Vendor import of llvm trunk r300422:
https://llvm.org/svn/llvm-project/llvm/trunk@300422
This commit is contained in:
parent
31bbf64f3a
commit
71d5a2540a
6
.gitignore
vendored
6
.gitignore
vendored
@ -21,6 +21,9 @@
|
||||
#OS X specific files.
|
||||
.DS_store
|
||||
|
||||
# Nested build directory
|
||||
/build
|
||||
|
||||
#==============================================================================#
|
||||
# Explicit files to ignore (only matches one).
|
||||
#==============================================================================#
|
||||
@ -62,8 +65,9 @@ tools/polly
|
||||
tools/avrlit
|
||||
# Sphinx build tree, if building in-source dir.
|
||||
docs/_build
|
||||
# VSCode config files.
|
||||
# VS2017 and VSCode config files.
|
||||
.vscode
|
||||
.vs
|
||||
|
||||
#==============================================================================#
|
||||
# Files created in tree by the Go bindings.
|
||||
|
@ -20,7 +20,7 @@ if(POLICY CMP0057)
|
||||
endif()
|
||||
|
||||
if(NOT DEFINED LLVM_VERSION_MAJOR)
|
||||
set(LLVM_VERSION_MAJOR 4)
|
||||
set(LLVM_VERSION_MAJOR 5)
|
||||
endif()
|
||||
if(NOT DEFINED LLVM_VERSION_MINOR)
|
||||
set(LLVM_VERSION_MINOR 0)
|
||||
@ -29,7 +29,7 @@ if(NOT DEFINED LLVM_VERSION_PATCH)
|
||||
set(LLVM_VERSION_PATCH 0)
|
||||
endif()
|
||||
if(NOT DEFINED LLVM_VERSION_SUFFIX)
|
||||
set(LLVM_VERSION_SUFFIX "")
|
||||
set(LLVM_VERSION_SUFFIX svn)
|
||||
endif()
|
||||
|
||||
if (POLICY CMP0048)
|
||||
@ -56,17 +56,20 @@ endif()
|
||||
|
||||
# This should only apply if you are both on an Apple host, and targeting Apple.
|
||||
if(CMAKE_HOST_APPLE AND APPLE)
|
||||
if(NOT CMAKE_XCRUN)
|
||||
find_program(CMAKE_XCRUN NAMES xcrun)
|
||||
endif()
|
||||
if(CMAKE_XCRUN)
|
||||
execute_process(COMMAND ${CMAKE_XCRUN} -find libtool
|
||||
OUTPUT_VARIABLE CMAKE_LIBTOOL
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
endif()
|
||||
# if CMAKE_LIBTOOL is not set, try and find it with xcrun or find_program
|
||||
if(NOT CMAKE_LIBTOOL)
|
||||
if(NOT CMAKE_XCRUN)
|
||||
find_program(CMAKE_XCRUN NAMES xcrun)
|
||||
endif()
|
||||
if(CMAKE_XCRUN)
|
||||
execute_process(COMMAND ${CMAKE_XCRUN} -find libtool
|
||||
OUTPUT_VARIABLE CMAKE_LIBTOOL
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
endif()
|
||||
|
||||
if(NOT CMAKE_LIBTOOL OR NOT EXISTS CMAKE_LIBTOOL)
|
||||
find_program(CMAKE_LIBTOOL NAMES libtool)
|
||||
if(NOT CMAKE_LIBTOOL OR NOT EXISTS CMAKE_LIBTOOL)
|
||||
find_program(CMAKE_LIBTOOL NAMES libtool)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
get_property(languages GLOBAL PROPERTY ENABLED_LANGUAGES)
|
||||
@ -132,18 +135,6 @@ foreach(proj ${LLVM_ENABLE_PROJECTS})
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
# The following only works with the Ninja generator in CMake >= 3.0.
|
||||
set(LLVM_PARALLEL_COMPILE_JOBS "" CACHE STRING
|
||||
"Define the maximum number of concurrent compilation jobs.")
|
||||
if(LLVM_PARALLEL_COMPILE_JOBS)
|
||||
if(NOT CMAKE_MAKE_PROGRAM MATCHES "ninja")
|
||||
message(WARNING "Job pooling is only available with Ninja generators.")
|
||||
else()
|
||||
set_property(GLOBAL APPEND PROPERTY JOB_POOLS compile_job_pool=${LLVM_PARALLEL_COMPILE_JOBS})
|
||||
set(CMAKE_JOB_POOL_COMPILE compile_job_pool)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Build llvm with ccache if the package is present
|
||||
set(LLVM_CCACHE_BUILD OFF CACHE BOOL "Set to ON for a ccache enabled build")
|
||||
if(LLVM_CCACHE_BUILD)
|
||||
@ -178,21 +169,12 @@ if(LLVM_DEPENDENCY_DEBUGGING)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
option(LLVM_BUILD_GLOBAL_ISEL "Experimental: Build GlobalISel" OFF)
|
||||
option(LLVM_BUILD_GLOBAL_ISEL "Experimental: Build GlobalISel" ON)
|
||||
if(LLVM_BUILD_GLOBAL_ISEL)
|
||||
add_definitions(-DLLVM_BUILD_GLOBAL_ISEL)
|
||||
endif()
|
||||
|
||||
set(LLVM_PARALLEL_LINK_JOBS "" CACHE STRING
|
||||
"Define the maximum number of concurrent link jobs.")
|
||||
if(LLVM_PARALLEL_LINK_JOBS)
|
||||
if(NOT CMAKE_MAKE_PROGRAM MATCHES "ninja")
|
||||
message(WARNING "Job pooling is only available with Ninja generators.")
|
||||
else()
|
||||
set_property(GLOBAL APPEND PROPERTY JOB_POOLS link_job_pool=${LLVM_PARALLEL_LINK_JOBS})
|
||||
set(CMAKE_JOB_POOL_LINK link_job_pool)
|
||||
endif()
|
||||
endif()
|
||||
option(LLVM_ENABLE_DAGISEL_COV "Debug: Prints tablegen patterns that were used for selecting" OFF)
|
||||
|
||||
# Add path for custom modules
|
||||
set(CMAKE_MODULE_PATH
|
||||
@ -385,8 +367,6 @@ set(LLVM_TARGETS_TO_BUILD
|
||||
${LLVM_EXPERIMENTAL_TARGETS_TO_BUILD})
|
||||
list(REMOVE_DUPLICATES LLVM_TARGETS_TO_BUILD)
|
||||
|
||||
include(AddLLVMDefinitions)
|
||||
|
||||
option(LLVM_ENABLE_PIC "Build Position-Independent Code" ON)
|
||||
option(LLVM_ENABLE_WARNINGS "Enable compiler warnings." ON)
|
||||
option(LLVM_ENABLE_MODULES "Compile with C++ modules enabled." OFF)
|
||||
@ -414,9 +394,6 @@ option(LLVM_ENABLE_EXPENSIVE_CHECKS "Enable expensive checks" OFF)
|
||||
set(LLVM_ABI_BREAKING_CHECKS "WITH_ASSERTS" CACHE STRING
|
||||
"Enable abi-breaking checks. Can be WITH_ASSERTS, FORCE_ON or FORCE_OFF.")
|
||||
|
||||
option(LLVM_DISABLE_ABI_BREAKING_CHECKS_ENFORCING
|
||||
"Disable abi-breaking checks mismatch detection at link-tim." OFF)
|
||||
|
||||
option(LLVM_FORCE_USE_OLD_HOST_TOOLCHAIN
|
||||
"Set to ON to force using an old, unsupported host toolchain." OFF)
|
||||
|
||||
@ -506,6 +483,10 @@ option(LLVM_INCLUDE_UTILS "Generate build targets for the LLVM utils." ON)
|
||||
option(LLVM_BUILD_UTILS
|
||||
"Build LLVM utility binaries. If OFF, just generate build targets." ON)
|
||||
|
||||
option(LLVM_INCLUDE_RUNTIMES "Generate build targets for the LLVM runtimes." ON)
|
||||
option(LLVM_BUILD_RUNTIMES
|
||||
"Build the LLVM runtimes. If OFF, just generate build targets." ON)
|
||||
|
||||
option(LLVM_BUILD_RUNTIME
|
||||
"Build the LLVM runtime libraries." ON)
|
||||
option(LLVM_BUILD_EXAMPLES
|
||||
@ -641,7 +622,7 @@ endif (LLVM_USE_OPROFILE)
|
||||
|
||||
message(STATUS "Constructing LLVMBuild project information")
|
||||
execute_process(
|
||||
COMMAND ${PYTHON_EXECUTABLE} ${LLVMBUILDTOOL}
|
||||
COMMAND ${PYTHON_EXECUTABLE} -B ${LLVMBUILDTOOL}
|
||||
--native-target "${LLVM_NATIVE_ARCH}"
|
||||
--enable-targets "${LLVM_TARGETS_TO_BUILD}"
|
||||
--enable-optional-components "${LLVMOPTIONALCOMPONENTS}"
|
||||
@ -737,6 +718,30 @@ configure_file(
|
||||
${LLVM_MAIN_INCLUDE_DIR}/llvm/Support/DataTypes.h.cmake
|
||||
${LLVM_INCLUDE_DIR}/llvm/Support/DataTypes.h)
|
||||
|
||||
# Add target for generating source rpm package.
|
||||
set(LLVM_SRPM_USER_BINARY_SPECFILE ${CMAKE_CURRENT_SOURCE_DIR}/llvm.spec.in
|
||||
CACHE FILEPATH ".spec file to use for srpm generation")
|
||||
set(LLVM_SRPM_BINARY_SPECFILE ${CMAKE_CURRENT_BINARY_DIR}/llvm.spec)
|
||||
set(LLVM_SRPM_DIR "${CMAKE_CURRENT_BINARY_DIR}/srpm")
|
||||
|
||||
# SVN_REVISION and GIT_COMMIT get set by the call to add_version_info_from_vcs.
|
||||
# DUMMY_VAR contains a version string which we don't care about.
|
||||
add_version_info_from_vcs(DUMMY_VAR)
|
||||
if ( SVN_REVISION )
|
||||
set(LLVM_RPM_SPEC_REVISION "r${SVN_REVISION}")
|
||||
elseif ( GIT_COMMIT )
|
||||
set (LLVM_RPM_SPEC_REVISION "g${GIT_COMMIT}")
|
||||
endif()
|
||||
|
||||
configure_file(
|
||||
${LLVM_SRPM_USER_BINARY_SPECFILE}
|
||||
${LLVM_SRPM_BINARY_SPECFILE} @ONLY)
|
||||
|
||||
add_custom_target(srpm
|
||||
COMMAND cpack -G TGZ --config CPackSourceConfig.cmake -B ${LLVM_SRPM_DIR}/SOURCES
|
||||
COMMAND rpmbuild -bs --define '_topdir ${LLVM_SRPM_DIR}' ${LLVM_SRPM_BINARY_SPECFILE})
|
||||
|
||||
|
||||
# They are not referenced. See set_output_directory().
|
||||
set( CMAKE_RUNTIME_OUTPUT_DIRECTORY ${LLVM_BINARY_DIR}/bin )
|
||||
set( CMAKE_LIBRARY_OUTPUT_DIRECTORY ${LLVM_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX} )
|
||||
@ -861,7 +866,9 @@ if( LLVM_INCLUDE_TOOLS )
|
||||
add_subdirectory(tools)
|
||||
endif()
|
||||
|
||||
add_subdirectory(runtimes)
|
||||
if( LLVM_INCLUDE_RUNTIMES )
|
||||
add_subdirectory(runtimes)
|
||||
endif()
|
||||
|
||||
if( LLVM_INCLUDE_EXAMPLES )
|
||||
add_subdirectory(examples)
|
||||
@ -978,3 +985,8 @@ if(LLVM_DISTRIBUTION_COMPONENTS)
|
||||
endif()
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
# This allows us to deploy the Universal CRT DLLs by passing -DCMAKE_INSTALL_UCRT_LIBRARIES=ON to CMake
|
||||
if (MSVC)
|
||||
include(InstallRequiredSystemLibraries)
|
||||
endif()
|
||||
|
@ -5,12 +5,9 @@ what goes in or not.
|
||||
|
||||
The list is sorted by surname and formatted to allow easy grepping and
|
||||
beautification by scripts. The fields are: name (N), email (E), web-address
|
||||
(W), PGP key ID and fingerprint (P), description (D), and snail-mail address
|
||||
(S). Each entry should contain at least the (N), (E) and (D) fields.
|
||||
|
||||
N: Joe Abbey
|
||||
E: jabbey@arxan.com
|
||||
D: LLVM Bitcode (lib/Bitcode/* include/llvm/Bitcode/*)
|
||||
(W), PGP key ID and fingerprint (P), description (D), snail-mail address
|
||||
(S) and (I) IRC handle. Each entry should contain at least the (N), (E) and
|
||||
(D) fields.
|
||||
|
||||
N: Justin Bogner
|
||||
E: mail@justinbogner.com
|
||||
@ -21,6 +18,11 @@ N: Alex Bradbury
|
||||
E: asb@lowrisc.org
|
||||
D: RISC-V backend (lib/Target/RISCV/*)
|
||||
|
||||
N: Matthias Braun
|
||||
E: matze@braunis.de
|
||||
I: MatzeB
|
||||
D: Instruction Scheduling
|
||||
|
||||
N: Chandler Carruth
|
||||
E: chandlerc@gmail.com
|
||||
E: chandlerc@google.com
|
||||
@ -34,6 +36,10 @@ N: Eric Christopher
|
||||
E: echristo@gmail.com
|
||||
D: Debug Information, inline assembly
|
||||
|
||||
N: Andrey Churbanov
|
||||
E: andrey.churbanov@intel.com
|
||||
D: OpenMP runtime library
|
||||
|
||||
N: Greg Clayton
|
||||
E: gclayton@apple.com
|
||||
D: LLDB
|
||||
@ -48,7 +54,7 @@ D: libc++
|
||||
|
||||
N: Peter Collingbourne
|
||||
E: peter@pcc.me.uk
|
||||
D: llgo, libLTO (lib/LTO/* tools/lto/*)
|
||||
D: llgo, libLTO (lib/LTO/* tools/lto/*), LLVM Bitcode (lib/Bitcode/* include/llvm/Bitcode/*)
|
||||
|
||||
N: Quentin Colombet
|
||||
E: qcolombet@apple.com
|
||||
@ -96,7 +102,7 @@ D: MCJIT, RuntimeDyld and JIT event listeners, Orcish Warchief
|
||||
|
||||
N: Teresa Johnson
|
||||
E: tejohnson@google.com
|
||||
D: Gold plugin (tools/gold/*)
|
||||
D: Gold plugin (tools/gold/*) and IR Linker
|
||||
|
||||
N: Galina Kistanova
|
||||
E: gkistanova@gmail.com
|
||||
@ -132,7 +138,7 @@ E: david.majnemer@gmail.com
|
||||
D: IR Constant Folder, InstCombine
|
||||
|
||||
N: Dylan McKay
|
||||
E: dylanmckay34@gmail.com
|
||||
E: me@dylanmckay.io
|
||||
D: AVR Backend
|
||||
|
||||
N: Tim Northover
|
||||
@ -180,9 +186,8 @@ E: alexei.starovoitov@gmail.com
|
||||
D: BPF backend
|
||||
|
||||
N: Tom Stellard
|
||||
E: thomas.stellard@amd.com
|
||||
E: mesa-dev@lists.freedesktop.org
|
||||
D: Release manager for the 3.5 and 3.6 branches, R600 Backend, libclc
|
||||
E: tstellar@redhat.com
|
||||
D: Stable release management (x.y.[1-9] releases), AMDGPU Backend, libclc
|
||||
|
||||
N: Evgeniy Stepanov
|
||||
E: eugenis@google.com
|
||||
@ -192,18 +197,10 @@ N: Craig Topper
|
||||
E: craig.topper@gmail.com
|
||||
D: X86 Backend
|
||||
|
||||
N: Andrew Trick
|
||||
E: atrick@apple.com
|
||||
D: Instruction Scheduling
|
||||
|
||||
N: Ulrich Weigand
|
||||
E: uweigand@de.ibm.com
|
||||
D: SystemZ Backend
|
||||
|
||||
N: Teresa Johnson
|
||||
E: tejohnson@google.com
|
||||
D: IR Linker
|
||||
|
||||
N: Hans Wennborg
|
||||
E: hans@chromium.org
|
||||
D: Release management (x.y.0 releases)
|
||||
@ -211,7 +208,3 @@ D: Release management (x.y.0 releases)
|
||||
N: whitequark
|
||||
E: whitequark@whitequark.org
|
||||
D: OCaml bindings
|
||||
|
||||
N: Andrey Churbanov
|
||||
E: andrey.churbanov@intel.com
|
||||
D: OpenMP runtime library
|
||||
|
@ -457,6 +457,10 @@ N: Adam Treat
|
||||
E: manyoso@yahoo.com
|
||||
D: C++ bugs filed, and C++ front-end bug fixes.
|
||||
|
||||
N: Andrew Trick
|
||||
E: atrick@apple.com
|
||||
D: Instruction Scheduling, ...
|
||||
|
||||
N: Lauro Ramos Venancio
|
||||
E: lauro.venancio@indt.org.br
|
||||
D: ARM backend improvements
|
||||
|
@ -15,3 +15,4 @@ documentation setup.
|
||||
|
||||
If you are writing a package for LLVM, see docs/Packaging.rst for our
|
||||
suggestions.
|
||||
|
||||
|
@ -51,7 +51,7 @@ E: diana.picus@linaro.org
|
||||
T: AArch64
|
||||
O: Linux
|
||||
|
||||
N: Vasileios Kalintiris
|
||||
E: Vasileios.Kalintiris@imgtec.com
|
||||
N: Simon Dardis
|
||||
E: simon.dardis@imgtec.com
|
||||
T: MIPS
|
||||
O: Linux
|
||||
|
@ -119,7 +119,8 @@ LLVMMetadataRef LLVMDIBuilderCreatePointerType(LLVMDIBuilderRef Dref,
|
||||
const char *Name) {
|
||||
DIBuilder *D = unwrap(Dref);
|
||||
return wrap(D->createPointerType(unwrap<DIType>(PointeeType), SizeInBits,
|
||||
AlignInBits, Name));
|
||||
AlignInBits, /* DWARFAddressSpace */ None,
|
||||
Name));
|
||||
}
|
||||
|
||||
LLVMMetadataRef
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include "IRBindings.h"
|
||||
#include "llvm/IR/Attributes.h"
|
||||
#include "llvm/IR/DebugLoc.h"
|
||||
#include "llvm/IR/DebugInfoMetadata.h"
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
#include "llvm/IR/LLVMContext.h"
|
||||
@ -71,6 +72,18 @@ void LLVMSetCurrentDebugLocation2(LLVMBuilderRef Bref, unsigned Line,
|
||||
InlinedAt ? unwrap<MDNode>(InlinedAt) : nullptr));
|
||||
}
|
||||
|
||||
LLVMDebugLocMetadata LLVMGetCurrentDebugLocation2(LLVMBuilderRef Bref) {
|
||||
const auto& Loc = unwrap(Bref)->getCurrentDebugLocation();
|
||||
const auto* InlinedAt = Loc.getInlinedAt();
|
||||
const LLVMDebugLocMetadata md{
|
||||
Loc.getLine(),
|
||||
Loc.getCol(),
|
||||
wrap(Loc.getScope()),
|
||||
InlinedAt == nullptr ? nullptr : wrap(InlinedAt->getRawInlinedAt()),
|
||||
};
|
||||
return md;
|
||||
}
|
||||
|
||||
void LLVMSetSubprogram(LLVMValueRef Func, LLVMMetadataRef SP) {
|
||||
unwrap<Function>(Func)->setSubprogram(unwrap<DISubprogram>(SP));
|
||||
}
|
||||
|
@ -27,6 +27,12 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct LLVMOpaqueMetadata *LLVMMetadataRef;
|
||||
struct LLVMDebugLocMetadata{
|
||||
unsigned Line;
|
||||
unsigned Col;
|
||||
LLVMMetadataRef Scope;
|
||||
LLVMMetadataRef InlinedAt;
|
||||
};
|
||||
|
||||
LLVMMetadataRef LLVMConstantAsMetadata(LLVMValueRef Val);
|
||||
|
||||
@ -46,6 +52,8 @@ void LLVMSetCurrentDebugLocation2(LLVMBuilderRef Bref, unsigned Line,
|
||||
unsigned Col, LLVMMetadataRef Scope,
|
||||
LLVMMetadataRef InlinedAt);
|
||||
|
||||
struct LLVMDebugLocMetadata LLVMGetCurrentDebugLocation2(LLVMBuilderRef Bref);
|
||||
|
||||
void LLVMSetSubprogram(LLVMValueRef Fn, LLVMMetadataRef SP);
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
@ -1226,9 +1226,23 @@ func (b Builder) InsertWithName(instr Value, name string) {
|
||||
func (b Builder) Dispose() { C.LLVMDisposeBuilder(b.C) }
|
||||
|
||||
// Metadata
|
||||
type DebugLoc struct {
|
||||
Line, Col uint
|
||||
Scope Metadata
|
||||
InlinedAt Metadata
|
||||
}
|
||||
func (b Builder) SetCurrentDebugLocation(line, col uint, scope, inlinedAt Metadata) {
|
||||
C.LLVMSetCurrentDebugLocation2(b.C, C.unsigned(line), C.unsigned(col), scope.C, inlinedAt.C)
|
||||
}
|
||||
// Get current debug location. Please do not call this function until setting debug location with SetCurrentDebugLocation()
|
||||
func (b Builder) GetCurrentDebugLocation() (loc DebugLoc) {
|
||||
md := C.LLVMGetCurrentDebugLocation2(b.C)
|
||||
loc.Line = uint(md.Line)
|
||||
loc.Col = uint(md.Col)
|
||||
loc.Scope = Metadata{C: md.Scope}
|
||||
loc.InlinedAt = Metadata{C: md.InlinedAt}
|
||||
return
|
||||
}
|
||||
func (b Builder) SetInstDebugLocation(v Value) { C.LLVMSetInstDebugLocation(b.C, v.C) }
|
||||
func (b Builder) InsertDeclare(module Module, storage Value, md Value) Value {
|
||||
f := module.NamedFunction("llvm.dbg.declare")
|
||||
|
@ -95,3 +95,42 @@ func TestAttributes(t *testing.T) {
|
||||
testAttribute(t, name)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDebugLoc(t *testing.T) {
|
||||
mod := NewModule("")
|
||||
defer mod.Dispose()
|
||||
|
||||
ctx := mod.Context()
|
||||
|
||||
b := ctx.NewBuilder()
|
||||
defer b.Dispose()
|
||||
|
||||
d := NewDIBuilder(mod)
|
||||
defer func() {
|
||||
d.Destroy()
|
||||
}()
|
||||
file := d.CreateFile("dummy_file", "dummy_dir")
|
||||
voidInfo := d.CreateBasicType(DIBasicType{Name: "void"})
|
||||
typeInfo := d.CreateSubroutineType(DISubroutineType{file, []Metadata{voidInfo}})
|
||||
scope := d.CreateFunction(file, DIFunction{
|
||||
Name: "foo",
|
||||
LinkageName: "foo",
|
||||
Line: 10,
|
||||
ScopeLine: 10,
|
||||
Type: typeInfo,
|
||||
File: file,
|
||||
IsDefinition: true,
|
||||
})
|
||||
|
||||
b.SetCurrentDebugLocation(10, 20, scope, Metadata{})
|
||||
loc := b.GetCurrentDebugLocation()
|
||||
if loc.Line != 10 {
|
||||
t.Errorf("Got line %d, though wanted 10", loc.Line)
|
||||
}
|
||||
if loc.Col != 20 {
|
||||
t.Errorf("Got column %d, though wanted 20", loc.Col)
|
||||
}
|
||||
if loc.Scope.C != scope.C {
|
||||
t.Errorf("Got metadata %v as scope, though wanted %v", loc.Scope.C, scope.C)
|
||||
}
|
||||
}
|
||||
|
@ -43,6 +43,26 @@ func (pmb PassManagerBuilder) PopulateFunc(pm PassManager) {
|
||||
C.LLVMPassManagerBuilderPopulateFunctionPassManager(pmb.C, pm.C)
|
||||
}
|
||||
|
||||
func (pmb PassManagerBuilder) PopulateLTOPassManager(pm PassManager, internalize bool, runInliner bool) {
|
||||
C.LLVMPassManagerBuilderPopulateLTOPassManager(pmb.C, pm.C, boolToLLVMBool(internalize), boolToLLVMBool(runInliner))
|
||||
}
|
||||
|
||||
func (pmb PassManagerBuilder) Dispose() {
|
||||
C.LLVMPassManagerBuilderDispose(pmb.C)
|
||||
}
|
||||
|
||||
func (pmb PassManagerBuilder) SetDisableUnitAtATime(val bool) {
|
||||
C.LLVMPassManagerBuilderSetDisableUnitAtATime(pmb.C, boolToLLVMBool(val))
|
||||
}
|
||||
|
||||
func (pmb PassManagerBuilder) SetDisableUnrollLoops(val bool) {
|
||||
C.LLVMPassManagerBuilderSetDisableUnrollLoops(pmb.C, boolToLLVMBool(val))
|
||||
}
|
||||
|
||||
func (pmb PassManagerBuilder) SetDisableSimplifyLibCalls(val bool) {
|
||||
C.LLVMPassManagerBuilderSetDisableSimplifyLibCalls(pmb.C, boolToLLVMBool(val))
|
||||
}
|
||||
|
||||
func (pmb PassManagerBuilder) UseInlinerWithThreshold(threshold uint) {
|
||||
C.LLVMPassManagerBuilderUseInlinerWithThreshold(pmb.C, C.uint(threshold))
|
||||
}
|
||||
|
@ -46,7 +46,6 @@ endfunction()
|
||||
check_include_file(dirent.h HAVE_DIRENT_H)
|
||||
check_include_file(dlfcn.h HAVE_DLFCN_H)
|
||||
check_include_file(errno.h HAVE_ERRNO_H)
|
||||
check_include_file(execinfo.h HAVE_EXECINFO_H)
|
||||
check_include_file(fcntl.h HAVE_FCNTL_H)
|
||||
check_include_file(inttypes.h HAVE_INTTYPES_H)
|
||||
check_include_file(link.h HAVE_LINK_H)
|
||||
@ -88,6 +87,15 @@ if(APPLE)
|
||||
HAVE_CRASHREPORTER_INFO)
|
||||
endif()
|
||||
|
||||
if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
|
||||
check_include_file(linux/magic.h HAVE_LINUX_MAGIC_H)
|
||||
if(NOT HAVE_LINUX_MAGIC_H)
|
||||
# older kernels use split files
|
||||
check_include_file(linux/nfs_fs.h HAVE_LINUX_NFS_FS_H)
|
||||
check_include_file(linux/smb.h HAVE_LINUX_SMB_H)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# library checks
|
||||
if( NOT PURE_WINDOWS )
|
||||
check_library_exists(pthread pthread_create "" HAVE_LIBPTHREAD)
|
||||
@ -115,7 +123,7 @@ if(HAVE_LIBPTHREAD)
|
||||
set(CMAKE_THREAD_PREFER_PTHREAD TRUE)
|
||||
set(THREADS_HAVE_PTHREAD_ARG Off)
|
||||
find_package(Threads REQUIRED)
|
||||
set(PTHREAD_LIB ${CMAKE_THREAD_LIBS_INIT})
|
||||
set(LLVM_PTHREAD_LIB ${CMAKE_THREAD_LIBS_INIT})
|
||||
endif()
|
||||
|
||||
# Don't look for these libraries on Windows. Also don't look for them if we're
|
||||
@ -156,7 +164,9 @@ endif()
|
||||
|
||||
# function checks
|
||||
check_symbol_exists(arc4random "stdlib.h" HAVE_DECL_ARC4RANDOM)
|
||||
check_symbol_exists(backtrace "execinfo.h" HAVE_BACKTRACE)
|
||||
find_package(Backtrace)
|
||||
set(HAVE_BACKTRACE ${Backtrace_FOUND})
|
||||
set(BACKTRACE_HEADER ${Backtrace_HEADER})
|
||||
check_symbol_exists(_Unwind_Backtrace "unwind.h" HAVE__UNWIND_BACKTRACE)
|
||||
check_symbol_exists(getpagesize unistd.h HAVE_GETPAGESIZE)
|
||||
check_symbol_exists(sysconf unistd.h HAVE_SYSCONF)
|
||||
@ -227,6 +237,7 @@ if( HAVE_DLFCN_H )
|
||||
list(APPEND CMAKE_REQUIRED_LIBRARIES dl)
|
||||
endif()
|
||||
check_symbol_exists(dlopen dlfcn.h HAVE_DLOPEN)
|
||||
check_symbol_exists(dladdr dlfcn.h HAVE_DLADDR)
|
||||
if( HAVE_LIBDL )
|
||||
list(REMOVE_ITEM CMAKE_REQUIRED_LIBRARIES dl)
|
||||
endif()
|
||||
@ -234,7 +245,15 @@ endif()
|
||||
|
||||
check_symbol_exists(__GLIBC__ stdio.h LLVM_USING_GLIBC)
|
||||
if( LLVM_USING_GLIBC )
|
||||
add_llvm_definitions( -D_GNU_SOURCE )
|
||||
add_definitions( -D_GNU_SOURCE )
|
||||
endif()
|
||||
# This check requires _GNU_SOURCE
|
||||
if(HAVE_LIBPTHREAD)
|
||||
check_library_exists(pthread pthread_getname_np "" HAVE_PTHREAD_GETNAME_NP)
|
||||
check_library_exists(pthread pthread_setname_np "" HAVE_PTHREAD_SETNAME_NP)
|
||||
elseif(PTHREAD_IN_LIBC)
|
||||
check_library_exists(c pthread_getname_np "" HAVE_PTHREAD_GETNAME_NP)
|
||||
check_library_exists(c pthread_setname_np "" HAVE_PTHREAD_SETNAME_NP)
|
||||
endif()
|
||||
|
||||
set(headers "sys/types.h")
|
||||
@ -489,8 +508,6 @@ if (LLVM_ENABLE_ZLIB )
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set(LLVM_PREFIX ${CMAKE_INSTALL_PREFIX})
|
||||
|
||||
if (LLVM_ENABLE_DOXYGEN)
|
||||
message(STATUS "Doxygen enabled.")
|
||||
find_package(Doxygen REQUIRED)
|
||||
@ -547,6 +564,9 @@ set(LLVM_BINUTILS_INCDIR "" CACHE PATH
|
||||
"PATH to binutils/include containing plugin-api.h for gold plugin.")
|
||||
|
||||
if(CMAKE_HOST_APPLE AND APPLE)
|
||||
if(NOT CMAKE_XCRUN)
|
||||
find_program(CMAKE_XCRUN NAMES xcrun)
|
||||
endif()
|
||||
if(CMAKE_XCRUN)
|
||||
execute_process(COMMAND ${CMAKE_XCRUN} -find ld
|
||||
OUTPUT_VARIABLE LD64_EXECUTABLE
|
||||
|
@ -718,11 +718,11 @@ macro(add_llvm_executable name)
|
||||
if(NOT ARG_IGNORE_EXTERNALIZE_DEBUGINFO)
|
||||
llvm_externalize_debuginfo(${name})
|
||||
endif()
|
||||
if (PTHREAD_LIB)
|
||||
if (LLVM_PTHREAD_LIB)
|
||||
# libpthreads overrides some standard library symbols, so main
|
||||
# executable must be linked with it in order to provide consistent
|
||||
# API for all shared libaries loaded by this executable.
|
||||
target_link_libraries(${name} ${PTHREAD_LIB})
|
||||
target_link_libraries(${name} ${LLVM_PTHREAD_LIB})
|
||||
endif()
|
||||
endmacro(add_llvm_executable name)
|
||||
|
||||
@ -1027,7 +1027,7 @@ function(add_unittest test_suite test_name)
|
||||
# libpthreads overrides some standard library symbols, so main
|
||||
# executable must be linked with it in order to provide consistent
|
||||
# API for all shared libaries loaded by this executable.
|
||||
target_link_libraries(${test_name} gtest_main gtest ${PTHREAD_LIB})
|
||||
target_link_libraries(${test_name} gtest_main gtest ${LLVM_PTHREAD_LIB})
|
||||
|
||||
add_dependencies(${test_suite} ${test_name})
|
||||
get_target_property(test_suite_folder ${test_suite} FOLDER)
|
||||
@ -1387,7 +1387,11 @@ function(llvm_externalize_debuginfo name)
|
||||
endif()
|
||||
|
||||
if(NOT LLVM_EXTERNALIZE_DEBUGINFO_SKIP_STRIP)
|
||||
set(strip_command COMMAND xcrun strip -Sxl $<TARGET_FILE:${name}>)
|
||||
if(APPLE)
|
||||
set(strip_command COMMAND xcrun strip -Sxl $<TARGET_FILE:${name}>)
|
||||
else()
|
||||
set(strip_command COMMAND strip -gx $<TARGET_FILE:${name}>)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(APPLE)
|
||||
@ -1403,7 +1407,11 @@ function(llvm_externalize_debuginfo name)
|
||||
${strip_command}
|
||||
)
|
||||
else()
|
||||
message(FATAL_ERROR "LLVM_EXTERNALIZE_DEBUGINFO isn't implemented for non-darwin platforms!")
|
||||
add_custom_command(TARGET ${name} POST_BUILD
|
||||
COMMAND objcopy --only-keep-debug $<TARGET_FILE:${name}> $<TARGET_FILE:${name}>.debug
|
||||
${strip_command} -R .gnu_debuglink
|
||||
COMMAND objcopy --add-gnu-debuglink=$<TARGET_FILE:${name}>.debug $<TARGET_FILE:${name}>
|
||||
)
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
|
@ -48,10 +48,15 @@ function (add_sphinx_target builder project)
|
||||
# Handle installation
|
||||
if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY)
|
||||
if (builder STREQUAL man)
|
||||
if (CMAKE_INSTALL_MANDIR)
|
||||
set(INSTALL_MANDIR ${CMAKE_INSTALL_MANDIR}/)
|
||||
else()
|
||||
set(INSTALL_MANDIR share/man/)
|
||||
endif()
|
||||
# FIXME: We might not ship all the tools that these man pages describe
|
||||
install(DIRECTORY "${SPHINX_BUILD_DIR}/" # Slash indicates contents of
|
||||
COMPONENT "${project}-sphinx-man"
|
||||
DESTINATION share/man/man1)
|
||||
DESTINATION ${INSTALL_MANDIR}man1)
|
||||
|
||||
elseif (builder STREQUAL html)
|
||||
string(TOUPPER "${project}" project_upper)
|
||||
|
@ -8,12 +8,41 @@ string(TOUPPER "${CMAKE_BUILD_TYPE}" uppercase_CMAKE_BUILD_TYPE)
|
||||
|
||||
include(CheckCompilerVersion)
|
||||
include(HandleLLVMStdlib)
|
||||
include(AddLLVMDefinitions)
|
||||
include(CheckCCompilerFlag)
|
||||
include(CheckCXXCompilerFlag)
|
||||
|
||||
if(CMAKE_LINKER MATCHES "lld-link.exe" OR (WIN32 AND LLVM_USE_LINKER STREQUAL "lld"))
|
||||
set(LINKER_IS_LLD_LINK TRUE)
|
||||
else()
|
||||
set(LINKER_IS_LLD_LINK FALSE)
|
||||
endif()
|
||||
|
||||
if (CMAKE_LINKER MATCHES "lld-link.exe")
|
||||
# Ninja Job Pool support
|
||||
# The following only works with the Ninja generator in CMake >= 3.0.
|
||||
set(LLVM_PARALLEL_COMPILE_JOBS "" CACHE STRING
|
||||
"Define the maximum number of concurrent compilation jobs.")
|
||||
if(LLVM_PARALLEL_COMPILE_JOBS)
|
||||
if(NOT CMAKE_MAKE_PROGRAM MATCHES "ninja")
|
||||
message(WARNING "Job pooling is only available with Ninja generators.")
|
||||
else()
|
||||
set_property(GLOBAL APPEND PROPERTY JOB_POOLS compile_job_pool=${LLVM_PARALLEL_COMPILE_JOBS})
|
||||
set(CMAKE_JOB_POOL_COMPILE compile_job_pool)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set(LLVM_PARALLEL_LINK_JOBS "" CACHE STRING
|
||||
"Define the maximum number of concurrent link jobs.")
|
||||
if(LLVM_PARALLEL_LINK_JOBS)
|
||||
if(NOT CMAKE_MAKE_PROGRAM MATCHES "ninja")
|
||||
message(WARNING "Job pooling is only available with Ninja generators.")
|
||||
else()
|
||||
set_property(GLOBAL APPEND PROPERTY JOB_POOLS link_job_pool=${LLVM_PARALLEL_LINK_JOBS})
|
||||
set(CMAKE_JOB_POOL_LINK link_job_pool)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
||||
if (LINKER_IS_LLD_LINK)
|
||||
# Pass /MANIFEST:NO so that CMake doesn't run mt.exe on our binaries. Adding
|
||||
# manifests with mt.exe breaks LLD's symbol tables and takes as much time as
|
||||
# the link. See PR24476.
|
||||
@ -147,9 +176,19 @@ function(add_flag_or_print_warning flag name)
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
if(LLVM_ENABLE_LLD)
|
||||
check_cxx_compiler_flag("-fuse-ld=lld" CXX_SUPPORTS_LLD)
|
||||
append_if(CXX_SUPPORTS_LLD "-fuse-ld=lld"
|
||||
if( LLVM_ENABLE_LLD )
|
||||
if ( LLVM_USE_LINKER )
|
||||
message(FATAL_ERROR "LLVM_ENABLE_LLD and LLVM_USE_LINKER can't be set at the same time")
|
||||
endif()
|
||||
set(LLVM_USE_LINKER "lld")
|
||||
endif()
|
||||
|
||||
if( LLVM_USE_LINKER )
|
||||
check_cxx_compiler_flag("-fuse-ld=${LLVM_USE_LINKER}" CXX_SUPPORTS_CUSTOM_LINKER)
|
||||
if ( NOT CXX_SUPPORTS_CUSTOM_LINKER )
|
||||
message(FATAL_ERROR "Host compiler does not support '-fuse-ld=${LLVM_USE_LINKER}'")
|
||||
endif()
|
||||
append("-fuse-ld=${LLVM_USE_LINKER}"
|
||||
CMAKE_EXE_LINKER_FLAGS CMAKE_MODULE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS)
|
||||
endif()
|
||||
|
||||
@ -213,10 +252,10 @@ if( MSVC_IDE )
|
||||
"Number of parallel compiler jobs. 0 means use all processors. Default is 0.")
|
||||
if( NOT LLVM_COMPILER_JOBS STREQUAL "1" )
|
||||
if( LLVM_COMPILER_JOBS STREQUAL "0" )
|
||||
add_llvm_definitions( /MP )
|
||||
add_definitions( /MP )
|
||||
else()
|
||||
message(STATUS "Number of parallel compiler jobs set to " ${LLVM_COMPILER_JOBS})
|
||||
add_llvm_definitions( /MP${LLVM_COMPILER_JOBS} )
|
||||
add_definitions( /MP${LLVM_COMPILER_JOBS} )
|
||||
endif()
|
||||
else()
|
||||
message(STATUS "Parallel compilation disabled")
|
||||
@ -245,17 +284,17 @@ if( MSVC )
|
||||
if( CMAKE_CXX_COMPILER_VERSION VERSION_LESS 19.0 )
|
||||
# For MSVC 2013, disable iterator null pointer checking in debug mode,
|
||||
# especially so std::equal(nullptr, nullptr, nullptr) will not assert.
|
||||
add_llvm_definitions("-D_DEBUG_POINTER_IMPL=")
|
||||
add_definitions("-D_DEBUG_POINTER_IMPL=")
|
||||
endif()
|
||||
|
||||
include(ChooseMSVCCRT)
|
||||
|
||||
if( MSVC11 )
|
||||
add_llvm_definitions(-D_VARIADIC_MAX=10)
|
||||
add_definitions(-D_VARIADIC_MAX=10)
|
||||
endif()
|
||||
|
||||
# Add definitions that make MSVC much less annoying.
|
||||
add_llvm_definitions(
|
||||
add_definitions(
|
||||
# For some reason MS wants to deprecate a bunch of standard functions...
|
||||
-D_CRT_SECURE_NO_DEPRECATE
|
||||
-D_CRT_SECURE_NO_WARNINGS
|
||||
@ -266,94 +305,15 @@ if( MSVC )
|
||||
)
|
||||
|
||||
# Tell MSVC to use the Unicode version of the Win32 APIs instead of ANSI.
|
||||
add_llvm_definitions(
|
||||
add_definitions(
|
||||
-DUNICODE
|
||||
-D_UNICODE
|
||||
)
|
||||
|
||||
set(msvc_warning_flags
|
||||
# Disabled warnings.
|
||||
-wd4141 # Suppress ''modifier' : used more than once' (because of __forceinline combined with inline)
|
||||
-wd4146 # Suppress 'unary minus operator applied to unsigned type, result still unsigned'
|
||||
-wd4180 # Suppress 'qualifier applied to function type has no meaning; ignored'
|
||||
-wd4244 # Suppress ''argument' : conversion from 'type1' to 'type2', possible loss of data'
|
||||
-wd4258 # Suppress ''var' : definition from the for loop is ignored; the definition from the enclosing scope is used'
|
||||
-wd4267 # Suppress ''var' : conversion from 'size_t' to 'type', possible loss of data'
|
||||
-wd4291 # Suppress ''declaration' : no matching operator delete found; memory will not be freed if initialization throws an exception'
|
||||
-wd4345 # Suppress 'behavior change: an object of POD type constructed with an initializer of the form () will be default-initialized'
|
||||
-wd4351 # Suppress 'new behavior: elements of array 'array' will be default initialized'
|
||||
-wd4355 # Suppress ''this' : used in base member initializer list'
|
||||
-wd4456 # Suppress 'declaration of 'var' hides local variable'
|
||||
-wd4457 # Suppress 'declaration of 'var' hides function parameter'
|
||||
-wd4458 # Suppress 'declaration of 'var' hides class member'
|
||||
-wd4459 # Suppress 'declaration of 'var' hides global declaration'
|
||||
-wd4503 # Suppress ''identifier' : decorated name length exceeded, name was truncated'
|
||||
-wd4624 # Suppress ''derived class' : destructor could not be generated because a base class destructor is inaccessible'
|
||||
-wd4722 # Suppress 'function' : destructor never returns, potential memory leak
|
||||
-wd4800 # Suppress ''type' : forcing value to bool 'true' or 'false' (performance warning)'
|
||||
-wd4100 # Suppress 'unreferenced formal parameter'
|
||||
-wd4127 # Suppress 'conditional expression is constant'
|
||||
-wd4512 # Suppress 'assignment operator could not be generated'
|
||||
-wd4505 # Suppress 'unreferenced local function has been removed'
|
||||
-wd4610 # Suppress '<class> can never be instantiated'
|
||||
-wd4510 # Suppress 'default constructor could not be generated'
|
||||
-wd4702 # Suppress 'unreachable code'
|
||||
-wd4245 # Suppress 'signed/unsigned mismatch'
|
||||
-wd4706 # Suppress 'assignment within conditional expression'
|
||||
-wd4310 # Suppress 'cast truncates constant value'
|
||||
-wd4701 # Suppress 'potentially uninitialized local variable'
|
||||
-wd4703 # Suppress 'potentially uninitialized local pointer variable'
|
||||
-wd4389 # Suppress 'signed/unsigned mismatch'
|
||||
-wd4611 # Suppress 'interaction between '_setjmp' and C++ object destruction is non-portable'
|
||||
-wd4805 # Suppress 'unsafe mix of type <type> and type <type> in operation'
|
||||
-wd4204 # Suppress 'nonstandard extension used : non-constant aggregate initializer'
|
||||
-wd4577 # Suppress 'noexcept used with no exception handling mode specified; termination on exception is not guaranteed'
|
||||
-wd4091 # Suppress 'typedef: ignored on left of '' when no variable is declared'
|
||||
# C4592 is disabled because of false positives in Visual Studio 2015
|
||||
# Update 1. Re-evaluate the usefulness of this diagnostic with Update 2.
|
||||
-wd4592 # Suppress ''var': symbol will be dynamically initialized (implementation limitation)
|
||||
-wd4319 # Suppress ''operator' : zero extending 'type' to 'type' of greater size'
|
||||
|
||||
# Ideally, we'd like this warning to be enabled, but MSVC 2013 doesn't
|
||||
# support the 'aligned' attribute in the way that clang sources requires (for
|
||||
# any code that uses the LLVM_ALIGNAS macro), so this is must be disabled to
|
||||
# avoid unwanted alignment warnings.
|
||||
# When we switch to requiring a version of MSVC that supports the 'alignas'
|
||||
# specifier (MSVC 2015?) this warning can be re-enabled.
|
||||
-wd4324 # Suppress 'structure was padded due to __declspec(align())'
|
||||
|
||||
# Promoted warnings.
|
||||
-w14062 # Promote 'enumerator in switch of enum is not handled' to level 1 warning.
|
||||
|
||||
# Promoted warnings to errors.
|
||||
-we4238 # Promote 'nonstandard extension used : class rvalue used as lvalue' to error.
|
||||
)
|
||||
|
||||
# Enable warnings
|
||||
if (LLVM_ENABLE_WARNINGS)
|
||||
# Put /W4 in front of all the -we flags. cl.exe doesn't care, but for
|
||||
# clang-cl having /W4 after the -we flags will re-enable the warnings
|
||||
# disabled by -we.
|
||||
set(msvc_warning_flags "/W4 ${msvc_warning_flags}")
|
||||
# CMake appends /W3 by default, and having /W3 followed by /W4 will result in
|
||||
# cl : Command line warning D9025 : overriding '/W3' with '/W4'. Since this is
|
||||
# a command line warning and not a compiler warning, it cannot be suppressed except
|
||||
# by fixing the command line.
|
||||
string(REGEX REPLACE " /W[0-4]" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
|
||||
string(REGEX REPLACE " /W[0-4]" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
||||
|
||||
if (LLVM_ENABLE_PEDANTIC)
|
||||
# No MSVC equivalent available
|
||||
endif (LLVM_ENABLE_PEDANTIC)
|
||||
endif (LLVM_ENABLE_WARNINGS)
|
||||
if (LLVM_ENABLE_WERROR)
|
||||
append("/WX" msvc_warning_flags)
|
||||
append("/WX" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
|
||||
endif (LLVM_ENABLE_WERROR)
|
||||
|
||||
foreach(flag ${msvc_warning_flags})
|
||||
append("${flag}" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
|
||||
endforeach(flag)
|
||||
|
||||
append("/Zc:inline" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
|
||||
|
||||
# /Zc:strictStrings is incompatible with VS12's (Visual Studio 2013's)
|
||||
@ -373,11 +333,13 @@ if( MSVC )
|
||||
# "Enforce type conversion rules".
|
||||
append("/Zc:rvalueCast" CMAKE_CXX_FLAGS)
|
||||
|
||||
if (CMAKE_CXX_COMPILER_ID MATCHES "Clang")
|
||||
if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND NOT LLVM_ENABLE_LTO)
|
||||
# clang-cl and cl by default produce non-deterministic binaries because
|
||||
# link.exe /incremental requires a timestamp in the .obj file. clang-cl
|
||||
# has the flag /Brepro to force deterministic binaries. We want to pass that
|
||||
# whenever you're building with clang unless you're passing /incremental.
|
||||
# whenever you're building with clang unless you're passing /incremental
|
||||
# or using LTO (/Brepro with LTO would result in a warning about the flag
|
||||
# being unused, because we're not generating object files).
|
||||
# This checks CMAKE_CXX_COMPILER_ID in addition to check_cxx_compiler_flag()
|
||||
# because cl.exe does not emit an error on flags it doesn't understand,
|
||||
# letting check_cxx_compiler_flag() claim it understands all flags.
|
||||
@ -401,63 +363,6 @@ if( MSVC )
|
||||
endif()
|
||||
|
||||
elseif( LLVM_COMPILER_IS_GCC_COMPATIBLE )
|
||||
if (LLVM_ENABLE_WARNINGS)
|
||||
append("-Wall -W -Wno-unused-parameter -Wwrite-strings" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
|
||||
append("-Wcast-qual" CMAKE_CXX_FLAGS)
|
||||
|
||||
# Turn off missing field initializer warnings for gcc to avoid noise from
|
||||
# false positives with empty {}. Turn them on otherwise (they're off by
|
||||
# default for clang).
|
||||
check_cxx_compiler_flag("-Wmissing-field-initializers" CXX_SUPPORTS_MISSING_FIELD_INITIALIZERS_FLAG)
|
||||
if (CXX_SUPPORTS_MISSING_FIELD_INITIALIZERS_FLAG)
|
||||
if (CMAKE_COMPILER_IS_GNUCXX)
|
||||
append("-Wno-missing-field-initializers" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
|
||||
else()
|
||||
append("-Wmissing-field-initializers" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
append_if(LLVM_ENABLE_PEDANTIC "-pedantic" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
|
||||
append_if(LLVM_ENABLE_PEDANTIC "-Wno-long-long" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
|
||||
add_flag_if_supported("-Wcovered-switch-default" COVERED_SWITCH_DEFAULT_FLAG)
|
||||
append_if(USE_NO_UNINITIALIZED "-Wno-uninitialized" CMAKE_CXX_FLAGS)
|
||||
append_if(USE_NO_MAYBE_UNINITIALIZED "-Wno-maybe-uninitialized" CMAKE_CXX_FLAGS)
|
||||
|
||||
# Check if -Wnon-virtual-dtor warns even though the class is marked final.
|
||||
# If it does, don't add it. So it won't be added on clang 3.4 and older.
|
||||
# This also catches cases when -Wnon-virtual-dtor isn't supported by
|
||||
# the compiler at all. This flag is not activated for gcc since it will
|
||||
# incorrectly identify a protected non-virtual base when there is a friend
|
||||
# declaration.
|
||||
if (NOT CMAKE_COMPILER_IS_GNUCXX)
|
||||
set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
|
||||
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -std=c++11 -Werror=non-virtual-dtor")
|
||||
CHECK_CXX_SOURCE_COMPILES("class base {public: virtual void anchor();protected: ~base();};
|
||||
class derived final : public base { public: ~derived();};
|
||||
int main() { return 0; }"
|
||||
CXX_WONT_WARN_ON_FINAL_NONVIRTUALDTOR)
|
||||
set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS})
|
||||
append_if(CXX_WONT_WARN_ON_FINAL_NONVIRTUALDTOR
|
||||
"-Wnon-virtual-dtor" CMAKE_CXX_FLAGS)
|
||||
endif()
|
||||
|
||||
# Enable -Wdelete-non-virtual-dtor if available.
|
||||
add_flag_if_supported("-Wdelete-non-virtual-dtor" DELETE_NON_VIRTUAL_DTOR_FLAG)
|
||||
|
||||
# Check if -Wcomment is OK with an // comment ending with '\' if the next
|
||||
# line is also a // comment.
|
||||
set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
|
||||
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror -Wcomment")
|
||||
CHECK_C_SOURCE_COMPILES("// \\\\\\n//\\nint main() {return 0;}"
|
||||
C_WCOMMENT_ALLOWS_LINE_WRAP)
|
||||
set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS})
|
||||
if (NOT C_WCOMMENT_ALLOWS_LINE_WRAP)
|
||||
append("-Wno-comment" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
|
||||
endif()
|
||||
|
||||
# Enable -Wstring-conversion to catch misuse of string literals.
|
||||
add_flag_if_supported("-Wstring-conversion" STRING_CONVERSION_FLAG)
|
||||
endif (LLVM_ENABLE_WARNINGS)
|
||||
append_if(LLVM_ENABLE_WERROR "-Werror" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
|
||||
add_flag_if_supported("-Werror=date-time" WERROR_DATE_TIME)
|
||||
if (LLVM_ENABLE_CXX1Y)
|
||||
@ -511,6 +416,155 @@ elseif( LLVM_COMPILER_IS_GCC_COMPATIBLE )
|
||||
endif(LLVM_ENABLE_MODULES)
|
||||
endif( MSVC )
|
||||
|
||||
if (MSVC AND NOT CLANG_CL)
|
||||
set(msvc_warning_flags
|
||||
# Disabled warnings.
|
||||
-wd4141 # Suppress ''modifier' : used more than once' (because of __forceinline combined with inline)
|
||||
-wd4146 # Suppress 'unary minus operator applied to unsigned type, result still unsigned'
|
||||
-wd4180 # Suppress 'qualifier applied to function type has no meaning; ignored'
|
||||
-wd4244 # Suppress ''argument' : conversion from 'type1' to 'type2', possible loss of data'
|
||||
-wd4258 # Suppress ''var' : definition from the for loop is ignored; the definition from the enclosing scope is used'
|
||||
-wd4267 # Suppress ''var' : conversion from 'size_t' to 'type', possible loss of data'
|
||||
-wd4291 # Suppress ''declaration' : no matching operator delete found; memory will not be freed if initialization throws an exception'
|
||||
-wd4345 # Suppress 'behavior change: an object of POD type constructed with an initializer of the form () will be default-initialized'
|
||||
-wd4351 # Suppress 'new behavior: elements of array 'array' will be default initialized'
|
||||
-wd4355 # Suppress ''this' : used in base member initializer list'
|
||||
-wd4456 # Suppress 'declaration of 'var' hides local variable'
|
||||
-wd4457 # Suppress 'declaration of 'var' hides function parameter'
|
||||
-wd4458 # Suppress 'declaration of 'var' hides class member'
|
||||
-wd4459 # Suppress 'declaration of 'var' hides global declaration'
|
||||
-wd4503 # Suppress ''identifier' : decorated name length exceeded, name was truncated'
|
||||
-wd4624 # Suppress ''derived class' : destructor could not be generated because a base class destructor is inaccessible'
|
||||
-wd4722 # Suppress 'function' : destructor never returns, potential memory leak
|
||||
-wd4800 # Suppress ''type' : forcing value to bool 'true' or 'false' (performance warning)'
|
||||
-wd4100 # Suppress 'unreferenced formal parameter'
|
||||
-wd4127 # Suppress 'conditional expression is constant'
|
||||
-wd4512 # Suppress 'assignment operator could not be generated'
|
||||
-wd4505 # Suppress 'unreferenced local function has been removed'
|
||||
-wd4610 # Suppress '<class> can never be instantiated'
|
||||
-wd4510 # Suppress 'default constructor could not be generated'
|
||||
-wd4702 # Suppress 'unreachable code'
|
||||
-wd4245 # Suppress 'signed/unsigned mismatch'
|
||||
-wd4706 # Suppress 'assignment within conditional expression'
|
||||
-wd4310 # Suppress 'cast truncates constant value'
|
||||
-wd4701 # Suppress 'potentially uninitialized local variable'
|
||||
-wd4703 # Suppress 'potentially uninitialized local pointer variable'
|
||||
-wd4389 # Suppress 'signed/unsigned mismatch'
|
||||
-wd4611 # Suppress 'interaction between '_setjmp' and C++ object destruction is non-portable'
|
||||
-wd4805 # Suppress 'unsafe mix of type <type> and type <type> in operation'
|
||||
-wd4204 # Suppress 'nonstandard extension used : non-constant aggregate initializer'
|
||||
-wd4577 # Suppress 'noexcept used with no exception handling mode specified; termination on exception is not guaranteed'
|
||||
-wd4091 # Suppress 'typedef: ignored on left of '' when no variable is declared'
|
||||
# C4592 is disabled because of false positives in Visual Studio 2015
|
||||
# Update 1. Re-evaluate the usefulness of this diagnostic with Update 2.
|
||||
-wd4592 # Suppress ''var': symbol will be dynamically initialized (implementation limitation)
|
||||
-wd4319 # Suppress ''operator' : zero extending 'type' to 'type' of greater size'
|
||||
|
||||
# Ideally, we'd like this warning to be enabled, but MSVC 2013 doesn't
|
||||
# support the 'aligned' attribute in the way that clang sources requires (for
|
||||
# any code that uses the LLVM_ALIGNAS macro), so this is must be disabled to
|
||||
# avoid unwanted alignment warnings.
|
||||
# When we switch to requiring a version of MSVC that supports the 'alignas'
|
||||
# specifier (MSVC 2015?) this warning can be re-enabled.
|
||||
-wd4324 # Suppress 'structure was padded due to __declspec(align())'
|
||||
|
||||
# Promoted warnings.
|
||||
-w14062 # Promote 'enumerator in switch of enum is not handled' to level 1 warning.
|
||||
|
||||
# Promoted warnings to errors.
|
||||
-we4238 # Promote 'nonstandard extension used : class rvalue used as lvalue' to error.
|
||||
)
|
||||
|
||||
# Enable warnings
|
||||
if (LLVM_ENABLE_WARNINGS)
|
||||
# Put /W4 in front of all the -we flags. cl.exe doesn't care, but for
|
||||
# clang-cl having /W4 after the -we flags will re-enable the warnings
|
||||
# disabled by -we.
|
||||
set(msvc_warning_flags "/W4 ${msvc_warning_flags}")
|
||||
# CMake appends /W3 by default, and having /W3 followed by /W4 will result in
|
||||
# cl : Command line warning D9025 : overriding '/W3' with '/W4'. Since this is
|
||||
# a command line warning and not a compiler warning, it cannot be suppressed except
|
||||
# by fixing the command line.
|
||||
string(REGEX REPLACE " /W[0-4]" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
|
||||
string(REGEX REPLACE " /W[0-4]" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
||||
|
||||
if (LLVM_ENABLE_PEDANTIC)
|
||||
# No MSVC equivalent available
|
||||
endif (LLVM_ENABLE_PEDANTIC)
|
||||
endif (LLVM_ENABLE_WARNINGS)
|
||||
|
||||
foreach(flag ${msvc_warning_flags})
|
||||
append("${flag}" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
|
||||
endforeach(flag)
|
||||
endif (MSVC AND NOT CLANG_CL)
|
||||
|
||||
if (LLVM_ENABLE_WARNINGS AND (LLVM_COMPILER_IS_GCC_COMPATIBLE OR CLANG_CL))
|
||||
append("-Wall -W -Wno-unused-parameter -Wwrite-strings" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
|
||||
append("-Wcast-qual" CMAKE_CXX_FLAGS)
|
||||
|
||||
# Turn off missing field initializer warnings for gcc to avoid noise from
|
||||
# false positives with empty {}. Turn them on otherwise (they're off by
|
||||
# default for clang).
|
||||
check_cxx_compiler_flag("-Wmissing-field-initializers" CXX_SUPPORTS_MISSING_FIELD_INITIALIZERS_FLAG)
|
||||
if (CXX_SUPPORTS_MISSING_FIELD_INITIALIZERS_FLAG)
|
||||
if (CMAKE_COMPILER_IS_GNUCXX)
|
||||
append("-Wno-missing-field-initializers" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
|
||||
else()
|
||||
append("-Wmissing-field-initializers" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (LLVM_ENABLE_PEDANTIC AND LLVM_COMPILER_IS_GCC_COMPATIBLE)
|
||||
append("-pedantic" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
|
||||
append("-Wno-long-long" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
|
||||
endif()
|
||||
|
||||
add_flag_if_supported("-Wcovered-switch-default" COVERED_SWITCH_DEFAULT_FLAG)
|
||||
append_if(USE_NO_UNINITIALIZED "-Wno-uninitialized" CMAKE_CXX_FLAGS)
|
||||
append_if(USE_NO_MAYBE_UNINITIALIZED "-Wno-maybe-uninitialized" CMAKE_CXX_FLAGS)
|
||||
|
||||
# Check if -Wnon-virtual-dtor warns even though the class is marked final.
|
||||
# If it does, don't add it. So it won't be added on clang 3.4 and older.
|
||||
# This also catches cases when -Wnon-virtual-dtor isn't supported by
|
||||
# the compiler at all. This flag is not activated for gcc since it will
|
||||
# incorrectly identify a protected non-virtual base when there is a friend
|
||||
# declaration. Don't activate this in general on Windows as this warning has
|
||||
# too many false positives on COM-style classes, which are destroyed with
|
||||
# Release() (PR32286).
|
||||
if (NOT CMAKE_COMPILER_IS_GNUCXX AND NOT WIN32)
|
||||
set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
|
||||
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -std=c++11 -Werror=non-virtual-dtor")
|
||||
CHECK_CXX_SOURCE_COMPILES("class base {public: virtual void anchor();protected: ~base();};
|
||||
class derived final : public base { public: ~derived();};
|
||||
int main() { return 0; }"
|
||||
CXX_WONT_WARN_ON_FINAL_NONVIRTUALDTOR)
|
||||
set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS})
|
||||
append_if(CXX_WONT_WARN_ON_FINAL_NONVIRTUALDTOR
|
||||
"-Wnon-virtual-dtor" CMAKE_CXX_FLAGS)
|
||||
endif()
|
||||
|
||||
# Enable -Wdelete-non-virtual-dtor if available.
|
||||
add_flag_if_supported("-Wdelete-non-virtual-dtor" DELETE_NON_VIRTUAL_DTOR_FLAG)
|
||||
|
||||
# Check if -Wcomment is OK with an // comment ending with '\' if the next
|
||||
# line is also a // comment.
|
||||
set(OLD_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
|
||||
set(CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS} -Werror -Wcomment")
|
||||
CHECK_C_SOURCE_COMPILES("// \\\\\\n//\\nint main() {return 0;}"
|
||||
C_WCOMMENT_ALLOWS_LINE_WRAP)
|
||||
set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS})
|
||||
if (NOT C_WCOMMENT_ALLOWS_LINE_WRAP)
|
||||
append("-Wno-comment" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
|
||||
endif()
|
||||
|
||||
# Enable -Wstring-conversion to catch misuse of string literals.
|
||||
add_flag_if_supported("-Wstring-conversion" STRING_CONVERSION_FLAG)
|
||||
endif (LLVM_ENABLE_WARNINGS AND (LLVM_COMPILER_IS_GCC_COMPATIBLE OR CLANG_CL))
|
||||
|
||||
if (LLVM_COMPILER_IS_GCC_COMPATIBLE AND NOT LLVM_ENABLE_WARNINGS)
|
||||
append("-w" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
|
||||
endif()
|
||||
|
||||
macro(append_common_sanitizer_flags)
|
||||
if (NOT MSVC)
|
||||
# Append -fno-omit-frame-pointer and turn on debug info to get better
|
||||
@ -527,7 +581,7 @@ macro(append_common_sanitizer_flags)
|
||||
elseif (CLANG_CL)
|
||||
# Keep frame pointers around.
|
||||
append("/Oy-" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
|
||||
if (CMAKE_LINKER MATCHES "lld-link.exe")
|
||||
if (LINKER_IS_LLD_LINK)
|
||||
# Use DWARF debug info with LLD.
|
||||
append("-gdwarf" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
|
||||
else()
|
||||
@ -555,8 +609,11 @@ if(LLVM_USE_SANITIZER)
|
||||
append_common_sanitizer_flags()
|
||||
append("-fsanitize=undefined -fno-sanitize=vptr,function -fno-sanitize-recover=all"
|
||||
CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
|
||||
append("-fsanitize-blacklist=${CMAKE_SOURCE_DIR}/utils/sanitizers/ubsan_blacklist.txt"
|
||||
CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
|
||||
set(BLACKLIST_FILE "${CMAKE_SOURCE_DIR}/utils/sanitizers/ubsan_blacklist.txt")
|
||||
if (EXISTS "${BLACKLIST_FILE}")
|
||||
append("-fsanitize-blacklist=${BLACKLIST_FILE}"
|
||||
CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
|
||||
endif()
|
||||
elseif (LLVM_USE_SANITIZER STREQUAL "Thread")
|
||||
append_common_sanitizer_flags()
|
||||
append("-fsanitize=thread" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
|
||||
@ -578,6 +635,10 @@ if(LLVM_USE_SANITIZER)
|
||||
else()
|
||||
message(FATAL_ERROR "LLVM_USE_SANITIZER is not supported on this platform.")
|
||||
endif()
|
||||
if (LLVM_USE_SANITIZER MATCHES "(Undefined;)?Address(;Undefined)?")
|
||||
add_flag_if_supported("-fsanitize-address-use-after-scope"
|
||||
FSANITIZE_USE_AFTER_SCOPE_FLAG)
|
||||
endif()
|
||||
if (LLVM_USE_SANITIZE_COVERAGE)
|
||||
append("-fsanitize-coverage=trace-pc-guard,indirect-calls,trace-cmp" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
|
||||
endif()
|
||||
@ -588,9 +649,9 @@ if(LLVM_USE_SPLIT_DWARF)
|
||||
add_definitions("-gsplit-dwarf")
|
||||
endif()
|
||||
|
||||
add_llvm_definitions( -D__STDC_CONSTANT_MACROS )
|
||||
add_llvm_definitions( -D__STDC_FORMAT_MACROS )
|
||||
add_llvm_definitions( -D__STDC_LIMIT_MACROS )
|
||||
add_definitions( -D__STDC_CONSTANT_MACROS )
|
||||
add_definitions( -D__STDC_FORMAT_MACROS )
|
||||
add_definitions( -D__STDC_LIMIT_MACROS )
|
||||
|
||||
# clang doesn't print colored diagnostics when invoked from Ninja
|
||||
if (UNIX AND
|
||||
@ -658,20 +719,38 @@ append_if(LLVM_BUILD_INSTRUMENTED_COVERAGE "-fprofile-instr-generate='${LLVM_PRO
|
||||
|
||||
set(LLVM_ENABLE_LTO OFF CACHE STRING "Build LLVM with LTO. May be specified as Thin or Full to use a particular kind of LTO")
|
||||
string(TOUPPER "${LLVM_ENABLE_LTO}" uppercase_LLVM_ENABLE_LTO)
|
||||
if(LLVM_ENABLE_LTO AND LLVM_ON_WIN32 AND NOT LINKER_IS_LLD_LINK)
|
||||
message(FATAL_ERROR "When compiling for Windows, LLVM_ENABLE_LTO requires using lld as the linker (point CMAKE_LINKER at lld-link.exe)")
|
||||
endif()
|
||||
if(uppercase_LLVM_ENABLE_LTO STREQUAL "THIN")
|
||||
append("-flto=thin" CMAKE_CXX_FLAGS CMAKE_C_FLAGS
|
||||
CMAKE_EXE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS)
|
||||
# On darwin, enable the lto cache. This improves initial build time a little
|
||||
# since we re-link a lot of the same objects, and significantly improves
|
||||
# incremental build time.
|
||||
append_if(APPLE "-Wl,-cache_path_lto,${PROJECT_BINARY_DIR}/lto.cache"
|
||||
CMAKE_EXE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS)
|
||||
append("-flto=thin" CMAKE_CXX_FLAGS CMAKE_C_FLAGS)
|
||||
if(NOT LINKER_IS_LLD_LINK)
|
||||
append("-flto=thin" CMAKE_EXE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS)
|
||||
endif()
|
||||
# If the linker supports it, enable the lto cache. This improves initial build
|
||||
# time a little since we re-link a lot of the same objects, and significantly
|
||||
# improves incremental build time.
|
||||
# FIXME: We should move all this logic into the clang driver.
|
||||
if(APPLE)
|
||||
append("-Wl,-cache_path_lto,${PROJECT_BINARY_DIR}/lto.cache"
|
||||
CMAKE_EXE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS)
|
||||
elseif(UNIX AND LLVM_USE_LINKER STREQUAL "lld")
|
||||
append("-Wl,--thinlto-cache-dir=${PROJECT_BINARY_DIR}/lto.cache"
|
||||
CMAKE_EXE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS)
|
||||
elseif(LLVM_USE_LINKER STREQUAL "gold")
|
||||
append("-Wl,--plugin-opt,cache-dir=${PROJECT_BINARY_DIR}/lto.cache"
|
||||
CMAKE_EXE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS)
|
||||
endif()
|
||||
elseif(uppercase_LLVM_ENABLE_LTO STREQUAL "FULL")
|
||||
append("-flto=full" CMAKE_CXX_FLAGS CMAKE_C_FLAGS
|
||||
CMAKE_EXE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS)
|
||||
append("-flto=full" CMAKE_CXX_FLAGS CMAKE_C_FLAGS)
|
||||
if(NOT LINKER_IS_LLD_LINK)
|
||||
append("-flto=full" CMAKE_EXE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS)
|
||||
endif()
|
||||
elseif(LLVM_ENABLE_LTO)
|
||||
append("-flto" CMAKE_CXX_FLAGS CMAKE_C_FLAGS
|
||||
CMAKE_EXE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS)
|
||||
append("-flto" CMAKE_CXX_FLAGS CMAKE_C_FLAGS)
|
||||
if(NOT LINKER_IS_LLD_LINK)
|
||||
append("-flto" CMAKE_EXE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# This option makes utils/extract_symbols.py be used to determine the list of
|
||||
@ -698,3 +777,16 @@ if(WIN32 OR CYGWIN)
|
||||
else()
|
||||
set(LLVM_ENABLE_PLUGINS ON)
|
||||
endif()
|
||||
|
||||
function(get_compile_definitions)
|
||||
get_directory_property(top_dir_definitions DIRECTORY ${CMAKE_SOURCE_DIR} COMPILE_DEFINITIONS)
|
||||
foreach(definition ${top_dir_definitions})
|
||||
if(DEFINED result)
|
||||
string(APPEND result " -D${definition}")
|
||||
else()
|
||||
set(result "-D${definition}")
|
||||
endif()
|
||||
endforeach()
|
||||
set(LLVM_DEFINITIONS "${result}" PARENT_SCOPE)
|
||||
endfunction()
|
||||
get_compile_definitions()
|
||||
|
@ -8,27 +8,61 @@ function(link_system_libs target)
|
||||
message(AUTHOR_WARNING "link_system_libs no longer needed")
|
||||
endfunction()
|
||||
|
||||
|
||||
# is_llvm_target_library(
|
||||
# library
|
||||
# Name of the LLVM library to check
|
||||
# return_var
|
||||
# Output variable name
|
||||
# ALL_TARGETS;INCLUDED_TARGETS;OMITTED_TARGETS
|
||||
# ALL_TARGETS - default looks at the full list of known targets
|
||||
# INCLUDED_TARGETS - looks only at targets being configured
|
||||
# OMITTED_TARGETS - looks only at targets that are not being configured
|
||||
# )
|
||||
function(is_llvm_target_library library return_var)
|
||||
cmake_parse_arguments(ARG "ALL_TARGETS;INCLUDED_TARGETS;OMITTED_TARGETS" "" "" ${ARGN})
|
||||
# Sets variable `return_var' to ON if `library' corresponds to a
|
||||
# LLVM supported target. To OFF if it doesn't.
|
||||
set(${return_var} OFF PARENT_SCOPE)
|
||||
string(TOUPPER "${library}" capitalized_lib)
|
||||
string(TOUPPER "${LLVM_ALL_TARGETS}" targets)
|
||||
if(ARG_INCLUDED_TARGETS)
|
||||
string(TOUPPER "${LLVM_TARGETS_TO_BUILD}" targets)
|
||||
elseif(ARG_OMITTED_TARGETS)
|
||||
set(omitted_targets ${LLVM_ALL_TARGETS})
|
||||
list(REMOVE_ITEM omitted_targets ${LLVM_TARGETS_TO_BUILD})
|
||||
string(TOUPPER "${omitted_targets}" targets)
|
||||
else()
|
||||
string(TOUPPER "${LLVM_ALL_TARGETS}" targets)
|
||||
endif()
|
||||
foreach(t ${targets})
|
||||
if( capitalized_lib STREQUAL t OR
|
||||
capitalized_lib STREQUAL "LLVM${t}" OR
|
||||
capitalized_lib STREQUAL "LLVM${t}CODEGEN" OR
|
||||
capitalized_lib STREQUAL "LLVM${t}ASMPARSER" OR
|
||||
capitalized_lib STREQUAL "LLVM${t}ASMPRINTER" OR
|
||||
capitalized_lib STREQUAL "LLVM${t}DISASSEMBLER" OR
|
||||
capitalized_lib STREQUAL "LLVM${t}INFO" )
|
||||
capitalized_lib STREQUAL "${t}" OR
|
||||
capitalized_lib STREQUAL "${t}DESC" OR
|
||||
capitalized_lib STREQUAL "${t}CODEGEN" OR
|
||||
capitalized_lib STREQUAL "${t}ASMPARSER" OR
|
||||
capitalized_lib STREQUAL "${t}ASMPRINTER" OR
|
||||
capitalized_lib STREQUAL "${t}DISASSEMBLER" OR
|
||||
capitalized_lib STREQUAL "${t}INFO" OR
|
||||
capitalized_lib STREQUAL "${t}UTILS" )
|
||||
set(${return_var} ON PARENT_SCOPE)
|
||||
break()
|
||||
endif()
|
||||
endforeach()
|
||||
endfunction(is_llvm_target_library)
|
||||
|
||||
function(is_llvm_target_specifier library return_var)
|
||||
is_llvm_target_library(${library} ${return_var} ${ARGN})
|
||||
string(TOUPPER "${library}" capitalized_lib)
|
||||
if(NOT ${return_var})
|
||||
if( capitalized_lib STREQUAL "ALLTARGETSASMPARSERS" OR
|
||||
capitalized_lib STREQUAL "ALLTARGETSDESCS" OR
|
||||
capitalized_lib STREQUAL "ALLTARGETSDISASSEMBLERS" OR
|
||||
capitalized_lib STREQUAL "ALLTARGETSINFOS" OR
|
||||
capitalized_lib STREQUAL "NATIVE" OR
|
||||
capitalized_lib STREQUAL "NATIVECODEGEN" )
|
||||
set(${return_var} ON PARENT_SCOPE)
|
||||
endif()
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
macro(llvm_config executable)
|
||||
cmake_parse_arguments(ARG "USE_SHARED" "" "" ${ARGN})
|
||||
@ -93,6 +127,21 @@ function(llvm_map_components_to_libnames out_libs)
|
||||
endif()
|
||||
string(TOUPPER "${LLVM_AVAILABLE_LIBS}" capitalized_libs)
|
||||
|
||||
get_property(LLVM_TARGETS_CONFIGURED GLOBAL PROPERTY LLVM_TARGETS_CONFIGURED)
|
||||
|
||||
# Generally in our build system we avoid order-dependence. Unfortunately since
|
||||
# not all targets create the same set of libraries we actually need to ensure
|
||||
# that all build targets associated with a target are added before we can
|
||||
# process target dependencies.
|
||||
if(NOT LLVM_TARGETS_CONFIGURED)
|
||||
foreach(c ${link_components})
|
||||
is_llvm_target_specifier(${c} iltl_result ALL_TARGETS)
|
||||
if(iltl_result)
|
||||
message(FATAL_ERROR "Specified target library before target registration is complete.")
|
||||
endif()
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
# Expand some keywords:
|
||||
list(FIND LLVM_TARGETS_TO_BUILD "${LLVM_NATIVE_ARCH}" have_native_backend)
|
||||
list(FIND link_components "engine" engine_required)
|
||||
@ -141,6 +190,12 @@ function(llvm_map_components_to_libnames out_libs)
|
||||
if( TARGET LLVM${c}Disassembler )
|
||||
list(APPEND expanded_components "LLVM${c}Disassembler")
|
||||
endif()
|
||||
if( TARGET LLVM${c}Info )
|
||||
list(APPEND expanded_components "LLVM${c}Info")
|
||||
endif()
|
||||
if( TARGET LLVM${c}Utils )
|
||||
list(APPEND expanded_components "LLVM${c}Utils")
|
||||
endif()
|
||||
elseif( c STREQUAL "native" )
|
||||
# already processed
|
||||
elseif( c STREQUAL "nativecodegen" )
|
||||
@ -198,9 +253,16 @@ function(llvm_map_components_to_libnames out_libs)
|
||||
list(FIND capitalized_libs LLVM${capitalized} lib_idx)
|
||||
if( lib_idx LESS 0 )
|
||||
# The component is unknown. Maybe is an omitted target?
|
||||
is_llvm_target_library(${c} iltl_result)
|
||||
if( NOT iltl_result )
|
||||
message(FATAL_ERROR "Library `${c}' not found in list of llvm libraries.")
|
||||
is_llvm_target_library(${c} iltl_result OMITTED_TARGETS)
|
||||
if(iltl_result)
|
||||
# A missing library to a directly referenced omitted target would be bad.
|
||||
message(FATAL_ERROR "Library '${c}' is a direct reference to a target library for an omitted target.")
|
||||
else()
|
||||
# If it is not an omitted target we should assume it is a component
|
||||
# that hasn't yet been processed by CMake. Missing components will
|
||||
# cause errors later in the configuration, so we can safely assume
|
||||
# that this is valid here.
|
||||
list(APPEND expanded_components LLVM${c})
|
||||
endif()
|
||||
else( lib_idx LESS 0 )
|
||||
list(GET LLVM_AVAILABLE_LIBS ${lib_idx} canonical_lib)
|
||||
|
@ -45,6 +45,10 @@ set(LLVM_ENABLE_PIC @LLVM_ENABLE_PIC@)
|
||||
|
||||
set(LLVM_BUILD_32_BITS @LLVM_BUILD_32_BITS@)
|
||||
|
||||
if (NOT "@LLVM_PTHREAD_LIB@" STREQUAL "")
|
||||
set(LLVM_PTHREAD_LIB "@LLVM_PTHREAD_LIB@")
|
||||
endif()
|
||||
|
||||
set(LLVM_ENABLE_PLUGINS @LLVM_ENABLE_PLUGINS@)
|
||||
set(LLVM_EXPORT_SYMBOLS_FOR_PLUGINS @LLVM_EXPORT_SYMBOLS_FOR_PLUGINS@)
|
||||
set(LLVM_PLUGIN_EXT @LLVM_PLUGIN_EXT@)
|
||||
@ -75,4 +79,5 @@ if(NOT TARGET LLVMSupport)
|
||||
@llvm_config_include_buildtree_only_exports@
|
||||
endif()
|
||||
|
||||
set_property(GLOBAL PROPERTY LLVM_TARGETS_CONFIGURED On)
|
||||
include(${LLVM_CMAKE_DIR}/LLVM-Config.cmake)
|
||||
|
@ -23,6 +23,13 @@ function(tablegen project ofn)
|
||||
set(LLVM_TARGET_DEFINITIONS_ABSOLUTE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/${LLVM_TARGET_DEFINITIONS})
|
||||
endif()
|
||||
if (LLVM_ENABLE_DAGISEL_COV)
|
||||
list(FIND ARGN "-gen-dag-isel" idx)
|
||||
if( NOT idx EQUAL -1 )
|
||||
list(APPEND LLVM_TABLEGEN_FLAGS "-instrument-coverage")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${ofn}.tmp
|
||||
# Generate tablegen output in a temporary file.
|
||||
COMMAND ${${project}_TABLEGEN_EXE} ${ARGN} -I ${CMAKE_CURRENT_SOURCE_DIR}
|
||||
@ -92,7 +99,7 @@ macro(add_tablegen target project)
|
||||
set(LLVM_ENABLE_OBJLIB ON)
|
||||
endif()
|
||||
|
||||
add_llvm_utility(${target} ${ARGN})
|
||||
add_llvm_executable(${target} DISABLE_LLVM_LINK_LLVM_DYLIB ${ARGN})
|
||||
set(LLVM_LINK_COMPONENTS ${${target}_OLD_LLVM_LINK_COMPONENTS})
|
||||
|
||||
set(${project}_TABLEGEN "${target}" CACHE
|
||||
|
@ -25,60 +25,64 @@ function(add_version_info_from_vcs VERS)
|
||||
set(LLVM_REPOSITORY ${Project_WC_URL} PARENT_SCOPE)
|
||||
endif()
|
||||
endif()
|
||||
elseif( EXISTS ${SOURCE_DIR}/.git )
|
||||
set(result "${result}git")
|
||||
# Try to get a ref-id
|
||||
if( EXISTS ${SOURCE_DIR}/.git/svn )
|
||||
find_program(git_executable NAMES git git.exe git.cmd)
|
||||
if( git_executable )
|
||||
set(is_git_svn_rev_exact false)
|
||||
execute_process(COMMAND
|
||||
${git_executable} svn info
|
||||
WORKING_DIRECTORY ${SOURCE_DIR}
|
||||
TIMEOUT 5
|
||||
RESULT_VARIABLE git_result
|
||||
OUTPUT_VARIABLE git_output)
|
||||
if( git_result EQUAL 0 )
|
||||
string(REGEX MATCH "URL: ([^ \n]*)" svn_url ${git_output})
|
||||
if(svn_url)
|
||||
set(LLVM_REPOSITORY ${CMAKE_MATCH_1} PARENT_SCOPE)
|
||||
endif()
|
||||
else()
|
||||
find_program(git_executable NAMES git git.exe git.cmd)
|
||||
|
||||
string(REGEX REPLACE "^(.*\n)?Revision: ([^\n]+).*"
|
||||
"\\2" git_svn_rev_number "${git_output}")
|
||||
set(SVN_REVISION ${git_svn_rev_number} PARENT_SCOPE)
|
||||
set(git_svn_rev "-svn-${git_svn_rev}")
|
||||
|
||||
# Determine if the HEAD points directly at a subversion revision.
|
||||
execute_process(COMMAND ${git_executable} svn find-rev HEAD
|
||||
if( git_executable )
|
||||
# Run from a subdirectory to force git to print an absoute path.
|
||||
execute_process(COMMAND ${git_executable} rev-parse --git-dir
|
||||
WORKING_DIRECTORY ${SOURCE_DIR}/cmake
|
||||
RESULT_VARIABLE git_result
|
||||
OUTPUT_VARIABLE git_dir)
|
||||
if(git_result EQUAL 0)
|
||||
# Try to get a ref-id
|
||||
string(STRIP "${git_dir}" git_dir)
|
||||
set(result "${result}git")
|
||||
if( EXISTS ${git_dir}/svn )
|
||||
# Get the repository URL
|
||||
execute_process(COMMAND
|
||||
${git_executable} svn info
|
||||
WORKING_DIRECTORY ${SOURCE_DIR}
|
||||
TIMEOUT 5
|
||||
RESULT_VARIABLE git_result
|
||||
OUTPUT_VARIABLE git_output)
|
||||
if( git_result EQUAL 0 )
|
||||
string(STRIP "${git_output}" git_head_svn_rev_number)
|
||||
if( git_head_svn_rev_number EQUAL git_svn_rev_number )
|
||||
set(is_git_svn_rev_exact true)
|
||||
string(REGEX MATCH "URL: ([^ \n]*)" svn_url ${git_output})
|
||||
if(svn_url)
|
||||
set(LLVM_REPOSITORY ${CMAKE_MATCH_1} PARENT_SCOPE)
|
||||
endif()
|
||||
endif()
|
||||
else()
|
||||
set(git_svn_rev "")
|
||||
|
||||
# Get the svn revision number for this git commit if one exists.
|
||||
execute_process(COMMAND ${git_executable} svn find-rev HEAD
|
||||
WORKING_DIRECTORY ${SOURCE_DIR}
|
||||
TIMEOUT 5
|
||||
RESULT_VARIABLE git_result
|
||||
OUTPUT_VARIABLE git_head_svn_rev_number
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
if( git_result EQUAL 0 AND git_output)
|
||||
set(SVN_REVISION ${git_head_svn_rev_number} PARENT_SCOPE)
|
||||
set(git_svn_rev "-svn-${git_head_svn_rev_number}")
|
||||
else()
|
||||
set(git_svn_rev "")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Get the git ref id
|
||||
execute_process(COMMAND
|
||||
${git_executable} rev-parse --short HEAD
|
||||
WORKING_DIRECTORY ${SOURCE_DIR}
|
||||
TIMEOUT 5
|
||||
RESULT_VARIABLE git_result
|
||||
OUTPUT_VARIABLE git_output)
|
||||
OUTPUT_VARIABLE git_ref_id
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
|
||||
if( git_result EQUAL 0 AND NOT is_git_svn_rev_exact )
|
||||
string(STRIP "${git_output}" git_ref_id)
|
||||
if( git_result EQUAL 0 )
|
||||
set(GIT_COMMIT ${git_ref_id} PARENT_SCOPE)
|
||||
set(result "${result}${git_svn_rev}-${git_ref_id}")
|
||||
else()
|
||||
set(result "${result}${git_svn_rev}")
|
||||
endif()
|
||||
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
@ -4,6 +4,7 @@ SET(CMAKE_SYSTEM_NAME Darwin)
|
||||
SET(CMAKE_SYSTEM_VERSION 13)
|
||||
SET(CMAKE_CXX_COMPILER_WORKS True)
|
||||
SET(CMAKE_C_COMPILER_WORKS True)
|
||||
SET(IOS True)
|
||||
|
||||
if(NOT CMAKE_OSX_SYSROOT)
|
||||
execute_process(COMMAND xcodebuild -version -sdk iphoneos Path
|
||||
|
@ -19,20 +19,73 @@ Address Spaces
|
||||
|
||||
The AMDGPU back-end uses the following address space mapping:
|
||||
|
||||
============= ============================================
|
||||
Address Space Memory Space
|
||||
============= ============================================
|
||||
0 Private
|
||||
1 Global
|
||||
2 Constant
|
||||
3 Local
|
||||
4 Generic (Flat)
|
||||
5 Region
|
||||
============= ============================================
|
||||
================== =================== ==============
|
||||
LLVM Address Space DWARF Address Space Memory Space
|
||||
================== =================== ==============
|
||||
0 1 Private
|
||||
1 N/A Global
|
||||
2 N/A Constant
|
||||
3 2 Local
|
||||
4 N/A Generic (Flat)
|
||||
5 N/A Region
|
||||
================== =================== ==============
|
||||
|
||||
The terminology in the table, aside from the region memory space, is from the
|
||||
OpenCL standard.
|
||||
|
||||
LLVM Address Space is used throughout LLVM (for example, in LLVM IR). DWARF
|
||||
Address Space is emitted in DWARF, and is used by tools, such as debugger,
|
||||
profiler and others.
|
||||
|
||||
Trap Handler ABI
|
||||
----------------
|
||||
The OS element of the target triple controls the trap handler behavior.
|
||||
|
||||
HSA OS
|
||||
^^^^^^
|
||||
For code objects generated by AMDGPU back-end for the HSA OS, the runtime
|
||||
installs a trap handler that supports the s_trap instruction with the following
|
||||
usage:
|
||||
|
||||
+--------------+-------------+-------------------+----------------------------+
|
||||
|Usage |Code Sequence|Trap Handler Inputs|Description |
|
||||
+==============+=============+===================+============================+
|
||||
|reserved |s_trap 0x00 | |Reserved by hardware. |
|
||||
+--------------+-------------+-------------------+----------------------------+
|
||||
|HSA debugtrap |s_trap 0x01 |SGPR0-1: queue_ptr |Reserved for HSA debugtrap |
|
||||
|(arg) | |VGPR0: arg |intrinsic (not implemented).|
|
||||
+--------------+-------------+-------------------+----------------------------+
|
||||
|llvm.trap |s_trap 0x02 |SGPR0-1: queue_ptr |Causes dispatch to be |
|
||||
| | | |terminated and its |
|
||||
| | | |associated queue put into |
|
||||
| | | |the error state. |
|
||||
+--------------+-------------+-------------------+----------------------------+
|
||||
|llvm.debugtrap| s_trap 0x03 |SGPR0-1: queue_ptr |If debugger not installed |
|
||||
| | | |handled same as llvm.trap. |
|
||||
+--------------+-------------+-------------------+----------------------------+
|
||||
|debugger |s_trap 0x07 | |Reserved for debugger |
|
||||
|breakpoint | | |breakpoints. |
|
||||
+--------------+-------------+-------------------+----------------------------+
|
||||
|debugger |s_trap 0x08 | |Reserved for debugger. |
|
||||
+--------------+-------------+-------------------+----------------------------+
|
||||
|debugger |s_trap 0xfe | |Reserved for debugger. |
|
||||
+--------------+-------------+-------------------+----------------------------+
|
||||
|debugger |s_trap 0xff | |Reserved for debugger. |
|
||||
+--------------+-------------+-------------------+----------------------------+
|
||||
|
||||
Non-HSA OS
|
||||
^^^^^^^^^^
|
||||
For code objects generated by AMDGPU back-end for non-HSA OS, the runtime does
|
||||
not install a trap handler. The llvm.trap and llvm.debugtrap instructions are
|
||||
handler as follows:
|
||||
|
||||
=============== ============= ===============================================
|
||||
Usage Code Sequence Description
|
||||
=============== ============= ===============================================
|
||||
llvm.trap s_endpgm Causes wavefront to be terminated.
|
||||
llvm.debugtrap s_nop No operation. Compiler warning generated that
|
||||
there is no trap handler installed.
|
||||
=============== ============= ===============================================
|
||||
|
||||
Assembler
|
||||
=========
|
||||
@ -204,7 +257,7 @@ SOPP Instruction Examples
|
||||
For full list of supported instructions, refer to "SOPP Instructions" in ISA Manual.
|
||||
|
||||
Unless otherwise mentioned, little verification is performed on the operands
|
||||
of SOPP Instrucitons, so it is up to the programmer to be familiar with the
|
||||
of SOPP Instructions, so it is up to the programmer to be familiar with the
|
||||
range or acceptable values.
|
||||
|
||||
Vector ALU Instruction Examples
|
||||
|
@ -136,7 +136,7 @@ be overlapping in some way, but do not start at the same address.
|
||||
|
||||
The ``MustAlias`` response may only be returned if the two memory objects are
|
||||
guaranteed to always start at exactly the same location. A ``MustAlias``
|
||||
response implies that the pointers compare equal.
|
||||
response does not imply that the pointers compare equal.
|
||||
|
||||
The ``getModRefInfo`` methods
|
||||
-----------------------------
|
||||
|
@ -839,16 +839,6 @@ fields are
|
||||
* *unnamed_addr*: If present, an encoding of the
|
||||
:ref:`unnamed_addr<bcunnamedaddr>` attribute of this alias
|
||||
|
||||
MODULE_CODE_PURGEVALS Record
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
``[PURGEVALS, numvals]``
|
||||
|
||||
The ``PURGEVALS`` record (code 10) resets the module-level value list to the
|
||||
size given by the single operand value. Module-level value list items are added
|
||||
by ``GLOBALVAR``, ``FUNCTION``, and ``ALIAS`` records. After a ``PURGEVALS``
|
||||
record is seen, new value indices will start from the given *numvals* value.
|
||||
|
||||
.. _MODULE_CODE_GCNAME:
|
||||
|
||||
MODULE_CODE_GCNAME Record
|
||||
|
@ -123,11 +123,11 @@ To allow comparing different functions during inter-procedural analysis and
|
||||
optimization, ``MD_prof`` nodes can also be assigned to a function definition.
|
||||
The first operand is a string indicating the name of the associated counter.
|
||||
|
||||
Currently, one counter is supported: "function_entry_count". This is a 64-bit
|
||||
counter that indicates the number of times that this function was invoked (in
|
||||
the case of instrumentation-based profiles). In the case of sampling-based
|
||||
profiles, this counter is an approximation of how many times the function was
|
||||
invoked.
|
||||
Currently, one counter is supported: "function_entry_count". The second operand
|
||||
is a 64-bit counter that indicates the number of times that this function was
|
||||
invoked (in the case of instrumentation-based profiles). In the case of
|
||||
sampling-based profiles, this operand is an approximation of how many times
|
||||
the function was invoked.
|
||||
|
||||
For example, in the code below, the instrumentation for function foo()
|
||||
indicates that it was called 2,590 times at runtime.
|
||||
@ -138,3 +138,13 @@ indicates that it was called 2,590 times at runtime.
|
||||
ret i32 0
|
||||
}
|
||||
!1 = !{!"function_entry_count", i64 2590}
|
||||
|
||||
If "function_entry_count" has more than 2 operands, the later operands are
|
||||
the GUID of the functions that needs to be imported by ThinLTO. This is only
|
||||
set by sampling based profile. It is needed because the sampling based profile
|
||||
was collected on a binary that had already imported and inlined these functions,
|
||||
and we need to ensure the IR matches in the ThinLTO backends for profile
|
||||
annotation. The reason why we cannot annotate this on the callsite is that it
|
||||
can only goes down 1 level in the call chain. For the cases where
|
||||
foo_in_a_cc()->bar_in_b_cc()->baz_in_c_cc(), we will need to go down 2 levels
|
||||
in the call chain to import both bar_in_b_cc and baz_in_c_cc.
|
||||
|
@ -382,6 +382,18 @@ LLVM-specific variables
|
||||
lines, enabling link-time optimization. Possible values are ``Off``,
|
||||
``On``, ``Thin`` and ``Full``. Defaults to OFF.
|
||||
|
||||
**LLVM_USE_LINKER**:STRING
|
||||
Add ``-fuse-ld={name}`` to the link invocation. The possible value depend on
|
||||
your compiler, for clang the value can be an absolute path to your custom
|
||||
linker, otherwise clang will prefix the name with ``ld.`` and apply its usual
|
||||
search. For example to link LLVM with the Gold linker, cmake can be invoked
|
||||
with ``-DLLVM_USE_LINKER=gold``.
|
||||
|
||||
**LLVM_ENABLE_LLD**:BOOL
|
||||
This option is equivalent to `-DLLVM_USE_LINKER=lld`, except during a 2-stage
|
||||
build where a dependency is added from the first stage to the second ensuring
|
||||
that lld is built before stage2 begins.
|
||||
|
||||
**LLVM_PARALLEL_COMPILE_JOBS**:STRING
|
||||
Define the maximum number of concurrent compilation jobs.
|
||||
|
||||
@ -457,6 +469,8 @@ LLVM-specific variables
|
||||
|
||||
**SPHINX_EXECUTABLE**:STRING
|
||||
The path to the ``sphinx-build`` executable detected by CMake.
|
||||
For installation instructions, see
|
||||
http://www.sphinx-doc.org/en/latest/install.html
|
||||
|
||||
**SPHINX_OUTPUT_HTML**:BOOL
|
||||
If enabled (and ``LLVM_ENABLE_SPHINX`` is enabled) then the targets for
|
||||
|
@ -1005,7 +1005,7 @@ The TableGen DAG instruction selector generator reads the instruction patterns
|
||||
in the ``.td`` file and automatically builds parts of the pattern matching code
|
||||
for your target. It has the following strengths:
|
||||
|
||||
* At compiler-compiler time, it analyzes your instruction patterns and tells you
|
||||
* At compiler-compile time, it analyzes your instruction patterns and tells you
|
||||
if your patterns make sense or not.
|
||||
|
||||
* It can handle arbitrary constraints on operands for the pattern match. In
|
||||
@ -1026,7 +1026,7 @@ for your target. It has the following strengths:
|
||||
|
||||
* Targets can define their own (and rely on built-in) "pattern fragments".
|
||||
Pattern fragments are chunks of reusable patterns that get inlined into your
|
||||
patterns during compiler-compiler time. For example, the integer "``(not
|
||||
patterns during compiler-compile time. For example, the integer "``(not
|
||||
x)``" operation is actually defined as a pattern fragment that expands as
|
||||
"``(xor x, -1)``", since the SelectionDAG does not have a native '``not``'
|
||||
operation. Targets can define their own short-hand fragments as they see fit.
|
||||
|
@ -77,6 +77,15 @@ OPTIONS
|
||||
-verify``. With this option FileCheck will verify that input does not contain
|
||||
warnings not covered by any ``CHECK:`` patterns.
|
||||
|
||||
.. option:: --enable-var-scope
|
||||
|
||||
Enables scope for regex variables.
|
||||
|
||||
Variables with names that start with ``$`` are considered global and
|
||||
remain set throughout the file.
|
||||
|
||||
All other variables get undefined after each encountered ``CHECK-LABEL``.
|
||||
|
||||
.. option:: -version
|
||||
|
||||
Show the version number of this program.
|
||||
@ -344,6 +353,9 @@ matched by the directive cannot also be matched by any other check present in
|
||||
other unique identifiers. Conceptually, the presence of ``CHECK-LABEL`` divides
|
||||
the input stream into separate blocks, each of which is processed independently,
|
||||
preventing a ``CHECK:`` directive in one block matching a line in another block.
|
||||
If ``--enable-var-scope`` is in effect, all local variables are cleared at the
|
||||
beginning of the block.
|
||||
|
||||
For example,
|
||||
|
||||
.. code-block:: llvm
|
||||
@ -436,6 +448,13 @@ were defined on. For example:
|
||||
Can be useful if you want the operands of ``op`` to be the same register,
|
||||
and don't care exactly which register it is.
|
||||
|
||||
If ``--enable-var-scope`` is in effect, variables with names that
|
||||
start with ``$`` are considered to be global. All others variables are
|
||||
local. All local variables get undefined at the beginning of each
|
||||
CHECK-LABEL block. Global variables are not affected by CHECK-LABEL.
|
||||
This makes it easier to ensure that individual tests are not affected
|
||||
by variables set in preceding tests.
|
||||
|
||||
FileCheck Expressions
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
|
@ -56,7 +56,7 @@ GENERAL OPTIONS
|
||||
Search for :file:`{NAME}.cfg` and :file:`{NAME}.site.cfg` when searching for
|
||||
test suites, instead of :file:`lit.cfg` and :file:`lit.site.cfg`.
|
||||
|
||||
.. option:: -D NAME, -D NAME=VALUE, --param NAME, --param NAME=VALUE
|
||||
.. option:: -D NAME[=VALUE], --param NAME[=VALUE]
|
||||
|
||||
Add a user defined parameter ``NAME`` with the given ``VALUE`` (or the empty
|
||||
string if not given). The meaning and use of these parameters is test suite
|
||||
@ -152,6 +152,23 @@ SELECTION OPTIONS
|
||||
|
||||
Run the tests in a random order.
|
||||
|
||||
.. option:: --num-shards=M
|
||||
|
||||
Divide the set of selected tests into ``M`` equal-sized subsets or
|
||||
"shards", and run only one of them. Must be used with the
|
||||
``--run-shard=N`` option, which selects the shard to run. The environment
|
||||
variable ``LIT_NUM_SHARDS`` can also be used in place of this
|
||||
option. These two options provide a coarse mechanism for paritioning large
|
||||
testsuites, for parallel execution on separate machines (say in a large
|
||||
testing farm).
|
||||
|
||||
.. option:: --run-shard=N
|
||||
|
||||
Select which shard to run, assuming the ``--num-shards=M`` option was
|
||||
provided. The two options must be used together, and the value of ``N``
|
||||
must be in the range ``1..M``. The environment variable
|
||||
``LIT_RUN_SHARD`` can also be used in place of this option.
|
||||
|
||||
ADDITIONAL OPTIONS
|
||||
------------------
|
||||
|
||||
@ -362,7 +379,7 @@ PRE-DEFINED SUBSTITUTIONS
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
:program:`lit` provides various patterns that can be used with the RUN command.
|
||||
These are defined in TestRunner.py.
|
||||
These are defined in TestRunner.py. The base set of substitutions are:
|
||||
|
||||
========== ==============
|
||||
Macro Substitution
|
||||
@ -374,17 +391,13 @@ These are defined in TestRunner.py.
|
||||
%t temporary file name unique to the test
|
||||
%T temporary directory unique to the test
|
||||
%% %
|
||||
%/s same as %s but replace all / with \\
|
||||
%/S same as %S but replace all / with \\
|
||||
%/p same as %p but replace all / with \\
|
||||
%/t same as %t but replace all / with \\
|
||||
%/T same as %T but replace all / with \\
|
||||
========== ==============
|
||||
|
||||
Further substitution patterns might be defined by each test module.
|
||||
See the modules :ref:`local-configuration-files`.
|
||||
Other substitutions are provided that are variations on this base set and
|
||||
further substitution patterns can be defined by each test module. See the
|
||||
modules :ref:`local-configuration-files`.
|
||||
|
||||
More information on the testing infrastucture can be found in the
|
||||
More detailed information on substitutions can be found in the
|
||||
:doc:`../TestingGuide`.
|
||||
|
||||
TEST RUN OUTPUT FORMAT
|
||||
|
@ -322,6 +322,10 @@ OPTIONS
|
||||
universal binary or to use an architecture that does not match a
|
||||
non-universal binary.
|
||||
|
||||
.. option:: -show-functions
|
||||
|
||||
Show coverage summaries for each function.
|
||||
|
||||
.. program:: llvm-cov export
|
||||
|
||||
.. _llvm-cov-export:
|
||||
|
@ -196,6 +196,10 @@ OPTIONS
|
||||
|
||||
Specify that the input profile is a sample-based profile.
|
||||
|
||||
.. option:: -memop-sizes
|
||||
|
||||
Show the profiled sizes of the memory intrinsic calls for shown functions.
|
||||
|
||||
EXIT STATUS
|
||||
-----------
|
||||
|
||||
|
@ -89,7 +89,7 @@ and 6 after which the coroutine will be destroyed.
|
||||
|
||||
The LLVM IR for this coroutine looks like this:
|
||||
|
||||
.. code-block:: none
|
||||
.. code-block:: llvm
|
||||
|
||||
define i8* @f(i32 %n) {
|
||||
entry:
|
||||
@ -110,7 +110,7 @@ The LLVM IR for this coroutine looks like this:
|
||||
call void @free(i8* %mem)
|
||||
br label %suspend
|
||||
suspend:
|
||||
call void @llvm.coro.end(i8* %hdl, i1 false)
|
||||
%unused = call i1 @llvm.coro.end(i8* %hdl, i1 false)
|
||||
ret i8* %hdl
|
||||
}
|
||||
|
||||
@ -156,7 +156,7 @@ We also store addresses of the resume and destroy functions so that the
|
||||
when its identity cannot be determined statically at compile time. For our
|
||||
example, the coroutine frame will be:
|
||||
|
||||
.. code-block:: text
|
||||
.. code-block:: llvm
|
||||
|
||||
%f.frame = type { void (%f.frame*)*, void (%f.frame*)*, i32 }
|
||||
|
||||
@ -164,7 +164,7 @@ After resume and destroy parts are outlined, function `f` will contain only the
|
||||
code responsible for creation and initialization of the coroutine frame and
|
||||
execution of the coroutine until a suspend point is reached:
|
||||
|
||||
.. code-block:: none
|
||||
.. code-block:: llvm
|
||||
|
||||
define i8* @f(i32 %n) {
|
||||
entry:
|
||||
@ -224,7 +224,7 @@ In the entry block, we will call `coro.alloc`_ intrinsic that will return `true`
|
||||
when dynamic allocation is required, and `false` if dynamic allocation is
|
||||
elided.
|
||||
|
||||
.. code-block:: none
|
||||
.. code-block:: llvm
|
||||
|
||||
entry:
|
||||
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
|
||||
@ -242,7 +242,7 @@ In the cleanup block, we will make freeing the coroutine frame conditional on
|
||||
`coro.free`_ intrinsic. If allocation is elided, `coro.free`_ returns `null`
|
||||
thus skipping the deallocation code:
|
||||
|
||||
.. code-block:: text
|
||||
.. code-block:: llvm
|
||||
|
||||
cleanup:
|
||||
%mem = call i8* @llvm.coro.free(token %id, i8* %hdl)
|
||||
@ -286,7 +286,7 @@ Let's consider the coroutine that has more than one suspend point:
|
||||
Matching LLVM code would look like (with the rest of the code remaining the same
|
||||
as the code in the previous section):
|
||||
|
||||
.. code-block:: text
|
||||
.. code-block:: llvm
|
||||
|
||||
loop:
|
||||
%n.addr = phi i32 [ %n, %entry ], [ %inc, %loop.resume ]
|
||||
@ -383,17 +383,17 @@ point when coroutine should be ready for resumption (namely, when a resume index
|
||||
should be stored in the coroutine frame, so that it can be resumed at the
|
||||
correct resume point):
|
||||
|
||||
.. code-block:: text
|
||||
.. code-block:: llvm
|
||||
|
||||
if.true:
|
||||
%save1 = call token @llvm.coro.save(i8* %hdl)
|
||||
call void async_op1(i8* %hdl)
|
||||
call void @async_op1(i8* %hdl)
|
||||
%suspend1 = call i1 @llvm.coro.suspend(token %save1, i1 false)
|
||||
switch i8 %suspend1, label %suspend [i8 0, label %resume1
|
||||
i8 1, label %cleanup]
|
||||
if.false:
|
||||
%save2 = call token @llvm.coro.save(i8* %hdl)
|
||||
call void async_op2(i8* %hdl)
|
||||
call void @async_op2(i8* %hdl)
|
||||
%suspend2 = call i1 @llvm.coro.suspend(token %save2, i1 false)
|
||||
switch i8 %suspend1, label %suspend [i8 0, label %resume2
|
||||
i8 1, label %cleanup]
|
||||
@ -411,7 +411,7 @@ be used to communicate with the coroutine. This distinguished alloca is called
|
||||
The following coroutine designates a 32 bit integer `promise` and uses it to
|
||||
store the current value produced by a coroutine.
|
||||
|
||||
.. code-block:: text
|
||||
.. code-block:: llvm
|
||||
|
||||
define i8* @f(i32 %n) {
|
||||
entry:
|
||||
@ -440,7 +440,7 @@ store the current value produced by a coroutine.
|
||||
call void @free(i8* %mem)
|
||||
br label %suspend
|
||||
suspend:
|
||||
call void @llvm.coro.end(i8* %hdl, i1 false)
|
||||
%unused = call i1 @llvm.coro.end(i8* %hdl, i1 false)
|
||||
ret i8* %hdl
|
||||
}
|
||||
|
||||
@ -692,7 +692,7 @@ a coroutine user are responsible to makes sure there is no data races.
|
||||
Example:
|
||||
""""""""
|
||||
|
||||
.. code-block:: text
|
||||
.. code-block:: llvm
|
||||
|
||||
define i8* @f(i32 %n) {
|
||||
entry:
|
||||
@ -812,7 +812,7 @@ pointer that was returned by prior `coro.begin` call.
|
||||
Example (custom deallocation function):
|
||||
"""""""""""""""""""""""""""""""""""""""
|
||||
|
||||
.. code-block:: text
|
||||
.. code-block:: llvm
|
||||
|
||||
cleanup:
|
||||
%mem = call i8* @llvm.coro.free(token %id, i8* %frame)
|
||||
@ -827,7 +827,7 @@ Example (custom deallocation function):
|
||||
Example (standard deallocation functions):
|
||||
""""""""""""""""""""""""""""""""""""""""""
|
||||
|
||||
.. code-block:: text
|
||||
.. code-block:: llvm
|
||||
|
||||
cleanup:
|
||||
%mem = call i8* @llvm.coro.free(token %id, i8* %frame)
|
||||
@ -864,7 +864,7 @@ when possible.
|
||||
Example:
|
||||
""""""""
|
||||
|
||||
.. code-block:: text
|
||||
.. code-block:: llvm
|
||||
|
||||
entry:
|
||||
%id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null)
|
||||
@ -955,41 +955,90 @@ A frontend should emit exactly one `coro.id` intrinsic per coroutine.
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
::
|
||||
|
||||
declare void @llvm.coro.end(i8* <handle>, i1 <unwind>)
|
||||
declare i1 @llvm.coro.end(i8* <handle>, i1 <unwind>)
|
||||
|
||||
Overview:
|
||||
"""""""""
|
||||
|
||||
The '``llvm.coro.end``' marks the point where execution of the resume part of
|
||||
the coroutine should end and control returns back to the caller.
|
||||
the coroutine should end and control should return to the caller.
|
||||
|
||||
|
||||
Arguments:
|
||||
""""""""""
|
||||
|
||||
The first argument should refer to the coroutine handle of the enclosing coroutine.
|
||||
The first argument should refer to the coroutine handle of the enclosing
|
||||
coroutine. A frontend is allowed to supply null as the first parameter, in this
|
||||
case `coro-early` pass will replace the null with an appropriate coroutine
|
||||
handle value.
|
||||
|
||||
The second argument should be `true` if this coro.end is in the block that is
|
||||
part of the unwind sequence leaving the coroutine body due to exception prior to
|
||||
the first reaching any suspend points, and `false` otherwise.
|
||||
part of the unwind sequence leaving the coroutine body due to an exception and
|
||||
`false` otherwise.
|
||||
|
||||
Semantics:
|
||||
""""""""""
|
||||
The `coro.end`_ intrinsic is a no-op during an initial invocation of the
|
||||
coroutine. When the coroutine resumes, the intrinsic marks the point when
|
||||
coroutine need to return control back to the caller.
|
||||
The purpose of this intrinsic is to allow frontends to mark the cleanup and
|
||||
other code that is only relevant during the initial invocation of the coroutine
|
||||
and should not be present in resume and destroy parts.
|
||||
|
||||
This intrinsic is removed by the CoroSplit pass when a coroutine is split into
|
||||
the start, resume and destroy parts. In start part, the intrinsic is removed,
|
||||
in resume and destroy parts, it is replaced with `ret void` instructions and
|
||||
This intrinsic is lowered when a coroutine is split into
|
||||
the start, resume and destroy parts. In the start part, it is a no-op,
|
||||
in resume and destroy parts, it is replaced with `ret void` instruction and
|
||||
the rest of the block containing `coro.end` instruction is discarded.
|
||||
|
||||
In landing pads it is replaced with an appropriate instruction to unwind to
|
||||
caller.
|
||||
caller. The handling of coro.end differs depending on whether the target is
|
||||
using landingpad or WinEH exception model.
|
||||
|
||||
A frontend is allowed to supply null as the first parameter, in this case
|
||||
`coro-early` pass will replace the null with an appropriate coroutine handle
|
||||
value.
|
||||
For landingpad based exception model, it is expected that frontend uses the
|
||||
`coro.end`_ intrinsic as follows:
|
||||
|
||||
.. code-block:: llvm
|
||||
|
||||
ehcleanup:
|
||||
%InResumePart = call i1 @llvm.coro.end(i8* null, i1 true)
|
||||
br i1 %InResumePart, label %eh.resume, label %cleanup.cont
|
||||
|
||||
cleanup.cont:
|
||||
; rest of the cleanup
|
||||
|
||||
eh.resume:
|
||||
%exn = load i8*, i8** %exn.slot, align 8
|
||||
%sel = load i32, i32* %ehselector.slot, align 4
|
||||
%lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0
|
||||
%lpad.val29 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1
|
||||
resume { i8*, i32 } %lpad.val29
|
||||
|
||||
The `CoroSpit` pass replaces `coro.end` with ``True`` in the resume functions,
|
||||
thus leading to immediate unwind to the caller, whereas in start function it
|
||||
is replaced with ``False``, thus allowing to proceed to the rest of the cleanup
|
||||
code that is only needed during initial invocation of the coroutine.
|
||||
|
||||
For Windows Exception handling model, a frontend should attach a funclet bundle
|
||||
referring to an enclosing cleanuppad as follows:
|
||||
|
||||
.. code-block:: llvm
|
||||
|
||||
ehcleanup:
|
||||
%tok = cleanuppad within none []
|
||||
%unused = call i1 @llvm.coro.end(i8* null, i1 true) [ "funclet"(token %tok) ]
|
||||
cleanupret from %tok unwind label %RestOfTheCleanup
|
||||
|
||||
The `CoroSplit` pass, if the funclet bundle is present, will insert
|
||||
``cleanupret from %tok unwind to caller`` before
|
||||
the `coro.end`_ intrinsic and will remove the rest of the block.
|
||||
|
||||
The following table summarizes the handling of `coro.end`_ intrinsic.
|
||||
|
||||
+--------------------------+-------------------+-------------------------------+
|
||||
| | In Start Function | In Resume/Destroy Functions |
|
||||
+--------------------------+-------------------+-------------------------------+
|
||||
|unwind=false | nothing |``ret void`` |
|
||||
+------------+-------------+-------------------+-------------------------------+
|
||||
| | WinEH | nothing |``cleanupret unwind to caller``|
|
||||
|unwind=true +-------------+-------------------+-------------------------------+
|
||||
| | Landingpad | nothing | nothing |
|
||||
+------------+-------------+-------------------+-------------------------------+
|
||||
|
||||
.. _coro.suspend:
|
||||
.. _suspend points:
|
||||
@ -1025,7 +1074,7 @@ basic blocks.
|
||||
Example (normal suspend point):
|
||||
"""""""""""""""""""""""""""""""
|
||||
|
||||
.. code-block:: text
|
||||
.. code-block:: llvm
|
||||
|
||||
%0 = call i8 @llvm.coro.suspend(token none, i1 false)
|
||||
switch i8 %0, label %suspend [i8 0, label %resume
|
||||
@ -1034,7 +1083,7 @@ Example (normal suspend point):
|
||||
Example (final suspend point):
|
||||
""""""""""""""""""""""""""""""
|
||||
|
||||
.. code-block:: text
|
||||
.. code-block:: llvm
|
||||
|
||||
while.end:
|
||||
%s.final = call i8 @llvm.coro.suspend(token none, i1 true)
|
||||
@ -1095,10 +1144,10 @@ In such a case, a coroutine should be ready for resumption prior to a call to
|
||||
a different thread possibly prior to `async_op` call returning control back
|
||||
to the coroutine:
|
||||
|
||||
.. code-block:: text
|
||||
.. code-block:: llvm
|
||||
|
||||
%save1 = call token @llvm.coro.save(i8* %hdl)
|
||||
call void async_op1(i8* %hdl)
|
||||
call void @async_op1(i8* %hdl)
|
||||
%suspend1 = call i1 @llvm.coro.suspend(token %save1, i1 false)
|
||||
switch i8 %suspend1, label %suspend [i8 0, label %resume1
|
||||
i8 1, label %cleanup]
|
||||
|
@ -21,7 +21,7 @@ to know how it works under the hood. A prior knowledge of how Clang's profile
|
||||
guided optimization works is useful, but not required.
|
||||
|
||||
We start by showing how to use LLVM and Clang for code coverage analysis,
|
||||
then we briefly desribe LLVM's code coverage mapping format and the
|
||||
then we briefly describe LLVM's code coverage mapping format and the
|
||||
way that Clang and LLVM's code coverage tool work with this format. After
|
||||
the basics are down, more advanced features of the coverage mapping format
|
||||
are discussed - such as the data structures, LLVM IR representation and
|
||||
|
@ -62,7 +62,7 @@ way to see what other people are interested in and watching the flow of the
|
||||
project as a whole.
|
||||
|
||||
We recommend that active developers register an email account with `LLVM
|
||||
Bugzilla <http://llvm.org/bugs/>`_ and preferably subscribe to the `llvm-bugs
|
||||
Bugzilla <https://bugs.llvm.org/>`_ and preferably subscribe to the `llvm-bugs
|
||||
<http://lists.llvm.org/mailman/listinfo/llvm-bugs>`_ email list to keep track
|
||||
of bugs and enhancements occurring in LLVM. We really appreciate people who are
|
||||
proactive at catching incoming bugs in their components and dealing with them
|
||||
@ -261,7 +261,7 @@ the future that the change is responsible for. For example:
|
||||
* The changes should not cause performance or correctness regressions in code
|
||||
compiled by LLVM on all applicable targets.
|
||||
|
||||
* You are expected to address any `Bugzilla bugs <http://llvm.org/bugs/>`_ that
|
||||
* You are expected to address any `Bugzilla bugs <https://bugs.llvm.org/>`_ that
|
||||
result from your change.
|
||||
|
||||
We prefer for this to be handled before submission but understand that it isn't
|
||||
|
@ -204,9 +204,49 @@ For example, the following code creates two sections named ``.text``.
|
||||
The unique number is not present in the resulting object at all. It is just used
|
||||
in the assembler to differentiate the sections.
|
||||
|
||||
The 'o' flag is mapped to SHF_LINK_ORDER. If it is present, a symbol
|
||||
must be given that identifies the section to be placed is the
|
||||
.sh_link.
|
||||
|
||||
.. code-block:: gas
|
||||
|
||||
.section .foo,"a",@progbits
|
||||
.Ltmp:
|
||||
.section .bar,"ao",@progbits,.Ltmp
|
||||
|
||||
which is equivalent to just
|
||||
|
||||
.. code-block:: gas
|
||||
|
||||
.section .foo,"a",@progbits
|
||||
.section .bar,"ao",@progbits,.foo
|
||||
|
||||
|
||||
Target Specific Behaviour
|
||||
=========================
|
||||
|
||||
X86
|
||||
---
|
||||
|
||||
Relocations
|
||||
^^^^^^^^^^^
|
||||
|
||||
``@ABS8`` can be applied to symbols which appear as immediate operands to
|
||||
instructions that have an 8-bit immediate form for that operand. It causes
|
||||
the assembler to use the 8-bit form and an 8-bit relocation (e.g. ``R_386_8``
|
||||
or ``R_X86_64_8``) for the symbol.
|
||||
|
||||
For example:
|
||||
|
||||
.. code-block:: gas
|
||||
|
||||
cmpq $foo@ABS8, %rdi
|
||||
|
||||
This causes the assembler to select the form of the 64-bit ``cmpq`` instruction
|
||||
that takes an 8-bit immediate operand that is sign extended to 64 bits, as
|
||||
opposed to ``cmpq $foo, %rdi`` which takes a 32-bit immediate operand. This
|
||||
is also not the same as ``cmpb $foo, %dil``, which is an 8-bit comparison.
|
||||
|
||||
Windows on ARM
|
||||
--------------
|
||||
|
||||
|
@ -47,12 +47,18 @@ The format of this section is
|
||||
uint32 : NumFaultingPCs
|
||||
uint32 : Reserved (expected to be 0)
|
||||
FunctionFaultInfo[NumFaultingPCs] {
|
||||
uint32 : FaultKind = FaultMaps::FaultingLoad (only legal value currently)
|
||||
uint32 : FaultKind
|
||||
uint32 : FaultingPCOffset
|
||||
uint32 : HandlerPCOffset
|
||||
}
|
||||
}
|
||||
|
||||
FailtKind describes the reason of expected fault. Currently three kind
|
||||
of faults are supported:
|
||||
|
||||
1. ``FaultMaps::FaultingLoad`` - fault due to load from memory.
|
||||
2. ``FaultMaps::FaultingLoadStore`` - fault due to instruction load and store.
|
||||
3. ``FaultMaps::FaultingStore`` - fault due to store to memory.
|
||||
|
||||
The ``ImplicitNullChecks`` pass
|
||||
===============================
|
||||
|
@ -52,6 +52,18 @@ Here's the short story for getting up and running quickly with LLVM:
|
||||
* ``cd llvm/tools``
|
||||
* ``svn co http://llvm.org/svn/llvm-project/cfe/trunk clang``
|
||||
|
||||
#. Checkout LLD linker **[Optional]**:
|
||||
|
||||
* ``cd where-you-want-llvm-to-live``
|
||||
* ``cd llvm/tools``
|
||||
* ``svn co http://llvm.org/svn/llvm-project/lld/trunk lld``
|
||||
|
||||
#. Checkout Polly Loop Optimizer **[Optional]**:
|
||||
|
||||
* ``cd where-you-want-llvm-to-live``
|
||||
* ``cd llvm/tools``
|
||||
* ``svn co http://llvm.org/svn/llvm-project/polly/trunk polly``
|
||||
|
||||
#. Checkout Compiler-RT (required to build the sanitizers) **[Optional]**:
|
||||
|
||||
* ``cd where-you-want-llvm-to-live``
|
||||
@ -719,10 +731,10 @@ Or a combination of multiple projects:
|
||||
|
||||
% cd $TOP_LEVEL_DIR
|
||||
% mkdir clang-build && cd clang-build
|
||||
% cmake -GNinja ../llvm-project/llvm -DLLVM_ENABLE_PROJECTS="clang;libcxx;compiler-rt"
|
||||
% cmake -GNinja ../llvm-project/llvm -DLLVM_ENABLE_PROJECTS="clang;libcxx;libcxxabi"
|
||||
|
||||
A helper script is provided in `llvm/utils/git-svn/git-llvm`. After you add it
|
||||
to your path, you can push committed changes upstream with `git llvm push`.
|
||||
A helper script is provided in ``llvm/utils/git-svn/git-llvm``. After you add it
|
||||
to your path, you can push committed changes upstream with ``git llvm push``.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
@ -731,10 +743,22 @@ to your path, you can push committed changes upstream with `git llvm push`.
|
||||
|
||||
While this is using SVN under the hood, it does not require any interaction from
|
||||
you with git-svn.
|
||||
After a few minutes, `git pull` should get back the changes as they were
|
||||
commited. Note that a current limitation is that `git` does not directly record
|
||||
file rename, and thus it is propagated to SVN as a combination of delete-add
|
||||
instead of a file rename.
|
||||
After a few minutes, ``git pull`` should get back the changes as they were
|
||||
committed. Note that a current limitation is that ``git`` does not directly
|
||||
record file rename, and thus it is propagated to SVN as a combination of
|
||||
delete-add instead of a file rename.
|
||||
|
||||
The SVN revision of each monorepo commit can be found in the commit notes. git
|
||||
does not fetch notes by default. The following commands will fetch the notes and
|
||||
configure git to fetch future notes. Use ``git notes show $commit`` to look up
|
||||
the SVN revision of a git commit. The notes show up ``git log``, and searching
|
||||
the log is currently the recommended way to look up the git commit for a given
|
||||
SVN revision.
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
% git config --add remote.origin.fetch +refs/notes/commits:refs/notes/commits
|
||||
% git fetch
|
||||
|
||||
If you are using `arc` to interact with Phabricator, you need to manually put it
|
||||
at the root of the checkout:
|
||||
@ -793,7 +817,8 @@ used by people developing LLVM.
|
||||
+-------------------------+----------------------------------------------------+
|
||||
| LLVM_ENABLE_SPHINX | Build sphinx-based documentation from the source |
|
||||
| | code. This is disabled by default because it is |
|
||||
| | slow and generates a lot of output. |
|
||||
| | slow and generates a lot of output. Sphinx version |
|
||||
| | 1.5 or later recommended. |
|
||||
+-------------------------+----------------------------------------------------+
|
||||
| LLVM_BUILD_LLVM_DYLIB | Generate libLLVM.so. This library contains a |
|
||||
| | default set of LLVM components that can be |
|
||||
@ -1138,7 +1163,7 @@ the `Command Guide <CommandGuide/index.html>`_.
|
||||
``llc``
|
||||
|
||||
``llc`` is the LLVM backend compiler, which translates LLVM bitcode to a
|
||||
native code assembly file or to C code (with the ``-march=c`` option).
|
||||
native code assembly file.
|
||||
|
||||
``opt``
|
||||
|
||||
|
@ -358,41 +358,6 @@ existing patterns (as any pattern we can select is by definition legal).
|
||||
Expanding that to describe legalization actions is a much larger but
|
||||
potentially useful project.
|
||||
|
||||
.. _milegalizer-scalar-narrow:
|
||||
|
||||
Scalar narrow types
|
||||
^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
In the AArch64 port, we currently mark as legal operations on narrow integer
|
||||
types that have a legal equivalent in a wider type.
|
||||
|
||||
For example, this:
|
||||
|
||||
%2(GPR,s8) = G_ADD %0, %1
|
||||
|
||||
is selected to a 32-bit instruction:
|
||||
|
||||
%2(GPR32) = ADDWrr %0, %1
|
||||
|
||||
This avoids unnecessarily legalizing operations that can be seen as legal:
|
||||
8-bit additions are supported, but happen to have a 32-bit result with the high
|
||||
24 bits undefined.
|
||||
|
||||
``TODO``:
|
||||
This has implications regarding vreg classes (as narrow values can now be
|
||||
represented by wider vregs) and should be investigated further.
|
||||
|
||||
``TODO``:
|
||||
In particular, s1 comparison results can be represented as wider values in
|
||||
different ways.
|
||||
SelectionDAG has the notion of BooleanContents, which allows targets to choose
|
||||
what true and false are when in a larger register:
|
||||
|
||||
* ``ZeroOrOne`` --- if only 0 and 1 are valid bools, even in a larger register.
|
||||
* ``ZeroOrMinusOne`` --- if -1 is true (common for vector instructions,
|
||||
where compares produce -1).
|
||||
* ``Undefined`` --- if only the low bit is relevant in determining truth.
|
||||
|
||||
.. _milegalizer-non-power-of-2:
|
||||
|
||||
Non-power of 2 types
|
||||
|
@ -6,9 +6,19 @@ Introduction
|
||||
============
|
||||
|
||||
This document contains information about adding a build configuration and
|
||||
buildslave to private slave builder to LLVM Buildbot Infrastructure
|
||||
`<http://lab.llvm.org:8011>`_.
|
||||
buildslave to private slave builder to LLVM Buildbot Infrastructure.
|
||||
|
||||
Buildmasters
|
||||
============
|
||||
|
||||
There are two buildmasters running.
|
||||
|
||||
* The main buildmaster at `<http://lab.llvm.org:8011>`_. All builders attached
|
||||
to this machine will notify commit authors every time they break the build.
|
||||
* The staging buildbot at `<http://lab.llvm.org:8014>`_. All builders attached
|
||||
to this machine will be completely silent by default when the build is broken.
|
||||
Builders for experimental backends should generally be attached to this
|
||||
buildmaster.
|
||||
|
||||
Steps To Add Builder To LLVM Buildbot
|
||||
=====================================
|
||||
@ -73,6 +83,11 @@ Here are the steps you can follow to do so:
|
||||
* slaves are added to ``buildbot/osuosl/master/config/slaves.py``
|
||||
* builders are added to ``buildbot/osuosl/master/config/builders.py``
|
||||
|
||||
It is possible to whitelist email addresses to unconditionally receive notifications
|
||||
on build failure; for this you'll need to add an ``InformativeMailNotifier`` to
|
||||
``buildbot/osuosl/master/config/status.py``. This is particularly useful for the
|
||||
staging buildmaster which is silent otherwise.
|
||||
|
||||
#. Send the buildslave access name and the access password directly to
|
||||
`Galina Kistanova <mailto:gkistanova@gmail.com>`_, and wait till she
|
||||
will let you know that your changes are applied and buildmaster is
|
||||
|
@ -19,7 +19,7 @@ section to narrow down the bug so that the person who fixes it will be able
|
||||
to find the problem more easily.
|
||||
|
||||
Once you have a reduced test-case, go to `the LLVM Bug Tracking System
|
||||
<http://llvm.org/bugs/enter_bug.cgi>`_ and fill out the form with the
|
||||
<https://bugs.llvm.org/enter_bug.cgi>`_ and fill out the form with the
|
||||
necessary details (note that you don't need to pick a category, just use
|
||||
the "new-bugs" category if you're not sure). The bug description should
|
||||
contain the following information:
|
||||
|
@ -38,36 +38,35 @@ Because attributes are no longer represented as a bit mask, you will need to
|
||||
convert any code which does treat them as a bit mask to use the new query
|
||||
methods on the Attribute class.
|
||||
|
||||
``AttributeSet``
|
||||
================
|
||||
``AttributeList``
|
||||
=================
|
||||
|
||||
The ``AttributeSet`` class replaces the old ``AttributeList`` class. The
|
||||
``AttributeSet`` stores a collection of Attribute objects for each kind of
|
||||
object that may have an attribute associated with it: the function as a
|
||||
whole, the return type, or the function's parameters. A function's attributes
|
||||
are at index ``AttributeSet::FunctionIndex``; the return type's attributes are
|
||||
at index ``AttributeSet::ReturnIndex``; and the function's parameters'
|
||||
attributes are at indices 1, ..., n (where 'n' is the number of parameters).
|
||||
Most methods on the ``AttributeSet`` class take an index parameter.
|
||||
The ``AttributeList`` stores a collection of Attribute objects for each kind of
|
||||
object that may have an attribute associated with it: the function as a whole,
|
||||
the return type, or the function's parameters. A function's attributes are at
|
||||
index ``AttributeList::FunctionIndex``; the return type's attributes are at
|
||||
index ``AttributeList::ReturnIndex``; and the function's parameters' attributes
|
||||
are at indices 1, ..., n (where 'n' is the number of parameters). Most methods
|
||||
on the ``AttributeList`` class take an index parameter.
|
||||
|
||||
An ``AttributeSet`` is also a uniqued and immutable object. You create an
|
||||
``AttributeSet`` through the ``AttributeSet::get`` methods. You can add and
|
||||
remove attributes, which result in the creation of a new ``AttributeSet``.
|
||||
An ``AttributeList`` is also a uniqued and immutable object. You create an
|
||||
``AttributeList`` through the ``AttributeList::get`` methods. You can add and
|
||||
remove attributes, which result in the creation of a new ``AttributeList``.
|
||||
|
||||
An ``AttributeSet`` object is designed to be passed around by value.
|
||||
An ``AttributeList`` object is designed to be passed around by value.
|
||||
|
||||
Note: It is advised that you do *not* use the ``AttributeSet`` "introspection"
|
||||
Note: It is advised that you do *not* use the ``AttributeList`` "introspection"
|
||||
methods (e.g. ``Raw``, ``getRawPointer``, etc.). These methods break
|
||||
encapsulation, and may be removed in a future release (i.e. LLVM 4.0).
|
||||
|
||||
``AttrBuilder``
|
||||
===============
|
||||
|
||||
Lastly, we have a "builder" class to help create the ``AttributeSet`` object
|
||||
Lastly, we have a "builder" class to help create the ``AttributeList`` object
|
||||
without having to create several different intermediate uniqued
|
||||
``AttributeSet`` objects. The ``AttrBuilder`` class allows you to add and
|
||||
``AttributeList`` objects. The ``AttrBuilder`` class allows you to add and
|
||||
remove attributes at will. The attributes won't be uniqued until you call the
|
||||
appropriate ``AttributeSet::get`` method.
|
||||
appropriate ``AttributeList::get`` method.
|
||||
|
||||
An ``AttrBuilder`` object is *not* designed to be passed around by value. It
|
||||
should be passed by reference.
|
||||
|
@ -54,7 +54,7 @@ handled by another build system (See: :doc:`CMake <CMake>`).
|
||||
The build system implementation will load the relevant contents of the
|
||||
LLVMBuild files and use that to drive the actual project build.
|
||||
Typically, the build system will only need to load this information at
|
||||
"configure" time, and use it to generative native information. Build
|
||||
"configure" time, and use it to generate native information. Build
|
||||
systems will also handle automatically reconfiguring their information
|
||||
when the contents of the ``LLVMBuild.txt`` files change.
|
||||
|
||||
|
725
docs/LangRef.rst
725
docs/LangRef.rst
File diff suppressed because it is too large
Load Diff
@ -182,7 +182,7 @@ P
|
||||
|
||||
**PR**
|
||||
Problem report. A bug filed on `the LLVM Bug Tracking System
|
||||
<http://llvm.org/bugs/enter_bug.cgi>`_.
|
||||
<https://bugs.llvm.org/enter_bug.cgi>`_.
|
||||
|
||||
**PRE**
|
||||
Partial Redundancy Elimination
|
||||
|
@ -768,10 +768,12 @@ Trophies
|
||||
|
||||
* LLVM: `Clang <https://llvm.org/bugs/show_bug.cgi?id=23057>`_, `Clang-format <https://llvm.org/bugs/show_bug.cgi?id=23052>`_, `libc++ <https://llvm.org/bugs/show_bug.cgi?id=24411>`_, `llvm-as <https://llvm.org/bugs/show_bug.cgi?id=24639>`_, `Demangler <https://bugs.chromium.org/p/chromium/issues/detail?id=606626>`_, Disassembler: http://reviews.llvm.org/rL247405, http://reviews.llvm.org/rL247414, http://reviews.llvm.org/rL247416, http://reviews.llvm.org/rL247417, http://reviews.llvm.org/rL247420, http://reviews.llvm.org/rL247422.
|
||||
|
||||
* Tensorflow: `[1] <https://github.com/tensorflow/tensorflow/commit/7231d01fcb2cd9ef9ffbfea03b724892c8a4026e>`__
|
||||
* Tensorflow: `[1] <https://da-data.blogspot.com/2017/01/finding-bugs-in-tensorflow-with.html>`__
|
||||
|
||||
* Ffmpeg: `[1] <https://github.com/FFmpeg/FFmpeg/commit/c92f55847a3d9cd12db60bfcd0831ff7f089c37c>`__ `[2] <https://github.com/FFmpeg/FFmpeg/commit/25ab1a65f3acb5ec67b53fb7a2463a7368f1ad16>`__ `[3] <https://github.com/FFmpeg/FFmpeg/commit/85d23e5cbc9ad6835eef870a5b4247de78febe56>`__ `[4] <https://github.com/FFmpeg/FFmpeg/commit/04bd1b38ee6b8df410d0ab8d4949546b6c4af26a>`__
|
||||
|
||||
* `Wireshark <https://bugs.wireshark.org/bugzilla/buglist.cgi?bug_status=UNCONFIRMED&bug_status=CONFIRMED&bug_status=IN_PROGRESS&bug_status=INCOMPLETE&bug_status=RESOLVED&bug_status=VERIFIED&f0=OP&f1=OP&f2=product&f3=component&f4=alias&f5=short_desc&f7=content&f8=CP&f9=CP&j1=OR&o2=substring&o3=substring&o4=substring&o5=substring&o6=substring&o7=matches&order=bug_id%20DESC&query_format=advanced&v2=libfuzzer&v3=libfuzzer&v4=libfuzzer&v5=libfuzzer&v6=libfuzzer&v7=%22libfuzzer%22>`_
|
||||
|
||||
.. _pcre2: http://www.pcre.org/
|
||||
.. _AFL: http://lcamtuf.coredump.cx/afl/
|
||||
.. _Radamsa: https://github.com/aoh/radamsa
|
||||
|
@ -39,37 +39,85 @@ MIR Testing Guide
|
||||
You can use the MIR format for testing in two different ways:
|
||||
|
||||
- You can write MIR tests that invoke a single code generation pass using the
|
||||
``run-pass`` option in llc.
|
||||
``-run-pass`` option in llc.
|
||||
|
||||
- You can use llc's ``stop-after`` option with existing or new LLVM assembly
|
||||
- You can use llc's ``-stop-after`` option with existing or new LLVM assembly
|
||||
tests and check the MIR output of a specific code generation pass.
|
||||
|
||||
Testing Individual Code Generation Passes
|
||||
-----------------------------------------
|
||||
|
||||
The ``run-pass`` option in llc allows you to create MIR tests that invoke
|
||||
just a single code generation pass. When this option is used, llc will parse
|
||||
an input MIR file, run the specified code generation pass, and print the
|
||||
resulting MIR to the standard output stream.
|
||||
The ``-run-pass`` option in llc allows you to create MIR tests that invoke just
|
||||
a single code generation pass. When this option is used, llc will parse an
|
||||
input MIR file, run the specified code generation pass(es), and output the
|
||||
resulting MIR code.
|
||||
|
||||
You can generate an input MIR file for the test by using the ``stop-after``
|
||||
option in llc. For example, if you would like to write a test for the
|
||||
post register allocation pseudo instruction expansion pass, you can specify
|
||||
the machine copy propagation pass in the ``stop-after`` option, as it runs
|
||||
just before the pass that we are trying to test:
|
||||
You can generate an input MIR file for the test by using the ``-stop-after`` or
|
||||
``-stop-before`` option in llc. For example, if you would like to write a test
|
||||
for the post register allocation pseudo instruction expansion pass, you can
|
||||
specify the machine copy propagation pass in the ``-stop-after`` option, as it
|
||||
runs just before the pass that we are trying to test:
|
||||
|
||||
``llc -stop-after machine-cp bug-trigger.ll > test.mir``
|
||||
``llc -stop-after=machine-cp bug-trigger.ll > test.mir``
|
||||
|
||||
After generating the input MIR file, you'll have to add a run line that uses
|
||||
the ``-run-pass`` option to it. In order to test the post register allocation
|
||||
pseudo instruction expansion pass on X86-64, a run line like the one shown
|
||||
below can be used:
|
||||
|
||||
``# RUN: llc -run-pass postrapseudos -march=x86-64 %s -o /dev/null | FileCheck %s``
|
||||
``# RUN: llc -o - %s -mtriple=x86_64-- -run-pass=postrapseudos | FileCheck %s``
|
||||
|
||||
The MIR files are target dependent, so they have to be placed in the target
|
||||
specific test directories. They also need to specify a target triple or a
|
||||
target architecture either in the run line or in the embedded LLVM IR module.
|
||||
specific test directories (``lib/CodeGen/TARGETNAME``). They also need to
|
||||
specify a target triple or a target architecture either in the run line or in
|
||||
the embedded LLVM IR module.
|
||||
|
||||
Simplifying MIR files
|
||||
^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
The MIR code coming out of ``-stop-after``/``-stop-before`` is very verbose;
|
||||
Tests are more accessible and future proof when simplified:
|
||||
|
||||
- Machine function attributes often have default values or the test works just
|
||||
as well with default values. Typical candidates for this are: `alignment:`,
|
||||
`exposesReturnsTwice`, `legalized`, `regBankSelected`, `selected`.
|
||||
The whole `frameInfo` section is often unnecessary if there is no special
|
||||
frame usage in the function. `tracksRegLiveness` on the other hand is often
|
||||
necessary for some passes that care about block livein lists.
|
||||
|
||||
- The (global) `liveins:` list is typically only interesting for early
|
||||
instruction selection passes and can be removed when testing later passes.
|
||||
The per-block `liveins:` on the other hand are necessary if
|
||||
`tracksRegLiveness` is true.
|
||||
|
||||
- Branch probability data in block `successors:` lists can be dropped if the
|
||||
test doesn't depend on it. Example:
|
||||
`successors: %bb.1(0x40000000), %bb.2(0x40000000)` can be replaced with
|
||||
`successors: %bb.1, %bb.2`.
|
||||
|
||||
- MIR code contains a whole IR module. This is necessary because there are
|
||||
no equivalents in MIR for global variables, references to external functions,
|
||||
function attributes, metadata, debug info. Instead some MIR data references
|
||||
the IR constructs. You can often remove them if the test doesn't depend on
|
||||
them.
|
||||
|
||||
- Alias Analysis is performed on IR values. These are referenced by memory
|
||||
operands in MIR. Example: `:: (load 8 from %ir.foobar, !alias.scope !9)`.
|
||||
If the test doesn't depend on (good) alias analysis the references can be
|
||||
dropped: `:: (load 8)`
|
||||
|
||||
- MIR blocks can reference IR blocks for debug printing, profile information
|
||||
or debug locations. Example: `bb.42.myblock` in MIR references the IR block
|
||||
`myblock`. It is usually possible to drop the `.myblock` reference and simply
|
||||
use `bb.42`.
|
||||
|
||||
- If there are no memory operands or blocks referencing the IR then the
|
||||
IR function can be replaced by a parameterless dummy function like
|
||||
`define @func() { ret void }`.
|
||||
|
||||
- It is possible to drop the whole IR section of the MIR file if it only
|
||||
contains dummy functions (see above). The .mir loader will create the
|
||||
IR functions automatically in this case.
|
||||
|
||||
Limitations
|
||||
-----------
|
||||
|
@ -289,7 +289,7 @@ code often follows a pattern:
|
||||
return my_function_precise(a);
|
||||
}
|
||||
|
||||
The default value for all unspecified reflection parameters is zero.
|
||||
The default value for all unspecified reflection parameters is zero.
|
||||
|
||||
The ``NVVMReflect`` pass should be executed early in the optimization
|
||||
pipeline, immediately after the link stage. The ``internalize`` pass is also
|
||||
@ -326,6 +326,16 @@ often leave behind dead code of the form:
|
||||
Therefore, it is recommended that ``NVVMReflect`` is executed early in the
|
||||
optimization pipeline before dead-code elimination.
|
||||
|
||||
The NVPTX TargetMachine knows how to schedule ``NVVMReflect`` at the beginning
|
||||
of your pass manager; just use the following code when setting up your pass
|
||||
manager:
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
std::unique_ptr<TargetMachine> TM = ...;
|
||||
PassManagerBuilder PMBuilder(...);
|
||||
if (TM)
|
||||
TM->adjustPassManager(PMBuilder);
|
||||
|
||||
Reflection Parameters
|
||||
---------------------
|
||||
@ -339,35 +349,17 @@ Flag Description
|
||||
``__CUDA_FTZ=[0,1]`` Use optimized code paths that flush subnormals to zero
|
||||
==================== ======================================================
|
||||
|
||||
The value of this flag is determined by the "nvvm-reflect-ftz" module flag.
|
||||
The following sets the ftz flag to 1.
|
||||
|
||||
Invoking NVVMReflect
|
||||
--------------------
|
||||
|
||||
To ensure that all dead code caused by the reflection pass is eliminated, it
|
||||
is recommended that the reflection pass is executed early in the LLVM IR
|
||||
optimization pipeline. The pass takes an optional mapping of reflection
|
||||
parameter name to an integer value. This mapping can be specified as either a
|
||||
command-line option to ``opt`` or as an LLVM ``StringMap<int>`` object when
|
||||
programmatically creating a pass pipeline.
|
||||
|
||||
With ``opt``:
|
||||
|
||||
.. code-block:: text
|
||||
|
||||
# opt -nvvm-reflect -nvvm-reflect-list=<var>=<value>,<var>=<value> module.bc -o module.reflect.bc
|
||||
|
||||
|
||||
With programmatic pass pipeline:
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
extern FunctionPass *llvm::createNVVMReflectPass(const StringMap<int>& Mapping);
|
||||
|
||||
StringMap<int> ReflectParams;
|
||||
ReflectParams["__CUDA_FTZ"] = 1;
|
||||
Passes.add(createNVVMReflectPass(ReflectParams));
|
||||
.. code-block:: llvm
|
||||
|
||||
!llvm.module.flag = !{!0}
|
||||
!0 = !{i32 4, !"nvvm-reflect-ftz", i32 1}
|
||||
|
||||
(``i32 4`` indicates that the value set here overrides the value in another
|
||||
module we link with. See the `LangRef <LangRef.html#module-flags-metadata>`
|
||||
for details.)
|
||||
|
||||
Executing PTX
|
||||
=============
|
||||
|
@ -60,11 +60,14 @@ like this:
|
||||
clang -O2 -mllvm -opt-bisect-limit=256 my_file.c
|
||||
|
||||
The -opt-bisect-limit option may also be applied to link-time optimizations by
|
||||
using a prefix to indicate that this is a plug-in option for the linker. The
|
||||
using a prefix to indicate that this is a plug-in option for the linker. The
|
||||
following syntax will set a bisect limit for LTO transformations:
|
||||
|
||||
::
|
||||
|
||||
# When using lld, or ld64 (macOS)
|
||||
clang -flto -Wl,-mllvm,-opt-bisect-limit=256 my_file.o my_other_file.o
|
||||
# When using Gold
|
||||
clang -flto -Wl,-plugin-opt,-opt-bisect-limit=256 my_file.o my_other_file.o
|
||||
|
||||
LTO passes are run by a library instance invoked by the linker. Therefore any
|
||||
@ -186,12 +189,5 @@ Adding Finer Granularity
|
||||
|
||||
Once the pass in which an incorrect transformation is performed has been
|
||||
determined, it may be useful to perform further analysis in order to determine
|
||||
which specific transformation is causing the problem. Ideally all passes
|
||||
would be instrumented to allow skipping of individual transformations. This
|
||||
functionality is available through the OptBisect object but it is impractical
|
||||
to proactively instrument every existing pass. It is hoped that as developers
|
||||
find that they need a pass to be instrumented they will add the instrumentation
|
||||
and contribute it back to the LLVM source base.
|
||||
|
||||
Helper functions will be added to simplify this level of instrumentation, but
|
||||
this work is not yet completed. For more information, contact Andy Kaylor.
|
||||
which specific transformation is causing the problem. Debug counters
|
||||
can be used for this purpose.
|
||||
|
@ -32,7 +32,7 @@ to know when working in the LLVM infrastructure, and the second describes the
|
||||
Core LLVM classes. In the future this manual will be extended with information
|
||||
describing how to use extension libraries, such as dominator information, CFG
|
||||
traversal routines, and useful utilities like the ``InstVisitor`` (`doxygen
|
||||
<http://llvm.org/doxygen/InstVisitor_8h-source.html>`__) template.
|
||||
<http://llvm.org/doxygen/InstVisitor_8h_source.html>`__) template.
|
||||
|
||||
.. _general:
|
||||
|
||||
@ -108,7 +108,7 @@ they don't have some drawbacks (primarily stemming from the fact that
|
||||
``dynamic_cast<>`` only works on classes that have a v-table). Because they are
|
||||
used so often, you must know what they do and how they work. All of these
|
||||
templates are defined in the ``llvm/Support/Casting.h`` (`doxygen
|
||||
<http://llvm.org/doxygen/Casting_8h-source.html>`__) file (note that you very
|
||||
<http://llvm.org/doxygen/Casting_8h_source.html>`__) file (note that you very
|
||||
rarely have to include this file directly).
|
||||
|
||||
``isa<>``:
|
||||
@ -225,7 +225,7 @@ and clients can call it using any one of:
|
||||
Similarly, APIs which need to return a string may return a ``StringRef``
|
||||
instance, which can be used directly or converted to an ``std::string`` using
|
||||
the ``str`` member function. See ``llvm/ADT/StringRef.h`` (`doxygen
|
||||
<http://llvm.org/doxygen/classllvm_1_1StringRef_8h-source.html>`__) for more
|
||||
<http://llvm.org/doxygen/StringRef_8h_source.html>`__) for more
|
||||
information.
|
||||
|
||||
You should rarely use the ``StringRef`` class directly, because it contains
|
||||
@ -482,7 +482,7 @@ that inherits from the ErrorInfo utility, E.g.:
|
||||
}
|
||||
};
|
||||
|
||||
char FileExists::ID; // This should be declared in the C++ file.
|
||||
char BadFileFormat::ID; // This should be declared in the C++ file.
|
||||
|
||||
Error printFormattedFile(StringRef Path) {
|
||||
if (<check for valid format>)
|
||||
@ -564,18 +564,18 @@ the boolean conversion operator):
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
if (auto Err = canFail(...))
|
||||
if (auto Err = mayFail(...))
|
||||
return Err; // Failure value - move error to caller.
|
||||
|
||||
// Safe to continue: Err was checked.
|
||||
|
||||
In contrast, the following code will always cause an abort, even if ``canFail``
|
||||
In contrast, the following code will always cause an abort, even if ``mayFail``
|
||||
returns a success value:
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
canFail();
|
||||
// Program will always abort here, even if canFail() returns Success, since
|
||||
mayFail();
|
||||
// Program will always abort here, even if mayFail() returns Success, since
|
||||
// the value is not checked.
|
||||
|
||||
Failure values are considered checked once a handler for the error type has
|
||||
@ -633,6 +633,12 @@ exiting with an error code, the :ref:`ExitOnError <err_exitonerr>` utility
|
||||
may be a better choice than handleErrors, as it simplifies control flow when
|
||||
calling fallible functions.
|
||||
|
||||
In situations where it is known that a particular call to a fallible function
|
||||
will always succeed (for example, a call to a function that can only fail on a
|
||||
subset of inputs with an input that is known to be safe) the
|
||||
:ref:`cantFail <err_cantfail>` functions can be used to remove the error type,
|
||||
simplifying control flow.
|
||||
|
||||
StringError
|
||||
"""""""""""
|
||||
|
||||
@ -765,6 +771,43 @@ mapping can also be supplied from ``Error`` values to exit codes using the
|
||||
Use ``ExitOnError`` in your tool code where possible as it can greatly improve
|
||||
readability.
|
||||
|
||||
.. _err_cantfail:
|
||||
|
||||
Using cantFail to simplify safe callsites
|
||||
"""""""""""""""""""""""""""""""""""""""""
|
||||
|
||||
Some functions may only fail for a subset of their inputs. For such functions
|
||||
call-sites using known-safe inputs can assume that the result will be a success
|
||||
value.
|
||||
|
||||
The cantFail functions encapsulate this by wrapping an assertion that their
|
||||
argument is a success value and, in the case of Expected<T>, unwrapping the
|
||||
T value from the Expected<T> argument:
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
Error mayFail(int X);
|
||||
Expected<int> mayFail2(int X);
|
||||
|
||||
void foo() {
|
||||
cantFail(mayFail(KnownSafeValue));
|
||||
int Y = cantFail(mayFail2(KnownSafeValue));
|
||||
...
|
||||
}
|
||||
|
||||
Like the ExitOnError utility, cantFail simplifies control flow. Their treatment
|
||||
of error cases is very different however: Where ExitOnError is guaranteed to
|
||||
terminate the program on an error input, cantFile simply asserts that the result
|
||||
is success. In debug builds this will result in an assertion failure if an error
|
||||
is encountered. In release builds the behavior of cantFail for failure values is
|
||||
undefined. As such, care must be taken in the use of cantFail: clients must be
|
||||
certain that a cantFail wrapped call really can not fail under any
|
||||
circumstances.
|
||||
|
||||
Use of the cantFail functions should be rare in library code, but they are
|
||||
likely to be of more use in tool and unit-test code where inputs and/or
|
||||
mocked-up classes or functions may be known to be safe.
|
||||
|
||||
Fallible constructors
|
||||
"""""""""""""""""""""
|
||||
|
||||
@ -864,7 +907,7 @@ completing the walk over the archive they could use the ``joinErrors`` utility:
|
||||
|
||||
The ``joinErrors`` routine builds a special error type called ``ErrorList``,
|
||||
which holds a list of user defined errors. The ``handleErrors`` routine
|
||||
recognizes this type and will attempt to handle each of the contained erorrs in
|
||||
recognizes this type and will attempt to handle each of the contained errors in
|
||||
order. If all contained errors can be handled, ``handleErrors`` will return
|
||||
``Error::success()``, otherwise ``handleErrors`` will concatenate the remaining
|
||||
errors and return the resulting ``ErrorList``.
|
||||
@ -931,7 +974,7 @@ The ``function_ref`` class template
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
The ``function_ref``
|
||||
(`doxygen <http://llvm.org/docs/doxygen/html/classllvm_1_1function__ref_3_01Ret_07Params_8_8_8_08_4.html>`__) class
|
||||
(`doxygen <http://llvm.org/doxygen/classllvm_1_1function__ref_3_01Ret_07Params_8_8_8_08_4.html>`__) class
|
||||
template represents a reference to a callable object, templated over the type
|
||||
of the callable. This is a good choice for passing a callback to a function,
|
||||
if you don't need to hold onto the callback after the function returns. In this
|
||||
@ -981,7 +1024,7 @@ you don't want them to always be noisy. A standard compromise is to comment
|
||||
them out, allowing you to enable them if you need them in the future.
|
||||
|
||||
The ``llvm/Support/Debug.h`` (`doxygen
|
||||
<http://llvm.org/doxygen/Debug_8h-source.html>`__) file provides a macro named
|
||||
<http://llvm.org/doxygen/Debug_8h_source.html>`__) file provides a macro named
|
||||
``DEBUG()`` that is a much nicer solution to this problem. Basically, you can
|
||||
put arbitrary code into the argument of the ``DEBUG`` macro, and it is only
|
||||
executed if '``opt``' (or any other tool) is run with the '``-debug``' command
|
||||
@ -1078,7 +1121,7 @@ The ``Statistic`` class & ``-stats`` option
|
||||
-------------------------------------------
|
||||
|
||||
The ``llvm/ADT/Statistic.h`` (`doxygen
|
||||
<http://llvm.org/doxygen/Statistic_8h-source.html>`__) file provides a class
|
||||
<http://llvm.org/doxygen/Statistic_8h_source.html>`__) file provides a class
|
||||
named ``Statistic`` that is used as a unified way to keep track of what the LLVM
|
||||
compiler is doing and how effective various optimizations are. It is useful to
|
||||
see what optimizations are contributing to making a particular program run
|
||||
@ -1094,23 +1137,23 @@ uniform manner with the rest of the passes being executed.
|
||||
There are many examples of ``Statistic`` uses, but the basics of using it are as
|
||||
follows:
|
||||
|
||||
#. Define your statistic like this:
|
||||
Define your statistic like this:
|
||||
|
||||
.. code-block:: c++
|
||||
.. code-block:: c++
|
||||
|
||||
#define DEBUG_TYPE "mypassname" // This goes before any #includes.
|
||||
STATISTIC(NumXForms, "The # of times I did stuff");
|
||||
#define DEBUG_TYPE "mypassname" // This goes before any #includes.
|
||||
STATISTIC(NumXForms, "The # of times I did stuff");
|
||||
|
||||
The ``STATISTIC`` macro defines a static variable, whose name is specified by
|
||||
the first argument. The pass name is taken from the ``DEBUG_TYPE`` macro, and
|
||||
the description is taken from the second argument. The variable defined
|
||||
("NumXForms" in this case) acts like an unsigned integer.
|
||||
The ``STATISTIC`` macro defines a static variable, whose name is specified by
|
||||
the first argument. The pass name is taken from the ``DEBUG_TYPE`` macro, and
|
||||
the description is taken from the second argument. The variable defined
|
||||
("NumXForms" in this case) acts like an unsigned integer.
|
||||
|
||||
#. Whenever you make a transformation, bump the counter:
|
||||
Whenever you make a transformation, bump the counter:
|
||||
|
||||
.. code-block:: c++
|
||||
.. code-block:: c++
|
||||
|
||||
++NumXForms; // I did stuff!
|
||||
++NumXForms; // I did stuff!
|
||||
|
||||
That's all you have to do. To get '``opt``' to print out the statistics
|
||||
gathered, use the '``-stats``' option:
|
||||
@ -1158,6 +1201,71 @@ Obviously, with so many optimizations, having a unified framework for this stuff
|
||||
is very nice. Making your pass fit well into the framework makes it more
|
||||
maintainable and useful.
|
||||
|
||||
.. _DebugCounters:
|
||||
|
||||
Adding debug counters to aid in debugging your code
|
||||
---------------------------------------------------
|
||||
|
||||
Sometimes, when writing new passes, or trying to track down bugs, it
|
||||
is useful to be able to control whether certain things in your pass
|
||||
happen or not. For example, there are times the minimization tooling
|
||||
can only easily give you large testcases. You would like to narrow
|
||||
your bug down to a specific transformation happening or not happening,
|
||||
automatically, using bisection. This is where debug counters help.
|
||||
They provide a framework for making parts of your code only execute a
|
||||
certain number of times.
|
||||
|
||||
The ``llvm/Support/DebugCounter.h`` (`doxygen
|
||||
<http://llvm.org/doxygen/DebugCounter_8h_source.html>`__) file
|
||||
provides a class named ``DebugCounter`` that can be used to create
|
||||
command line counter options that control execution of parts of your code.
|
||||
|
||||
Define your DebugCounter like this:
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
DEBUG_COUNTER(DeleteAnInstruction, "passname-delete-instruction",
|
||||
"Controls which instructions get delete").
|
||||
|
||||
The ``DEBUG_COUNTER`` macro defines a static variable, whose name
|
||||
is specified by the first argument. The name of the counter
|
||||
(which is used on the command line) is specified by the second
|
||||
argument, and the description used in the help is specified by the
|
||||
third argument.
|
||||
|
||||
Whatever code you want that control, use ``DebugCounter::shouldExecute`` to control it.
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
if (DebugCounter::shouldExecute(DeleteAnInstruction))
|
||||
I->eraseFromParent();
|
||||
|
||||
That's all you have to do. Now, using opt, you can control when this code triggers using
|
||||
the '``--debug-counter``' option. There are two counters provided, ``skip`` and ``count``.
|
||||
``skip`` is the number of times to skip execution of the codepath. ``count`` is the number
|
||||
of times, once we are done skipping, to execute the codepath.
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
$ opt --debug-counter=passname-delete-instruction-skip=1,passname-delete-instruction-count=2 -passname
|
||||
|
||||
This will skip the above code the first time we hit it, then execute it twice, then skip the rest of the executions.
|
||||
|
||||
So if executed on the following code:
|
||||
|
||||
.. code-block:: llvm
|
||||
|
||||
%1 = add i32 %a, %b
|
||||
%2 = add i32 %a, %b
|
||||
%3 = add i32 %a, %b
|
||||
%4 = add i32 %a, %b
|
||||
|
||||
It would delete number ``%2`` and ``%3``.
|
||||
|
||||
A utility is provided in `utils/bisect-skip-count` to binary search
|
||||
skip and count arguments. It can be used to automatically minimize the
|
||||
skip and count for a debug-counter variable.
|
||||
|
||||
.. _ViewGraph:
|
||||
|
||||
Viewing graphs while debugging code
|
||||
@ -2257,18 +2365,12 @@ of a ``BasicBlock`` and the number of ``Instruction``\ s it contains:
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
// func is a pointer to a Function instance
|
||||
for (Function::iterator i = func->begin(), e = func->end(); i != e; ++i)
|
||||
Function &Func = ...
|
||||
for (BasicBlock &BB : Func)
|
||||
// Print out the name of the basic block if it has one, and then the
|
||||
// number of instructions that it contains
|
||||
errs() << "Basic block (name=" << i->getName() << ") has "
|
||||
<< i->size() << " instructions.\n";
|
||||
|
||||
Note that i can be used as if it were a pointer for the purposes of invoking
|
||||
member functions of the ``Instruction`` class. This is because the indirection
|
||||
operator is overloaded for the iterator classes. In the above code, the
|
||||
expression ``i->size()`` is exactly equivalent to ``(*i).size()`` just like
|
||||
you'd expect.
|
||||
errs() << "Basic block (name=" << BB.getName() << ") has "
|
||||
<< BB.size() << " instructions.\n";
|
||||
|
||||
.. _iterate_basicblock:
|
||||
|
||||
@ -2281,17 +2383,17 @@ a code snippet that prints out each instruction in a ``BasicBlock``:
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
// blk is a pointer to a BasicBlock instance
|
||||
for (BasicBlock::iterator i = blk->begin(), e = blk->end(); i != e; ++i)
|
||||
BasicBlock& BB = ...
|
||||
for (Instruction &I : BB)
|
||||
// The next statement works since operator<<(ostream&,...)
|
||||
// is overloaded for Instruction&
|
||||
errs() << *i << "\n";
|
||||
errs() << I << "\n";
|
||||
|
||||
|
||||
However, this isn't really the best way to print out the contents of a
|
||||
``BasicBlock``! Since the ostream operators are overloaded for virtually
|
||||
anything you'll care about, you could have just invoked the print routine on the
|
||||
basic block itself: ``errs() << *blk << "\n";``.
|
||||
basic block itself: ``errs() << BB << "\n";``.
|
||||
|
||||
.. _iterate_insiter:
|
||||
|
||||
@ -2425,13 +2527,13 @@ method):
|
||||
OurFunctionPass(): callCounter(0) { }
|
||||
|
||||
virtual runOnFunction(Function& F) {
|
||||
for (Function::iterator b = F.begin(), be = F.end(); b != be; ++b) {
|
||||
for (BasicBlock::iterator i = b->begin(), ie = b->end(); i != ie; ++i) {
|
||||
if (CallInst* callInst = dyn_cast<CallInst>(&*i)) {
|
||||
for (BasicBlock &B : F) {
|
||||
for (Instruction &I: B) {
|
||||
if (auto *CallInst = dyn_cast<CallInst>(&I)) {
|
||||
// We know we've encountered a call instruction, so we
|
||||
// need to determine if it's a call to the
|
||||
// function pointed to by m_func or not.
|
||||
if (callInst->getCalledFunction() == targetFunc)
|
||||
if (CallInst->getCalledFunction() == targetFunc)
|
||||
++callCounter;
|
||||
}
|
||||
}
|
||||
@ -2524,12 +2626,11 @@ iterate over all predecessors of BB:
|
||||
#include "llvm/IR/CFG.h"
|
||||
BasicBlock *BB = ...;
|
||||
|
||||
for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
|
||||
BasicBlock *Pred = *PI;
|
||||
for (BasicBlock *Pred : predecessors(BB)) {
|
||||
// ...
|
||||
}
|
||||
|
||||
Similarly, to iterate over successors use ``succ_iterator/succ_begin/succ_end``.
|
||||
Similarly, to iterate over successors use ``successors``.
|
||||
|
||||
.. _simplechanges:
|
||||
|
||||
@ -2554,7 +2655,7 @@ For example, an ``AllocaInst`` only *requires* a (const-ptr-to) ``Type``. Thus:
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
AllocaInst* ai = new AllocaInst(Type::Int32Ty);
|
||||
auto *ai = new AllocaInst(Type::Int32Ty);
|
||||
|
||||
will create an ``AllocaInst`` instance that represents the allocation of one
|
||||
integer in the current stack frame, at run time. Each ``Instruction`` subclass
|
||||
@ -2579,7 +2680,7 @@ intending to use it within the same ``Function``. I might do:
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
AllocaInst* pa = new AllocaInst(Type::Int32Ty, 0, "indexLoc");
|
||||
auto *pa = new AllocaInst(Type::Int32Ty, 0, "indexLoc");
|
||||
|
||||
where ``indexLoc`` is now the logical name of the instruction's execution value,
|
||||
which is a pointer to an integer on the run time stack.
|
||||
@ -2599,7 +2700,7 @@ sequence of instructions that form a ``BasicBlock``:
|
||||
|
||||
BasicBlock *pb = ...;
|
||||
Instruction *pi = ...;
|
||||
Instruction *newInst = new Instruction(...);
|
||||
auto *newInst = new Instruction(...);
|
||||
|
||||
pb->getInstList().insert(pi, newInst); // Inserts newInst before pi in pb
|
||||
|
||||
@ -2611,7 +2712,7 @@ sequence of instructions that form a ``BasicBlock``:
|
||||
.. code-block:: c++
|
||||
|
||||
BasicBlock *pb = ...;
|
||||
Instruction *newInst = new Instruction(...);
|
||||
auto *newInst = new Instruction(...);
|
||||
|
||||
pb->getInstList().push_back(newInst); // Appends newInst to pb
|
||||
|
||||
@ -2620,7 +2721,7 @@ sequence of instructions that form a ``BasicBlock``:
|
||||
.. code-block:: c++
|
||||
|
||||
BasicBlock *pb = ...;
|
||||
Instruction *newInst = new Instruction(..., pb);
|
||||
auto *newInst = new Instruction(..., pb);
|
||||
|
||||
which is much cleaner, especially if you are creating long instruction
|
||||
streams.
|
||||
@ -2635,7 +2736,7 @@ sequence of instructions that form a ``BasicBlock``:
|
||||
.. code-block:: c++
|
||||
|
||||
Instruction *pi = ...;
|
||||
Instruction *newInst = new Instruction(...);
|
||||
auto *newInst = new Instruction(...);
|
||||
|
||||
pi->getParent()->getInstList().insert(pi, newInst);
|
||||
|
||||
@ -2651,7 +2752,7 @@ sequence of instructions that form a ``BasicBlock``:
|
||||
.. code-block:: c++
|
||||
|
||||
Instruction* pi = ...;
|
||||
Instruction* newInst = new Instruction(..., pi);
|
||||
auto *newInst = new Instruction(..., pi);
|
||||
|
||||
which is much cleaner, especially if you're creating a lot of instructions and
|
||||
adding them to ``BasicBlock``\ s.
|
||||
@ -2718,7 +2819,7 @@ Replacing individual instructions
|
||||
"""""""""""""""""""""""""""""""""
|
||||
|
||||
Including "`llvm/Transforms/Utils/BasicBlockUtils.h
|
||||
<http://llvm.org/doxygen/BasicBlockUtils_8h-source.html>`_" permits use of two
|
||||
<http://llvm.org/doxygen/BasicBlockUtils_8h_source.html>`_" permits use of two
|
||||
very useful replace functions: ``ReplaceInstWithValue`` and
|
||||
``ReplaceInstWithInst``.
|
||||
|
||||
@ -2814,7 +2915,7 @@ is easier to read and write than the equivalent
|
||||
FunctionType *ft = FunctionType::get(Type::Int8Ty, params, false);
|
||||
|
||||
See the `class comment
|
||||
<http://llvm.org/doxygen/TypeBuilder_8h-source.html#l00001>`_ for more details.
|
||||
<http://llvm.org/doxygen/TypeBuilder_8h_source.html#l00001>`_ for more details.
|
||||
|
||||
.. _threading:
|
||||
|
||||
@ -2903,7 +3004,7 @@ Another way is to only call ``getPointerToFunction()`` from the
|
||||
|
||||
When the JIT is configured to compile lazily (using
|
||||
``ExecutionEngine::DisableLazyCompilation(false)``), there is currently a `race
|
||||
condition <http://llvm.org/bugs/show_bug.cgi?id=5184>`_ in updating call sites
|
||||
condition <https://bugs.llvm.org/show_bug.cgi?id=5184>`_ in updating call sites
|
||||
after a function is lazily-jitted. It's still possible to use the lazy JIT in a
|
||||
threaded program if you ensure that only one thread at a time can call any
|
||||
particular lazy stub and that the JIT lock guards any IR access, but we suggest
|
||||
@ -3235,7 +3336,7 @@ The Core LLVM Class Hierarchy Reference
|
||||
|
||||
``#include "llvm/IR/Type.h"``
|
||||
|
||||
header source: `Type.h <http://llvm.org/doxygen/Type_8h-source.html>`_
|
||||
header source: `Type.h <http://llvm.org/doxygen/Type_8h_source.html>`_
|
||||
|
||||
doxygen info: `Type Clases <http://llvm.org/doxygen/classllvm_1_1Type.html>`_
|
||||
|
||||
@ -3339,7 +3440,7 @@ The ``Module`` class
|
||||
|
||||
``#include "llvm/IR/Module.h"``
|
||||
|
||||
header source: `Module.h <http://llvm.org/doxygen/Module_8h-source.html>`_
|
||||
header source: `Module.h <http://llvm.org/doxygen/Module_8h_source.html>`_
|
||||
|
||||
doxygen info: `Module Class <http://llvm.org/doxygen/classllvm_1_1Module.html>`_
|
||||
|
||||
@ -3426,7 +3527,7 @@ The ``Value`` class
|
||||
|
||||
``#include "llvm/IR/Value.h"``
|
||||
|
||||
header source: `Value.h <http://llvm.org/doxygen/Value_8h-source.html>`_
|
||||
header source: `Value.h <http://llvm.org/doxygen/Value_8h_source.html>`_
|
||||
|
||||
doxygen info: `Value Class <http://llvm.org/doxygen/classllvm_1_1Value.html>`_
|
||||
|
||||
@ -3517,7 +3618,7 @@ The ``User`` class
|
||||
|
||||
``#include "llvm/IR/User.h"``
|
||||
|
||||
header source: `User.h <http://llvm.org/doxygen/User_8h-source.html>`_
|
||||
header source: `User.h <http://llvm.org/doxygen/User_8h_source.html>`_
|
||||
|
||||
doxygen info: `User Class <http://llvm.org/doxygen/classllvm_1_1User.html>`_
|
||||
|
||||
@ -3564,7 +3665,7 @@ The ``Instruction`` class
|
||||
``#include "llvm/IR/Instruction.h"``
|
||||
|
||||
header source: `Instruction.h
|
||||
<http://llvm.org/doxygen/Instruction_8h-source.html>`_
|
||||
<http://llvm.org/doxygen/Instruction_8h_source.html>`_
|
||||
|
||||
doxygen info: `Instruction Class
|
||||
<http://llvm.org/doxygen/classllvm_1_1Instruction.html>`_
|
||||
@ -3712,7 +3813,7 @@ The ``GlobalValue`` class
|
||||
``#include "llvm/IR/GlobalValue.h"``
|
||||
|
||||
header source: `GlobalValue.h
|
||||
<http://llvm.org/doxygen/GlobalValue_8h-source.html>`_
|
||||
<http://llvm.org/doxygen/GlobalValue_8h_source.html>`_
|
||||
|
||||
doxygen info: `GlobalValue Class
|
||||
<http://llvm.org/doxygen/classllvm_1_1GlobalValue.html>`_
|
||||
@ -3770,7 +3871,7 @@ The ``Function`` class
|
||||
|
||||
``#include "llvm/IR/Function.h"``
|
||||
|
||||
header source: `Function.h <http://llvm.org/doxygen/Function_8h-source.html>`_
|
||||
header source: `Function.h <http://llvm.org/doxygen/Function_8h_source.html>`_
|
||||
|
||||
doxygen info: `Function Class
|
||||
<http://llvm.org/doxygen/classllvm_1_1Function.html>`_
|
||||
@ -3879,7 +3980,7 @@ The ``GlobalVariable`` class
|
||||
``#include "llvm/IR/GlobalVariable.h"``
|
||||
|
||||
header source: `GlobalVariable.h
|
||||
<http://llvm.org/doxygen/GlobalVariable_8h-source.html>`_
|
||||
<http://llvm.org/doxygen/GlobalVariable_8h_source.html>`_
|
||||
|
||||
doxygen info: `GlobalVariable Class
|
||||
<http://llvm.org/doxygen/classllvm_1_1GlobalVariable.html>`_
|
||||
@ -3937,7 +4038,7 @@ The ``BasicBlock`` class
|
||||
``#include "llvm/IR/BasicBlock.h"``
|
||||
|
||||
header source: `BasicBlock.h
|
||||
<http://llvm.org/doxygen/BasicBlock_8h-source.html>`_
|
||||
<http://llvm.org/doxygen/BasicBlock_8h_source.html>`_
|
||||
|
||||
doxygen info: `BasicBlock Class
|
||||
<http://llvm.org/doxygen/classllvm_1_1BasicBlock.html>`_
|
||||
|
@ -30,7 +30,7 @@ This proposal relates only to moving the hosting of our source-code repository
|
||||
from SVN hosted on our own servers to Git hosted on GitHub. We are not proposing
|
||||
using GitHub's issue tracker, pull-requests, or code-review.
|
||||
|
||||
Contributers will continue to earn commit access on demand under the Developer
|
||||
Contributors will continue to earn commit access on demand under the Developer
|
||||
Policy, except that that a GitHub account will be required instead of SVN
|
||||
username/password-hash.
|
||||
|
||||
@ -433,7 +433,7 @@ Concerns
|
||||
* Using the monolithic repository may add overhead for those *integrating* a
|
||||
standalone sub-project, even if they aren't contributing to it, due to the
|
||||
same disk space concern as the point above. The availability of the
|
||||
sub-project Git mirror addesses this, even without SVN access.
|
||||
sub-project Git mirror addresses this, even without SVN access.
|
||||
* Preservation of the existing read/write SVN-based workflows relies on the
|
||||
GitHub SVN bridge, which is an extra dependency. Maintaining this locks us
|
||||
into GitHub and could restrict future workflow changes.
|
||||
|
@ -1,15 +1,21 @@
|
||||
========================
|
||||
LLVM 4.0.0 Release Notes
|
||||
LLVM 5.0.0 Release Notes
|
||||
========================
|
||||
|
||||
.. contents::
|
||||
:local:
|
||||
|
||||
.. warning::
|
||||
These are in-progress notes for the upcoming LLVM 5 release.
|
||||
Release notes for previous releases can be found on
|
||||
`the Download Page <http://releases.llvm.org/download.html>`_.
|
||||
|
||||
|
||||
Introduction
|
||||
============
|
||||
|
||||
This document contains the release notes for the LLVM Compiler Infrastructure,
|
||||
release 4.0.0. Here we describe the status of LLVM, including major improvements
|
||||
release 5.0.0. Here we describe the status of LLVM, including major improvements
|
||||
from the previous release, improvements in various subprojects of LLVM, and
|
||||
some of the current users of the code. All LLVM releases may be downloaded
|
||||
from the `LLVM releases web site <http://llvm.org/releases/>`_.
|
||||
@ -20,319 +26,77 @@ have questions or comments, the `LLVM Developer's Mailing List
|
||||
<http://lists.llvm.org/mailman/listinfo/llvm-dev>`_ is a good place to send
|
||||
them.
|
||||
|
||||
New Versioning Scheme
|
||||
=====================
|
||||
Starting with this release, LLVM is using a
|
||||
`new versioning scheme <http://blog.llvm.org/2016/12/llvms-new-versioning-scheme.html>`_,
|
||||
increasing the major version number with each major release. Stable updates to
|
||||
this release will be versioned 4.0.x, and the next major release, six months
|
||||
from now, will be version 5.0.0.
|
||||
Note that if you are reading this file from a Subversion checkout or the main
|
||||
LLVM web page, this document applies to the *next* release, not the current
|
||||
one. To see the release notes for a specific release, please see the `releases
|
||||
page <http://llvm.org/releases/>`_.
|
||||
|
||||
Non-comprehensive list of changes in this release
|
||||
=================================================
|
||||
* The minimum compiler version required for building LLVM has been raised to
|
||||
4.8 for GCC and 2015 for Visual Studio.
|
||||
.. NOTE
|
||||
For small 1-3 sentence descriptions, just add an entry at the end of
|
||||
this list. If your description won't fit comfortably in one bullet
|
||||
point (e.g. maybe you would like to give an example of the
|
||||
functionality, or simply have a lot to talk about), see the `NOTE` below
|
||||
for adding a new subsection.
|
||||
|
||||
* The C API functions ``LLVMAddFunctionAttr``, ``LLVMGetFunctionAttr``,
|
||||
``LLVMRemoveFunctionAttr``, ``LLVMAddAttribute``, ``LLVMRemoveAttribute``,
|
||||
``LLVMGetAttribute``, ``LLVMAddInstrAttribute`` and
|
||||
``LLVMRemoveInstrAttribute`` have been removed.
|
||||
* ... next change ...
|
||||
|
||||
* The C API enum ``LLVMAttribute`` has been deleted.
|
||||
.. NOTE
|
||||
If you would like to document a larger change, then you can add a
|
||||
subsection about it right here. You can copy the following boilerplate
|
||||
and un-indent it (the indentation causes it to be inside this comment).
|
||||
|
||||
* The definition and uses of ``LLVM_ATRIBUTE_UNUSED_RESULT`` in the LLVM source
|
||||
were replaced with ``LLVM_NODISCARD``, which matches the C++17 ``[[nodiscard]]``
|
||||
semantics rather than gcc's ``__attribute__((warn_unused_result))``.
|
||||
Special New Feature
|
||||
-------------------
|
||||
|
||||
* The Timer related APIs now expect a Name and Description. When upgrading code
|
||||
the previously used names should become descriptions and a short name in the
|
||||
style of a programming language identifier should be added.
|
||||
Makes programs 10x faster by doing Special New Thing.
|
||||
|
||||
* LLVM now handles ``invariant.group`` across different basic blocks, which makes
|
||||
it possible to devirtualize virtual calls inside loops.
|
||||
Changes to the LLVM IR
|
||||
----------------------
|
||||
|
||||
* The aggressive dead code elimination phase ("adce") now removes
|
||||
branches which do not effect program behavior. Loops are retained by
|
||||
default since they may be infinite but these can also be removed
|
||||
with LLVM option ``-adce-remove-loops`` when the loop body otherwise has
|
||||
no live operations.
|
||||
|
||||
* The llvm-cov tool can now export coverage data as json. Its html output mode
|
||||
has also improved.
|
||||
|
||||
Improvements to ThinLTO (-flto=thin)
|
||||
------------------------------------
|
||||
Integration with profile data (PGO). When available, profile data
|
||||
enables more accurate function importing decisions, as well as
|
||||
cross-module indirect call promotion.
|
||||
|
||||
Significant build-time and binary-size improvements when compiling with
|
||||
debug info (-g).
|
||||
|
||||
LLVM Coroutines
|
||||
---------------
|
||||
|
||||
Experimental support for :doc:`Coroutines` was added, which can be enabled
|
||||
with ``-enable-coroutines`` in ``opt`` the command tool or using the
|
||||
``addCoroutinePassesToExtensionPoints`` API when building the optimization
|
||||
pipeline.
|
||||
|
||||
For more information on LLVM Coroutines and the LLVM implementation, see
|
||||
`2016 LLVM Developers’ Meeting talk on LLVM Coroutines
|
||||
<http://llvm.org/devmtg/2016-11/#talk4>`_.
|
||||
|
||||
Regcall and Vectorcall Calling Conventions
|
||||
--------------------------------------------------
|
||||
|
||||
Support was added for ``_regcall`` calling convention.
|
||||
Existing ``__vectorcall`` calling convention support was extended to include
|
||||
correct handling of HVAs.
|
||||
|
||||
The ``__vectorcall`` calling convention was introduced by Microsoft to
|
||||
enhance register usage when passing parameters.
|
||||
For more information please read `__vectorcall documentation
|
||||
<https://msdn.microsoft.com/en-us/library/dn375768.aspx>`_.
|
||||
|
||||
The ``__regcall`` calling convention was introduced by Intel to
|
||||
optimize parameter transfer on function call.
|
||||
This calling convention ensures that as many values as possible are
|
||||
passed or returned in registers.
|
||||
For more information please read `__regcall documentation
|
||||
<https://software.intel.com/en-us/node/693069>`_.
|
||||
|
||||
Code Generation Testing
|
||||
-----------------------
|
||||
|
||||
Passes that work on the machine instruction representation can be tested with
|
||||
the .mir serialization format. ``llc`` supports the ``-run-pass``,
|
||||
``-stop-after``, ``-stop-before``, ``-start-after``, ``-start-before`` to
|
||||
run a single pass of the code generation pipeline, or to stop or start the code
|
||||
generation pipeline at a given point.
|
||||
|
||||
Additional information can be found in the :doc:`MIRLangRef`. The format is
|
||||
used by the tests ending in ``.mir`` in the ``test/CodeGen`` directory.
|
||||
|
||||
This feature is available since 2015. It is used more often lately and was not
|
||||
mentioned in the release notes yet.
|
||||
|
||||
Intrusive list API overhaul
|
||||
---------------------------
|
||||
|
||||
The intrusive list infrastructure was substantially rewritten over the last
|
||||
couple of releases, primarily to excise undefined behaviour. The biggest
|
||||
changes landed in this release.
|
||||
|
||||
* ``simple_ilist<T>`` is a lower-level intrusive list that never takes
|
||||
ownership of its nodes. New intrusive-list clients should consider using it
|
||||
instead of ``ilist<T>``.
|
||||
|
||||
* ``ilist_tag<class>`` allows a single data type to be inserted into two
|
||||
parallel intrusive lists. A type can inherit twice from ``ilist_node``,
|
||||
first using ``ilist_node<T,ilist_tag<A>>`` (enabling insertion into
|
||||
``simple_ilist<T,ilist_tag<A>>``) and second using
|
||||
``ilist_node<T,ilist_tag<B>>`` (enabling insertion into
|
||||
``simple_ilist<T,ilist_tag<B>>``), where ``A`` and ``B`` are arbitrary
|
||||
types.
|
||||
|
||||
* ``ilist_sentinel_tracking<bool>`` controls whether an iterator knows
|
||||
whether it's pointing at the sentinel (``end()``). By default, sentinel
|
||||
tracking is on when ABI-breaking checks are enabled, and off otherwise;
|
||||
this is used for an assertion when dereferencing ``end()`` (this assertion
|
||||
triggered often in practice, and many backend bugs were fixed). Explicitly
|
||||
turning on sentinel tracking also enables ``iterator::isEnd()``. This is
|
||||
used by ``MachineInstrBundleIterator`` to iterate over bundles.
|
||||
|
||||
* ``ilist<T>`` is built on top of ``simple_ilist<T>``, and supports the same
|
||||
configuration options. As before (and unlike ``simple_ilist<T>``),
|
||||
``ilist<T>`` takes ownership of its nodes. However, it no longer supports
|
||||
*allocating* nodes, and is now equivalent to ``iplist<T>``. ``iplist<T>``
|
||||
will likely be removed in the future.
|
||||
|
||||
* ``ilist<T>`` now always uses ``ilist_traits<T>``. Instead of passing a
|
||||
custom traits class in via a template parameter, clients that want to
|
||||
customize the traits should specialize ``ilist_traits<T>``. Clients that
|
||||
want to avoid ownership can specialize ``ilist_alloc_traits<T>`` to inherit
|
||||
from ``ilist_noalloc_traits<T>`` (or to do something funky); clients that
|
||||
need callbacks can specialize ``ilist_callback_traits<T>`` directly.
|
||||
|
||||
* The underlying data structure is now a simple recursive linked list. The
|
||||
sentinel node contains only a "next" (``begin()``) and "prev" (``rbegin()``)
|
||||
pointer and is stored in the same allocation as ``simple_ilist<T>``.
|
||||
Previously, it was malloc-allocated on-demand by default, although the
|
||||
now-defunct ``ilist_sentinel_traits<T>`` was sometimes specialized to avoid
|
||||
this.
|
||||
|
||||
* The ``reverse_iterator`` class no longer uses ``std::reverse_iterator``.
|
||||
Instead, it now has a handle to the same node that it dereferences to.
|
||||
Reverse iterators now have the same iterator invalidation semantics as
|
||||
forward iterators.
|
||||
|
||||
* ``iterator`` and ``reverse_iterator`` have explicit conversion constructors
|
||||
that match ``std::reverse_iterator``'s off-by-one semantics, so that
|
||||
reversing the end points of an iterator range results in the same range
|
||||
(albeit in reverse). I.e., ``reverse_iterator(begin())`` equals
|
||||
``rend()``.
|
||||
|
||||
* ``iterator::getReverse()`` and ``reverse_iterator::getReverse()`` return an
|
||||
iterator that dereferences to the *same* node. I.e.,
|
||||
``begin().getReverse()`` equals ``--rend()``.
|
||||
|
||||
* ``ilist_node<T>::getIterator()`` and
|
||||
``ilist_node<T>::getReverseIterator()`` return the forward and reverse
|
||||
iterators that dereference to the current node. I.e.,
|
||||
``begin()->getIterator()`` equals ``begin()`` and
|
||||
``rbegin()->getReverseIterator()`` equals ``rbegin()``.
|
||||
|
||||
* ``iterator`` now stores an ``ilist_node_base*`` instead of a ``T*``. The
|
||||
implicit conversions between ``ilist<T>::iterator`` and ``T*`` have been
|
||||
removed. Clients may use ``N->getIterator()`` (if not ``nullptr``) or
|
||||
``&*I`` (if not ``end()``); alternatively, clients may refactor to use
|
||||
references for known-good nodes.
|
||||
|
||||
Changes to the ARM Targets
|
||||
Changes to the ARM Backend
|
||||
--------------------------
|
||||
|
||||
**During this release the AArch64 target has:**
|
||||
During this release ...
|
||||
|
||||
* Gained support for ILP32 relocations.
|
||||
* Gained support for XRay.
|
||||
* Made even more progress on GlobalISel. There is still some work left before
|
||||
it is production-ready though.
|
||||
* Refined the support for Qualcomm's Falkor and Samsung's Exynos CPUs.
|
||||
* Learned a few new tricks for lowering multiplications by constants, folding
|
||||
spilled/refilled copies etc.
|
||||
|
||||
**During this release the ARM target has:**
|
||||
|
||||
* Gained support for ROPI (read-only position independence) and RWPI
|
||||
(read-write position independence), which can be used to remove the need for
|
||||
a dynamic linker.
|
||||
* Gained support for execute-only code, which is placed in pages without read
|
||||
permissions.
|
||||
* Gained a machine scheduler for Cortex-R52.
|
||||
* Gained support for XRay.
|
||||
* Gained Thumb1 implementations for several compiler-rt builtins. It also
|
||||
has some support for building the builtins for HF targets.
|
||||
* Started using the generic bitreverse intrinsic instead of rbit.
|
||||
* Gained very basic support for GlobalISel.
|
||||
|
||||
A lot of work has also been done in LLD for ARM, which now supports more
|
||||
relocations and TLS.
|
||||
|
||||
Note: From the next release (5.0), the "vulcan" target will be renamed to
|
||||
"thunderx2t99", including command line options, assembly directives, etc. This
|
||||
release (4.0) will be the last one to accept "vulcan" as its name.
|
||||
|
||||
Changes to the AVR Target
|
||||
-----------------------------
|
||||
|
||||
This marks the first release where the AVR backend has been completely merged
|
||||
from a fork into LLVM trunk. The backend is still marked experimental, but
|
||||
is generally quite usable. All downstream development has halted on
|
||||
`GitHub <https://github.com/avr-llvm/llvm>`_, and changes now go directly into
|
||||
LLVM trunk.
|
||||
|
||||
* Instruction selector and pseudo instruction expansion pass landed
|
||||
* `read_register` and `write_register` intrinsics are now supported
|
||||
* Support stack stores greater than 63-bytes from the bottom of the stack
|
||||
* A number of assertion errors have been fixed
|
||||
* Support stores to `undef` locations
|
||||
* Very basic support for the target has been added to clang
|
||||
* Small optimizations to some 16-bit boolean expressions
|
||||
|
||||
Most of the work behind the scenes has been on correctness of generated
|
||||
assembly, and also fixing some assertions we would hit on some well-formed
|
||||
inputs.
|
||||
|
||||
Changes to the MIPS Target
|
||||
--------------------------
|
||||
|
||||
During this release ...
|
||||
|
||||
|
||||
Changes to the PowerPC Target
|
||||
-----------------------------
|
||||
|
||||
**During this release the MIPS target has:**
|
||||
|
||||
* IAS is now enabled by default for Debian mips64el.
|
||||
* Added support for the two operand form for many instructions.
|
||||
* Added the following macros: unaligned load/store, seq, double word load/store for O32.
|
||||
* Improved the parsing of complex memory offset expressions.
|
||||
* Enabled the integrated assembler by default for Debian mips64el.
|
||||
* Added a generic scheduler based on the interAptiv CPU.
|
||||
* Added support for thread local relocations.
|
||||
* Added recip, rsqrt, evp, dvp, synci instructions in IAS.
|
||||
* Optimized the generation of constants from some cases.
|
||||
|
||||
**The following issues have been fixed:**
|
||||
|
||||
* Thread local debug information is correctly recorded.
|
||||
* MSA intrinsics are now range checked.
|
||||
* Fixed an issue with MSA and the no-odd-spreg abi.
|
||||
* Fixed some corner cases in handling forbidden slots for MIPSR6.
|
||||
* Fixed an issue with jumps not being converted to relative branches for assembly.
|
||||
* Fixed the handling of local symbols and jal instruction.
|
||||
* N32/N64 no longer have their relocation tables sorted as per their ABIs.
|
||||
* Fixed a crash when half-precision floating point conversion MSA intrinsics are used.
|
||||
* Fixed several crashes involving FastISel.
|
||||
* Corrected the corrected definitions for aui/daui/dahi/dati for MIPSR6.
|
||||
During this release ...
|
||||
|
||||
Changes to the X86 Target
|
||||
-------------------------
|
||||
|
||||
**During this release the X86 target has:**
|
||||
During this release ...
|
||||
|
||||
* Added support AMD Ryzen (znver1) CPUs.
|
||||
* Gained support for using VEX encoding on AVX-512 CPUs to reduce code size when possible.
|
||||
* Improved AVX-512 codegen.
|
||||
Changes to the AMDGPU Target
|
||||
-----------------------------
|
||||
|
||||
During this release ...
|
||||
|
||||
Changes to the AVR Target
|
||||
-----------------------------
|
||||
|
||||
During this release ...
|
||||
|
||||
Changes to the OCaml bindings
|
||||
-----------------------------
|
||||
|
||||
* The attribute API was completely overhauled, following the changes
|
||||
to the C API.
|
||||
During this release ...
|
||||
|
||||
|
||||
External Open Source Projects Using LLVM 4.0.0
|
||||
==============================================
|
||||
External Open Source Projects Using LLVM 5
|
||||
==========================================
|
||||
|
||||
LDC - the LLVM-based D compiler
|
||||
-------------------------------
|
||||
|
||||
`D <http://dlang.org>`_ is a language with C-like syntax and static typing. It
|
||||
pragmatically combines efficiency, control, and modeling power, with safety and
|
||||
programmer productivity. D supports powerful concepts like Compile-Time Function
|
||||
Execution (CTFE) and Template Meta-Programming, provides an innovative approach
|
||||
to concurrency and offers many classical paradigms.
|
||||
|
||||
`LDC <http://wiki.dlang.org/LDC>`_ uses the frontend from the reference compiler
|
||||
combined with LLVM as backend to produce efficient native code. LDC targets
|
||||
x86/x86_64 systems like Linux, OS X, FreeBSD and Windows and also Linux on ARM
|
||||
and PowerPC (32/64 bit). Ports to other architectures like AArch64 and MIPS64
|
||||
are underway.
|
||||
|
||||
Portable Computing Language (pocl)
|
||||
----------------------------------
|
||||
|
||||
In addition to producing an easily portable open source OpenCL
|
||||
implementation, another major goal of `pocl <http://pocl.sourceforge.net/>`_
|
||||
is improving performance portability of OpenCL programs with
|
||||
compiler optimizations, reducing the need for target-dependent manual
|
||||
optimizations. An important part of pocl is a set of LLVM passes used to
|
||||
statically parallelize multiple work-items with the kernel compiler, even in
|
||||
the presence of work-group barriers. This enables static parallelization of
|
||||
the fine-grained static concurrency in the work groups in multiple ways.
|
||||
|
||||
TTA-based Co-design Environment (TCE)
|
||||
-------------------------------------
|
||||
|
||||
`TCE <http://tce.cs.tut.fi/>`_ is a toolset for designing customized
|
||||
processors based on the Transport Triggered Architecture (TTA).
|
||||
The toolset provides a complete co-design flow from C/C++
|
||||
programs down to synthesizable VHDL/Verilog and parallel program binaries.
|
||||
Processor customization points include register files, function units,
|
||||
supported operations, and the interconnection network.
|
||||
|
||||
TCE uses Clang and LLVM for C/C++/OpenCL C language support, target independent
|
||||
optimizations and also for parts of code generation. It generates new
|
||||
LLVM-based code generators "on the fly" for the designed TTA processors and
|
||||
loads them in to the compiler backend as runtime libraries to avoid
|
||||
per-target recompilation of larger parts of the compiler chain.
|
||||
* A project...
|
||||
|
||||
|
||||
Additional Information
|
||||
|
@ -13,6 +13,13 @@ The Scudo Hardened Allocator is a user-mode allocator based on LLVM Sanitizer's
|
||||
CombinedAllocator, which aims at providing additional mitigations against heap
|
||||
based vulnerabilities, while maintaining good performance.
|
||||
|
||||
Currently, the allocator supports (was tested on) the following architectures:
|
||||
|
||||
- i386 (& i686) (32-bit);
|
||||
- x86_64 (64-bit);
|
||||
- armhf (32-bit);
|
||||
- AArch64 (64-bit).
|
||||
|
||||
The name "Scudo" has been retained from the initial implementation (Escudo
|
||||
meaning Shield in Spanish and Portuguese).
|
||||
|
||||
@ -31,29 +38,25 @@ header is accessed, and the process terminated.
|
||||
The following information is stored in the header:
|
||||
|
||||
- the 16-bit checksum;
|
||||
- the user requested size for that chunk, which is necessary for reallocation
|
||||
purposes;
|
||||
- the unused bytes amount for that chunk, which is necessary for computing the
|
||||
size of the chunk;
|
||||
- the state of the chunk (available, allocated or quarantined);
|
||||
- the allocation type (malloc, new, new[] or memalign), to detect potential
|
||||
mismatches in the allocation APIs used;
|
||||
- whether or not the chunk is offseted (ie: if the chunk beginning is different
|
||||
than the backend allocation beginning, which is most often the case with some
|
||||
aligned allocations);
|
||||
- the associated offset;
|
||||
- a 16-bit salt.
|
||||
- the offset of the chunk, which is the distance in bytes from the beginning of
|
||||
the returned chunk to the beginning of the backend allocation;
|
||||
- a 8-bit salt.
|
||||
|
||||
On x64, which is currently the only architecture supported, the header fits
|
||||
within 16-bytes, which works nicely with the minimum alignment requirements.
|
||||
This header fits within 8 bytes, on all platforms supported.
|
||||
|
||||
The checksum is computed as a CRC32 (requiring the SSE 4.2 instruction set)
|
||||
of the global secret, the chunk pointer itself, and the 16 bytes of header with
|
||||
The checksum is computed as a CRC32 (made faster with hardware support)
|
||||
of the global secret, the chunk pointer itself, and the 8 bytes of header with
|
||||
the checksum field zeroed out.
|
||||
|
||||
The header is atomically loaded and stored to prevent races (this requires
|
||||
platform support such as the cmpxchg16b instruction). This is important as two
|
||||
consecutive chunks could belong to different threads. We also want to avoid
|
||||
any type of double fetches of information located in the header, and use local
|
||||
copies of the header for this purpose.
|
||||
The header is atomically loaded and stored to prevent races. This is important
|
||||
as two consecutive chunks could belong to different threads. We also want to
|
||||
avoid any type of double fetches of information located in the header, and use
|
||||
local copies of the header for this purpose.
|
||||
|
||||
Delayed Freelist
|
||||
-----------------
|
||||
@ -94,9 +97,9 @@ You may also build Scudo like this:
|
||||
.. code::
|
||||
|
||||
cd $LLVM/projects/compiler-rt/lib
|
||||
clang++ -fPIC -std=c++11 -msse4.2 -mcx16 -O2 -I. scudo/*.cpp \
|
||||
clang++ -fPIC -std=c++11 -msse4.2 -O2 -I. scudo/*.cpp \
|
||||
$(\ls sanitizer_common/*.{cc,S} | grep -v "sanitizer_termination\|sanitizer_common_nolibc") \
|
||||
-shared -o scudo-allocator.so -lpthread
|
||||
-shared -o scudo-allocator.so -pthread
|
||||
|
||||
and then use it with existing binaries as follows:
|
||||
|
||||
@ -136,29 +139,29 @@ Or using the function:
|
||||
|
||||
The following options are available:
|
||||
|
||||
+-----------------------------+---------+------------------------------------------------+
|
||||
| Option | Default | Description |
|
||||
+-----------------------------+---------+------------------------------------------------+
|
||||
| QuarantineSizeMb | 64 | The size (in Mb) of quarantine used to delay |
|
||||
| | | the actual deallocation of chunks. Lower value |
|
||||
| | | may reduce memory usage but decrease the |
|
||||
| | | effectiveness of the mitigation; a negative |
|
||||
| | | value will fallback to a default of 64Mb. |
|
||||
+-----------------------------+---------+------------------------------------------------+
|
||||
| ThreadLocalQuarantineSizeKb | 1024 | The size (in Kb) of per-thread cache use to |
|
||||
| | | offload the global quarantine. Lower value may |
|
||||
| | | reduce memory usage but might increase |
|
||||
| | | contention on the global quarantine. |
|
||||
+-----------------------------+---------+------------------------------------------------+
|
||||
| DeallocationTypeMismatch | true | Whether or not we report errors on |
|
||||
| | | malloc/delete, new/free, new/delete[], etc. |
|
||||
+-----------------------------+---------+------------------------------------------------+
|
||||
| DeleteSizeMismatch | true | Whether or not we report errors on mismatch |
|
||||
| | | between sizes of new and delete. |
|
||||
+-----------------------------+---------+------------------------------------------------+
|
||||
| ZeroContents | false | Whether or not we zero chunk contents on |
|
||||
| | | allocation and deallocation. |
|
||||
+-----------------------------+---------+------------------------------------------------+
|
||||
+-----------------------------+----------------+----------------+------------------------------------------------+
|
||||
| Option | 64-bit default | 32-bit default | Description |
|
||||
+-----------------------------+----------------+----------------+------------------------------------------------+
|
||||
| QuarantineSizeMb | 64 | 16 | The size (in Mb) of quarantine used to delay |
|
||||
| | | | the actual deallocation of chunks. Lower value |
|
||||
| | | | may reduce memory usage but decrease the |
|
||||
| | | | effectiveness of the mitigation; a negative |
|
||||
| | | | value will fallback to a default of 64Mb. |
|
||||
+-----------------------------+----------------+----------------+------------------------------------------------+
|
||||
| ThreadLocalQuarantineSizeKb | 1024 | 256 | The size (in Kb) of per-thread cache use to |
|
||||
| | | | offload the global quarantine. Lower value may |
|
||||
| | | | reduce memory usage but might increase |
|
||||
| | | | contention on the global quarantine. |
|
||||
+-----------------------------+----------------+----------------+------------------------------------------------+
|
||||
| DeallocationTypeMismatch | true | true | Whether or not we report errors on |
|
||||
| | | | malloc/delete, new/free, new/delete[], etc. |
|
||||
+-----------------------------+----------------+----------------+------------------------------------------------+
|
||||
| DeleteSizeMismatch | true | true | Whether or not we report errors on mismatch |
|
||||
| | | | between sizes of new and delete. |
|
||||
+-----------------------------+----------------+----------------+------------------------------------------------+
|
||||
| ZeroContents | false | false | Whether or not we zero chunk contents on |
|
||||
| | | | allocation and deallocation. |
|
||||
+-----------------------------+----------------+----------------+------------------------------------------------+
|
||||
|
||||
Allocator related common Sanitizer options can also be passed through Scudo
|
||||
options, such as ``allocator_may_return_null``. A detailed list including those
|
||||
|
@ -831,7 +831,7 @@ Bugs and Enhancements
|
||||
|
||||
Currently known bugs and enhancements under consideration can be
|
||||
tracked by performing a `bugzilla search
|
||||
<http://llvm.org/bugs/buglist.cgi?cmdtype=runnamed&namedcmd=Statepoint%20Bugs&list_id=64342>`_
|
||||
<https://bugs.llvm.org/buglist.cgi?cmdtype=runnamed&namedcmd=Statepoint%20Bugs&list_id=64342>`_
|
||||
for [Statepoint] in the summary field. When filing new bugs, please
|
||||
use this tag so that interested parties see the newly filed bug. As
|
||||
with most LLVM features, design discussions take place on `llvm-dev
|
||||
|
@ -228,6 +228,12 @@ CTags
|
||||
format. A helper script, utils/TableGen/tdtags, provides an easier-to-use
|
||||
interface; run 'tdtags -H' for documentation.
|
||||
|
||||
X86EVEX2VEX
|
||||
-----------
|
||||
|
||||
**Purpose**: This X86 specific tablegen backend emits tables that map EVEX
|
||||
encoded instructions to their VEX encoded identical instruction.
|
||||
|
||||
Clang BackEnds
|
||||
==============
|
||||
|
||||
|
@ -313,7 +313,7 @@ default outputs a ``ModuleID``:
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
``ModuleID`` can unexpetedly match against ``CHECK`` lines. For example:
|
||||
``ModuleID`` can unexpectedly match against ``CHECK`` lines. For example:
|
||||
|
||||
.. code-block:: llvm
|
||||
|
||||
@ -387,23 +387,49 @@ depends on special features of sub-architectures, you must add the specific
|
||||
triple, test with the specific FileCheck and put it into the specific
|
||||
directory that will filter out all other architectures.
|
||||
|
||||
REQUIRES and REQUIRES-ANY directive
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Some tests can be enabled only in specific situation - like having
|
||||
debug build. Use ``REQUIRES`` directive to specify those requirements.
|
||||
Constraining test execution
|
||||
---------------------------
|
||||
|
||||
Some tests can be run only in specific configurations, such as
|
||||
with debug builds or on particular platforms. Use ``REQUIRES``
|
||||
and ``UNSUPPORTED`` to control when the test is enabled.
|
||||
|
||||
Some tests are expected to fail. For example, there may be a known bug
|
||||
that the test detect. Use ``XFAIL`` to mark a test as an expected failure.
|
||||
An ``XFAIL`` test will be successful if its execution fails, and
|
||||
will be a failure if its execution succeeds.
|
||||
|
||||
.. code-block:: llvm
|
||||
|
||||
; This test will be only enabled in the build with asserts
|
||||
; This test will be only enabled in the build with asserts.
|
||||
; REQUIRES: asserts
|
||||
; This test is disabled on Linux.
|
||||
; UNSUPPORTED: -linux-
|
||||
; This test is expected to fail on PowerPC.
|
||||
; XFAIL: powerpc
|
||||
|
||||
You can separate requirements by a comma.
|
||||
``REQUIRES`` means all listed requirements must be satisfied.
|
||||
``REQUIRES-ANY`` means at least one must be satisfied.
|
||||
``REQUIRES`` and ``UNSUPPORTED`` and ``XFAIL`` all accept a comma-separated
|
||||
list of boolean expressions. The values in each expression may be:
|
||||
|
||||
- Features added to ``config.available_features`` by
|
||||
configuration files such as ``lit.cfg``.
|
||||
- Substrings of the target triple (``UNSUPPORTED`` and ``XFAIL`` only).
|
||||
|
||||
| ``REQUIRES`` enables the test if all expressions are true.
|
||||
| ``UNSUPPORTED`` disables the test if any expression is true.
|
||||
| ``XFAIL`` expects the test to fail if any expression is true.
|
||||
|
||||
As a special case, ``XFAIL: *`` is expected to fail everywhere.
|
||||
|
||||
.. code-block:: llvm
|
||||
|
||||
; This test is disabled on Windows,
|
||||
; and is disabled on Linux, except for Android Linux.
|
||||
; UNSUPPORTED: windows, linux && !android
|
||||
; This test is expected to fail on both PowerPC and ARM.
|
||||
; XFAIL: powerpc || arm
|
||||
|
||||
List of features that can be used in ``REQUIRES`` and ``REQUIRES-ANY`` can be
|
||||
found in lit.cfg files.
|
||||
|
||||
Substitutions
|
||||
-------------
|
||||
@ -442,6 +468,25 @@ RUN lines:
|
||||
|
||||
Expands to the path separator, i.e. ``:`` (or ``;`` on Windows).
|
||||
|
||||
``%/s, %/S, %/t, %/T:``
|
||||
|
||||
Act like the corresponding substitution above but replace any ``\``
|
||||
character with a ``/``. This is useful to normalize path separators.
|
||||
|
||||
Example: ``%s: C:\Desktop Files/foo_test.s.tmp``
|
||||
|
||||
Example: ``%/s: C:/Desktop Files/foo_test.s.tmp``
|
||||
|
||||
``%:s, %:S, %:t, %:T:``
|
||||
|
||||
Act like the corresponding substitution above but remove colons at
|
||||
the beginning of Windows paths. This is useful to allow concatenation
|
||||
of absolute paths on Windows to produce a legal path.
|
||||
|
||||
Example: ``%s: C:\Desktop Files\foo_test.s.tmp``
|
||||
|
||||
Example: ``%:s: C\Desktop Files\foo_test.s.tmp``
|
||||
|
||||
|
||||
**LLVM-specific substitutions:**
|
||||
|
||||
@ -520,24 +565,6 @@ their name. For example:
|
||||
This program runs its arguments and then inverts the result code from it.
|
||||
Zero result codes become 1. Non-zero result codes become 0.
|
||||
|
||||
Sometimes it is necessary to mark a test case as "expected fail" or
|
||||
XFAIL. You can easily mark a test as XFAIL just by including ``XFAIL:``
|
||||
on a line near the top of the file. This signals that the test case
|
||||
should succeed if the test fails. Such test cases are counted separately
|
||||
by the testing tool. To specify an expected fail, use the XFAIL keyword
|
||||
in the comments of the test program followed by a colon and one or more
|
||||
failure patterns. Each failure pattern can be either ``*`` (to specify
|
||||
fail everywhere), or a part of a target triple (indicating the test
|
||||
should fail on that platform), or the name of a configurable feature
|
||||
(for example, ``loadable_module``). If there is a match, the test is
|
||||
expected to fail. If not, the test is expected to succeed. To XFAIL
|
||||
everywhere just specify ``XFAIL: *``. Here is an example of an ``XFAIL``
|
||||
line:
|
||||
|
||||
.. code-block:: llvm
|
||||
|
||||
; XFAIL: darwin,sun
|
||||
|
||||
To make the output more useful, :program:`lit` will scan
|
||||
the lines of the test case for ones that contain a pattern that matches
|
||||
``PR[0-9]+``. This is the syntax for specifying a PR (Problem Report) number
|
||||
|
@ -593,12 +593,12 @@ the order in the definition of ``IntRegs`` in the target description file.
|
||||
FPRegsClass FPRegsRegClass;
|
||||
IntRegsClass IntRegsRegClass;
|
||||
...
|
||||
// IntRegs Sub-register Classess...
|
||||
// IntRegs Sub-register Classes...
|
||||
static const TargetRegisterClass* const IntRegsSubRegClasses [] = {
|
||||
NULL
|
||||
};
|
||||
...
|
||||
// IntRegs Super-register Classess...
|
||||
// IntRegs Super-register Classes..
|
||||
static const TargetRegisterClass* const IntRegsSuperRegClasses [] = {
|
||||
NULL
|
||||
};
|
||||
|
117
docs/XRay.rst
117
docs/XRay.rst
@ -28,8 +28,9 @@ XRay consists of three main parts:
|
||||
- A runtime library for enabling/disabling tracing at runtime.
|
||||
- A suite of tools for analysing the traces.
|
||||
|
||||
**NOTE:** As of the time of this writing, XRay is only available for x86_64
|
||||
and arm7 32-bit (no-thumb) Linux.
|
||||
**NOTE:** As of February 27, 2017 , XRay is only available for the following
|
||||
architectures running Linux: x86_64, arm7 (no thumb), aarch64, powerpc64le,
|
||||
mips, mipsel, mips64, mips64el.
|
||||
|
||||
The compiler-inserted instrumentation points come in the form of nop-sleds in
|
||||
the final generated binary, and an ELF section named ``xray_instr_map`` which
|
||||
@ -84,7 +85,10 @@ GCC-style attributes or C++11-style attributes.
|
||||
|
||||
When linking a binary, you can either manually link in the `XRay Runtime
|
||||
Library`_ or use ``clang`` to link it in automatically with the
|
||||
``-fxray-instrument`` flag.
|
||||
``-fxray-instrument`` flag. Alternatively, you can statically link-in the XRay
|
||||
runtime library from compiler-rt -- those archive files will take the name of
|
||||
`libclang_rt.xray-{arch}` where `{arch}` is the mnemonic supported by clang
|
||||
(x86_64, arm7, etc.).
|
||||
|
||||
LLVM Function Attribute
|
||||
-----------------------
|
||||
@ -135,7 +139,7 @@ variable, where we list down the options and their defaults below.
|
||||
+-------------------+-----------------+---------------+------------------------+
|
||||
| Option | Type | Default | Description |
|
||||
+===================+=================+===============+========================+
|
||||
| patch_premain | ``bool`` | ``true`` | Whether to patch |
|
||||
| patch_premain | ``bool`` | ``false`` | Whether to patch |
|
||||
| | | | instrumentation points |
|
||||
| | | | before main. |
|
||||
+-------------------+-----------------+---------------+------------------------+
|
||||
@ -146,6 +150,11 @@ variable, where we list down the options and their defaults below.
|
||||
| xray_logfile_base | ``const char*`` | ``xray-log.`` | Filename base for the |
|
||||
| | | | XRay logfile. |
|
||||
+-------------------+-----------------+---------------+------------------------+
|
||||
| xray_fdr_log | ``bool`` | ``false`` | Wheter to install the |
|
||||
| | | | Flight Data Recorder |
|
||||
| | | | (FDR) mode. |
|
||||
+-------------------+-----------------+---------------+------------------------+
|
||||
|
||||
|
||||
If you choose to not use the default logging implementation that comes with the
|
||||
XRay runtime and/or control when/how the XRay instrumentation runs, you may use
|
||||
@ -175,6 +184,64 @@ thread-safety of operations to be performed by the XRay runtime library:
|
||||
XRay cannot guarantee that all threads that have ever gotten a copy of the
|
||||
pointer will not invoke the function.
|
||||
|
||||
Flight Data Recorder Mode
|
||||
-------------------------
|
||||
|
||||
XRay supports a logging mode which allows the application to only capture a
|
||||
fixed amount of memory's worth of events. Flight Data Recorder (FDR) mode works
|
||||
very much like a plane's "black box" which keeps recording data to memory in a
|
||||
fixed-size circular queue of buffers, and have the data available
|
||||
programmatically until the buffers are finalized and flushed. To use FDR mode
|
||||
on your application, you may set the ``xray_fdr_log`` option to ``true`` in the
|
||||
``XRAY_OPTIONS`` environment variable (while also optionally setting the
|
||||
``xray_naive_log`` to ``false``).
|
||||
|
||||
When FDR mode is on, it will keep writing and recycling memory buffers until
|
||||
the logging implementation is finalized -- at which point it can be flushed and
|
||||
re-initialised later. To do this programmatically, we follow the workflow
|
||||
provided below:
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
// Patch the sleds, if we haven't yet.
|
||||
auto patch_status = __xray_patch();
|
||||
|
||||
// Maybe handle the patch_status errors.
|
||||
|
||||
// When we want to flush the log, we need to finalize it first, to give
|
||||
// threads a chance to return buffers to the queue.
|
||||
auto finalize_status = __xray_log_finalize();
|
||||
if (finalize_status != XRAY_LOG_FINALIZED) {
|
||||
// maybe retry, or bail out.
|
||||
}
|
||||
|
||||
// At this point, we are sure that the log is finalized, so we may try
|
||||
// flushing the log.
|
||||
auto flush_status = __xray_log_flushLog();
|
||||
if (flush_status != XRAY_LOG_FLUSHED) {
|
||||
// maybe retry, or bail out.
|
||||
}
|
||||
|
||||
The default settings for the FDR mode implementation will create logs named
|
||||
similarly to the naive log implementation, but will have a different log
|
||||
format. All the trace analysis tools (and the trace reading library) will
|
||||
support all versions of the FDR mode format as we add more functionality and
|
||||
record types in the future.
|
||||
|
||||
**NOTE:** We do not however promise perpetual support for when we update the
|
||||
log versions we support going forward. Deprecation of the formats will be
|
||||
announced and discussed on the developers mailing list.
|
||||
|
||||
XRay allows for replacing the default FDR mode logging implementation using the
|
||||
following API:
|
||||
|
||||
- ``__xray_set_log_impl(...)``: This function takes a struct of type
|
||||
``XRayLogImpl``, which is defined in ``xray/xray_log_interface.h``, part of
|
||||
the XRay compiler-rt installation.
|
||||
- ``__xray_log_init(...)``: This function allows for initializing and
|
||||
re-initializing an installed logging implementation. See
|
||||
``xray/xray_log_interface.h`` for details, part of the XRay compiler-rt
|
||||
installation.
|
||||
|
||||
Trace Analysis Tools
|
||||
--------------------
|
||||
@ -185,7 +252,26 @@ supports the following subcommands:
|
||||
|
||||
- ``extract``: Extract the instrumentation map from a binary, and return it as
|
||||
YAML.
|
||||
- ``account``: Performs basic function call accounting statistics with various
|
||||
options for sorting, and output formats (supports CSV, YAML, and
|
||||
console-friendly TEXT).
|
||||
- ``convert``: Converts an XRay log file from one format to another. Currently
|
||||
only converts to YAML.
|
||||
- ``graph``: Generates a DOT graph of the function call relationships between
|
||||
functions found in an XRay trace.
|
||||
|
||||
These subcommands use various library components found as part of the XRay
|
||||
libraries, distributed with the LLVM distribution. These are:
|
||||
|
||||
- ``llvm/XRay/Trace.h`` : A trace reading library for conveniently loading
|
||||
an XRay trace of supported forms, into a convenient in-memory representation.
|
||||
All the analysis tools that deal with traces use this implementation.
|
||||
- ``llvm/XRay/Graph.h`` : A semi-generic graph type used by the graph
|
||||
subcommand to conveniently represent a function call graph with statistics
|
||||
associated with edges and vertices.
|
||||
- ``llvm/XRay/InstrumentationMap.h``: A convenient tool for analyzing the
|
||||
instrumentation map in XRay-instrumented object files and binaries. The
|
||||
``extract`` subcommand uses this particular library.
|
||||
|
||||
Future Work
|
||||
===========
|
||||
@ -193,38 +279,19 @@ Future Work
|
||||
There are a number of ongoing efforts for expanding the toolset building around
|
||||
the XRay instrumentation system.
|
||||
|
||||
Flight Data Recorder Mode
|
||||
-------------------------
|
||||
|
||||
The `XRay whitepaper`_ mentions a mode for when events are kept in memory, and
|
||||
have the traces be dumped on demand through a triggering API. This work is
|
||||
currently ongoing.
|
||||
|
||||
Trace Analysis
|
||||
--------------
|
||||
|
||||
There are a few more subcommands making its way to the ``llvm-xray`` tool, that
|
||||
are currently under review:
|
||||
|
||||
- ``convert``: Turns an XRay trace from one format to another. Currently
|
||||
supporting conversion from the binary XRay log to YAML.
|
||||
- ``account``: Do function call accounting based on data in the XRay log.
|
||||
|
||||
We have more subcommands and modes that we're thinking of developing, in the
|
||||
following forms:
|
||||
|
||||
- ``stack``: Reconstruct the function call stacks in a timeline.
|
||||
- ``convert``: Converting from one version of the XRay log to another (higher)
|
||||
version, and converting to other trace formats (i.e. Chrome Trace Viewer,
|
||||
pprof, etc.).
|
||||
- ``graph``: Generate a function call graph with relative timings and distributions.
|
||||
|
||||
More Platforms
|
||||
--------------
|
||||
|
||||
Since XRay is only currently available in x86_64 and arm7 32-bit (no-thumb)
|
||||
running Linux, we're looking to supporting more platforms (architectures and
|
||||
operating systems).
|
||||
We're looking forward to contributions to port XRay to more architectures and
|
||||
operating systems.
|
||||
|
||||
.. References...
|
||||
|
||||
|
273
docs/XRayExample.rst
Normal file
273
docs/XRayExample.rst
Normal file
@ -0,0 +1,273 @@
|
||||
===================
|
||||
Debugging with XRay
|
||||
===================
|
||||
|
||||
This document shows an example of how you would go about analyzing applications
|
||||
built with XRay instrumentation. Here we will attempt to debug ``llc``
|
||||
compiling some sample LLVM IR generated by Clang.
|
||||
|
||||
.. contents::
|
||||
:local:
|
||||
|
||||
Building with XRay
|
||||
------------------
|
||||
|
||||
To debug an application with XRay instrumentation, we need to build it with a
|
||||
Clang that supports the ``-fxray-instrument`` option. See `XRay <XRay.html>`_
|
||||
for more technical details of how XRay works for background information.
|
||||
|
||||
In our example, we need to add ``-fxray-instrument`` to the list of flags
|
||||
passed to Clang when building a binary. Note that we need to link with Clang as
|
||||
well to get the XRay runtime linked in appropriately. For building ``llc`` with
|
||||
XRay, we do something similar below for our LLVM build:
|
||||
|
||||
::
|
||||
|
||||
$ mkdir -p llvm-build && cd llvm-build
|
||||
# Assume that the LLVM sources are at ../llvm
|
||||
$ cmake -GNinja ../llvm -DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_C_FLAGS_RELEASE="-fxray-instrument" -DCMAKE_CXX_FLAGS="-fxray-instrument" \
|
||||
# Once this finishes, we should build llc
|
||||
$ ninja llc
|
||||
|
||||
|
||||
To verify that we have an XRay instrumented binary, we can use ``objdump`` to
|
||||
look for the ``xray_instr_map`` section.
|
||||
|
||||
::
|
||||
|
||||
$ objdump -h -j xray_instr_map ./bin/llc
|
||||
./bin/llc: file format elf64-x86-64
|
||||
|
||||
Sections:
|
||||
Idx Name Size VMA LMA File off Algn
|
||||
14 xray_instr_map 00002fc0 00000000041516c6 00000000041516c6 03d516c6 2**0
|
||||
CONTENTS, ALLOC, LOAD, READONLY, DATA
|
||||
|
||||
Getting Traces
|
||||
--------------
|
||||
|
||||
By default, XRay does not write out the trace files or patch the application
|
||||
before main starts. If we just run ``llc`` it should just work like a normally
|
||||
built binary. However, if we want to get a full trace of the application's
|
||||
operations (of the functions we do end up instrumenting with XRay) then we need
|
||||
to enable XRay at application start. To do this, XRay checks the
|
||||
``XRAY_OPTIONS`` environment variable.
|
||||
|
||||
::
|
||||
|
||||
# The following doesn't create an XRay trace by default.
|
||||
$ ./bin/llc input.ll
|
||||
|
||||
# We need to set the XRAY_OPTIONS to enable some features.
|
||||
$ XRAY_OPTIONS="patch_premain=true" ./bin/llc input.ll
|
||||
==69819==XRay: Log file in 'xray-log.llc.m35qPB'
|
||||
|
||||
At this point we now have an XRay trace we can start analysing.
|
||||
|
||||
The ``llvm-xray`` Tool
|
||||
----------------------
|
||||
|
||||
Having a trace then allows us to do basic accounting of the functions that were
|
||||
instrumented, and how much time we're spending in parts of the code. To make
|
||||
sense of this data, we use the ``llvm-xray`` tool which has a few subcommands
|
||||
to help us understand our trace.
|
||||
|
||||
One of the simplest things we can do is to get an accounting of the functions
|
||||
that have been instrumented. We can see an example accounting with ``llvm-xray
|
||||
account``:
|
||||
|
||||
::
|
||||
|
||||
$ llvm-xray account xray-log.llc.m35qPB -top=10 -sort=sum -sortorder=dsc -instr_map ./bin/llc
|
||||
Functions with latencies: 29
|
||||
funcid count [ min, med, 90p, 99p, max] sum function
|
||||
187 360 [ 0.000000, 0.000001, 0.000014, 0.000032, 0.000075] 0.001596 LLLexer.cpp:446:0: llvm::LLLexer::LexIdentifier()
|
||||
85 130 [ 0.000000, 0.000000, 0.000018, 0.000023, 0.000156] 0.000799 X86ISelDAGToDAG.cpp:1984:0: (anonymous namespace)::X86DAGToDAGISel::Select(llvm::SDNode*)
|
||||
138 130 [ 0.000000, 0.000000, 0.000017, 0.000155, 0.000155] 0.000774 SelectionDAGISel.cpp:2963:0: llvm::SelectionDAGISel::SelectCodeCommon(llvm::SDNode*, unsigned char const*, unsigned int)
|
||||
188 103 [ 0.000000, 0.000000, 0.000003, 0.000123, 0.000214] 0.000737 LLParser.cpp:2692:0: llvm::LLParser::ParseValID(llvm::ValID&, llvm::LLParser::PerFunctionState*)
|
||||
88 1 [ 0.000562, 0.000562, 0.000562, 0.000562, 0.000562] 0.000562 X86ISelLowering.cpp:83:0: llvm::X86TargetLowering::X86TargetLowering(llvm::X86TargetMachine const&, llvm::X86Subtarget const&)
|
||||
125 102 [ 0.000001, 0.000003, 0.000010, 0.000017, 0.000049] 0.000471 Verifier.cpp:3714:0: (anonymous namespace)::Verifier::visitInstruction(llvm::Instruction&)
|
||||
90 8 [ 0.000023, 0.000035, 0.000106, 0.000106, 0.000106] 0.000342 X86ISelLowering.cpp:3363:0: llvm::X86TargetLowering::LowerCall(llvm::TargetLowering::CallLoweringInfo&, llvm::SmallVectorImpl<llvm::SDValue>&) const
|
||||
124 32 [ 0.000003, 0.000007, 0.000016, 0.000041, 0.000041] 0.000310 Verifier.cpp:1967:0: (anonymous namespace)::Verifier::visitFunction(llvm::Function const&)
|
||||
123 1 [ 0.000302, 0.000302, 0.000302, 0.000302, 0.000302] 0.000302 LLVMContextImpl.cpp:54:0: llvm::LLVMContextImpl::~LLVMContextImpl()
|
||||
139 46 [ 0.000000, 0.000002, 0.000006, 0.000008, 0.000019] 0.000138 TargetLowering.cpp:506:0: llvm::TargetLowering::SimplifyDemandedBits(llvm::SDValue, llvm::APInt const&, llvm::APInt&, llvm::APInt&, llvm::TargetLowering::TargetLoweringOpt&, unsigned int, bool) const
|
||||
|
||||
This shows us that for our input file, ``llc`` spent the most cumulative time
|
||||
in the lexer (a total of 1 millisecond). If we wanted for example to work with
|
||||
this data in a spreadsheet, we can output the results as CSV using the
|
||||
``-format=csv`` option to the command for further analysis.
|
||||
|
||||
If we want to get a textual representation of the raw trace we can use the
|
||||
``llvm-xray convert`` tool to get YAML output. The first few lines of that
|
||||
ouput for an example trace would look like the following:
|
||||
|
||||
::
|
||||
|
||||
$ llvm-xray convert -f yaml -symbolize -instr_map=./bin/llc xray-log.llc.m35qPB
|
||||
---
|
||||
header:
|
||||
version: 1
|
||||
type: 0
|
||||
constant-tsc: true
|
||||
nonstop-tsc: true
|
||||
cycle-frequency: 2601000000
|
||||
records:
|
||||
- { type: 0, func-id: 110, function: __cxx_global_var_init.8, cpu: 37, thread: 69819, kind: function-enter, tsc: 5434426023268520 }
|
||||
- { type: 0, func-id: 110, function: __cxx_global_var_init.8, cpu: 37, thread: 69819, kind: function-exit, tsc: 5434426023523052 }
|
||||
- { type: 0, func-id: 164, function: __cxx_global_var_init, cpu: 37, thread: 69819, kind: function-enter, tsc: 5434426029925386 }
|
||||
- { type: 0, func-id: 164, function: __cxx_global_var_init, cpu: 37, thread: 69819, kind: function-exit, tsc: 5434426030031128 }
|
||||
- { type: 0, func-id: 142, function: '(anonymous namespace)::CommandLineParser::ParseCommandLineOptions(int, char const* const*, llvm::StringRef, llvm::raw_ostream*)', cpu: 37, thread: 69819, kind: function-enter, tsc: 5434426046951388 }
|
||||
- { type: 0, func-id: 142, function: '(anonymous namespace)::CommandLineParser::ParseCommandLineOptions(int, char const* const*, llvm::StringRef, llvm::raw_ostream*)', cpu: 37, thread: 69819, kind: function-exit, tsc: 5434426047282020 }
|
||||
- { type: 0, func-id: 187, function: 'llvm::LLLexer::LexIdentifier()', cpu: 37, thread: 69819, kind: function-enter, tsc: 5434426047857332 }
|
||||
- { type: 0, func-id: 187, function: 'llvm::LLLexer::LexIdentifier()', cpu: 37, thread: 69819, kind: function-exit, tsc: 5434426047984152 }
|
||||
- { type: 0, func-id: 187, function: 'llvm::LLLexer::LexIdentifier()', cpu: 37, thread: 69819, kind: function-enter, tsc: 5434426048036584 }
|
||||
- { type: 0, func-id: 187, function: 'llvm::LLLexer::LexIdentifier()', cpu: 37, thread: 69819, kind: function-exit, tsc: 5434426048042292 }
|
||||
- { type: 0, func-id: 187, function: 'llvm::LLLexer::LexIdentifier()', cpu: 37, thread: 69819, kind: function-enter, tsc: 5434426048055056 }
|
||||
- { type: 0, func-id: 187, function: 'llvm::LLLexer::LexIdentifier()', cpu: 37, thread: 69819, kind: function-exit, tsc: 5434426048067316 }
|
||||
|
||||
Controlling Fidelity
|
||||
--------------------
|
||||
|
||||
So far in our examples, we haven't been getting full coverage of the functions
|
||||
we have in the binary. To get that, we need to modify the compiler flags so
|
||||
that we can instrument more (if not all) the functions we have in the binary.
|
||||
We have two options for doing that, and we explore both of these below.
|
||||
|
||||
Instruction Threshold
|
||||
`````````````````````
|
||||
|
||||
The first "blunt" way of doing this is by setting the minimum threshold for
|
||||
function bodies to 1. We can do that with the
|
||||
``-fxray-instruction-threshold=N`` flag when building our binary. We rebuild
|
||||
``llc`` with this option and observe the results:
|
||||
|
||||
::
|
||||
|
||||
$ rm CMakeCache.txt
|
||||
$ cmake -GNinja ../llvm -DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_C_FLAGS_RELEASE="-fxray-instrument -fxray-instruction-threshold=1" \
|
||||
-DCMAKE_CXX_FLAGS="-fxray-instrument -fxray-instruction-threshold=1"
|
||||
$ ninja llc
|
||||
$ XRAY_OPTIONS="patch_premain=true" ./bin/llc input.ll
|
||||
==69819==XRay: Log file in 'xray-log.llc.5rqxkU'
|
||||
|
||||
$ llvm-xray account xray-log.llc.5rqxkU -top=10 -sort=sum -sortorder=dsc -instr_map ./bin/llc
|
||||
Functions with latencies: 36652
|
||||
funcid count [ min, med, 90p, 99p, max] sum function
|
||||
75 1 [ 0.672368, 0.672368, 0.672368, 0.672368, 0.672368] 0.672368 llc.cpp:271:0: main
|
||||
78 1 [ 0.626455, 0.626455, 0.626455, 0.626455, 0.626455] 0.626455 llc.cpp:381:0: compileModule(char**, llvm::LLVMContext&)
|
||||
139617 1 [ 0.472618, 0.472618, 0.472618, 0.472618, 0.472618] 0.472618 LegacyPassManager.cpp:1723:0: llvm::legacy::PassManager::run(llvm::Module&)
|
||||
139610 1 [ 0.472618, 0.472618, 0.472618, 0.472618, 0.472618] 0.472618 LegacyPassManager.cpp:1681:0: llvm::legacy::PassManagerImpl::run(llvm::Module&)
|
||||
139612 1 [ 0.470948, 0.470948, 0.470948, 0.470948, 0.470948] 0.470948 LegacyPassManager.cpp:1564:0: (anonymous namespace)::MPPassManager::runOnModule(llvm::Module&)
|
||||
139607 2 [ 0.147345, 0.315994, 0.315994, 0.315994, 0.315994] 0.463340 LegacyPassManager.cpp:1530:0: llvm::FPPassManager::runOnModule(llvm::Module&)
|
||||
139605 21 [ 0.000002, 0.000002, 0.102593, 0.213336, 0.213336] 0.463331 LegacyPassManager.cpp:1491:0: llvm::FPPassManager::runOnFunction(llvm::Function&)
|
||||
139563 26096 [ 0.000002, 0.000002, 0.000037, 0.000063, 0.000215] 0.225708 LegacyPassManager.cpp:1083:0: llvm::PMDataManager::findAnalysisPass(void const*, bool)
|
||||
108055 188 [ 0.000002, 0.000120, 0.001375, 0.004523, 0.062624] 0.159279 MachineFunctionPass.cpp:38:0: llvm::MachineFunctionPass::runOnFunction(llvm::Function&)
|
||||
62635 22 [ 0.000041, 0.000046, 0.000050, 0.126744, 0.126744] 0.127715 X86TargetMachine.cpp:242:0: llvm::X86TargetMachine::getSubtargetImpl(llvm::Function const&) const
|
||||
|
||||
|
||||
Instrumentation Attributes
|
||||
``````````````````````````
|
||||
|
||||
The other way is to use configuration files for selecting which functions
|
||||
should always be instrumented by the compiler. This gives us a way of ensuring
|
||||
that certain functions are either always or never instrumented by not having to
|
||||
add the attribute to the source.
|
||||
|
||||
To use this feature, you can define one file for the functions to always
|
||||
instrument, and another for functions to never instrument. The format of these
|
||||
files are exactly the same as the SanitizerLists files that control similar
|
||||
things for the sanitizer implementations. For example, we can have two
|
||||
different files like below:
|
||||
|
||||
::
|
||||
|
||||
# always-instrument.txt
|
||||
# always instrument functions that match the following filters:
|
||||
fun:main
|
||||
|
||||
# never-instrument.txt
|
||||
# never instrument functions that match the following filters:
|
||||
fun:__cxx_*
|
||||
|
||||
Given the above two files we can re-build by providing those two files as
|
||||
arguments to clang as ``-fxray-always-instrument=always-instrument.txt`` or
|
||||
``-fxray-never-instrument=never-instrument.txt``.
|
||||
|
||||
Further Exploration
|
||||
-------------------
|
||||
|
||||
The ``llvm-xray`` tool has a few other subcommands that are in various stages
|
||||
of being developed. One interesting subcommand that can highlight a few
|
||||
interesting things is the ``graph`` subcommand. Given for example the following
|
||||
toy program that we build with XRay instrumentation, we can see how the
|
||||
generated graph may be a helpful indicator of where time is being spent for the
|
||||
application.
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
// sample.cc
|
||||
#include <iostream>
|
||||
#include <thread>
|
||||
|
||||
[[clang::xray_always_intrument]] void f() {
|
||||
std::cerr << '.';
|
||||
}
|
||||
|
||||
[[clang::xray_always_intrument]] void g() {
|
||||
for (int i = 0; i < 1 << 10; ++i) {
|
||||
std::cerr << '-';
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
std::thread t1([] {
|
||||
for (int i = 0; i < 1 << 10; ++i)
|
||||
f();
|
||||
});
|
||||
std::thread t2([] {
|
||||
g();
|
||||
});
|
||||
t1.join();
|
||||
t2.join();
|
||||
std::cerr << '\n';
|
||||
}
|
||||
|
||||
We then build the above with XRay instrumentation:
|
||||
|
||||
::
|
||||
|
||||
$ clang++ -o sample -O3 sample.cc -std=c++11 -fxray-instrument -fxray-instruction-threshold=1
|
||||
$ XRAY_OPTIONS="patch_premain=true" ./sample
|
||||
|
||||
We can then explore the graph rendering of the trace generated by this sample
|
||||
application. We assume you have the graphviz toosl available in your system,
|
||||
including both ``unflatten`` and ``dot``. If you prefer rendering or exploring
|
||||
the graph using another tool, then that should be feasible as well. ``llvm-xray
|
||||
graph`` will create DOT format graphs which should be usable in most graph
|
||||
rendering applications. One example invocation of the ``llvm-xray graph``
|
||||
command should yield some interesting insights to the workings of C++
|
||||
applications:
|
||||
|
||||
::
|
||||
|
||||
$ llvm-xray graph xray-log.sample.* -m sample -color-edges=sum -edge-label=sum \
|
||||
| unflatten -f -l10 | dot -Tsvg -o sample.svg
|
||||
|
||||
Next Steps
|
||||
----------
|
||||
|
||||
If you have some interesting analyses you'd like to implement as part of the
|
||||
llvm-xray tool, please feel free to propose them on the llvm-dev@ mailing list.
|
||||
The following are some ideas to inspire you in getting involved and potentially
|
||||
making things better.
|
||||
|
||||
- Implement a query/filtering library that allows for finding patterns in the
|
||||
XRay traces.
|
||||
- A conversion from the XRay trace onto something that can be visualised
|
||||
better by other tools (like the Chrome trace viewer for example).
|
||||
- Collecting function call stacks and how often they're encountered in the
|
||||
XRay trace.
|
||||
|
||||
|
@ -731,7 +731,7 @@ it is parsed. This allows dynamic types of nodes. But the YAML I/O model uses
|
||||
static typing, so there are limits to how you can use tags with the YAML I/O
|
||||
model. Recently, we added support to YAML I/O for checking/setting the optional
|
||||
tag on a map. Using this functionality it is even possbile to support different
|
||||
mappings, as long as they are convertable.
|
||||
mappings, as long as they are convertible.
|
||||
|
||||
To check a tag, inside your mapping() method you can use io.mapTag() to specify
|
||||
what the tag should be. This will also add that tag when writing yaml.
|
||||
|
10
docs/conf.py
10
docs/conf.py
@ -47,10 +47,10 @@ copyright = u'2003-%d, LLVM Project' % date.today().year
|
||||
# |version| and |release|, also used in various other places throughout the
|
||||
# built documents.
|
||||
#
|
||||
# The short X.Y version.
|
||||
version = '4'
|
||||
# The short version.
|
||||
version = '5'
|
||||
# The full version, including alpha/beta/rc tags.
|
||||
release = '4'
|
||||
release = '5'
|
||||
|
||||
# The language for content autogenerated by Sphinx. Refer to documentation
|
||||
# for a list of supported languages.
|
||||
@ -251,3 +251,7 @@ for name in os.listdir(command_guide_path):
|
||||
|
||||
# FIXME: Define intersphinx configuration.
|
||||
intersphinx_mapping = {}
|
||||
|
||||
# Pygment lexer are sometimes out of date (when parsing LLVM for example) or
|
||||
# wrong. Suppress the warning so the build doesn't abort.
|
||||
suppress_warnings = [ 'misc.highlighting_failure' ]
|
||||
|
@ -1885,7 +1885,7 @@ ENABLE_PREPROCESSING = YES
|
||||
# The default value is: NO.
|
||||
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
|
||||
|
||||
MACRO_EXPANSION = NO
|
||||
MACRO_EXPANSION = YES
|
||||
|
||||
# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES then
|
||||
# the macro expansion is limited to the macros specified with the PREDEFINED and
|
||||
@ -1893,7 +1893,7 @@ MACRO_EXPANSION = NO
|
||||
# The default value is: NO.
|
||||
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
|
||||
|
||||
EXPAND_ONLY_PREDEF = NO
|
||||
EXPAND_ONLY_PREDEF = YES
|
||||
|
||||
# If the SEARCH_INCLUDES tag is set to YES the includes files in the
|
||||
# INCLUDE_PATH will be searched if a #include is found.
|
||||
@ -1925,7 +1925,7 @@ INCLUDE_FILE_PATTERNS =
|
||||
# recursively expanded use the := operator instead of the = operator.
|
||||
# This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
|
||||
|
||||
PREDEFINED =
|
||||
PREDEFINED = LLVM_ALIGNAS(x)=
|
||||
|
||||
# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this
|
||||
# tag can be used to specify a list of macro names that should be expanded. The
|
||||
|
@ -1,6 +1,11 @@
|
||||
Overview
|
||||
========
|
||||
|
||||
.. warning::
|
||||
|
||||
If you are using a released version of LLVM, see `the download page
|
||||
<http://llvm.org/releases/>`_ to find your documentation.
|
||||
|
||||
The LLVM compiler infrastructure supports a wide range of projects, from
|
||||
industrial strength compilers to specialized JIT applications to small
|
||||
research projects.
|
||||
@ -269,6 +274,7 @@ For API clients and LLVM developers.
|
||||
Coroutines
|
||||
GlobalISel
|
||||
XRay
|
||||
XRayExample
|
||||
PDB/index
|
||||
|
||||
:doc:`WritingAnLLVMPass`
|
||||
@ -394,6 +400,9 @@ For API clients and LLVM developers.
|
||||
:doc:`XRay`
|
||||
High-level documentation of how to use XRay in LLVM.
|
||||
|
||||
:doc:`XRayExample`
|
||||
An example of how to debug an application with XRay.
|
||||
|
||||
:doc:`The Microsoft PDB File Format <PDB/index>`
|
||||
A detailed description of the Microsoft PDB (Program Database) file format.
|
||||
|
||||
|
@ -125,14 +125,12 @@ usual include guards and #includes [2]_, we get to the definition of our class:
|
||||
|
||||
class KaleidoscopeJIT {
|
||||
private:
|
||||
|
||||
std::unique_ptr<TargetMachine> TM;
|
||||
const DataLayout DL;
|
||||
ObjectLinkingLayer<> ObjectLayer;
|
||||
IRCompileLayer<decltype(ObjectLayer)> CompileLayer;
|
||||
|
||||
public:
|
||||
|
||||
typedef decltype(CompileLayer)::ModuleSetHandleT ModuleHandleT;
|
||||
|
||||
Our class begins with four members: A TargetMachine, TM, which will be used
|
||||
@ -152,16 +150,16 @@ compiling it, and passing the resulting in-memory object files down to the
|
||||
object linking layer below.
|
||||
|
||||
That's it for member variables, after that we have a single typedef:
|
||||
ModuleHandle. This is the handle type that will be returned from our JIT's
|
||||
ModuleHandleT. This is the handle type that will be returned from our JIT's
|
||||
addModule method, and can be passed to the removeModule method to remove a
|
||||
module. The IRCompileLayer class already provides a convenient handle type
|
||||
(IRCompileLayer::ModuleSetHandleT), so we just alias our ModuleHandle to this.
|
||||
(IRCompileLayer::ModuleSetHandleT), so we just alias our ModuleHandleT to this.
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
KaleidoscopeJIT()
|
||||
: TM(EngineBuilder().selectTarget()), DL(TM->createDataLayout()),
|
||||
CompileLayer(ObjectLayer, SimpleCompiler(*TM)) {
|
||||
CompileLayer(ObjectLayer, SimpleCompiler(*TM)) {
|
||||
llvm::sys::DynamicLibrary::LoadLibraryPermanently(nullptr);
|
||||
}
|
||||
|
||||
@ -200,7 +198,7 @@ available for execution.
|
||||
return JITSymbol(nullptr);
|
||||
});
|
||||
|
||||
// Build a singlton module set to hold our module.
|
||||
// Build a singleton module set to hold our module.
|
||||
std::vector<std::unique_ptr<Module>> Ms;
|
||||
Ms.push_back(std::move(M));
|
||||
|
||||
@ -259,16 +257,16 @@ were linked into a single, ever-growing logical dylib. To implement this our
|
||||
first lambda (the one defining findSymbolInLogicalDylib) will just search for
|
||||
JIT'd code by calling the CompileLayer's findSymbol method. If we don't find a
|
||||
symbol in the JIT itself we'll fall back to our second lambda, which implements
|
||||
findSymbol. This will use the RTDyldMemoyrManager::getSymbolAddressInProcess
|
||||
findSymbol. This will use the RTDyldMemoryManager::getSymbolAddressInProcess
|
||||
method to search for the symbol within the program itself. If we can't find a
|
||||
symbol definition via either of these paths the JIT will refuse to accept our
|
||||
symbol definition via either of these paths, the JIT will refuse to accept our
|
||||
module, returning a "symbol not found" error.
|
||||
|
||||
Now that we've built our symbol resolver we're ready to add our module to the
|
||||
Now that we've built our symbol resolver, we're ready to add our module to the
|
||||
JIT. We do this by calling the CompileLayer's addModuleSet method [4]_. Since
|
||||
we only have a single Module and addModuleSet expects a collection, we will
|
||||
create a vector of modules and add our module as the only member. Since we
|
||||
have already typedef'd our ModuleHandle type to be the same as the
|
||||
have already typedef'd our ModuleHandleT type to be the same as the
|
||||
CompileLayer's handle type, we can return the handle from addModuleSet
|
||||
directly from our addModule method.
|
||||
|
||||
@ -304,7 +302,7 @@ treated as a duplicate definition when the next top-level expression is
|
||||
entered. It is generally good to free any module that you know you won't need
|
||||
to call further, just to free up the resources dedicated to it. However, you
|
||||
don't strictly need to do this: All resources will be cleaned up when your
|
||||
JIT class is destructed, if the haven't been freed before then.
|
||||
JIT class is destructed, if they haven't been freed before then.
|
||||
|
||||
This brings us to the end of Chapter 1 of Building a JIT. You now have a basic
|
||||
but fully functioning JIT stack that you can use to take LLVM IR and make it
|
||||
|
@ -119,6 +119,8 @@ way to talk about functions themselves:
|
||||
public:
|
||||
PrototypeAST(const std::string &name, std::vector<std::string> Args)
|
||||
: Name(name), Args(std::move(Args)) {}
|
||||
|
||||
const std::string &getName() const { return Name; }
|
||||
};
|
||||
|
||||
/// FunctionAST - This class represents a function definition itself.
|
||||
|
@ -122,7 +122,7 @@ First we'll do numeric literals:
|
||||
.. code-block:: c++
|
||||
|
||||
Value *NumberExprAST::codegen() {
|
||||
return ConstantFP::get(LLVMContext, APFloat(Val));
|
||||
return ConstantFP::get(TheContext, APFloat(Val));
|
||||
}
|
||||
|
||||
In the LLVM IR, numeric constants are represented with the
|
||||
@ -171,7 +171,7 @@ variables <LangImpl7.html#user-defined-local-variables>`_.
|
||||
case '<':
|
||||
L = Builder.CreateFCmpULT(L, R, "cmptmp");
|
||||
// Convert bool 0/1 to double 0.0 or 1.0
|
||||
return Builder.CreateUIToFP(L, Type::getDoubleTy(LLVMContext),
|
||||
return Builder.CreateUIToFP(L, Type::getDoubleTy(TheContext),
|
||||
"booltmp");
|
||||
default:
|
||||
return LogErrorV("invalid binary operator");
|
||||
@ -270,9 +270,9 @@ with:
|
||||
Function *PrototypeAST::codegen() {
|
||||
// Make the function type: double(double,double) etc.
|
||||
std::vector<Type*> Doubles(Args.size(),
|
||||
Type::getDoubleTy(LLVMContext));
|
||||
Type::getDoubleTy(TheContext));
|
||||
FunctionType *FT =
|
||||
FunctionType::get(Type::getDoubleTy(LLVMContext), Doubles, false);
|
||||
FunctionType::get(Type::getDoubleTy(TheContext), Doubles, false);
|
||||
|
||||
Function *F =
|
||||
Function::Create(FT, Function::ExternalLinkage, Name, TheModule);
|
||||
@ -346,7 +346,7 @@ assert that the function is empty (i.e. has no body yet) before we start.
|
||||
.. code-block:: c++
|
||||
|
||||
// Create a new basic block to start insertion into.
|
||||
BasicBlock *BB = BasicBlock::Create(LLVMContext, "entry", TheFunction);
|
||||
BasicBlock *BB = BasicBlock::Create(TheContext, "entry", TheFunction);
|
||||
Builder.SetInsertPoint(BB);
|
||||
|
||||
// Record the function arguments in the NamedValues map.
|
||||
@ -533,7 +533,8 @@ This shows an extern for the libm "cos" function, and a call to it.
|
||||
ret double %calltmp
|
||||
}
|
||||
|
||||
When you quit the current demo, it dumps out the IR for the entire
|
||||
When you quit the current demo (by sending an EOF via CTRL+D on Linux
|
||||
or CTRL+Z and ENTER on Windows), it dumps out the IR for the entire
|
||||
module generated. Here you can see the big picture with all the
|
||||
functions referencing each other.
|
||||
|
||||
|
@ -131,33 +131,29 @@ for us:
|
||||
|
||||
void InitializeModuleAndPassManager(void) {
|
||||
// Open a new module.
|
||||
Context LLVMContext;
|
||||
TheModule = llvm::make_unique<Module>("my cool jit", LLVMContext);
|
||||
TheModule->setDataLayout(TheJIT->getTargetMachine().createDataLayout());
|
||||
TheModule = llvm::make_unique<Module>("my cool jit", TheContext);
|
||||
|
||||
// Create a new pass manager attached to it.
|
||||
TheFPM = llvm::make_unique<FunctionPassManager>(TheModule.get());
|
||||
|
||||
// Provide basic AliasAnalysis support for GVN.
|
||||
TheFPM.add(createBasicAliasAnalysisPass());
|
||||
// Do simple "peephole" optimizations and bit-twiddling optzns.
|
||||
TheFPM.add(createInstructionCombiningPass());
|
||||
TheFPM->add(createInstructionCombiningPass());
|
||||
// Reassociate expressions.
|
||||
TheFPM.add(createReassociatePass());
|
||||
TheFPM->add(createReassociatePass());
|
||||
// Eliminate Common SubExpressions.
|
||||
TheFPM.add(createGVNPass());
|
||||
TheFPM->add(createGVNPass());
|
||||
// Simplify the control flow graph (deleting unreachable blocks, etc).
|
||||
TheFPM.add(createCFGSimplificationPass());
|
||||
TheFPM->add(createCFGSimplificationPass());
|
||||
|
||||
TheFPM.doInitialization();
|
||||
TheFPM->doInitialization();
|
||||
}
|
||||
|
||||
This code initializes the global module ``TheModule``, and the function pass
|
||||
manager ``TheFPM``, which is attached to ``TheModule``. Once the pass manager is
|
||||
set up, we use a series of "add" calls to add a bunch of LLVM passes.
|
||||
|
||||
In this case, we choose to add five passes: one analysis pass (alias analysis),
|
||||
and four optimization passes. The passes we choose here are a pretty standard set
|
||||
In this case, we choose to add four optimization passes.
|
||||
The passes we choose here are a pretty standard set
|
||||
of "cleanup" optimizations that are useful for a wide variety of code. I won't
|
||||
delve into what they do but, believe me, they are a good starting place :).
|
||||
|
||||
@ -227,8 +223,10 @@ expressions they type in. For example, if they type in "1 + 2;", we
|
||||
should evaluate and print out 3. If they define a function, they should
|
||||
be able to call it from the command line.
|
||||
|
||||
In order to do this, we first declare and initialize the JIT. This is
|
||||
done by adding a global variable ``TheJIT``, and initializing it in
|
||||
In order to do this, we first prepare the environment to create code for
|
||||
the current native target and declare and initialize the JIT. This is
|
||||
done by calling some ``InitializeNativeTarget\*`` functions and
|
||||
adding a global variable ``TheJIT``, and initializing it in
|
||||
``main``:
|
||||
|
||||
.. code-block:: c++
|
||||
@ -236,7 +234,21 @@ done by adding a global variable ``TheJIT``, and initializing it in
|
||||
static std::unique_ptr<KaleidoscopeJIT> TheJIT;
|
||||
...
|
||||
int main() {
|
||||
..
|
||||
InitializeNativeTarget();
|
||||
InitializeNativeTargetAsmPrinter();
|
||||
InitializeNativeTargetAsmParser();
|
||||
|
||||
// Install standard binary operators.
|
||||
// 1 is lowest precedence.
|
||||
BinopPrecedence['<'] = 10;
|
||||
BinopPrecedence['+'] = 20;
|
||||
BinopPrecedence['-'] = 20;
|
||||
BinopPrecedence['*'] = 40; // highest.
|
||||
|
||||
// Prime the first token.
|
||||
fprintf(stderr, "ready> ");
|
||||
getNextToken();
|
||||
|
||||
TheJIT = llvm::make_unique<KaleidoscopeJIT>();
|
||||
|
||||
// Run the main "interpreter loop" now.
|
||||
@ -245,9 +257,24 @@ done by adding a global variable ``TheJIT``, and initializing it in
|
||||
return 0;
|
||||
}
|
||||
|
||||
We also need to setup the data layout for the JIT:
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
void InitializeModuleAndPassManager(void) {
|
||||
// Open a new module.
|
||||
TheModule = llvm::make_unique<Module>("my cool jit", TheContext);
|
||||
TheModule->setDataLayout(TheJIT->getTargetMachine().createDataLayout());
|
||||
|
||||
// Create a new pass manager attached to it.
|
||||
TheFPM = llvm::make_unique<FunctionPassManager>(TheModule.get());
|
||||
...
|
||||
|
||||
The KaleidoscopeJIT class is a simple JIT built specifically for these
|
||||
tutorials. In later chapters we will look at how it works and extend it with
|
||||
new features, but for now we will take it as given. Its API is very simple::
|
||||
tutorials, available inside the LLVM source code
|
||||
at llvm-src/examples/Kaleidoscope/include/KaleidoscopeJIT.h.
|
||||
In later chapters we will look at how it works and extend it with
|
||||
new features, but for now we will take it as given. Its API is very simple:
|
||||
``addModule`` adds an LLVM IR module to the JIT, making its functions
|
||||
available for execution; ``removeModule`` removes a module, freeing any
|
||||
memory associated with the code in that module; and ``findSymbol`` allows us
|
||||
@ -458,7 +485,8 @@ We also need to update HandleDefinition and HandleExtern:
|
||||
if (auto FnAST = ParseDefinition()) {
|
||||
if (auto *FnIR = FnAST->codegen()) {
|
||||
fprintf(stderr, "Read function definition:");
|
||||
FnIR->dump();
|
||||
FnIR->print(errs());
|
||||
fprintf(stderr, "\n");
|
||||
TheJIT->addModule(std::move(TheModule));
|
||||
InitializeModuleAndPassManager();
|
||||
}
|
||||
@ -472,7 +500,8 @@ We also need to update HandleDefinition and HandleExtern:
|
||||
if (auto ProtoAST = ParseExtern()) {
|
||||
if (auto *FnIR = ProtoAST->codegen()) {
|
||||
fprintf(stderr, "Read extern: ");
|
||||
FnIR->dump();
|
||||
FnIR->print(errs());
|
||||
fprintf(stderr, "\n");
|
||||
FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST);
|
||||
}
|
||||
} else {
|
||||
@ -552,7 +581,10 @@ most recent to the oldest, to find the newest definition. If no definition is
|
||||
found inside the JIT, it falls back to calling "``dlsym("sin")``" on the
|
||||
Kaleidoscope process itself. Since "``sin``" is defined within the JIT's
|
||||
address space, it simply patches up calls in the module to call the libm
|
||||
version of ``sin`` directly.
|
||||
version of ``sin`` directly. But in some cases this even goes further:
|
||||
as sin and cos are names of standard math functions, the constant folder
|
||||
will directly evaluate the function calls to the correct result when called
|
||||
with constants like in the "``sin(1.0)``" above.
|
||||
|
||||
In the future we'll see how tweaking this symbol resolution rule can be used to
|
||||
enable all sorts of useful features, from security (restricting the set of
|
||||
@ -565,12 +597,21 @@ if we add:
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
#ifdef LLVM_ON_WIN32
|
||||
#define DLLEXPORT __declspec(dllexport)
|
||||
#else
|
||||
#define DLLEXPORT
|
||||
#endif
|
||||
|
||||
/// putchard - putchar that takes a double and returns 0.
|
||||
extern "C" double putchard(double X) {
|
||||
extern "C" DLLEXPORT double putchard(double X) {
|
||||
fputc((char)X, stderr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
Note, that for Windows we need to actually export the functions because
|
||||
the dynamic symbol loader will use GetProcAddress to find the symbols.
|
||||
|
||||
Now we can produce simple output to the console by using things like:
|
||||
"``extern putchard(x); putchard(120);``", which prints a lowercase 'x'
|
||||
on the console (120 is the ASCII code for 'x'). Similar code could be
|
||||
|
@ -103,7 +103,8 @@ To represent the new expression we add a new AST node for it:
|
||||
IfExprAST(std::unique_ptr<ExprAST> Cond, std::unique_ptr<ExprAST> Then,
|
||||
std::unique_ptr<ExprAST> Else)
|
||||
: Cond(std::move(Cond)), Then(std::move(Then)), Else(std::move(Else)) {}
|
||||
virtual Value *codegen();
|
||||
|
||||
Value *codegen() override;
|
||||
};
|
||||
|
||||
The AST node just has pointers to the various subexpressions.
|
||||
@ -290,9 +291,9 @@ for ``IfExprAST``:
|
||||
if (!CondV)
|
||||
return nullptr;
|
||||
|
||||
// Convert condition to a bool by comparing equal to 0.0.
|
||||
// Convert condition to a bool by comparing non-equal to 0.0.
|
||||
CondV = Builder.CreateFCmpONE(
|
||||
CondV, ConstantFP::get(LLVMContext, APFloat(0.0)), "ifcond");
|
||||
CondV, ConstantFP::get(TheContext, APFloat(0.0)), "ifcond");
|
||||
|
||||
This code is straightforward and similar to what we saw before. We emit
|
||||
the expression for the condition, then compare that value to zero to get
|
||||
@ -305,9 +306,9 @@ a truth value as a 1-bit (bool) value.
|
||||
// Create blocks for the then and else cases. Insert the 'then' block at the
|
||||
// end of the function.
|
||||
BasicBlock *ThenBB =
|
||||
BasicBlock::Create(LLVMContext, "then", TheFunction);
|
||||
BasicBlock *ElseBB = BasicBlock::Create(LLVMContext, "else");
|
||||
BasicBlock *MergeBB = BasicBlock::Create(LLVMContext, "ifcont");
|
||||
BasicBlock::Create(TheContext, "then", TheFunction);
|
||||
BasicBlock *ElseBB = BasicBlock::Create(TheContext, "else");
|
||||
BasicBlock *MergeBB = BasicBlock::Create(TheContext, "ifcont");
|
||||
|
||||
Builder.CreateCondBr(CondV, ThenBB, ElseBB);
|
||||
|
||||
@ -400,7 +401,7 @@ code:
|
||||
TheFunction->getBasicBlockList().push_back(MergeBB);
|
||||
Builder.SetInsertPoint(MergeBB);
|
||||
PHINode *PN =
|
||||
Builder.CreatePHI(Type::getDoubleTy(LLVMContext), 2, "iftmp");
|
||||
Builder.CreatePHI(Type::getDoubleTy(TheContext), 2, "iftmp");
|
||||
|
||||
PN->addIncoming(ThenV, ThenBB);
|
||||
PN->addIncoming(ElseV, ElseBB);
|
||||
@ -433,7 +434,7 @@ something more aggressive, a 'for' expression:
|
||||
|
||||
::
|
||||
|
||||
extern putchard(char)
|
||||
extern putchard(char);
|
||||
def printstar(n)
|
||||
for i = 1, i < n, 1.0 in
|
||||
putchard(42); # ascii 42 = '*'
|
||||
@ -500,7 +501,8 @@ variable name and the constituent expressions in the node.
|
||||
std::unique_ptr<ExprAST> Body)
|
||||
: VarName(VarName), Start(std::move(Start)), End(std::move(End)),
|
||||
Step(std::move(Step)), Body(std::move(Body)) {}
|
||||
virtual Value *codegen();
|
||||
|
||||
Value *codegen() override;
|
||||
};
|
||||
|
||||
Parser Extensions for the 'for' Loop
|
||||
@ -561,6 +563,27 @@ value to null in the AST node:
|
||||
std::move(Body));
|
||||
}
|
||||
|
||||
And again we hook it up as a primary expression:
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
static std::unique_ptr<ExprAST> ParsePrimary() {
|
||||
switch (CurTok) {
|
||||
default:
|
||||
return LogError("unknown token when expecting an expression");
|
||||
case tok_identifier:
|
||||
return ParseIdentifierExpr();
|
||||
case tok_number:
|
||||
return ParseNumberExpr();
|
||||
case '(':
|
||||
return ParseParenExpr();
|
||||
case tok_if:
|
||||
return ParseIfExpr();
|
||||
case tok_for:
|
||||
return ParseForExpr();
|
||||
}
|
||||
}
|
||||
|
||||
LLVM IR for the 'for' Loop
|
||||
--------------------------
|
||||
|
||||
@ -610,7 +633,8 @@ expression for the loop value:
|
||||
Value *ForExprAST::codegen() {
|
||||
// Emit the start code first, without 'variable' in scope.
|
||||
Value *StartVal = Start->codegen();
|
||||
if (StartVal == 0) return 0;
|
||||
if (!StartVal)
|
||||
return nullptr;
|
||||
|
||||
With this out of the way, the next step is to set up the LLVM basic
|
||||
block for the start of the loop body. In the case above, the whole loop
|
||||
@ -625,7 +649,7 @@ expression).
|
||||
Function *TheFunction = Builder.GetInsertBlock()->getParent();
|
||||
BasicBlock *PreheaderBB = Builder.GetInsertBlock();
|
||||
BasicBlock *LoopBB =
|
||||
BasicBlock::Create(LLVMContext, "loop", TheFunction);
|
||||
BasicBlock::Create(TheContext, "loop", TheFunction);
|
||||
|
||||
// Insert an explicit fall through from the current block to the LoopBB.
|
||||
Builder.CreateBr(LoopBB);
|
||||
@ -642,7 +666,7 @@ the two blocks.
|
||||
Builder.SetInsertPoint(LoopBB);
|
||||
|
||||
// Start the PHI node with an entry for Start.
|
||||
PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(LLVMContext),
|
||||
PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(TheContext),
|
||||
2, VarName.c_str());
|
||||
Variable->addIncoming(StartVal, PreheaderBB);
|
||||
|
||||
@ -693,7 +717,7 @@ table.
|
||||
return nullptr;
|
||||
} else {
|
||||
// If not specified, use 1.0.
|
||||
StepVal = ConstantFP::get(LLVMContext, APFloat(1.0));
|
||||
StepVal = ConstantFP::get(TheContext, APFloat(1.0));
|
||||
}
|
||||
|
||||
Value *NextVar = Builder.CreateFAdd(Variable, StepVal, "nextvar");
|
||||
@ -710,9 +734,9 @@ iteration of the loop.
|
||||
if (!EndCond)
|
||||
return nullptr;
|
||||
|
||||
// Convert condition to a bool by comparing equal to 0.0.
|
||||
// Convert condition to a bool by comparing non-equal to 0.0.
|
||||
EndCond = Builder.CreateFCmpONE(
|
||||
EndCond, ConstantFP::get(LLVMContext, APFloat(0.0)), "loopcond");
|
||||
EndCond, ConstantFP::get(TheContext, APFloat(0.0)), "loopcond");
|
||||
|
||||
Finally, we evaluate the exit value of the loop, to determine whether
|
||||
the loop should exit. This mirrors the condition evaluation for the
|
||||
@ -723,7 +747,7 @@ if/then/else statement.
|
||||
// Create the "after loop" block and insert it.
|
||||
BasicBlock *LoopEndBB = Builder.GetInsertBlock();
|
||||
BasicBlock *AfterBB =
|
||||
BasicBlock::Create(LLVMContext, "afterloop", TheFunction);
|
||||
BasicBlock::Create(TheContext, "afterloop", TheFunction);
|
||||
|
||||
// Insert the conditional branch into the end of LoopEndBB.
|
||||
Builder.CreateCondBr(EndCond, LoopBB, AfterBB);
|
||||
@ -751,7 +775,7 @@ insertion position to it.
|
||||
NamedValues.erase(VarName);
|
||||
|
||||
// for expr always returns 0.0.
|
||||
return Constant::getNullValue(Type::getDoubleTy(LLVMContext));
|
||||
return Constant::getNullValue(Type::getDoubleTy(TheContext));
|
||||
}
|
||||
|
||||
The final code handles various cleanups: now that we have the "NextVar"
|
||||
@ -772,7 +796,7 @@ Full Code Listing
|
||||
=================
|
||||
|
||||
Here is the complete code listing for our running example, enhanced with
|
||||
the if/then/else and for expressions.. To build this example, use:
|
||||
the if/then/else and for expressions. To build this example, use:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
|
@ -31,7 +31,7 @@ User-defined Operators: the Idea
|
||||
================================
|
||||
|
||||
The "operator overloading" that we will add to Kaleidoscope is more
|
||||
general than languages like C++. In C++, you are only allowed to
|
||||
general than in languages like C++. In C++, you are only allowed to
|
||||
redefine existing operators: you can't programmatically change the
|
||||
grammar, introduce new operators, change precedence levels, etc. In this
|
||||
chapter, we will add this capability to Kaleidoscope, which will let the
|
||||
@ -41,8 +41,8 @@ The point of going into user-defined operators in a tutorial like this
|
||||
is to show the power and flexibility of using a hand-written parser.
|
||||
Thus far, the parser we have been implementing uses recursive descent
|
||||
for most parts of the grammar and operator precedence parsing for the
|
||||
expressions. See `Chapter 2 <LangImpl2.html>`_ for details. Without
|
||||
using operator precedence parsing, it would be very difficult to allow
|
||||
expressions. See `Chapter 2 <LangImpl2.html>`_ for details. By
|
||||
using operator precedence parsing, it is very easy to allow
|
||||
the programmer to introduce new operators into the grammar: the grammar
|
||||
is dynamically extensible as the JIT runs.
|
||||
|
||||
@ -143,17 +143,18 @@ this:
|
||||
: Name(name), Args(std::move(Args)), IsOperator(IsOperator),
|
||||
Precedence(Prec) {}
|
||||
|
||||
Function *codegen();
|
||||
const std::string &getName() const { return Name; }
|
||||
|
||||
bool isUnaryOp() const { return IsOperator && Args.size() == 1; }
|
||||
bool isBinaryOp() const { return IsOperator && Args.size() == 2; }
|
||||
|
||||
char getOperatorName() const {
|
||||
assert(isUnaryOp() || isBinaryOp());
|
||||
return Name[Name.size()-1];
|
||||
return Name[Name.size() - 1];
|
||||
}
|
||||
|
||||
unsigned getBinaryPrecedence() const { return Precedence; }
|
||||
|
||||
Function *codegen();
|
||||
};
|
||||
|
||||
Basically, in addition to knowing a name for the prototype, we now keep
|
||||
@ -194,7 +195,7 @@ user-defined operator, we need to parse it:
|
||||
// Read the precedence if present.
|
||||
if (CurTok == tok_number) {
|
||||
if (NumVal < 1 || NumVal > 100)
|
||||
return LogErrorP("Invalid precedecnce: must be 1..100");
|
||||
return LogErrorP("Invalid precedence: must be 1..100");
|
||||
BinaryPrecedence = (unsigned)NumVal;
|
||||
getNextToken();
|
||||
}
|
||||
@ -225,7 +226,7 @@ This is all fairly straightforward parsing code, and we have already
|
||||
seen a lot of similar code in the past. One interesting part about the
|
||||
code above is the couple lines that set up ``FnName`` for binary
|
||||
operators. This builds names like "binary@" for a newly defined "@"
|
||||
operator. This then takes advantage of the fact that symbol names in the
|
||||
operator. It then takes advantage of the fact that symbol names in the
|
||||
LLVM symbol table are allowed to have any character in them, including
|
||||
embedded nul characters.
|
||||
|
||||
@ -251,7 +252,7 @@ default case for our existing binary operator node:
|
||||
case '<':
|
||||
L = Builder.CreateFCmpULT(L, R, "cmptmp");
|
||||
// Convert bool 0/1 to double 0.0 or 1.0
|
||||
return Builder.CreateUIToFP(L, Type::getDoubleTy(LLVMContext),
|
||||
return Builder.CreateUIToFP(L, Type::getDoubleTy(TheContext),
|
||||
"booltmp");
|
||||
default:
|
||||
break;
|
||||
@ -259,7 +260,7 @@ default case for our existing binary operator node:
|
||||
|
||||
// If it wasn't a builtin binary operator, it must be a user defined one. Emit
|
||||
// a call to it.
|
||||
Function *F = TheModule->getFunction(std::string("binary") + Op);
|
||||
Function *F = getFunction(std::string("binary") + Op);
|
||||
assert(F && "binary operator not found!");
|
||||
|
||||
Value *Ops[2] = { L, R };
|
||||
@ -277,22 +278,21 @@ The final piece of code we are missing, is a bit of top-level magic:
|
||||
.. code-block:: c++
|
||||
|
||||
Function *FunctionAST::codegen() {
|
||||
NamedValues.clear();
|
||||
|
||||
Function *TheFunction = Proto->codegen();
|
||||
// Transfer ownership of the prototype to the FunctionProtos map, but keep a
|
||||
// reference to it for use below.
|
||||
auto &P = *Proto;
|
||||
FunctionProtos[Proto->getName()] = std::move(Proto);
|
||||
Function *TheFunction = getFunction(P.getName());
|
||||
if (!TheFunction)
|
||||
return nullptr;
|
||||
|
||||
// If this is an operator, install it.
|
||||
if (Proto->isBinaryOp())
|
||||
BinopPrecedence[Proto->getOperatorName()] = Proto->getBinaryPrecedence();
|
||||
if (P.isBinaryOp())
|
||||
BinopPrecedence[P.getOperatorName()] = P.getBinaryPrecedence();
|
||||
|
||||
// Create a new basic block to start insertion into.
|
||||
BasicBlock *BB = BasicBlock::Create(LLVMContext, "entry", TheFunction);
|
||||
Builder.SetInsertPoint(BB);
|
||||
|
||||
if (Value *RetVal = Body->codegen()) {
|
||||
...
|
||||
BasicBlock *BB = BasicBlock::Create(TheContext, "entry", TheFunction);
|
||||
...
|
||||
|
||||
Basically, before codegening a function, if it is a user-defined
|
||||
operator, we register it in the precedence table. This allows the binary
|
||||
@ -323,7 +323,8 @@ that, we need an AST node:
|
||||
public:
|
||||
UnaryExprAST(char Opcode, std::unique_ptr<ExprAST> Operand)
|
||||
: Opcode(Opcode), Operand(std::move(Operand)) {}
|
||||
virtual Value *codegen();
|
||||
|
||||
Value *codegen() override;
|
||||
};
|
||||
|
||||
This AST node is very simple and obvious by now. It directly mirrors the
|
||||
@ -345,7 +346,7 @@ simple: we'll add a new function to do it:
|
||||
int Opc = CurTok;
|
||||
getNextToken();
|
||||
if (auto Operand = ParseUnary())
|
||||
return llvm::unique_ptr<UnaryExprAST>(Opc, std::move(Operand));
|
||||
return llvm::make_unique<UnaryExprAST>(Opc, std::move(Operand));
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
@ -433,7 +434,7 @@ unary operators. It looks like this:
|
||||
if (!OperandV)
|
||||
return nullptr;
|
||||
|
||||
Function *F = TheModule->getFunction(std::string("unary")+Opcode);
|
||||
Function *F = getFunction(std::string("unary") + Opcode);
|
||||
if (!F)
|
||||
return LogErrorV("Unknown unary operator");
|
||||
|
||||
@ -461,7 +462,7 @@ newline):
|
||||
declare double @printd(double)
|
||||
|
||||
ready> def binary : 1 (x y) 0; # Low-precedence operator that ignores operands.
|
||||
..
|
||||
...
|
||||
ready> printd(123) : printd(456) : printd(789);
|
||||
123.000000
|
||||
456.000000
|
||||
@ -518,10 +519,9 @@ denser the character:
|
||||
|
||||
::
|
||||
|
||||
ready>
|
||||
|
||||
extern putchard(char)
|
||||
def printdensity(d)
|
||||
ready> extern putchard(char);
|
||||
...
|
||||
ready> def printdensity(d)
|
||||
if d > 8 then
|
||||
putchard(32) # ' '
|
||||
else if d > 4 then
|
||||
@ -538,9 +538,9 @@ denser the character:
|
||||
Evaluated to 0.000000
|
||||
|
||||
Based on these simple primitive operations, we can start to define more
|
||||
interesting things. For example, here's a little function that solves
|
||||
for the number of iterations it takes a function in the complex plane to
|
||||
converge:
|
||||
interesting things. For example, here's a little function that determines
|
||||
the number of iterations it takes for a certain function in the complex
|
||||
plane to diverge:
|
||||
|
||||
::
|
||||
|
||||
@ -742,7 +742,7 @@ Full Code Listing
|
||||
=================
|
||||
|
||||
Here is the complete code listing for our running example, enhanced with
|
||||
the if/then/else and for expressions.. To build this example, use:
|
||||
the support for user-defined operators. To build this example, use:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
|
@ -327,7 +327,7 @@ to update:
|
||||
|
||||
static std::map<std::string, AllocaInst*> NamedValues;
|
||||
|
||||
Also, since we will need to create these alloca's, we'll use a helper
|
||||
Also, since we will need to create these allocas, we'll use a helper
|
||||
function that ensures that the allocas are created in the entry block of
|
||||
the function:
|
||||
|
||||
@ -339,7 +339,7 @@ the function:
|
||||
const std::string &VarName) {
|
||||
IRBuilder<> TmpB(&TheFunction->getEntryBlock(),
|
||||
TheFunction->getEntryBlock().begin());
|
||||
return TmpB.CreateAlloca(Type::getDoubleTy(LLVMContext), 0,
|
||||
return TmpB.CreateAlloca(Type::getDoubleTy(TheContext), 0,
|
||||
VarName.c_str());
|
||||
}
|
||||
|
||||
@ -348,7 +348,7 @@ the first instruction (.begin()) of the entry block. It then creates an
|
||||
alloca with the expected name and returns it. Because all values in
|
||||
Kaleidoscope are doubles, there is no need to pass in a type to use.
|
||||
|
||||
With this in place, the first functionality change we want to make is to
|
||||
With this in place, the first functionality change we want to make belongs to
|
||||
variable references. In our new scheme, variables live on the stack, so
|
||||
code generating a reference to them actually needs to produce a load
|
||||
from the stack slot:
|
||||
@ -377,7 +377,7 @@ the unabridged code):
|
||||
// Create an alloca for the variable in the entry block.
|
||||
AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
|
||||
|
||||
// Emit the start code first, without 'variable' in scope.
|
||||
// Emit the start code first, without 'variable' in scope.
|
||||
Value *StartVal = Start->codegen();
|
||||
if (!StartVal)
|
||||
return nullptr;
|
||||
@ -408,21 +408,25 @@ them. The code for this is also pretty simple:
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
/// CreateArgumentAllocas - Create an alloca for each argument and register the
|
||||
/// argument in the symbol table so that references to it will succeed.
|
||||
void PrototypeAST::CreateArgumentAllocas(Function *F) {
|
||||
Function::arg_iterator AI = F->arg_begin();
|
||||
for (unsigned Idx = 0, e = Args.size(); Idx != e; ++Idx, ++AI) {
|
||||
Function *FunctionAST::codegen() {
|
||||
...
|
||||
Builder.SetInsertPoint(BB);
|
||||
|
||||
// Record the function arguments in the NamedValues map.
|
||||
NamedValues.clear();
|
||||
for (auto &Arg : TheFunction->args()) {
|
||||
// Create an alloca for this variable.
|
||||
AllocaInst *Alloca = CreateEntryBlockAlloca(F, Args[Idx]);
|
||||
AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, Arg.getName());
|
||||
|
||||
// Store the initial value into the alloca.
|
||||
Builder.CreateStore(AI, Alloca);
|
||||
Builder.CreateStore(&Arg, Alloca);
|
||||
|
||||
// Add arguments to variable symbol table.
|
||||
NamedValues[Args[Idx]] = Alloca;
|
||||
NamedValues[Arg.getName()] = Alloca;
|
||||
}
|
||||
}
|
||||
|
||||
if (Value *RetVal = Body->codegen()) {
|
||||
...
|
||||
|
||||
For each argument, we make an alloca, store the input value to the
|
||||
function into the alloca, and register the alloca as the memory location
|
||||
@ -434,15 +438,13 @@ get good codegen once again:
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
// Set up the optimizer pipeline. Start with registering info about how the
|
||||
// target lays out data structures.
|
||||
OurFPM.add(new DataLayout(*TheExecutionEngine->getDataLayout()));
|
||||
// Promote allocas to registers.
|
||||
OurFPM.add(createPromoteMemoryToRegisterPass());
|
||||
TheFPM->add(createPromoteMemoryToRegisterPass());
|
||||
// Do simple "peephole" optimizations and bit-twiddling optzns.
|
||||
OurFPM.add(createInstructionCombiningPass());
|
||||
TheFPM->add(createInstructionCombiningPass());
|
||||
// Reassociate expressions.
|
||||
OurFPM.add(createReassociatePass());
|
||||
TheFPM->add(createReassociatePass());
|
||||
...
|
||||
|
||||
It is interesting to see what the code looks like before and after the
|
||||
mem2reg optimization runs. For example, this is the before/after code
|
||||
@ -454,7 +456,7 @@ for our recursive fib function. Before the optimization:
|
||||
entry:
|
||||
%x1 = alloca double
|
||||
store double %x, double* %x1
|
||||
%x2 = load double* %x1
|
||||
%x2 = load double, double* %x1
|
||||
%cmptmp = fcmp ult double %x2, 3.000000e+00
|
||||
%booltmp = uitofp i1 %cmptmp to double
|
||||
%ifcond = fcmp one double %booltmp, 0.000000e+00
|
||||
@ -464,10 +466,10 @@ for our recursive fib function. Before the optimization:
|
||||
br label %ifcont
|
||||
|
||||
else: ; preds = %entry
|
||||
%x3 = load double* %x1
|
||||
%x3 = load double, double* %x1
|
||||
%subtmp = fsub double %x3, 1.000000e+00
|
||||
%calltmp = call double @fib(double %subtmp)
|
||||
%x4 = load double* %x1
|
||||
%x4 = load double, double* %x1
|
||||
%subtmp5 = fsub double %x4, 2.000000e+00
|
||||
%calltmp6 = call double @fib(double %subtmp5)
|
||||
%addtmp = fadd double %calltmp, %calltmp6
|
||||
@ -677,10 +679,10 @@ var/in, it looks like this:
|
||||
|
||||
public:
|
||||
VarExprAST(std::vector<std::pair<std::string, std::unique_ptr<ExprAST>>> VarNames,
|
||||
std::unique_ptr<ExprAST> body)
|
||||
: VarNames(std::move(VarNames)), Body(std::move(Body)) {}
|
||||
std::unique_ptr<ExprAST> Body)
|
||||
: VarNames(std::move(VarNames)), Body(std::move(Body)) {}
|
||||
|
||||
virtual Value *codegen();
|
||||
Value *codegen() override;
|
||||
};
|
||||
|
||||
var/in allows a list of names to be defined all at once, and each name
|
||||
@ -812,7 +814,7 @@ previous value that we replace in OldBindings.
|
||||
if (!InitVal)
|
||||
return nullptr;
|
||||
} else { // If not specified, use 0.0.
|
||||
InitVal = ConstantFP::get(LLVMContext, APFloat(0.0));
|
||||
InitVal = ConstantFP::get(TheContext, APFloat(0.0));
|
||||
}
|
||||
|
||||
AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, VarName);
|
||||
|
@ -18,7 +18,7 @@ Source level debugging uses formatted data that helps a debugger
|
||||
translate from binary and the state of the machine back to the
|
||||
source that the programmer wrote. In LLVM we generally use a format
|
||||
called `DWARF <http://dwarfstd.org>`_. DWARF is a compact encoding
|
||||
that represents types, source locations, and variable locations.
|
||||
that represents types, source locations, and variable locations.
|
||||
|
||||
The short summary of this chapter is that we'll go through the
|
||||
various things you have to add to a programming language to
|
||||
@ -94,14 +94,14 @@ Then we're going to remove the command line code wherever it exists:
|
||||
return;
|
||||
@@ -1184,7 +1183,6 @@ int main() {
|
||||
BinopPrecedence['*'] = 40; // highest.
|
||||
|
||||
|
||||
// Prime the first token.
|
||||
- fprintf(stderr, "ready> ");
|
||||
getNextToken();
|
||||
|
||||
|
||||
Lastly we're going to disable all of the optimization passes and the JIT so
|
||||
that the only thing that happens after we're done parsing and generating
|
||||
code is that the llvm IR goes to standard error:
|
||||
code is that the LLVM IR goes to standard error:
|
||||
|
||||
.. code-block:: udiff
|
||||
|
||||
@ -140,7 +140,7 @@ code is that the llvm IR goes to standard error:
|
||||
-
|
||||
+ #endif
|
||||
OurFPM.doInitialization();
|
||||
|
||||
|
||||
// Set the global so the code gen can use this.
|
||||
|
||||
This relatively small set of changes get us to the point that we can compile
|
||||
@ -166,8 +166,8 @@ DWARF Emission Setup
|
||||
|
||||
Similar to the ``IRBuilder`` class we have a
|
||||
`DIBuilder <http://llvm.org/doxygen/classllvm_1_1DIBuilder.html>`_ class
|
||||
that helps in constructing debug metadata for an llvm IR file. It
|
||||
corresponds 1:1 similarly to ``IRBuilder`` and llvm IR, but with nicer names.
|
||||
that helps in constructing debug metadata for an LLVM IR file. It
|
||||
corresponds 1:1 similarly to ``IRBuilder`` and LLVM IR, but with nicer names.
|
||||
Using it does require that you be more familiar with DWARF terminology than
|
||||
you needed to be with ``IRBuilder`` and ``Instruction`` names, but if you
|
||||
read through the general documentation on the
|
||||
@ -194,7 +194,7 @@ expressions:
|
||||
} KSDbgInfo;
|
||||
|
||||
DIType *DebugInfo::getDoubleTy() {
|
||||
if (DblTy.isValid())
|
||||
if (DblTy)
|
||||
return DblTy;
|
||||
|
||||
DblTy = DBuilder->createBasicType("double", 64, 64, dwarf::DW_ATE_float);
|
||||
@ -214,7 +214,7 @@ There are a couple of things to note here. First, while we're producing a
|
||||
compile unit for a language called Kaleidoscope we used the language
|
||||
constant for C. This is because a debugger wouldn't necessarily understand
|
||||
the calling conventions or default ABI for a language it doesn't recognize
|
||||
and we follow the C ABI in our llvm code generation so it's the closest
|
||||
and we follow the C ABI in our LLVM code generation so it's the closest
|
||||
thing to accurate. This ensures we can actually call functions from the
|
||||
debugger and have them execute. Secondly, you'll see the "fib.ks" in the
|
||||
call to ``createCompileUnit``. This is a default hard coded value since
|
||||
@ -259,10 +259,11 @@ information) and construct our function definition:
|
||||
unsigned LineNo = 0;
|
||||
unsigned ScopeLine = 0;
|
||||
DISubprogram *SP = DBuilder->createFunction(
|
||||
FContext, Name, StringRef(), Unit, LineNo,
|
||||
CreateFunctionType(Args.size(), Unit), false /* internal linkage */,
|
||||
true /* definition */, ScopeLine, DINode::FlagPrototyped, false);
|
||||
F->setSubprogram(SP);
|
||||
FContext, P.getName(), StringRef(), Unit, LineNo,
|
||||
CreateFunctionType(TheFunction->arg_size(), Unit),
|
||||
false /* internal linkage */, true /* definition */, ScopeLine,
|
||||
DINode::FlagPrototyped, false);
|
||||
TheFunction->setSubprogram(SP);
|
||||
|
||||
and we now have an DISubprogram that contains a reference to all of our
|
||||
metadata for the function.
|
||||
@ -326,10 +327,9 @@ that we pass down through when we create a new expression:
|
||||
|
||||
giving us locations for each of our expressions and variables.
|
||||
|
||||
From this we can make sure to tell ``DIBuilder`` when we're at a new source
|
||||
location so it can use that when we generate the rest of our code and make
|
||||
sure that each instruction has source location information. We do this
|
||||
by constructing another small function:
|
||||
To make sure that every instruction gets proper source location information,
|
||||
we have to tell ``Builder`` whenever we're at a new source location.
|
||||
We use a small helper function for this:
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
@ -343,40 +343,23 @@ by constructing another small function:
|
||||
DebugLoc::get(AST->getLine(), AST->getCol(), Scope));
|
||||
}
|
||||
|
||||
that both tells the main ``IRBuilder`` where we are, but also what scope
|
||||
we're in. Since we've just created a function above we can either be in
|
||||
the main file scope (like when we created our function), or now we can be
|
||||
in the function scope we just created. To represent this we create a stack
|
||||
of scopes:
|
||||
This both tells the main ``IRBuilder`` where we are, but also what scope
|
||||
we're in. The scope can either be on compile-unit level or be the nearest
|
||||
enclosing lexical block like the current function.
|
||||
To represent this we create a stack of scopes:
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
std::vector<DIScope *> LexicalBlocks;
|
||||
std::map<const PrototypeAST *, DIScope *> FnScopeMap;
|
||||
|
||||
and keep a map of each function to the scope that it represents (an
|
||||
DISubprogram is also an DIScope).
|
||||
|
||||
Then we make sure to:
|
||||
and push the scope (function) to the top of the stack when we start
|
||||
generating the code for each function:
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
KSDbgInfo.emitLocation(this);
|
||||
KSDbgInfo.LexicalBlocks.push_back(SP);
|
||||
|
||||
emit the location every time we start to generate code for a new AST, and
|
||||
also:
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
KSDbgInfo.FnScopeMap[this] = SP;
|
||||
|
||||
store the scope (function) when we create it and use it:
|
||||
|
||||
KSDbgInfo.LexicalBlocks.push_back(&KSDbgInfo.FnScopeMap[Proto]);
|
||||
|
||||
when we start generating the code for each function.
|
||||
|
||||
also, don't forget to pop the scope back off of your scope stack at the
|
||||
Also, we may not forget to pop the scope back off of the scope stack at the
|
||||
end of the code generation for the function:
|
||||
|
||||
.. code-block:: c++
|
||||
@ -385,6 +368,13 @@ end of the code generation for the function:
|
||||
// unconditionally.
|
||||
KSDbgInfo.LexicalBlocks.pop_back();
|
||||
|
||||
Then we make sure to emit the location every time we start to generate code
|
||||
for a new AST object:
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
KSDbgInfo.emitLocation(this);
|
||||
|
||||
Variables
|
||||
=========
|
||||
|
||||
@ -392,25 +382,37 @@ Now that we have functions, we need to be able to print out the variables
|
||||
we have in scope. Let's get our function arguments set up so we can get
|
||||
decent backtraces and see how our functions are being called. It isn't
|
||||
a lot of code, and we generally handle it when we're creating the
|
||||
argument allocas in ``PrototypeAST::CreateArgumentAllocas``.
|
||||
argument allocas in ``FunctionAST::codegen``.
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
DIScope *Scope = KSDbgInfo.LexicalBlocks.back();
|
||||
DIFile *Unit = DBuilder->createFile(KSDbgInfo.TheCU.getFilename(),
|
||||
KSDbgInfo.TheCU.getDirectory());
|
||||
DILocalVariable D = DBuilder->createParameterVariable(
|
||||
Scope, Args[Idx], Idx + 1, Unit, Line, KSDbgInfo.getDoubleTy(), true);
|
||||
// Record the function arguments in the NamedValues map.
|
||||
NamedValues.clear();
|
||||
unsigned ArgIdx = 0;
|
||||
for (auto &Arg : TheFunction->args()) {
|
||||
// Create an alloca for this variable.
|
||||
AllocaInst *Alloca = CreateEntryBlockAlloca(TheFunction, Arg.getName());
|
||||
|
||||
DBuilder->insertDeclare(Alloca, D, DBuilder->createExpression(),
|
||||
DebugLoc::get(Line, 0, Scope),
|
||||
Builder.GetInsertBlock());
|
||||
// Create a debug descriptor for the variable.
|
||||
DILocalVariable *D = DBuilder->createParameterVariable(
|
||||
SP, Arg.getName(), ++ArgIdx, Unit, LineNo, KSDbgInfo.getDoubleTy(),
|
||||
true);
|
||||
|
||||
Here we're doing a few things. First, we're grabbing our current scope
|
||||
for the variable so we can say what range of code our variable is valid
|
||||
through. Second, we're creating the variable, giving it the scope,
|
||||
DBuilder->insertDeclare(Alloca, D, DBuilder->createExpression(),
|
||||
DebugLoc::get(LineNo, 0, SP),
|
||||
Builder.GetInsertBlock());
|
||||
|
||||
// Store the initial value into the alloca.
|
||||
Builder.CreateStore(&Arg, Alloca);
|
||||
|
||||
// Add arguments to variable symbol table.
|
||||
NamedValues[Arg.getName()] = Alloca;
|
||||
}
|
||||
|
||||
|
||||
Here we're first creating the variable, giving it the scope (``SP``),
|
||||
the name, source location, type, and since it's an argument, the argument
|
||||
index. Third, we create an ``lvm.dbg.declare`` call to indicate at the IR
|
||||
index. Next, we create an ``lvm.dbg.declare`` call to indicate at the IR
|
||||
level that we've got a variable in an alloca (and it gives a starting
|
||||
location for the variable), and setting a source location for the
|
||||
beginning of the scope on the declare.
|
||||
@ -420,7 +422,7 @@ assumptions based on how code and debug information was generated for them
|
||||
in the past. In this case we need to do a little bit of a hack to avoid
|
||||
generating line information for the function prologue so that the debugger
|
||||
knows to skip over those instructions when setting a breakpoint. So in
|
||||
``FunctionAST::CodeGen`` we add a couple of lines:
|
||||
``FunctionAST::CodeGen`` we add some more lines:
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
@ -434,7 +436,7 @@ body of the function:
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
KSDbgInfo.emitLocation(Body);
|
||||
KSDbgInfo.emitLocation(Body.get());
|
||||
|
||||
With this we have enough debug information to set breakpoints in functions,
|
||||
print out argument variables, and call functions. Not too bad for just a
|
||||
|
@ -103,19 +103,7 @@ Parser Extensions for If/Then/Else
|
||||
|
||||
Now that we have the relevant tokens coming from the lexer and we have
|
||||
the AST node to build, our parsing logic is relatively straightforward.
|
||||
First we define a new parsing function:
|
||||
|
||||
.. code-block:: ocaml
|
||||
|
||||
let rec parse_primary = parser
|
||||
...
|
||||
(* ifexpr ::= 'if' expr 'then' expr 'else' expr *)
|
||||
| [< 'Token.If; c=parse_expr;
|
||||
'Token.Then ?? "expected 'then'"; t=parse_expr;
|
||||
'Token.Else ?? "expected 'else'"; e=parse_expr >] ->
|
||||
Ast.If (c, t, e)
|
||||
|
||||
Next we hook it up as a primary expression:
|
||||
Next we add a new case for parsing a if-expression as a primary expression:
|
||||
|
||||
.. code-block:: ocaml
|
||||
|
||||
|
@ -74,18 +74,18 @@ void BrainF::header(LLVMContext& C) {
|
||||
|
||||
//declare i32 @getchar()
|
||||
getchar_func = cast<Function>(module->
|
||||
getOrInsertFunction("getchar", IntegerType::getInt32Ty(C), NULL));
|
||||
getOrInsertFunction("getchar", IntegerType::getInt32Ty(C)));
|
||||
|
||||
//declare i32 @putchar(i32)
|
||||
putchar_func = cast<Function>(module->
|
||||
getOrInsertFunction("putchar", IntegerType::getInt32Ty(C),
|
||||
IntegerType::getInt32Ty(C), NULL));
|
||||
IntegerType::getInt32Ty(C)));
|
||||
|
||||
//Function header
|
||||
|
||||
//define void @brainf()
|
||||
brainf_func = cast<Function>(module->
|
||||
getOrInsertFunction("brainf", Type::getVoidTy(C), NULL));
|
||||
getOrInsertFunction("brainf", Type::getVoidTy(C)));
|
||||
|
||||
builder = new IRBuilder<>(BasicBlock::Create(C, label, brainf_func));
|
||||
|
||||
@ -156,7 +156,7 @@ void BrainF::header(LLVMContext& C) {
|
||||
//declare i32 @puts(i8 *)
|
||||
Function *puts_func = cast<Function>(module->
|
||||
getOrInsertFunction("puts", IntegerType::getInt32Ty(C),
|
||||
PointerType::getUnqual(IntegerType::getInt8Ty(C)), NULL));
|
||||
PointerType::getUnqual(IntegerType::getInt8Ty(C))));
|
||||
|
||||
//brainf.aberror:
|
||||
aberrorbb = BasicBlock::Create(C, label, brainf_func);
|
||||
|
@ -77,7 +77,7 @@ void addMainFunction(Module *mod) {
|
||||
getOrInsertFunction("main", IntegerType::getInt32Ty(mod->getContext()),
|
||||
IntegerType::getInt32Ty(mod->getContext()),
|
||||
PointerType::getUnqual(PointerType::getUnqual(
|
||||
IntegerType::getInt8Ty(mod->getContext()))), NULL));
|
||||
IntegerType::getInt8Ty(mod->getContext())))));
|
||||
{
|
||||
Function::arg_iterator args = main_func->arg_begin();
|
||||
Value *arg_0 = &*args++;
|
||||
@ -166,6 +166,10 @@ int main(int argc, char **argv) {
|
||||
std::vector<GenericValue> args;
|
||||
Function *brainf_func = M.getFunction("brainf");
|
||||
GenericValue gv = ee->runFunction(brainf_func, args);
|
||||
// Genereated code calls putchar, and output is not guaranteed without fflush.
|
||||
// The better place for fflush(stdout) call would be the generated code, but it
|
||||
// is unmanageable because stdout linkage name depends on stdlib implementation.
|
||||
fflush(stdout);
|
||||
} else {
|
||||
WriteBitcodeToFile(Mod.get(), *out);
|
||||
}
|
||||
|
@ -54,8 +54,7 @@ static Function *CreateFibFunction(Module *M, LLVMContext &Context) {
|
||||
// to return an int and take an int parameter.
|
||||
Function *FibF =
|
||||
cast<Function>(M->getOrInsertFunction("fib", Type::getInt32Ty(Context),
|
||||
Type::getInt32Ty(Context),
|
||||
nullptr));
|
||||
Type::getInt32Ty(Context)));
|
||||
|
||||
// Add a basic block to the function.
|
||||
BasicBlock *BB = BasicBlock::Create(Context, "EntryBlock", FibF);
|
||||
|
@ -69,11 +69,9 @@ int main() {
|
||||
|
||||
// Create the add1 function entry and insert this entry into module M. The
|
||||
// function will have a return type of "int" and take an argument of "int".
|
||||
// The '0' terminates the list of argument types.
|
||||
Function *Add1F =
|
||||
cast<Function>(M->getOrInsertFunction("add1", Type::getInt32Ty(Context),
|
||||
Type::getInt32Ty(Context),
|
||||
nullptr));
|
||||
Type::getInt32Ty(Context)));
|
||||
|
||||
// Add a basic block to the function. As before, it automatically inserts
|
||||
// because of the last argument.
|
||||
@ -102,8 +100,7 @@ int main() {
|
||||
// Now we're going to create function `foo', which returns an int and takes no
|
||||
// arguments.
|
||||
Function *FooF =
|
||||
cast<Function>(M->getOrInsertFunction("foo", Type::getInt32Ty(Context),
|
||||
nullptr));
|
||||
cast<Function>(M->getOrInsertFunction("foo", Type::getInt32Ty(Context)));
|
||||
|
||||
// Add a basic block to the FooF function.
|
||||
BB = BasicBlock::Create(Context, "EntryBlock", FooF);
|
||||
|
@ -22,7 +22,7 @@
|
||||
#include "llvm/ExecutionEngine/Orc/CompileUtils.h"
|
||||
#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
|
||||
#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
|
||||
#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
|
||||
#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
|
||||
#include "llvm/IR/DataLayout.h"
|
||||
#include "llvm/IR/Mangler.h"
|
||||
#include "llvm/Support/DynamicLibrary.h"
|
||||
@ -40,7 +40,7 @@ class KaleidoscopeJIT {
|
||||
private:
|
||||
std::unique_ptr<TargetMachine> TM;
|
||||
const DataLayout DL;
|
||||
ObjectLinkingLayer<> ObjectLayer;
|
||||
RTDyldObjectLinkingLayer<> ObjectLayer;
|
||||
IRCompileLayer<decltype(ObjectLayer)> CompileLayer;
|
||||
|
||||
public:
|
||||
|
@ -1110,7 +1110,8 @@ static void HandleDefinition() {
|
||||
if (auto FnAST = ParseDefinition()) {
|
||||
if (auto *FnIR = FnAST->codegen()) {
|
||||
fprintf(stderr, "Read function definition:");
|
||||
FnIR->dump();
|
||||
FnIR->print(errs());
|
||||
fprintf(stderr, "\n");
|
||||
TheJIT->addModule(std::move(TheModule));
|
||||
InitializeModule();
|
||||
}
|
||||
@ -1124,7 +1125,8 @@ static void HandleExtern() {
|
||||
if (auto ProtoAST = ParseExtern()) {
|
||||
if (auto *FnIR = ProtoAST->codegen()) {
|
||||
fprintf(stderr, "Read extern: ");
|
||||
FnIR->dump();
|
||||
FnIR->print(errs());
|
||||
fprintf(stderr, "\n");
|
||||
FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST);
|
||||
}
|
||||
} else {
|
||||
|
@ -23,7 +23,7 @@
|
||||
#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
|
||||
#include "llvm/ExecutionEngine/Orc/IRTransformLayer.h"
|
||||
#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
|
||||
#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
|
||||
#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
|
||||
#include "llvm/IR/DataLayout.h"
|
||||
#include "llvm/IR/LegacyPassManager.h"
|
||||
#include "llvm/IR/Mangler.h"
|
||||
@ -44,7 +44,7 @@ class KaleidoscopeJIT {
|
||||
private:
|
||||
std::unique_ptr<TargetMachine> TM;
|
||||
const DataLayout DL;
|
||||
ObjectLinkingLayer<> ObjectLayer;
|
||||
RTDyldObjectLinkingLayer<> ObjectLayer;
|
||||
IRCompileLayer<decltype(ObjectLayer)> CompileLayer;
|
||||
|
||||
typedef std::function<std::unique_ptr<Module>(std::unique_ptr<Module>)>
|
||||
|
@ -1110,7 +1110,8 @@ static void HandleDefinition() {
|
||||
if (auto FnAST = ParseDefinition()) {
|
||||
if (auto *FnIR = FnAST->codegen()) {
|
||||
fprintf(stderr, "Read function definition:");
|
||||
FnIR->dump();
|
||||
FnIR->print(errs());
|
||||
fprintf(stderr, "\n");
|
||||
TheJIT->addModule(std::move(TheModule));
|
||||
InitializeModule();
|
||||
}
|
||||
@ -1124,7 +1125,8 @@ static void HandleExtern() {
|
||||
if (auto ProtoAST = ParseExtern()) {
|
||||
if (auto *FnIR = ProtoAST->codegen()) {
|
||||
fprintf(stderr, "Read extern: ");
|
||||
FnIR->dump();
|
||||
FnIR->print(errs());
|
||||
fprintf(stderr, "\n");
|
||||
FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST);
|
||||
}
|
||||
} else {
|
||||
|
@ -24,7 +24,7 @@
|
||||
#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
|
||||
#include "llvm/ExecutionEngine/Orc/IRTransformLayer.h"
|
||||
#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
|
||||
#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
|
||||
#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
|
||||
#include "llvm/IR/DataLayout.h"
|
||||
#include "llvm/IR/LegacyPassManager.h"
|
||||
#include "llvm/IR/Mangler.h"
|
||||
@ -46,7 +46,7 @@ class KaleidoscopeJIT {
|
||||
private:
|
||||
std::unique_ptr<TargetMachine> TM;
|
||||
const DataLayout DL;
|
||||
ObjectLinkingLayer<> ObjectLayer;
|
||||
RTDyldObjectLinkingLayer<> ObjectLayer;
|
||||
IRCompileLayer<decltype(ObjectLayer)> CompileLayer;
|
||||
|
||||
typedef std::function<std::unique_ptr<Module>(std::unique_ptr<Module>)>
|
||||
@ -70,7 +70,7 @@ public:
|
||||
CompileCallbackManager(
|
||||
orc::createLocalCompileCallbackManager(TM->getTargetTriple(), 0)),
|
||||
CODLayer(OptimizeLayer,
|
||||
[this](Function &F) { return std::set<Function*>({&F}); },
|
||||
[](Function &F) { return std::set<Function*>({&F}); },
|
||||
*CompileCallbackManager,
|
||||
orc::createLocalIndirectStubsManagerBuilder(
|
||||
TM->getTargetTriple())) {
|
||||
|
@ -1110,7 +1110,8 @@ static void HandleDefinition() {
|
||||
if (auto FnAST = ParseDefinition()) {
|
||||
if (auto *FnIR = FnAST->codegen()) {
|
||||
fprintf(stderr, "Read function definition:");
|
||||
FnIR->dump();
|
||||
FnIR->print(errs());
|
||||
fprintf(stderr, "\n");
|
||||
TheJIT->addModule(std::move(TheModule));
|
||||
InitializeModule();
|
||||
}
|
||||
@ -1124,7 +1125,8 @@ static void HandleExtern() {
|
||||
if (auto ProtoAST = ParseExtern()) {
|
||||
if (auto *FnIR = ProtoAST->codegen()) {
|
||||
fprintf(stderr, "Read extern: ");
|
||||
FnIR->dump();
|
||||
FnIR->print(errs());
|
||||
fprintf(stderr, "\n");
|
||||
FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST);
|
||||
}
|
||||
} else {
|
||||
|
@ -24,7 +24,7 @@
|
||||
#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
|
||||
#include "llvm/ExecutionEngine/Orc/IRTransformLayer.h"
|
||||
#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
|
||||
#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
|
||||
#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
|
||||
#include "llvm/IR/DataLayout.h"
|
||||
#include "llvm/IR/LegacyPassManager.h"
|
||||
#include "llvm/IR/Mangler.h"
|
||||
@ -73,7 +73,7 @@ class KaleidoscopeJIT {
|
||||
private:
|
||||
std::unique_ptr<TargetMachine> TM;
|
||||
const DataLayout DL;
|
||||
ObjectLinkingLayer<> ObjectLayer;
|
||||
RTDyldObjectLinkingLayer<> ObjectLayer;
|
||||
IRCompileLayer<decltype(ObjectLayer)> CompileLayer;
|
||||
|
||||
typedef std::function<std::unique_ptr<Module>(std::unique_ptr<Module>)>
|
||||
|
@ -1126,7 +1126,8 @@ static void HandleExtern() {
|
||||
if (auto ProtoAST = ParseExtern()) {
|
||||
if (auto *FnIR = ProtoAST->codegen()) {
|
||||
fprintf(stderr, "Read extern: ");
|
||||
FnIR->dump();
|
||||
FnIR->print(errs());
|
||||
fprintf(stderr, "\n");
|
||||
FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST);
|
||||
}
|
||||
} else {
|
||||
|
@ -26,7 +26,7 @@
|
||||
#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
|
||||
#include "llvm/ExecutionEngine/Orc/IRTransformLayer.h"
|
||||
#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
|
||||
#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
|
||||
#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
|
||||
#include "llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h"
|
||||
#include "llvm/IR/DataLayout.h"
|
||||
#include "llvm/IR/LegacyPassManager.h"
|
||||
@ -79,7 +79,7 @@ class KaleidoscopeJIT {
|
||||
private:
|
||||
std::unique_ptr<TargetMachine> TM;
|
||||
const DataLayout DL;
|
||||
ObjectLinkingLayer<> ObjectLayer;
|
||||
RTDyldObjectLinkingLayer<> ObjectLayer;
|
||||
IRCompileLayer<decltype(ObjectLayer)> CompileLayer;
|
||||
|
||||
typedef std::function<std::unique_ptr<Module>(std::unique_ptr<Module>)>
|
||||
|
@ -1150,7 +1150,8 @@ static void HandleExtern() {
|
||||
if (auto ProtoAST = ParseExtern()) {
|
||||
if (auto *FnIR = ProtoAST->codegen()) {
|
||||
fprintf(stderr, "Read extern: ");
|
||||
FnIR->dump();
|
||||
FnIR->print(errs());
|
||||
fprintf(stderr, "\n");
|
||||
FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST);
|
||||
}
|
||||
} else {
|
||||
|
@ -140,6 +140,8 @@ class PrototypeAST {
|
||||
public:
|
||||
PrototypeAST(const std::string &Name, std::vector<std::string> Args)
|
||||
: Name(Name), Args(std::move(Args)) {}
|
||||
|
||||
const std::string &getName() const { return Name; }
|
||||
};
|
||||
|
||||
/// FunctionAST - This class represents a function definition itself.
|
||||
|
@ -522,7 +522,8 @@ static void HandleDefinition() {
|
||||
if (auto FnAST = ParseDefinition()) {
|
||||
if (auto *FnIR = FnAST->codegen()) {
|
||||
fprintf(stderr, "Read function definition:");
|
||||
FnIR->dump();
|
||||
FnIR->print(errs());
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
} else {
|
||||
// Skip token for error recovery.
|
||||
@ -534,7 +535,8 @@ static void HandleExtern() {
|
||||
if (auto ProtoAST = ParseExtern()) {
|
||||
if (auto *FnIR = ProtoAST->codegen()) {
|
||||
fprintf(stderr, "Read extern: ");
|
||||
FnIR->dump();
|
||||
FnIR->print(errs());
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
} else {
|
||||
// Skip token for error recovery.
|
||||
@ -547,7 +549,8 @@ static void HandleTopLevelExpression() {
|
||||
if (auto FnAST = ParseTopLevelExpr()) {
|
||||
if (auto *FnIR = FnAST->codegen()) {
|
||||
fprintf(stderr, "Read top-level expression:");
|
||||
FnIR->dump();
|
||||
FnIR->print(errs());
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
} else {
|
||||
// Skip token for error recovery.
|
||||
@ -601,7 +604,7 @@ int main() {
|
||||
MainLoop();
|
||||
|
||||
// Print out all of the generated code.
|
||||
TheModule->dump();
|
||||
TheModule->print(errs(), nullptr);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -571,7 +571,8 @@ static void HandleDefinition() {
|
||||
if (auto FnAST = ParseDefinition()) {
|
||||
if (auto *FnIR = FnAST->codegen()) {
|
||||
fprintf(stderr, "Read function definition:");
|
||||
FnIR->dump();
|
||||
FnIR->print(errs());
|
||||
fprintf(stderr, "\n");
|
||||
TheJIT->addModule(std::move(TheModule));
|
||||
InitializeModuleAndPassManager();
|
||||
}
|
||||
@ -585,7 +586,8 @@ static void HandleExtern() {
|
||||
if (auto ProtoAST = ParseExtern()) {
|
||||
if (auto *FnIR = ProtoAST->codegen()) {
|
||||
fprintf(stderr, "Read extern: ");
|
||||
FnIR->dump();
|
||||
FnIR->print(errs());
|
||||
fprintf(stderr, "\n");
|
||||
FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST);
|
||||
}
|
||||
} else {
|
||||
@ -648,14 +650,20 @@ static void MainLoop() {
|
||||
// "Library" functions that can be "extern'd" from user code.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifdef LLVM_ON_WIN32
|
||||
#define DLLEXPORT __declspec(dllexport)
|
||||
#else
|
||||
#define DLLEXPORT
|
||||
#endif
|
||||
|
||||
/// putchard - putchar that takes a double and returns 0.
|
||||
extern "C" double putchard(double X) {
|
||||
extern "C" DLLEXPORT double putchard(double X) {
|
||||
fputc((char)X, stderr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// printd - printf that takes a double prints it as "%f\n", returning 0.
|
||||
extern "C" double printd(double X) {
|
||||
extern "C" DLLEXPORT double printd(double X) {
|
||||
fprintf(stderr, "%f\n", X);
|
||||
return 0;
|
||||
}
|
||||
|
@ -622,7 +622,7 @@ Value *IfExprAST::codegen() {
|
||||
if (!CondV)
|
||||
return nullptr;
|
||||
|
||||
// Convert condition to a bool by comparing equal to 0.0.
|
||||
// Convert condition to a bool by comparing non-equal to 0.0.
|
||||
CondV = Builder.CreateFCmpONE(
|
||||
CondV, ConstantFP::get(TheContext, APFloat(0.0)), "ifcond");
|
||||
|
||||
@ -736,7 +736,7 @@ Value *ForExprAST::codegen() {
|
||||
if (!EndCond)
|
||||
return nullptr;
|
||||
|
||||
// Convert condition to a bool by comparing equal to 0.0.
|
||||
// Convert condition to a bool by comparing non-equal to 0.0.
|
||||
EndCond = Builder.CreateFCmpONE(
|
||||
EndCond, ConstantFP::get(TheContext, APFloat(0.0)), "loopcond");
|
||||
|
||||
@ -845,7 +845,8 @@ static void HandleDefinition() {
|
||||
if (auto FnAST = ParseDefinition()) {
|
||||
if (auto *FnIR = FnAST->codegen()) {
|
||||
fprintf(stderr, "Read function definition:");
|
||||
FnIR->dump();
|
||||
FnIR->print(errs());
|
||||
fprintf(stderr, "\n");
|
||||
TheJIT->addModule(std::move(TheModule));
|
||||
InitializeModuleAndPassManager();
|
||||
}
|
||||
@ -859,7 +860,8 @@ static void HandleExtern() {
|
||||
if (auto ProtoAST = ParseExtern()) {
|
||||
if (auto *FnIR = ProtoAST->codegen()) {
|
||||
fprintf(stderr, "Read extern: ");
|
||||
FnIR->dump();
|
||||
FnIR->print(errs());
|
||||
fprintf(stderr, "\n");
|
||||
FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST);
|
||||
}
|
||||
} else {
|
||||
@ -922,14 +924,20 @@ static void MainLoop() {
|
||||
// "Library" functions that can be "extern'd" from user code.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifdef LLVM_ON_WIN32
|
||||
#define DLLEXPORT __declspec(dllexport)
|
||||
#else
|
||||
#define DLLEXPORT
|
||||
#endif
|
||||
|
||||
/// putchard - putchar that takes a double and returns 0.
|
||||
extern "C" double putchard(double X) {
|
||||
extern "C" DLLEXPORT double putchard(double X) {
|
||||
fputc((char)X, stderr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// printd - printf that takes a double prints it as "%f\n", returning 0.
|
||||
extern "C" double printd(double X) {
|
||||
extern "C" DLLEXPORT double printd(double X) {
|
||||
fprintf(stderr, "%f\n", X);
|
||||
return 0;
|
||||
}
|
||||
|
@ -567,7 +567,7 @@ static std::unique_ptr<PrototypeAST> ParsePrototype() {
|
||||
// Read the precedence if present.
|
||||
if (CurTok == tok_number) {
|
||||
if (NumVal < 1 || NumVal > 100)
|
||||
return LogErrorP("Invalid precedecnce: must be 1..100");
|
||||
return LogErrorP("Invalid precedence: must be 1..100");
|
||||
BinaryPrecedence = (unsigned)NumVal;
|
||||
getNextToken();
|
||||
}
|
||||
@ -734,7 +734,7 @@ Value *IfExprAST::codegen() {
|
||||
if (!CondV)
|
||||
return nullptr;
|
||||
|
||||
// Convert condition to a bool by comparing equal to 0.0.
|
||||
// Convert condition to a bool by comparing non-equal to 0.0.
|
||||
CondV = Builder.CreateFCmpONE(
|
||||
CondV, ConstantFP::get(TheContext, APFloat(0.0)), "ifcond");
|
||||
|
||||
@ -848,7 +848,7 @@ Value *ForExprAST::codegen() {
|
||||
if (!EndCond)
|
||||
return nullptr;
|
||||
|
||||
// Convert condition to a bool by comparing equal to 0.0.
|
||||
// Convert condition to a bool by comparing non-equal to 0.0.
|
||||
EndCond = Builder.CreateFCmpONE(
|
||||
EndCond, ConstantFP::get(TheContext, APFloat(0.0)), "loopcond");
|
||||
|
||||
@ -964,7 +964,8 @@ static void HandleDefinition() {
|
||||
if (auto FnAST = ParseDefinition()) {
|
||||
if (auto *FnIR = FnAST->codegen()) {
|
||||
fprintf(stderr, "Read function definition:");
|
||||
FnIR->dump();
|
||||
FnIR->print(errs());
|
||||
fprintf(stderr, "\n");
|
||||
TheJIT->addModule(std::move(TheModule));
|
||||
InitializeModuleAndPassManager();
|
||||
}
|
||||
@ -978,7 +979,8 @@ static void HandleExtern() {
|
||||
if (auto ProtoAST = ParseExtern()) {
|
||||
if (auto *FnIR = ProtoAST->codegen()) {
|
||||
fprintf(stderr, "Read extern: ");
|
||||
FnIR->dump();
|
||||
FnIR->print(errs());
|
||||
fprintf(stderr, "\n");
|
||||
FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST);
|
||||
}
|
||||
} else {
|
||||
@ -1041,14 +1043,20 @@ static void MainLoop() {
|
||||
// "Library" functions that can be "extern'd" from user code.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifdef LLVM_ON_WIN32
|
||||
#define DLLEXPORT __declspec(dllexport)
|
||||
#else
|
||||
#define DLLEXPORT
|
||||
#endif
|
||||
|
||||
/// putchard - putchar that takes a double and returns 0.
|
||||
extern "C" double putchard(double X) {
|
||||
extern "C" DLLEXPORT double putchard(double X) {
|
||||
fputc((char)X, stderr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// printd - printf that takes a double prints it as "%f\n", returning 0.
|
||||
extern "C" double printd(double X) {
|
||||
extern "C" DLLEXPORT double printd(double X) {
|
||||
fprintf(stderr, "%f\n", X);
|
||||
return 0;
|
||||
}
|
||||
|
@ -7,6 +7,7 @@ set(LLVM_LINK_COMPONENTS
|
||||
RuntimeDyld
|
||||
ScalarOpts
|
||||
Support
|
||||
TransformUtils
|
||||
native
|
||||
)
|
||||
|
||||
|
@ -639,7 +639,7 @@ static std::unique_ptr<PrototypeAST> ParsePrototype() {
|
||||
// Read the precedence if present.
|
||||
if (CurTok == tok_number) {
|
||||
if (NumVal < 1 || NumVal > 100)
|
||||
return LogErrorP("Invalid precedecnce: must be 1..100");
|
||||
return LogErrorP("Invalid precedence: must be 1..100");
|
||||
BinaryPrecedence = (unsigned)NumVal;
|
||||
getNextToken();
|
||||
}
|
||||
@ -840,7 +840,7 @@ Value *IfExprAST::codegen() {
|
||||
if (!CondV)
|
||||
return nullptr;
|
||||
|
||||
// Convert condition to a bool by comparing equal to 0.0.
|
||||
// Convert condition to a bool by comparing non-equal to 0.0.
|
||||
CondV = Builder.CreateFCmpONE(
|
||||
CondV, ConstantFP::get(TheContext, APFloat(0.0)), "ifcond");
|
||||
|
||||
@ -963,7 +963,7 @@ Value *ForExprAST::codegen() {
|
||||
Value *NextVar = Builder.CreateFAdd(CurVar, StepVal, "nextvar");
|
||||
Builder.CreateStore(NextVar, Alloca);
|
||||
|
||||
// Convert condition to a bool by comparing equal to 0.0.
|
||||
// Convert condition to a bool by comparing non-equal to 0.0.
|
||||
EndCond = Builder.CreateFCmpONE(
|
||||
EndCond, ConstantFP::get(TheContext, APFloat(0.0)), "loopcond");
|
||||
|
||||
@ -1115,6 +1115,8 @@ static void InitializeModuleAndPassManager() {
|
||||
// Create a new pass manager attached to it.
|
||||
TheFPM = llvm::make_unique<legacy::FunctionPassManager>(TheModule.get());
|
||||
|
||||
// Promote allocas to registers.
|
||||
TheFPM->add(createPromoteMemoryToRegisterPass());
|
||||
// Do simple "peephole" optimizations and bit-twiddling optzns.
|
||||
TheFPM->add(createInstructionCombiningPass());
|
||||
// Reassociate expressions.
|
||||
@ -1131,7 +1133,8 @@ static void HandleDefinition() {
|
||||
if (auto FnAST = ParseDefinition()) {
|
||||
if (auto *FnIR = FnAST->codegen()) {
|
||||
fprintf(stderr, "Read function definition:");
|
||||
FnIR->dump();
|
||||
FnIR->print(errs());
|
||||
fprintf(stderr, "\n");
|
||||
TheJIT->addModule(std::move(TheModule));
|
||||
InitializeModuleAndPassManager();
|
||||
}
|
||||
@ -1145,7 +1148,8 @@ static void HandleExtern() {
|
||||
if (auto ProtoAST = ParseExtern()) {
|
||||
if (auto *FnIR = ProtoAST->codegen()) {
|
||||
fprintf(stderr, "Read extern: ");
|
||||
FnIR->dump();
|
||||
FnIR->print(errs());
|
||||
fprintf(stderr, "\n");
|
||||
FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST);
|
||||
}
|
||||
} else {
|
||||
@ -1208,14 +1212,20 @@ static void MainLoop() {
|
||||
// "Library" functions that can be "extern'd" from user code.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifdef LLVM_ON_WIN32
|
||||
#define DLLEXPORT __declspec(dllexport)
|
||||
#else
|
||||
#define DLLEXPORT
|
||||
#endif
|
||||
|
||||
/// putchard - putchar that takes a double and returns 0.
|
||||
extern "C" double putchard(double X) {
|
||||
extern "C" DLLEXPORT double putchard(double X) {
|
||||
fputc((char)X, stderr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// printd - printf that takes a double prints it as "%f\n", returning 0.
|
||||
extern "C" double printd(double X) {
|
||||
extern "C" DLLEXPORT double printd(double X) {
|
||||
fprintf(stderr, "%f\n", X);
|
||||
return 0;
|
||||
}
|
||||
|
@ -642,7 +642,7 @@ static std::unique_ptr<PrototypeAST> ParsePrototype() {
|
||||
// Read the precedence if present.
|
||||
if (CurTok == tok_number) {
|
||||
if (NumVal < 1 || NumVal > 100)
|
||||
return LogErrorP("Invalid precedecnce: must be 1..100");
|
||||
return LogErrorP("Invalid precedence: must be 1..100");
|
||||
BinaryPrecedence = (unsigned)NumVal;
|
||||
getNextToken();
|
||||
}
|
||||
@ -841,7 +841,7 @@ Value *IfExprAST::codegen() {
|
||||
if (!CondV)
|
||||
return nullptr;
|
||||
|
||||
// Convert condition to a bool by comparing equal to 0.0.
|
||||
// Convert condition to a bool by comparing non-equal to 0.0.
|
||||
CondV = Builder.CreateFCmpONE(
|
||||
CondV, ConstantFP::get(TheContext, APFloat(0.0)), "ifcond");
|
||||
|
||||
@ -964,7 +964,7 @@ Value *ForExprAST::codegen() {
|
||||
Value *NextVar = Builder.CreateFAdd(CurVar, StepVal, "nextvar");
|
||||
Builder.CreateStore(NextVar, Alloca);
|
||||
|
||||
// Convert condition to a bool by comparing equal to 0.0.
|
||||
// Convert condition to a bool by comparing non-equal to 0.0.
|
||||
EndCond = Builder.CreateFCmpONE(
|
||||
EndCond, ConstantFP::get(TheContext, APFloat(0.0)), "loopcond");
|
||||
|
||||
@ -1114,7 +1114,8 @@ static void HandleDefinition() {
|
||||
if (auto FnAST = ParseDefinition()) {
|
||||
if (auto *FnIR = FnAST->codegen()) {
|
||||
fprintf(stderr, "Read function definition:");
|
||||
FnIR->dump();
|
||||
FnIR->print(errs());
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
} else {
|
||||
// Skip token for error recovery.
|
||||
@ -1126,7 +1127,8 @@ static void HandleExtern() {
|
||||
if (auto ProtoAST = ParseExtern()) {
|
||||
if (auto *FnIR = ProtoAST->codegen()) {
|
||||
fprintf(stderr, "Read extern: ");
|
||||
FnIR->dump();
|
||||
FnIR->print(errs());
|
||||
fprintf(stderr, "\n");
|
||||
FunctionProtos[ProtoAST->getName()] = std::move(ProtoAST);
|
||||
}
|
||||
} else {
|
||||
@ -1171,14 +1173,20 @@ static void MainLoop() {
|
||||
// "Library" functions that can be "extern'd" from user code.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifdef LLVM_ON_WIN32
|
||||
#define DLLEXPORT __declspec(dllexport)
|
||||
#else
|
||||
#define DLLEXPORT
|
||||
#endif
|
||||
|
||||
/// putchard - putchar that takes a double and returns 0.
|
||||
extern "C" double putchard(double X) {
|
||||
extern "C" DLLEXPORT double putchard(double X) {
|
||||
fputc((char)X, stderr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// printd - printf that takes a double prints it as "%f\n", returning 0.
|
||||
extern "C" double printd(double X) {
|
||||
extern "C" DLLEXPORT double printd(double X) {
|
||||
fprintf(stderr, "%f\n", X);
|
||||
return 0;
|
||||
}
|
||||
|
@ -756,7 +756,7 @@ static std::unique_ptr<PrototypeAST> ParsePrototype() {
|
||||
// Read the precedence if present.
|
||||
if (CurTok == tok_number) {
|
||||
if (NumVal < 1 || NumVal > 100)
|
||||
return LogErrorP("Invalid precedecnce: must be 1..100");
|
||||
return LogErrorP("Invalid precedence: must be 1..100");
|
||||
BinaryPrecedence = (unsigned)NumVal;
|
||||
getNextToken();
|
||||
}
|
||||
@ -1004,7 +1004,7 @@ Value *IfExprAST::codegen() {
|
||||
if (!CondV)
|
||||
return nullptr;
|
||||
|
||||
// Convert condition to a bool by comparing equal to 0.0.
|
||||
// Convert condition to a bool by comparing non-equal to 0.0.
|
||||
CondV = Builder.CreateFCmpONE(
|
||||
CondV, ConstantFP::get(TheContext, APFloat(0.0)), "ifcond");
|
||||
|
||||
@ -1129,7 +1129,7 @@ Value *ForExprAST::codegen() {
|
||||
Value *NextVar = Builder.CreateFAdd(CurVar, StepVal, "nextvar");
|
||||
Builder.CreateStore(NextVar, Alloca);
|
||||
|
||||
// Convert condition to a bool by comparing equal to 0.0.
|
||||
// Convert condition to a bool by comparing non-equal to 0.0.
|
||||
EndCond = Builder.CreateFCmpONE(
|
||||
EndCond, ConstantFP::get(TheContext, APFloat(0.0)), "loopcond");
|
||||
|
||||
@ -1379,14 +1379,20 @@ static void MainLoop() {
|
||||
// "Library" functions that can be "extern'd" from user code.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifdef LLVM_ON_WIN32
|
||||
#define DLLEXPORT __declspec(dllexport)
|
||||
#else
|
||||
#define DLLEXPORT
|
||||
#endif
|
||||
|
||||
/// putchard - putchar that takes a double and returns 0.
|
||||
extern "C" double putchard(double X) {
|
||||
extern "C" DLLEXPORT double putchard(double X) {
|
||||
fputc((char)X, stderr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// printd - printf that takes a double prints it as "%f\n", returning 0.
|
||||
extern "C" double printd(double X) {
|
||||
extern "C" DLLEXPORT double printd(double X) {
|
||||
fprintf(stderr, "%f\n", X);
|
||||
return 0;
|
||||
}
|
||||
@ -1439,7 +1445,7 @@ int main() {
|
||||
DBuilder->finalize();
|
||||
|
||||
// Print out all of the generated code.
|
||||
TheModule->dump();
|
||||
TheModule->print(errs(), nullptr);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -1395,7 +1395,8 @@ static void HandleDefinition() {
|
||||
if (Function *LF = F->Codegen()) {
|
||||
#ifndef MINIMAL_STDERR_OUTPUT
|
||||
fprintf(stderr, "Read function definition:");
|
||||
LF->dump();
|
||||
LF->print(errs());
|
||||
fprintf(stderr, "\n");
|
||||
#endif
|
||||
}
|
||||
} else {
|
||||
@ -1409,7 +1410,8 @@ static void HandleExtern() {
|
||||
if (Function *F = P->Codegen()) {
|
||||
#ifndef MINIMAL_STDERR_OUTPUT
|
||||
fprintf(stderr, "Read extern: ");
|
||||
F->dump();
|
||||
F->print(errs());
|
||||
fprintf(stderr, "\n");
|
||||
#endif
|
||||
}
|
||||
} else {
|
||||
@ -1540,7 +1542,7 @@ int main(int argc, char **argv) {
|
||||
|
||||
#ifndef MINIMAL_STDERR_OUTPUT
|
||||
// Print out all of the generated code.
|
||||
TheHelper->dump();
|
||||
TheHelper->print(errs());
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
|
@ -1474,7 +1474,8 @@ static void HandleDefinition() {
|
||||
Function *LF = F->Codegen();
|
||||
if (LF && VerboseOutput) {
|
||||
fprintf(stderr, "Read function definition:");
|
||||
LF->dump();
|
||||
LF->print(errs());
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
} else {
|
||||
// Skip token for error recovery.
|
||||
@ -1487,7 +1488,8 @@ static void HandleExtern() {
|
||||
Function *F = P->Codegen();
|
||||
if (F && VerboseOutput) {
|
||||
fprintf(stderr, "Read extern: ");
|
||||
F->dump();
|
||||
F->print(errs());
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
} else {
|
||||
// Skip token for error recovery.
|
||||
|
@ -1252,7 +1252,8 @@ static void HandleDefinition() {
|
||||
if (Function *LF = F->Codegen()) {
|
||||
#ifndef MINIMAL_STDERR_OUTPUT
|
||||
fprintf(stderr, "Read function definition:");
|
||||
LF->dump();
|
||||
LF->print(errs());
|
||||
fprintf(stderr, "\n");
|
||||
#endif
|
||||
}
|
||||
} else {
|
||||
@ -1266,7 +1267,8 @@ static void HandleExtern() {
|
||||
if (Function *F = P->Codegen()) {
|
||||
#ifndef MINIMAL_STDERR_OUTPUT
|
||||
fprintf(stderr, "Read extern: ");
|
||||
F->dump();
|
||||
F->print(errs());
|
||||
fprintf(stderr, "\n");
|
||||
#endif
|
||||
}
|
||||
} else {
|
||||
@ -1370,7 +1372,7 @@ int main() {
|
||||
|
||||
#ifndef MINIMAL_STDERR_OUTPUT
|
||||
// Print out all of the generated code.
|
||||
TheHelper->dump();
|
||||
TheHelper->print(errs());
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
|
@ -1010,7 +1010,8 @@ static void HandleDefinition() {
|
||||
if (Function *LF = F->Codegen()) {
|
||||
#ifndef MINIMAL_STDERR_OUTPUT
|
||||
fprintf(stderr, "Read function definition:");
|
||||
LF->dump();
|
||||
LF->print(errs());
|
||||
fprintf(stderr, "\n");
|
||||
#endif
|
||||
}
|
||||
} else {
|
||||
@ -1024,7 +1025,8 @@ static void HandleExtern() {
|
||||
if (Function *F = P->Codegen()) {
|
||||
#ifndef MINIMAL_STDERR_OUTPUT
|
||||
fprintf(stderr, "Read extern: ");
|
||||
F->dump();
|
||||
F->print(errs());
|
||||
fprintf(stderr, "\n");
|
||||
#endif
|
||||
}
|
||||
} else {
|
||||
@ -1157,7 +1159,7 @@ int main(int argc, char **argv) {
|
||||
// Print out all of the generated code.
|
||||
TheFPM = 0;
|
||||
#ifndef MINIMAL_STDERR_OUTPUT
|
||||
TheModule->dump();
|
||||
TheModule->print(errs(), nullptr);
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
@ -1293,7 +1293,8 @@ static void HandleDefinition() {
|
||||
if (Function *LF = F->Codegen()) {
|
||||
#ifndef MINIMAL_STDERR_OUTPUT
|
||||
fprintf(stderr, "Read function definition:");
|
||||
LF->dump();
|
||||
LF->print(errs());
|
||||
fprintf(stderr, "\n");
|
||||
#endif
|
||||
}
|
||||
} else {
|
||||
@ -1307,7 +1308,8 @@ static void HandleExtern() {
|
||||
if (Function *F = P->Codegen()) {
|
||||
#ifndef MINIMAL_STDERR_OUTPUT
|
||||
fprintf(stderr, "Read extern: ");
|
||||
F->dump();
|
||||
F->print(errs());
|
||||
fprintf(stderr, "\n");
|
||||
#endif
|
||||
}
|
||||
} else {
|
||||
|
@ -24,7 +24,7 @@
|
||||
#include "llvm/ExecutionEngine/Orc/CompileUtils.h"
|
||||
#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
|
||||
#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
|
||||
#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
|
||||
#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
|
||||
#include "llvm/IR/DataLayout.h"
|
||||
#include "llvm/IR/Mangler.h"
|
||||
#include "llvm/Support/DynamicLibrary.h"
|
||||
@ -40,7 +40,7 @@ namespace orc {
|
||||
|
||||
class KaleidoscopeJIT {
|
||||
public:
|
||||
typedef ObjectLinkingLayer<> ObjLayerT;
|
||||
typedef RTDyldObjectLinkingLayer<> ObjLayerT;
|
||||
typedef IRCompileLayer<ObjLayerT> CompileLayerT;
|
||||
typedef CompileLayerT::ModuleSetHandleT ModuleHandleT;
|
||||
|
||||
@ -97,17 +97,40 @@ private:
|
||||
}
|
||||
|
||||
JITSymbol findMangledSymbol(const std::string &Name) {
|
||||
#ifdef LLVM_ON_WIN32
|
||||
// The symbol lookup of ObjectLinkingLayer uses the SymbolRef::SF_Exported
|
||||
// flag to decide whether a symbol will be visible or not, when we call
|
||||
// IRCompileLayer::findSymbolIn with ExportedSymbolsOnly set to true.
|
||||
//
|
||||
// But for Windows COFF objects, this flag is currently never set.
|
||||
// For a potential solution see: https://reviews.llvm.org/rL258665
|
||||
// For now, we allow non-exported symbols on Windows as a workaround.
|
||||
const bool ExportedSymbolsOnly = false;
|
||||
#else
|
||||
const bool ExportedSymbolsOnly = true;
|
||||
#endif
|
||||
|
||||
// Search modules in reverse order: from last added to first added.
|
||||
// This is the opposite of the usual search order for dlsym, but makes more
|
||||
// sense in a REPL where we want to bind to the newest available definition.
|
||||
for (auto H : make_range(ModuleHandles.rbegin(), ModuleHandles.rend()))
|
||||
if (auto Sym = CompileLayer.findSymbolIn(H, Name, true))
|
||||
if (auto Sym = CompileLayer.findSymbolIn(H, Name, ExportedSymbolsOnly))
|
||||
return Sym;
|
||||
|
||||
// If we can't find the symbol in the JIT, try looking in the host process.
|
||||
if (auto SymAddr = RTDyldMemoryManager::getSymbolAddressInProcess(Name))
|
||||
return JITSymbol(SymAddr, JITSymbolFlags::Exported);
|
||||
|
||||
#ifdef LLVM_ON_WIN32
|
||||
// For Windows retry without "_" at begining, as RTDyldMemoryManager uses
|
||||
// GetProcAddress and standard libraries like msvcrt.dll use names
|
||||
// with and without "_" (for example "_itoa" but "sin").
|
||||
if (Name.length() > 2 && Name[0] == '_')
|
||||
if (auto SymAddr =
|
||||
RTDyldMemoryManager::getSymbolAddressInProcess(Name.substr(1)))
|
||||
return JITSymbol(SymAddr, JITSymbolFlags::Exported);
|
||||
#endif
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user