Vendor import of llvm trunk r338150:
https://llvm.org/svn/llvm-project/llvm/trunk@338150
This commit is contained in:
parent
b8a2042aa9
commit
eb11fae6d0
@ -1,3 +1,4 @@
|
||||
{
|
||||
"repository.callsign" : "L",
|
||||
"conduit_uri" : "https://reviews.llvm.org/"
|
||||
}
|
||||
|
7
.gitattributes
vendored
7
.gitattributes
vendored
@ -1,6 +1,11 @@
|
||||
# binary files
|
||||
test/Object/Inputs/*.a-* binary
|
||||
test/tools/dsymutil/Inputs/* binary
|
||||
test/tools/dsymutil/Inputs/*.o binary
|
||||
test/tools/dsymutil/Inputs/*.a binary
|
||||
test/tools/dsymutil/Inputs/*.i386 binary
|
||||
test/tools/dsymutil/Inputs/*.x86_64 binary
|
||||
test/tools/dsymutil/Inputs/*.armv7m binary
|
||||
test/tools/dsymutil/Inputs/*.dylib binary
|
||||
test/tools/llvm-ar/Inputs/*.lib binary
|
||||
test/tools/llvm-objdump/Inputs/*.a binary
|
||||
test/tools/llvm-rc/Inputs/* binary
|
||||
|
@ -17,8 +17,13 @@ cmake_policy(SET CMP0056 NEW)
|
||||
|
||||
cmake_policy(SET CMP0057 NEW)
|
||||
|
||||
if(POLICY CMP0068)
|
||||
cmake_policy(SET CMP0068 NEW)
|
||||
set(CMAKE_BUILD_WITH_INSTALL_NAME_DIR ON)
|
||||
endif()
|
||||
|
||||
if(NOT DEFINED LLVM_VERSION_MAJOR)
|
||||
set(LLVM_VERSION_MAJOR 6)
|
||||
set(LLVM_VERSION_MAJOR 7)
|
||||
endif()
|
||||
if(NOT DEFINED LLVM_VERSION_MINOR)
|
||||
set(LLVM_VERSION_MINOR 0)
|
||||
@ -87,7 +92,7 @@ if(CMAKE_HOST_APPLE AND APPLE)
|
||||
|
||||
foreach(lang ${languages})
|
||||
set(CMAKE_${lang}_CREATE_STATIC_LIBRARY
|
||||
"${CMAKE_LIBTOOL} -static ${LIBTOOL_NO_WARNING_FLAG} -o <TARGET> \
|
||||
"\"${CMAKE_LIBTOOL}\" -static ${LIBTOOL_NO_WARNING_FLAG} -o <TARGET> \
|
||||
<LINK_FLAGS> <OBJECTS> ")
|
||||
endforeach()
|
||||
endif()
|
||||
@ -137,11 +142,14 @@ set(LLVM_CCACHE_BUILD OFF CACHE BOOL "Set to ON for a ccache enabled build")
|
||||
if(LLVM_CCACHE_BUILD)
|
||||
find_program(CCACHE_PROGRAM ccache)
|
||||
if(CCACHE_PROGRAM)
|
||||
set(LLVM_CCACHE_SIZE "" CACHE STRING "Size of ccache")
|
||||
set(LLVM_CCACHE_MAXSIZE "" CACHE STRING "Size of ccache")
|
||||
set(LLVM_CCACHE_DIR "" CACHE STRING "Directory to keep ccached data")
|
||||
set(CCACHE_PROGRAM "CCACHE_CPP2=yes CCACHE_HASHDIR=yes ${CCACHE_PROGRAM}")
|
||||
if (LLVM_CCACHE_SIZE)
|
||||
set(CCACHE_PROGRAM "CCACHE_SIZE=${LLVM_CCACHE_SIZE} ${CCACHE_PROGRAM}")
|
||||
set(LLVM_CCACHE_PARAMS "CCACHE_CPP2=yes CCACHE_HASHDIR=yes"
|
||||
CACHE STRING "Parameters to pass through to ccache")
|
||||
|
||||
set(CCACHE_PROGRAM "${LLVM_CCACHE_PARAMS} ${CCACHE_PROGRAM}")
|
||||
if (LLVM_CCACHE_MAXSIZE)
|
||||
set(CCACHE_PROGRAM "CCACHE_MAXSIZE=${LLVM_CCACHE_MAXSIZE} ${CCACHE_PROGRAM}")
|
||||
endif()
|
||||
if (LLVM_CCACHE_DIR)
|
||||
set(CCACHE_PROGRAM "CCACHE_DIR=${LLVM_CCACHE_DIR} ${CCACHE_PROGRAM}")
|
||||
@ -202,7 +210,7 @@ option(LLVM_APPEND_VC_REV
|
||||
|
||||
set(PACKAGE_NAME LLVM)
|
||||
set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}")
|
||||
set(PACKAGE_BUGREPORT "http://llvm.org/bugs/")
|
||||
set(PACKAGE_BUGREPORT "https://bugs.llvm.org/")
|
||||
|
||||
set(BUG_REPORT_URL "${PACKAGE_BUGREPORT}" CACHE STRING
|
||||
"Default URL where bug reports are to be submitted.")
|
||||
@ -223,10 +231,6 @@ if(WIN32 AND NOT UNIX)
|
||||
set(CPACK_NSIS_MUI_UNIICON "${CMAKE_CURRENT_SOURCE_DIR}\\\\cmake\\\\nsis_icon.ico")
|
||||
set(CPACK_NSIS_MODIFY_PATH "ON")
|
||||
set(CPACK_NSIS_ENABLE_UNINSTALL_BEFORE_INSTALL "ON")
|
||||
set(CPACK_NSIS_EXTRA_INSTALL_COMMANDS
|
||||
"ExecWait '$INSTDIR/tools/msbuild/install.bat'")
|
||||
set(CPACK_NSIS_EXTRA_UNINSTALL_COMMANDS
|
||||
"ExecWait '$INSTDIR/tools/msbuild/uninstall.bat'")
|
||||
if( CMAKE_CL_64 )
|
||||
set(CPACK_NSIS_INSTALL_ROOT "$PROGRAMFILES64")
|
||||
endif()
|
||||
@ -276,9 +280,9 @@ set(LLVM_LIBDIR_SUFFIX "" CACHE STRING "Define suffix of library directory name
|
||||
set(LLVM_TOOLS_INSTALL_DIR "bin" CACHE STRING "Path for binary subdirectory (defaults to 'bin')")
|
||||
mark_as_advanced(LLVM_TOOLS_INSTALL_DIR)
|
||||
|
||||
set(LLVM_UTILS_INSTALL_DIR "bin" CACHE STRING
|
||||
set(LLVM_UTILS_INSTALL_DIR "${LLVM_TOOLS_INSTALL_DIR}" CACHE STRING
|
||||
"Path to install LLVM utilities (enabled by LLVM_INSTALL_UTILS=ON) (defaults to LLVM_TOOLS_INSTALL_DIR)")
|
||||
mark_as_advanced(LLVM_TOOLS_INSTALL_DIR)
|
||||
mark_as_advanced(LLVM_UTILS_INSTALL_DIR)
|
||||
|
||||
# They are used as destination of target generators.
|
||||
set(LLVM_RUNTIME_OUTPUT_INTDIR ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/bin)
|
||||
@ -355,6 +359,8 @@ set(LLVM_ENABLE_LIBXML2 "ON" CACHE STRING "Use libxml2 if available. Can be ON,
|
||||
|
||||
option(LLVM_ENABLE_LIBEDIT "Use libedit if available." ON)
|
||||
|
||||
option(LLVM_ENABLE_LIBPFM "Use libpfm for performance counters if available." ON)
|
||||
|
||||
option(LLVM_ENABLE_THREADS "Use threads if available." ON)
|
||||
|
||||
option(LLVM_ENABLE_ZLIB "Use zlib for compression/decompression if available." ON)
|
||||
@ -419,6 +425,9 @@ option(LLVM_USE_OPROFILE
|
||||
option(LLVM_EXTERNALIZE_DEBUGINFO
|
||||
"Generate dSYM files and strip executables and libraries (Darwin Only)" OFF)
|
||||
|
||||
option(LLVM_CODESIGNING_IDENTITY
|
||||
"Sign executables and dylibs with the given identity (Darwin Only)" OFF)
|
||||
|
||||
# If enabled, verify we are on a platform that supports oprofile.
|
||||
if( LLVM_USE_OPROFILE )
|
||||
if( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" )
|
||||
@ -426,8 +435,19 @@ if( LLVM_USE_OPROFILE )
|
||||
endif( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" )
|
||||
endif( LLVM_USE_OPROFILE )
|
||||
|
||||
option(LLVM_USE_PERF
|
||||
"Use perf JIT interface to inform perf about JIT code" OFF)
|
||||
|
||||
# If enabled, verify we are on a platform that supports perf.
|
||||
if( LLVM_USE_PERF )
|
||||
if( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" )
|
||||
message(FATAL_ERROR "perf support is available on Linux only.")
|
||||
endif( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" )
|
||||
endif( LLVM_USE_PERF )
|
||||
|
||||
set(LLVM_USE_SANITIZER "" CACHE STRING
|
||||
"Define the sanitizer used to build binaries and tests.")
|
||||
option(LLVM_OPTIMIZE_SANITIZED_BUILDS "Pass -O1 on debug sanitizer builds" ON)
|
||||
set(LLVM_LIB_FUZZING_ENGINE "" CACHE PATH
|
||||
"Path to fuzzing library for linking with fuzz targets")
|
||||
|
||||
@ -509,6 +529,7 @@ option (LLVM_INCLUDE_DOCS "Generate build targets for llvm documentation." ON)
|
||||
option (LLVM_ENABLE_DOXYGEN "Use doxygen to generate llvm API documentation." OFF)
|
||||
option (LLVM_ENABLE_SPHINX "Use Sphinx to generate llvm documentation." OFF)
|
||||
option (LLVM_ENABLE_OCAMLDOC "Build OCaml bindings documentation." ON)
|
||||
option (LLVM_ENABLE_BINDINGS "Build bindings." ON)
|
||||
|
||||
set(LLVM_INSTALL_DOXYGEN_HTML_DIR "share/doc/llvm/doxygen-html"
|
||||
CACHE STRING "Doxygen-generated HTML documentation install directory")
|
||||
@ -572,6 +593,9 @@ endif()
|
||||
set(LLVM_TARGET_TRIPLE_ENV CACHE STRING "The name of environment variable to override default target. Disabled by blank.")
|
||||
mark_as_advanced(LLVM_TARGET_TRIPLE_ENV)
|
||||
|
||||
set(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR OFF CACHE BOOL
|
||||
"Enable per-target runtimes directory")
|
||||
|
||||
# All options referred to from HandleLLVMOptions have to be specified
|
||||
# BEFORE this include, otherwise options will not be correctly set on
|
||||
# first cmake run
|
||||
@ -634,6 +658,9 @@ endif (LLVM_USE_INTEL_JITEVENTS)
|
||||
if (LLVM_USE_OPROFILE)
|
||||
set(LLVMOPTIONALCOMPONENTS ${LLVMOPTIONALCOMPONENTS} OProfileJIT)
|
||||
endif (LLVM_USE_OPROFILE)
|
||||
if (LLVM_USE_PERF)
|
||||
set(LLVMOPTIONALCOMPONENTS ${LLVMOPTIONALCOMPONENTS} PerfJITEvents)
|
||||
endif (LLVM_USE_PERF)
|
||||
|
||||
message(STATUS "Constructing LLVMBuild project information")
|
||||
execute_process(
|
||||
@ -733,9 +760,6 @@ configure_file(
|
||||
configure_file(
|
||||
${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/abi-breaking.h.cmake
|
||||
${LLVM_INCLUDE_DIR}/llvm/Config/abi-breaking.h)
|
||||
configure_file(
|
||||
${LLVM_MAIN_INCLUDE_DIR}/llvm/Support/DataTypes.h.cmake
|
||||
${LLVM_INCLUDE_DIR}/llvm/Support/DataTypes.h)
|
||||
|
||||
# Add target for generating source rpm package.
|
||||
set(LLVM_SRPM_USER_BINARY_SPECFILE ${CMAKE_CURRENT_SOURCE_DIR}/llvm.spec.in
|
||||
@ -829,7 +853,7 @@ endif()
|
||||
include(AddLLVM)
|
||||
include(TableGen)
|
||||
|
||||
if( MINGW )
|
||||
if( MINGW AND NOT "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" )
|
||||
# People report that -O3 is unreliable on MinGW. The traditional
|
||||
# build also uses -O2 for that reason:
|
||||
llvm_replace_compiler_option(CMAKE_CXX_FLAGS_RELEASE "-O3" "-O2")
|
||||
@ -955,7 +979,7 @@ if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY)
|
||||
PATTERN ".svn" EXCLUDE
|
||||
)
|
||||
|
||||
install(DIRECTORY ${LLVM_INCLUDE_DIR}/llvm
|
||||
install(DIRECTORY ${LLVM_INCLUDE_DIR}/llvm ${LLVM_INCLUDE_DIR}/llvm-c
|
||||
DESTINATION include
|
||||
COMPONENT llvm-headers
|
||||
FILES_MATCHING
|
||||
@ -976,6 +1000,7 @@ if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY)
|
||||
|
||||
if (NOT CMAKE_CONFIGURATION_TYPES)
|
||||
add_llvm_install_targets(install-llvm-headers
|
||||
DEPENDS llvm-headers
|
||||
COMPONENT llvm-headers)
|
||||
endif()
|
||||
endif()
|
||||
@ -990,7 +1015,7 @@ if(LLVM_DISTRIBUTION_COMPONENTS)
|
||||
add_custom_target(distribution)
|
||||
add_custom_target(install-distribution)
|
||||
add_custom_target(install-distribution-stripped)
|
||||
foreach(target ${LLVM_DISTRIBUTION_COMPONENTS})
|
||||
foreach(target ${LLVM_DISTRIBUTION_COMPONENTS} ${LLVM_RUNTIME_DISTRIBUTION_COMPONENTS})
|
||||
if(TARGET ${target})
|
||||
add_dependencies(distribution ${target})
|
||||
else()
|
||||
@ -1017,4 +1042,3 @@ endif()
|
||||
if (MSVC AND CMAKE_HOST_SYSTEM_NAME STREQUAL "Windows")
|
||||
include(InstallRequiredSystemLibraries)
|
||||
endif()
|
||||
|
||||
|
@ -9,6 +9,10 @@ beautification by scripts. The fields are: name (N), email (E), web-address
|
||||
(S) and (I) IRC handle. Each entry should contain at least the (N), (E) and
|
||||
(D) fields.
|
||||
|
||||
N: Simon Atanasyan
|
||||
E: simon@atanasyan.com
|
||||
D: MIPS Backend (lib/Target/Mips/*)
|
||||
|
||||
N: Justin Bogner
|
||||
E: mail@justinbogner.com
|
||||
D: InstrProfiling and related parts of ProfileData
|
||||
@ -61,12 +65,13 @@ E: peter@pcc.me.uk
|
||||
D: llgo, libLTO (lib/LTO/* tools/lto/*), LLVM Bitcode (lib/Bitcode/* include/llvm/Bitcode/*)
|
||||
|
||||
N: Quentin Colombet
|
||||
E: qcolombet@apple.com
|
||||
E: quentin.colombet@gmail.com
|
||||
D: Loop Strength Reduction, Register allocators
|
||||
|
||||
N: Simon Dardis
|
||||
E: simon.dardis@mips.com
|
||||
D: MIPS Backend (lib/Target/Mips/*)
|
||||
N: Andrea Di Biagio
|
||||
E: andrea.dibiagio@sony.com
|
||||
E: andrea.dibiagio@gmail.com
|
||||
D: llvm-mca
|
||||
|
||||
N: Duncan P. N. Exon Smith
|
||||
E: dexonsmith@apple.com
|
||||
@ -212,4 +217,4 @@ D: Release management (x.y.0 releases)
|
||||
|
||||
N: whitequark
|
||||
E: whitequark@whitequark.org
|
||||
D: OCaml bindings
|
||||
D: C API, OCaml bindings
|
||||
|
30
CREDITS.TXT
30
CREDITS.TXT
@ -23,7 +23,8 @@ D: MingW Win32 API portability layer
|
||||
|
||||
N: Aaron Ballman
|
||||
E: aaron@aaronballman.com
|
||||
D: __declspec attributes, Windows support, general bug fixing
|
||||
D: Clang frontend, frontend attributes, Windows support, general bug fixing
|
||||
I: AaronBallman
|
||||
|
||||
N: Nate Begeman
|
||||
E: natebegeman@mac.com
|
||||
@ -35,6 +36,13 @@ E: dberlin@dberlin.org
|
||||
D: ET-Forest implementation.
|
||||
D: Sparse bitmap
|
||||
|
||||
N: Geoff Berry
|
||||
E: gberry@codeaurora.org
|
||||
E: gcb@acm.org
|
||||
D: AArch64 backend improvements
|
||||
D: Added EarlyCSE MemorySSA support
|
||||
D: CodeGen improvements
|
||||
|
||||
N: David Blaikie
|
||||
E: dblaikie@gmail.com
|
||||
D: General bug fixing/fit & finish, mostly in Clang
|
||||
@ -107,8 +115,8 @@ E: stefanus.du.toit@intel.com
|
||||
D: Bug fixes and minor improvements
|
||||
|
||||
N: Rafael Avila de Espindola
|
||||
E: rafael.espindola@gmail.com
|
||||
D: The ARM backend
|
||||
E: rafael@espindo.la
|
||||
D: MC and LLD work
|
||||
|
||||
N: Dave Estes
|
||||
E: cestes@codeaurora.org
|
||||
@ -203,6 +211,11 @@ N: Patrick Jenkins
|
||||
E: patjenk@wam.umd.edu
|
||||
D: Nightly Tester
|
||||
|
||||
N: Tony(Yanjun) Jiang
|
||||
E: jtony@ca.ibm.com
|
||||
D: PowerPC Backend Developer
|
||||
D: Improvements to the PPC backend and miscellaneous bug fixes
|
||||
|
||||
N: Dale Johannesen
|
||||
E: dalej@apple.com
|
||||
D: ARM constant islands improvements
|
||||
@ -219,6 +232,11 @@ N: Rod Kay
|
||||
E: rkay@auroraux.org
|
||||
D: Author of LLVM Ada bindings
|
||||
|
||||
N: Erich Keane
|
||||
E: erich.keane@intel.com
|
||||
D: A variety of Clang contributions including function multiversioning, regcall/vectorcall.
|
||||
I: ErichKeane
|
||||
|
||||
N: Eric Kidd
|
||||
W: http://randomhacks.net/
|
||||
D: llvm-config script
|
||||
@ -269,7 +287,7 @@ D: Release manager (1.7+)
|
||||
N: Sylvestre Ledru
|
||||
E: sylvestre@debian.org
|
||||
W: http://sylvestre.ledru.info/
|
||||
W: http://apt.llvm.org/
|
||||
W: https://apt.llvm.org/
|
||||
D: Debian and Ubuntu packaging
|
||||
D: Continuous integration with jenkins
|
||||
|
||||
@ -480,3 +498,7 @@ D: Bunches of stuff
|
||||
N: Bob Wilson
|
||||
E: bob.wilson@acm.org
|
||||
D: Advanced SIMD (NEON) support in the ARM backend.
|
||||
|
||||
N: QingShan Zhang
|
||||
E: qshanz@cn.ibm.com
|
||||
D: PowerPC Backend Developer
|
||||
|
@ -4,7 +4,7 @@ LLVM Release License
|
||||
University of Illinois/NCSA
|
||||
Open Source License
|
||||
|
||||
Copyright (c) 2003-2017 University of Illinois at Urbana-Champaign.
|
||||
Copyright (c) 2003-2018 University of Illinois at Urbana-Champaign.
|
||||
All rights reserved.
|
||||
|
||||
Developed by:
|
||||
|
@ -1,4 +1,4 @@
|
||||
Low Level Virtual Machine (LLVM)
|
||||
The LLVM Compiler Infrastructure
|
||||
================================
|
||||
|
||||
This directory and its subdirectories contain source code for LLVM,
|
||||
@ -16,4 +16,3 @@ documentation setup.
|
||||
If you are writing a package for LLVM, see docs/Packaging.rst for our
|
||||
suggestions.
|
||||
|
||||
|
||||
|
@ -45,8 +45,3 @@ N: Diana Picus
|
||||
E: diana.picus@linaro.org
|
||||
T: ARM, AArch64
|
||||
O: Linux
|
||||
|
||||
N: Simon Dardis
|
||||
E: simon.dardis@mips.com
|
||||
T: MIPS
|
||||
O: Linux
|
||||
|
@ -51,3 +51,11 @@ CGO_CPPFLAGS, CGO_CXXFLAGS and CGO_LDFLAGS environment variables:
|
||||
$ export CGO_CXXFLAGS=-std=c++11
|
||||
$ export CGO_LDFLAGS="`/path/to/llvm-build/bin/llvm-config --ldflags --libs --system-libs all`"
|
||||
$ go build -tags byollvm
|
||||
|
||||
If you see a compilation error while compiling your code with Go 1.9.4 or later as follows,
|
||||
|
||||
go build llvm.org/llvm/bindings/go/llvm: invalid flag in #cgo LDFLAGS: -Wl,-headerpad_max_install_names
|
||||
|
||||
you need to setup $CGO_LDFLAGS_ALLOW to allow a compiler to specify some linker options:
|
||||
|
||||
$ export CGO_LDFLAGS_ALLOW='-Wl,(-search_paths_first|-headerpad_max_install_names)'
|
||||
|
@ -1,234 +0,0 @@
|
||||
//===- DIBuilderBindings.cpp - Bindings for DIBuilder ---------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines C bindings for the DIBuilder class.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "DIBuilderBindings.h"
|
||||
#include "IRBindings.h"
|
||||
#include "llvm/IR/DIBuilder.h"
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
LLVMDIBuilderRef LLVMNewDIBuilder(LLVMModuleRef mref) {
|
||||
Module *m = unwrap(mref);
|
||||
return wrap(new DIBuilder(*m));
|
||||
}
|
||||
|
||||
void LLVMDIBuilderDestroy(LLVMDIBuilderRef dref) {
|
||||
DIBuilder *d = unwrap(dref);
|
||||
delete d;
|
||||
}
|
||||
|
||||
LLVMMetadataRef LLVMDIBuilderCreateLexicalBlock(LLVMDIBuilderRef Dref,
|
||||
LLVMMetadataRef Scope,
|
||||
LLVMMetadataRef File,
|
||||
unsigned Line,
|
||||
unsigned Column) {
|
||||
DIBuilder *D = unwrap(Dref);
|
||||
auto *LB = D->createLexicalBlock(unwrap<DILocalScope>(Scope),
|
||||
unwrap<DIFile>(File), Line, Column);
|
||||
return wrap(LB);
|
||||
}
|
||||
|
||||
LLVMMetadataRef LLVMDIBuilderCreateLexicalBlockFile(LLVMDIBuilderRef Dref,
|
||||
LLVMMetadataRef Scope,
|
||||
LLVMMetadataRef File,
|
||||
unsigned Discriminator) {
|
||||
DIBuilder *D = unwrap(Dref);
|
||||
return wrap(D->createLexicalBlockFile(unwrap<DILocalScope>(Scope),
|
||||
unwrap<DIFile>(File), Discriminator));
|
||||
}
|
||||
|
||||
LLVMMetadataRef LLVMDIBuilderCreateFunction(
|
||||
LLVMDIBuilderRef Dref, LLVMMetadataRef Scope, const char *Name,
|
||||
const char *LinkageName, LLVMMetadataRef File, unsigned Line,
|
||||
LLVMMetadataRef CompositeType, int IsLocalToUnit, int IsDefinition,
|
||||
unsigned ScopeLine, unsigned Flags, int IsOptimized) {
|
||||
DIBuilder *D = unwrap(Dref);
|
||||
return wrap(D->createFunction(
|
||||
unwrap<DIScope>(Scope), Name, LinkageName,
|
||||
File ? unwrap<DIFile>(File) : nullptr, Line,
|
||||
unwrap<DISubroutineType>(CompositeType), IsLocalToUnit, IsDefinition,
|
||||
ScopeLine, static_cast<DINode::DIFlags>(Flags), IsOptimized));
|
||||
}
|
||||
|
||||
LLVMMetadataRef LLVMDIBuilderCreateAutoVariable(
|
||||
LLVMDIBuilderRef Dref, LLVMMetadataRef Scope, const char *Name,
|
||||
LLVMMetadataRef File, unsigned Line, LLVMMetadataRef Ty, int AlwaysPreserve,
|
||||
unsigned Flags, uint32_t AlignInBits) {
|
||||
DIBuilder *D = unwrap(Dref);
|
||||
return wrap(
|
||||
D->createAutoVariable(unwrap<DIScope>(Scope), Name, unwrap<DIFile>(File),
|
||||
Line, unwrap<DIType>(Ty), AlwaysPreserve,
|
||||
static_cast<DINode::DIFlags>(Flags), AlignInBits));
|
||||
}
|
||||
|
||||
LLVMMetadataRef LLVMDIBuilderCreateParameterVariable(
|
||||
LLVMDIBuilderRef Dref, LLVMMetadataRef Scope, const char *Name,
|
||||
unsigned ArgNo, LLVMMetadataRef File, unsigned Line, LLVMMetadataRef Ty,
|
||||
int AlwaysPreserve, unsigned Flags) {
|
||||
DIBuilder *D = unwrap(Dref);
|
||||
return wrap(D->createParameterVariable(
|
||||
unwrap<DIScope>(Scope), Name, ArgNo, unwrap<DIFile>(File), Line,
|
||||
unwrap<DIType>(Ty), AlwaysPreserve, static_cast<DINode::DIFlags>(Flags)));
|
||||
}
|
||||
|
||||
LLVMMetadataRef LLVMDIBuilderCreateBasicType(LLVMDIBuilderRef Dref,
|
||||
const char *Name,
|
||||
uint64_t SizeInBits,
|
||||
unsigned Encoding) {
|
||||
DIBuilder *D = unwrap(Dref);
|
||||
return wrap(D->createBasicType(Name, SizeInBits, Encoding));
|
||||
}
|
||||
|
||||
LLVMMetadataRef LLVMDIBuilderCreatePointerType(LLVMDIBuilderRef Dref,
|
||||
LLVMMetadataRef PointeeType,
|
||||
uint64_t SizeInBits,
|
||||
uint32_t AlignInBits,
|
||||
const char *Name) {
|
||||
DIBuilder *D = unwrap(Dref);
|
||||
return wrap(D->createPointerType(unwrap<DIType>(PointeeType), SizeInBits,
|
||||
AlignInBits, /* DWARFAddressSpace */ None,
|
||||
Name));
|
||||
}
|
||||
|
||||
LLVMMetadataRef
|
||||
LLVMDIBuilderCreateSubroutineType(LLVMDIBuilderRef Dref, LLVMMetadataRef File,
|
||||
LLVMMetadataRef ParameterTypes) {
|
||||
DIBuilder *D = unwrap(Dref);
|
||||
return wrap(
|
||||
D->createSubroutineType(DITypeRefArray(unwrap<MDTuple>(ParameterTypes))));
|
||||
}
|
||||
|
||||
LLVMMetadataRef LLVMDIBuilderCreateStructType(
|
||||
LLVMDIBuilderRef Dref, LLVMMetadataRef Scope, const char *Name,
|
||||
LLVMMetadataRef File, unsigned Line, uint64_t SizeInBits,
|
||||
uint32_t AlignInBits, unsigned Flags, LLVMMetadataRef DerivedFrom,
|
||||
LLVMMetadataRef ElementTypes) {
|
||||
DIBuilder *D = unwrap(Dref);
|
||||
return wrap(D->createStructType(
|
||||
unwrap<DIScope>(Scope), Name, File ? unwrap<DIFile>(File) : nullptr, Line,
|
||||
SizeInBits, AlignInBits, static_cast<DINode::DIFlags>(Flags),
|
||||
DerivedFrom ? unwrap<DIType>(DerivedFrom) : nullptr,
|
||||
ElementTypes ? DINodeArray(unwrap<MDTuple>(ElementTypes)) : nullptr));
|
||||
}
|
||||
|
||||
LLVMMetadataRef LLVMDIBuilderCreateReplaceableCompositeType(
|
||||
LLVMDIBuilderRef Dref, unsigned Tag, const char *Name,
|
||||
LLVMMetadataRef Scope, LLVMMetadataRef File, unsigned Line,
|
||||
unsigned RuntimeLang, uint64_t SizeInBits, uint32_t AlignInBits,
|
||||
unsigned Flags) {
|
||||
DIBuilder *D = unwrap(Dref);
|
||||
return wrap(D->createReplaceableCompositeType(
|
||||
Tag, Name, unwrap<DIScope>(Scope), File ? unwrap<DIFile>(File) : nullptr,
|
||||
Line, RuntimeLang, SizeInBits, AlignInBits,
|
||||
static_cast<DINode::DIFlags>(Flags)));
|
||||
}
|
||||
|
||||
LLVMMetadataRef
|
||||
LLVMDIBuilderCreateMemberType(LLVMDIBuilderRef Dref, LLVMMetadataRef Scope,
|
||||
const char *Name, LLVMMetadataRef File,
|
||||
unsigned Line, uint64_t SizeInBits,
|
||||
uint32_t AlignInBits, uint64_t OffsetInBits,
|
||||
unsigned Flags, LLVMMetadataRef Ty) {
|
||||
DIBuilder *D = unwrap(Dref);
|
||||
return wrap(D->createMemberType(
|
||||
unwrap<DIScope>(Scope), Name, File ? unwrap<DIFile>(File) : nullptr, Line,
|
||||
SizeInBits, AlignInBits, OffsetInBits,
|
||||
static_cast<DINode::DIFlags>(Flags), unwrap<DIType>(Ty)));
|
||||
}
|
||||
|
||||
LLVMMetadataRef LLVMDIBuilderCreateArrayType(LLVMDIBuilderRef Dref,
|
||||
uint64_t SizeInBits,
|
||||
uint32_t AlignInBits,
|
||||
LLVMMetadataRef ElementType,
|
||||
LLVMMetadataRef Subscripts) {
|
||||
DIBuilder *D = unwrap(Dref);
|
||||
return wrap(D->createArrayType(SizeInBits, AlignInBits,
|
||||
unwrap<DIType>(ElementType),
|
||||
DINodeArray(unwrap<MDTuple>(Subscripts))));
|
||||
}
|
||||
|
||||
LLVMMetadataRef LLVMDIBuilderCreateTypedef(LLVMDIBuilderRef Dref,
|
||||
LLVMMetadataRef Ty, const char *Name,
|
||||
LLVMMetadataRef File, unsigned Line,
|
||||
LLVMMetadataRef Context) {
|
||||
DIBuilder *D = unwrap(Dref);
|
||||
return wrap(D->createTypedef(unwrap<DIType>(Ty), Name,
|
||||
File ? unwrap<DIFile>(File) : nullptr, Line,
|
||||
Context ? unwrap<DIScope>(Context) : nullptr));
|
||||
}
|
||||
|
||||
LLVMMetadataRef LLVMDIBuilderGetOrCreateSubrange(LLVMDIBuilderRef Dref,
|
||||
int64_t Lo, int64_t Count) {
|
||||
DIBuilder *D = unwrap(Dref);
|
||||
return wrap(D->getOrCreateSubrange(Lo, Count));
|
||||
}
|
||||
|
||||
LLVMMetadataRef LLVMDIBuilderGetOrCreateArray(LLVMDIBuilderRef Dref,
|
||||
LLVMMetadataRef *Data,
|
||||
size_t Length) {
|
||||
DIBuilder *D = unwrap(Dref);
|
||||
Metadata **DataValue = unwrap(Data);
|
||||
ArrayRef<Metadata *> Elements(DataValue, Length);
|
||||
DINodeArray A = D->getOrCreateArray(Elements);
|
||||
return wrap(A.get());
|
||||
}
|
||||
|
||||
LLVMMetadataRef LLVMDIBuilderGetOrCreateTypeArray(LLVMDIBuilderRef Dref,
|
||||
LLVMMetadataRef *Data,
|
||||
size_t Length) {
|
||||
DIBuilder *D = unwrap(Dref);
|
||||
Metadata **DataValue = unwrap(Data);
|
||||
ArrayRef<Metadata *> Elements(DataValue, Length);
|
||||
DITypeRefArray A = D->getOrCreateTypeArray(Elements);
|
||||
return wrap(A.get());
|
||||
}
|
||||
|
||||
LLVMMetadataRef LLVMDIBuilderCreateExpression(LLVMDIBuilderRef Dref,
|
||||
int64_t *Addr, size_t Length) {
|
||||
DIBuilder *D = unwrap(Dref);
|
||||
return wrap(D->createExpression(ArrayRef<int64_t>(Addr, Length)));
|
||||
}
|
||||
|
||||
LLVMValueRef LLVMDIBuilderInsertDeclareAtEnd(LLVMDIBuilderRef Dref,
|
||||
LLVMValueRef Storage,
|
||||
LLVMMetadataRef VarInfo,
|
||||
LLVMMetadataRef Expr,
|
||||
LLVMBasicBlockRef Block) {
|
||||
// Fail immediately here until the llgo folks update their bindings. The
|
||||
// called function is going to assert out anyway.
|
||||
llvm_unreachable("DIBuilder API change requires a DebugLoc");
|
||||
|
||||
DIBuilder *D = unwrap(Dref);
|
||||
Instruction *Instr = D->insertDeclare(
|
||||
unwrap(Storage), unwrap<DILocalVariable>(VarInfo),
|
||||
unwrap<DIExpression>(Expr), /* DebugLoc */ nullptr, unwrap(Block));
|
||||
return wrap(Instr);
|
||||
}
|
||||
|
||||
LLVMValueRef LLVMDIBuilderInsertValueAtEnd(LLVMDIBuilderRef Dref,
|
||||
LLVMValueRef Val,
|
||||
LLVMMetadataRef VarInfo,
|
||||
LLVMMetadataRef Expr,
|
||||
LLVMBasicBlockRef Block) {
|
||||
// Fail immediately here until the llgo folks update their bindings. The
|
||||
// called function is going to assert out anyway.
|
||||
llvm_unreachable("DIBuilder API change requires a DebugLoc");
|
||||
|
||||
DIBuilder *D = unwrap(Dref);
|
||||
Instruction *Instr = D->insertDbgValueIntrinsic(
|
||||
unwrap(Val), unwrap<DILocalVariable>(VarInfo), unwrap<DIExpression>(Expr),
|
||||
/* DebugLoc */ nullptr, unwrap(Block));
|
||||
return wrap(Instr);
|
||||
}
|
@ -1,134 +0,0 @@
|
||||
//===- DIBuilderBindings.h - Bindings for DIBuilder -------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines C bindings for the DIBuilder class.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_BINDINGS_GO_LLVM_DIBUILDERBINDINGS_H
|
||||
#define LLVM_BINDINGS_GO_LLVM_DIBUILDERBINDINGS_H
|
||||
|
||||
#include "IRBindings.h"
|
||||
#include "llvm-c/Core.h"
|
||||
#include "llvm-c/DebugInfo.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// FIXME: These bindings shouldn't be Go-specific and should eventually move to
|
||||
// a (somewhat) less stable collection of C APIs for use in creating bindings of
|
||||
// LLVM in other languages.
|
||||
|
||||
typedef struct LLVMOpaqueDIBuilder *LLVMDIBuilderRef;
|
||||
|
||||
LLVMDIBuilderRef LLVMNewDIBuilder(LLVMModuleRef m);
|
||||
|
||||
void LLVMDIBuilderDestroy(LLVMDIBuilderRef d);
|
||||
|
||||
LLVMMetadataRef LLVMDIBuilderCreateLexicalBlock(LLVMDIBuilderRef D,
|
||||
LLVMMetadataRef Scope,
|
||||
LLVMMetadataRef File,
|
||||
unsigned Line, unsigned Column);
|
||||
|
||||
LLVMMetadataRef LLVMDIBuilderCreateLexicalBlockFile(LLVMDIBuilderRef D,
|
||||
LLVMMetadataRef Scope,
|
||||
LLVMMetadataRef File,
|
||||
unsigned Discriminator);
|
||||
|
||||
LLVMMetadataRef LLVMDIBuilderCreateFunction(
|
||||
LLVMDIBuilderRef D, LLVMMetadataRef Scope, const char *Name,
|
||||
const char *LinkageName, LLVMMetadataRef File, unsigned Line,
|
||||
LLVMMetadataRef CompositeType, int IsLocalToUnit, int IsDefinition,
|
||||
unsigned ScopeLine, unsigned Flags, int IsOptimized);
|
||||
|
||||
LLVMMetadataRef LLVMDIBuilderCreateAutoVariable(
|
||||
LLVMDIBuilderRef D, LLVMMetadataRef Scope, const char *Name,
|
||||
LLVMMetadataRef File, unsigned Line, LLVMMetadataRef Ty, int AlwaysPreserve,
|
||||
unsigned Flags, uint32_t AlignInBits);
|
||||
|
||||
LLVMMetadataRef LLVMDIBuilderCreateParameterVariable(
|
||||
LLVMDIBuilderRef D, LLVMMetadataRef Scope, const char *Name, unsigned ArgNo,
|
||||
LLVMMetadataRef File, unsigned Line, LLVMMetadataRef Ty, int AlwaysPreserve,
|
||||
unsigned Flags);
|
||||
|
||||
LLVMMetadataRef LLVMDIBuilderCreateBasicType(LLVMDIBuilderRef D,
|
||||
const char *Name,
|
||||
uint64_t SizeInBits,
|
||||
unsigned Encoding);
|
||||
|
||||
LLVMMetadataRef LLVMDIBuilderCreatePointerType(LLVMDIBuilderRef D,
|
||||
LLVMMetadataRef PointeeType,
|
||||
uint64_t SizeInBits,
|
||||
uint32_t AlignInBits,
|
||||
const char *Name);
|
||||
|
||||
LLVMMetadataRef
|
||||
LLVMDIBuilderCreateSubroutineType(LLVMDIBuilderRef D, LLVMMetadataRef File,
|
||||
LLVMMetadataRef ParameterTypes);
|
||||
|
||||
LLVMMetadataRef LLVMDIBuilderCreateStructType(
|
||||
LLVMDIBuilderRef D, LLVMMetadataRef Scope, const char *Name,
|
||||
LLVMMetadataRef File, unsigned Line, uint64_t SizeInBits,
|
||||
uint32_t AlignInBits, unsigned Flags, LLVMMetadataRef DerivedFrom,
|
||||
LLVMMetadataRef ElementTypes);
|
||||
|
||||
LLVMMetadataRef LLVMDIBuilderCreateReplaceableCompositeType(
|
||||
LLVMDIBuilderRef D, unsigned Tag, const char *Name, LLVMMetadataRef Scope,
|
||||
LLVMMetadataRef File, unsigned Line, unsigned RuntimeLang,
|
||||
uint64_t SizeInBits, uint32_t AlignInBits, unsigned Flags);
|
||||
|
||||
LLVMMetadataRef
|
||||
LLVMDIBuilderCreateMemberType(LLVMDIBuilderRef D, LLVMMetadataRef Scope,
|
||||
const char *Name, LLVMMetadataRef File,
|
||||
unsigned Line, uint64_t SizeInBits,
|
||||
uint32_t AlignInBits, uint64_t OffsetInBits,
|
||||
unsigned Flags, LLVMMetadataRef Ty);
|
||||
|
||||
LLVMMetadataRef LLVMDIBuilderCreateArrayType(LLVMDIBuilderRef D,
|
||||
uint64_t SizeInBits,
|
||||
uint32_t AlignInBits,
|
||||
LLVMMetadataRef ElementType,
|
||||
LLVMMetadataRef Subscripts);
|
||||
|
||||
LLVMMetadataRef LLVMDIBuilderCreateTypedef(LLVMDIBuilderRef D,
|
||||
LLVMMetadataRef Ty, const char *Name,
|
||||
LLVMMetadataRef File, unsigned Line,
|
||||
LLVMMetadataRef Context);
|
||||
|
||||
LLVMMetadataRef LLVMDIBuilderGetOrCreateSubrange(LLVMDIBuilderRef D, int64_t Lo,
|
||||
int64_t Count);
|
||||
|
||||
LLVMMetadataRef LLVMDIBuilderGetOrCreateArray(LLVMDIBuilderRef D,
|
||||
LLVMMetadataRef *Data,
|
||||
size_t Length);
|
||||
|
||||
LLVMMetadataRef LLVMDIBuilderGetOrCreateTypeArray(LLVMDIBuilderRef D,
|
||||
LLVMMetadataRef *Data,
|
||||
size_t Length);
|
||||
|
||||
LLVMMetadataRef LLVMDIBuilderCreateExpression(LLVMDIBuilderRef Dref,
|
||||
int64_t *Addr, size_t Length);
|
||||
|
||||
LLVMValueRef LLVMDIBuilderInsertDeclareAtEnd(LLVMDIBuilderRef D,
|
||||
LLVMValueRef Storage,
|
||||
LLVMMetadataRef VarInfo,
|
||||
LLVMMetadataRef Expr,
|
||||
LLVMBasicBlockRef Block);
|
||||
|
||||
LLVMValueRef LLVMDIBuilderInsertValueAtEnd(LLVMDIBuilderRef D, LLVMValueRef Val,
|
||||
LLVMMetadataRef VarInfo,
|
||||
LLVMMetadataRef Expr,
|
||||
LLVMBasicBlockRef Block);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif
|
@ -36,13 +36,6 @@ LLVMMetadataRef LLVMMDNode2(LLVMContextRef C, LLVMMetadataRef *MDs,
|
||||
MDNode::get(*unwrap(C), ArrayRef<Metadata *>(unwrap(MDs), Count)));
|
||||
}
|
||||
|
||||
LLVMMetadataRef LLVMTemporaryMDNode(LLVMContextRef C, LLVMMetadataRef *MDs,
|
||||
unsigned Count) {
|
||||
return wrap(MDTuple::getTemporary(*unwrap(C),
|
||||
ArrayRef<Metadata *>(unwrap(MDs), Count))
|
||||
.release());
|
||||
}
|
||||
|
||||
void LLVMAddNamedMetadataOperand2(LLVMModuleRef M, const char *name,
|
||||
LLVMMetadataRef Val) {
|
||||
NamedMDNode *N = unwrap(M)->getOrInsertNamedMetadata(name);
|
||||
@ -58,12 +51,6 @@ void LLVMSetMetadata2(LLVMValueRef Inst, unsigned KindID, LLVMMetadataRef MD) {
|
||||
unwrap<Instruction>(Inst)->setMetadata(KindID, N);
|
||||
}
|
||||
|
||||
void LLVMMetadataReplaceAllUsesWith(LLVMMetadataRef MD, LLVMMetadataRef New) {
|
||||
auto *Node = unwrap<MDNode>(MD);
|
||||
Node->replaceAllUsesWith(unwrap<Metadata>(New));
|
||||
MDNode::deleteTemporary(Node);
|
||||
}
|
||||
|
||||
void LLVMSetCurrentDebugLocation2(LLVMBuilderRef Bref, unsigned Line,
|
||||
unsigned Col, LLVMMetadataRef Scope,
|
||||
LLVMMetadataRef InlinedAt) {
|
||||
@ -84,6 +71,3 @@ LLVMDebugLocMetadata LLVMGetCurrentDebugLocation2(LLVMBuilderRef Bref) {
|
||||
return md;
|
||||
}
|
||||
|
||||
void LLVMSetSubprogram(LLVMValueRef Func, LLVMMetadataRef SP) {
|
||||
unwrap<Function>(Func)->setSubprogram(unwrap<DISubprogram>(SP));
|
||||
}
|
||||
|
@ -15,6 +15,7 @@
|
||||
#define LLVM_BINDINGS_GO_LLVM_IRBINDINGS_H
|
||||
|
||||
#include "llvm-c/Core.h"
|
||||
#include "llvm-c/DebugInfo.h"
|
||||
#ifdef __cplusplus
|
||||
#include "llvm/IR/Metadata.h"
|
||||
#include "llvm/Support/CBindingWrapping.h"
|
||||
@ -38,23 +39,17 @@ LLVMMetadataRef LLVMConstantAsMetadata(LLVMValueRef Val);
|
||||
LLVMMetadataRef LLVMMDString2(LLVMContextRef C, const char *Str, unsigned SLen);
|
||||
LLVMMetadataRef LLVMMDNode2(LLVMContextRef C, LLVMMetadataRef *MDs,
|
||||
unsigned Count);
|
||||
LLVMMetadataRef LLVMTemporaryMDNode(LLVMContextRef C, LLVMMetadataRef *MDs,
|
||||
unsigned Count);
|
||||
|
||||
void LLVMAddNamedMetadataOperand2(LLVMModuleRef M, const char *name,
|
||||
LLVMMetadataRef Val);
|
||||
void LLVMSetMetadata2(LLVMValueRef Inst, unsigned KindID, LLVMMetadataRef MD);
|
||||
|
||||
void LLVMMetadataReplaceAllUsesWith(LLVMMetadataRef MD, LLVMMetadataRef New);
|
||||
|
||||
void LLVMSetCurrentDebugLocation2(LLVMBuilderRef Bref, unsigned Line,
|
||||
unsigned Col, LLVMMetadataRef Scope,
|
||||
LLVMMetadataRef InlinedAt);
|
||||
|
||||
struct LLVMDebugLocMetadata LLVMGetCurrentDebugLocation2(LLVMBuilderRef Bref);
|
||||
|
||||
void LLVMSetSubprogram(LLVMValueRef Fn, LLVMMetadataRef SP);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
|
@ -14,7 +14,7 @@
|
||||
package llvm
|
||||
|
||||
/*
|
||||
#include "DIBuilderBindings.h"
|
||||
#include "IRBindings.h"
|
||||
#include <stdlib.h>
|
||||
*/
|
||||
import "C"
|
||||
@ -95,13 +95,13 @@ type DIBuilder struct {
|
||||
|
||||
// NewDIBuilder creates a new DIBuilder, associated with the given module.
|
||||
func NewDIBuilder(m Module) *DIBuilder {
|
||||
d := C.LLVMNewDIBuilder(m.C)
|
||||
d := C.LLVMCreateDIBuilder(m.C)
|
||||
return &DIBuilder{ref: d, m: m}
|
||||
}
|
||||
|
||||
// Destroy destroys the DIBuilder.
|
||||
func (d *DIBuilder) Destroy() {
|
||||
C.LLVMDIBuilderDestroy(d.ref)
|
||||
C.LLVMDisposeDIBuilder(d.ref)
|
||||
}
|
||||
|
||||
// FInalize finalizes the debug information generated by the DIBuilder.
|
||||
@ -147,7 +147,7 @@ func (d *DIBuilder) CreateCompileUnit(cu DICompileUnit) Metadata {
|
||||
return Metadata{C: result}
|
||||
}
|
||||
|
||||
// CreateCompileUnit creates file debug metadata.
|
||||
// CreateFile creates file debug metadata.
|
||||
func (d *DIBuilder) CreateFile(filename, dir string) Metadata {
|
||||
cfilename := C.CString(filename)
|
||||
defer C.free(unsafe.Pointer(cfilename))
|
||||
@ -166,7 +166,7 @@ type DILexicalBlock struct {
|
||||
Column int
|
||||
}
|
||||
|
||||
// CreateCompileUnit creates lexical block debug metadata.
|
||||
// CreateLexicalBlock creates lexical block debug metadata.
|
||||
func (d *DIBuilder) CreateLexicalBlock(diScope Metadata, b DILexicalBlock) Metadata {
|
||||
result := C.LLVMDIBuilderCreateLexicalBlock(
|
||||
d.ref,
|
||||
@ -198,7 +198,7 @@ type DIFunction struct {
|
||||
Optimized bool
|
||||
}
|
||||
|
||||
// CreateCompileUnit creates function debug metadata.
|
||||
// CreateFunction creates function debug metadata.
|
||||
func (d *DIBuilder) CreateFunction(diScope Metadata, f DIFunction) Metadata {
|
||||
name := C.CString(f.Name)
|
||||
defer C.free(unsafe.Pointer(name))
|
||||
@ -207,16 +207,16 @@ func (d *DIBuilder) CreateFunction(diScope Metadata, f DIFunction) Metadata {
|
||||
result := C.LLVMDIBuilderCreateFunction(
|
||||
d.ref,
|
||||
diScope.C,
|
||||
name,
|
||||
linkageName,
|
||||
name, C.size_t(len(f.Name)),
|
||||
linkageName, C.size_t(len(f.LinkageName)),
|
||||
f.File.C,
|
||||
C.unsigned(f.Line),
|
||||
f.Type.C,
|
||||
boolToCInt(f.LocalToUnit),
|
||||
boolToCInt(f.IsDefinition),
|
||||
C.LLVMBool(boolToCInt(f.LocalToUnit)),
|
||||
C.LLVMBool(boolToCInt(f.IsDefinition)),
|
||||
C.unsigned(f.ScopeLine),
|
||||
C.unsigned(f.Flags),
|
||||
boolToCInt(f.Optimized),
|
||||
C.LLVMDIFlags(f.Flags),
|
||||
C.LLVMBool(boolToCInt(f.Optimized)),
|
||||
)
|
||||
return Metadata{C: result}
|
||||
}
|
||||
@ -239,12 +239,12 @@ func (d *DIBuilder) CreateAutoVariable(scope Metadata, v DIAutoVariable) Metadat
|
||||
result := C.LLVMDIBuilderCreateAutoVariable(
|
||||
d.ref,
|
||||
scope.C,
|
||||
name,
|
||||
name, C.size_t(len(v.Name)),
|
||||
v.File.C,
|
||||
C.unsigned(v.Line),
|
||||
v.Type.C,
|
||||
boolToCInt(v.AlwaysPreserve),
|
||||
C.unsigned(v.Flags),
|
||||
C.LLVMBool(boolToCInt(v.AlwaysPreserve)),
|
||||
C.LLVMDIFlags(v.Flags),
|
||||
C.uint32_t(v.AlignInBits),
|
||||
)
|
||||
return Metadata{C: result}
|
||||
@ -271,13 +271,13 @@ func (d *DIBuilder) CreateParameterVariable(scope Metadata, v DIParameterVariabl
|
||||
result := C.LLVMDIBuilderCreateParameterVariable(
|
||||
d.ref,
|
||||
scope.C,
|
||||
name,
|
||||
name, C.size_t(len(v.Name)),
|
||||
C.unsigned(v.ArgNo),
|
||||
v.File.C,
|
||||
C.unsigned(v.Line),
|
||||
v.Type.C,
|
||||
boolToCInt(v.AlwaysPreserve),
|
||||
C.unsigned(v.Flags),
|
||||
C.LLVMBool(boolToCInt(v.AlwaysPreserve)),
|
||||
C.LLVMDIFlags(v.Flags),
|
||||
)
|
||||
return Metadata{C: result}
|
||||
}
|
||||
@ -296,6 +296,7 @@ func (d *DIBuilder) CreateBasicType(t DIBasicType) Metadata {
|
||||
result := C.LLVMDIBuilderCreateBasicType(
|
||||
d.ref,
|
||||
name,
|
||||
C.size_t(len(t.Name)),
|
||||
C.uint64_t(t.SizeInBits),
|
||||
C.unsigned(t.Encoding),
|
||||
)
|
||||
@ -307,10 +308,11 @@ type DIPointerType struct {
|
||||
Pointee Metadata
|
||||
SizeInBits uint64
|
||||
AlignInBits uint32 // optional
|
||||
AddressSpace uint32
|
||||
Name string // optional
|
||||
}
|
||||
|
||||
// CreateBasicType creates basic type debug metadata.
|
||||
// CreatePointerType creates a type that represents a pointer to another type.
|
||||
func (d *DIBuilder) CreatePointerType(t DIPointerType) Metadata {
|
||||
name := C.CString(t.Name)
|
||||
defer C.free(unsafe.Pointer(name))
|
||||
@ -319,7 +321,9 @@ func (d *DIBuilder) CreatePointerType(t DIPointerType) Metadata {
|
||||
t.Pointee.C,
|
||||
C.uint64_t(t.SizeInBits),
|
||||
C.uint32_t(t.AlignInBits),
|
||||
C.unsigned(t.AddressSpace),
|
||||
name,
|
||||
C.size_t(len(t.Name)),
|
||||
)
|
||||
return Metadata{C: result}
|
||||
}
|
||||
@ -332,12 +336,20 @@ type DISubroutineType struct {
|
||||
// Parameters contains the subroutine parameter types,
|
||||
// including the return type at the 0th index.
|
||||
Parameters []Metadata
|
||||
|
||||
Flags int
|
||||
}
|
||||
|
||||
// CreateSubroutineType creates subroutine type debug metadata.
|
||||
func (d *DIBuilder) CreateSubroutineType(t DISubroutineType) Metadata {
|
||||
params := d.getOrCreateTypeArray(t.Parameters)
|
||||
result := C.LLVMDIBuilderCreateSubroutineType(d.ref, t.File.C, params.C)
|
||||
params, length := llvmMetadataRefs(t.Parameters)
|
||||
result := C.LLVMDIBuilderCreateSubroutineType(
|
||||
d.ref,
|
||||
t.File.C,
|
||||
params,
|
||||
length,
|
||||
C.LLVMDIFlags(t.Flags),
|
||||
)
|
||||
return Metadata{C: result}
|
||||
}
|
||||
|
||||
@ -351,24 +363,34 @@ type DIStructType struct {
|
||||
Flags int
|
||||
DerivedFrom Metadata
|
||||
Elements []Metadata
|
||||
VTableHolder Metadata // optional
|
||||
UniqueID string
|
||||
}
|
||||
|
||||
// CreateStructType creates struct type debug metadata.
|
||||
func (d *DIBuilder) CreateStructType(scope Metadata, t DIStructType) Metadata {
|
||||
elements := d.getOrCreateArray(t.Elements)
|
||||
elements, length := llvmMetadataRefs(t.Elements)
|
||||
name := C.CString(t.Name)
|
||||
uniqueID := C.CString(t.UniqueID)
|
||||
defer C.free(unsafe.Pointer(name))
|
||||
defer C.free(unsafe.Pointer(uniqueID))
|
||||
result := C.LLVMDIBuilderCreateStructType(
|
||||
d.ref,
|
||||
scope.C,
|
||||
name,
|
||||
C.size_t(len(t.Name)),
|
||||
t.File.C,
|
||||
C.unsigned(t.Line),
|
||||
C.uint64_t(t.SizeInBits),
|
||||
C.uint32_t(t.AlignInBits),
|
||||
C.unsigned(t.Flags),
|
||||
C.LLVMDIFlags(t.Flags),
|
||||
t.DerivedFrom.C,
|
||||
elements.C,
|
||||
elements,
|
||||
length,
|
||||
C.unsigned(0), // Optional Objective-C runtime version.
|
||||
t.VTableHolder.C,
|
||||
uniqueID,
|
||||
C.size_t(len(t.UniqueID)),
|
||||
)
|
||||
return Metadata{C: result}
|
||||
}
|
||||
@ -384,23 +406,29 @@ type DIReplaceableCompositeType struct {
|
||||
SizeInBits uint64
|
||||
AlignInBits uint32
|
||||
Flags int
|
||||
UniqueID string
|
||||
}
|
||||
|
||||
// CreateReplaceableCompositeType creates replaceable composite type debug metadata.
|
||||
func (d *DIBuilder) CreateReplaceableCompositeType(scope Metadata, t DIReplaceableCompositeType) Metadata {
|
||||
name := C.CString(t.Name)
|
||||
uniqueID := C.CString(t.UniqueID)
|
||||
defer C.free(unsafe.Pointer(name))
|
||||
defer C.free(unsafe.Pointer(uniqueID))
|
||||
result := C.LLVMDIBuilderCreateReplaceableCompositeType(
|
||||
d.ref,
|
||||
C.unsigned(t.Tag),
|
||||
name,
|
||||
C.size_t(len(t.Name)),
|
||||
scope.C,
|
||||
t.File.C,
|
||||
C.unsigned(t.Line),
|
||||
C.unsigned(t.RuntimeLang),
|
||||
C.uint64_t(t.SizeInBits),
|
||||
C.uint32_t(t.AlignInBits),
|
||||
C.unsigned(t.Flags),
|
||||
C.LLVMDIFlags(t.Flags),
|
||||
uniqueID,
|
||||
C.size_t(len(t.UniqueID)),
|
||||
)
|
||||
return Metadata{C: result}
|
||||
}
|
||||
@ -425,12 +453,13 @@ func (d *DIBuilder) CreateMemberType(scope Metadata, t DIMemberType) Metadata {
|
||||
d.ref,
|
||||
scope.C,
|
||||
name,
|
||||
C.size_t(len(t.Name)),
|
||||
t.File.C,
|
||||
C.unsigned(t.Line),
|
||||
C.uint64_t(t.SizeInBits),
|
||||
C.uint32_t(t.AlignInBits),
|
||||
C.uint64_t(t.OffsetInBits),
|
||||
C.unsigned(t.Flags),
|
||||
C.LLVMDIFlags(t.Flags),
|
||||
t.Type.C,
|
||||
)
|
||||
return Metadata{C: result}
|
||||
@ -456,13 +485,14 @@ func (d *DIBuilder) CreateArrayType(t DIArrayType) Metadata {
|
||||
for i, s := range t.Subscripts {
|
||||
subscriptsSlice[i] = d.getOrCreateSubrange(s.Lo, s.Count)
|
||||
}
|
||||
subscripts := d.getOrCreateArray(subscriptsSlice)
|
||||
subscripts, length := llvmMetadataRefs(subscriptsSlice)
|
||||
result := C.LLVMDIBuilderCreateArrayType(
|
||||
d.ref,
|
||||
C.uint64_t(t.SizeInBits),
|
||||
C.uint32_t(t.AlignInBits),
|
||||
t.ElementType.C,
|
||||
subscripts.C,
|
||||
subscripts,
|
||||
length,
|
||||
)
|
||||
return Metadata{C: result}
|
||||
}
|
||||
@ -484,6 +514,7 @@ func (d *DIBuilder) CreateTypedef(t DITypedef) Metadata {
|
||||
d.ref,
|
||||
t.Type.C,
|
||||
name,
|
||||
C.size_t(len(t.Name)),
|
||||
t.File.C,
|
||||
C.unsigned(t.Line),
|
||||
t.Context.C,
|
||||
@ -534,20 +565,38 @@ func (d *DIBuilder) CreateExpression(addr []int64) Metadata {
|
||||
// InsertDeclareAtEnd inserts a call to llvm.dbg.declare at the end of the
|
||||
// specified basic block for the given value and associated debug metadata.
|
||||
func (d *DIBuilder) InsertDeclareAtEnd(v Value, diVarInfo, expr Metadata, bb BasicBlock) Value {
|
||||
result := C.LLVMDIBuilderInsertDeclareAtEnd(d.ref, v.C, diVarInfo.C, expr.C, bb.C)
|
||||
result := C.LLVMDIBuilderInsertDeclareAtEnd(d.ref, v.C, diVarInfo.C, expr.C, nil, bb.C)
|
||||
return Value{C: result}
|
||||
}
|
||||
|
||||
// InsertValueAtEnd inserts a call to llvm.dbg.value at the end of the
|
||||
// specified basic block for the given value and associated debug metadata.
|
||||
func (d *DIBuilder) InsertValueAtEnd(v Value, diVarInfo, expr Metadata, bb BasicBlock) Value {
|
||||
result := C.LLVMDIBuilderInsertValueAtEnd(d.ref, v.C, diVarInfo.C, expr.C, bb.C)
|
||||
result := C.LLVMDIBuilderInsertDbgValueAtEnd(d.ref, v.C, diVarInfo.C, expr.C, nil, bb.C)
|
||||
return Value{C: result}
|
||||
}
|
||||
|
||||
func (v Value) SetSubprogram(sp Metadata) {
|
||||
C.LLVMSetSubprogram(v.C, sp.C)
|
||||
}
|
||||
|
||||
func boolToCInt(v bool) C.int {
|
||||
if v {
|
||||
return 1
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
// llvm.Metadata
|
||||
//-------------------------------------------------------------------------
|
||||
|
||||
func (c Context) TemporaryMDNode(mds []Metadata) (md Metadata) {
|
||||
ptr, nvals := llvmMetadataRefs(mds)
|
||||
md.C = C.LLVMTemporaryMDNode(c.C, ptr, C.size_t(nvals))
|
||||
return
|
||||
}
|
||||
|
||||
func (md Metadata) ReplaceAllUsesWith(new Metadata) {
|
||||
C.LLVMMetadataReplaceAllUsesWith(md.C, new.C)
|
||||
}
|
||||
|
@ -15,6 +15,7 @@ package llvm
|
||||
|
||||
/*
|
||||
#include "llvm-c/Core.h"
|
||||
#include "llvm-c/Comdat.h"
|
||||
#include "IRBindings.h"
|
||||
#include <stdlib.h>
|
||||
*/
|
||||
@ -37,6 +38,9 @@ type (
|
||||
Value struct {
|
||||
C C.LLVMValueRef
|
||||
}
|
||||
Comdat struct {
|
||||
C C.LLVMComdatRef
|
||||
}
|
||||
BasicBlock struct {
|
||||
C C.LLVMBasicBlockRef
|
||||
}
|
||||
@ -61,14 +65,15 @@ type (
|
||||
Attribute struct {
|
||||
C C.LLVMAttributeRef
|
||||
}
|
||||
Opcode C.LLVMOpcode
|
||||
TypeKind C.LLVMTypeKind
|
||||
Linkage C.LLVMLinkage
|
||||
Visibility C.LLVMVisibility
|
||||
CallConv C.LLVMCallConv
|
||||
IntPredicate C.LLVMIntPredicate
|
||||
FloatPredicate C.LLVMRealPredicate
|
||||
LandingPadClause C.LLVMLandingPadClauseTy
|
||||
Opcode C.LLVMOpcode
|
||||
TypeKind C.LLVMTypeKind
|
||||
Linkage C.LLVMLinkage
|
||||
Visibility C.LLVMVisibility
|
||||
CallConv C.LLVMCallConv
|
||||
ComdatSelectionKind C.LLVMComdatSelectionKind
|
||||
IntPredicate C.LLVMIntPredicate
|
||||
FloatPredicate C.LLVMRealPredicate
|
||||
LandingPadClause C.LLVMLandingPadClauseTy
|
||||
)
|
||||
|
||||
func (c Context) IsNil() bool { return c.C == nil }
|
||||
@ -248,6 +253,18 @@ const (
|
||||
X86FastcallCallConv CallConv = C.LLVMX86FastcallCallConv
|
||||
)
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
// llvm.ComdatSelectionKind
|
||||
//-------------------------------------------------------------------------
|
||||
|
||||
const (
|
||||
AnyComdatSelectionKind ComdatSelectionKind = C.LLVMAnyComdatSelectionKind
|
||||
ExactMatchComdatSelectionKind ComdatSelectionKind = C.LLVMExactMatchComdatSelectionKind
|
||||
LargestComdatSelectionKind ComdatSelectionKind = C.LLVMLargestComdatSelectionKind
|
||||
NoDuplicatesComdatSelectionKind ComdatSelectionKind = C.LLVMNoDuplicatesComdatSelectionKind
|
||||
SameSizeComdatSelectionKind ComdatSelectionKind = C.LLVMSameSizeComdatSelectionKind
|
||||
)
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
// llvm.IntPredicate
|
||||
//-------------------------------------------------------------------------
|
||||
@ -767,11 +784,6 @@ func (c Context) MDNode(mds []Metadata) (md Metadata) {
|
||||
md.C = C.LLVMMDNode2(c.C, ptr, nvals)
|
||||
return
|
||||
}
|
||||
func (c Context) TemporaryMDNode(mds []Metadata) (md Metadata) {
|
||||
ptr, nvals := llvmMetadataRefs(mds)
|
||||
md.C = C.LLVMTemporaryMDNode(c.C, ptr, nvals)
|
||||
return
|
||||
}
|
||||
func (v Value) ConstantAsMetadata() (md Metadata) {
|
||||
md.C = C.LLVMConstantAsMetadata(v.C)
|
||||
return
|
||||
@ -1020,6 +1032,8 @@ func (v Value) IsThreadLocal() bool { return C.LLVMIsThreadLocal(v.C) != 0
|
||||
func (v Value) SetThreadLocal(tl bool) { C.LLVMSetThreadLocal(v.C, boolToLLVMBool(tl)) }
|
||||
func (v Value) IsGlobalConstant() bool { return C.LLVMIsGlobalConstant(v.C) != 0 }
|
||||
func (v Value) SetGlobalConstant(gc bool) { C.LLVMSetGlobalConstant(v.C, boolToLLVMBool(gc)) }
|
||||
func (v Value) IsVolatile() bool { return C.LLVMGetVolatile(v.C) != 0 }
|
||||
func (v Value) SetVolatile(volatile bool) { C.LLVMSetVolatile(v.C, boolToLLVMBool(volatile)) }
|
||||
|
||||
// Operations on aliases
|
||||
func AddAlias(m Module, t Type, aliasee Value, name string) (v Value) {
|
||||
@ -1029,6 +1043,25 @@ func AddAlias(m Module, t Type, aliasee Value, name string) (v Value) {
|
||||
return
|
||||
}
|
||||
|
||||
// Operations on comdat
|
||||
func (m Module) Comdat(name string) (c Comdat) {
|
||||
cname := C.CString(name)
|
||||
defer C.free(unsafe.Pointer(cname))
|
||||
c.C = C.LLVMGetOrInsertComdat(m.C, cname)
|
||||
return
|
||||
}
|
||||
|
||||
func (v Value) Comdat() (c Comdat) { c.C = C.LLVMGetComdat(v.C); return }
|
||||
func (v Value) SetComdat(c Comdat) { C.LLVMSetComdat(v.C, c.C) }
|
||||
|
||||
func (c Comdat) SelectionKind() ComdatSelectionKind {
|
||||
return ComdatSelectionKind(C.LLVMGetComdatSelectionKind(c.C))
|
||||
}
|
||||
|
||||
func (c Comdat) SetSelectionKind(k ComdatSelectionKind) {
|
||||
C.LLVMSetComdatSelectionKind(c.C, (C.LLVMComdatSelectionKind)(k))
|
||||
}
|
||||
|
||||
// Operations on functions
|
||||
func AddFunction(m Module, name string, ft Type) (v Value) {
|
||||
cname := C.CString(name)
|
||||
@ -1102,9 +1135,6 @@ func (v Value) AddTargetDependentFunctionAttr(attr, value string) {
|
||||
func (v Value) SetPersonality(p Value) {
|
||||
C.LLVMSetPersonalityFn(v.C, p.C)
|
||||
}
|
||||
func (v Value) SetSubprogram(sp Metadata) {
|
||||
C.LLVMSetSubprogram(v.C, sp.C)
|
||||
}
|
||||
|
||||
// Operations on parameters
|
||||
func (v Value) ParamsCount() int { return int(C.LLVMCountParams(v.C)) }
|
||||
@ -1872,7 +1902,7 @@ func (pm PassManager) InitializeFunc() bool { return C.LLVMInitializeFunctionPas
|
||||
// See llvm::FunctionPassManager::run(Function&).
|
||||
func (pm PassManager) RunFunc(f Value) bool { return C.LLVMRunFunctionPassManager(pm.C, f.C) != 0 }
|
||||
|
||||
// Finalizes all of the function passes scheduled in in the function pass
|
||||
// Finalizes all of the function passes scheduled in the function pass
|
||||
// manager. Returns 1 if any of the passes modified the module, 0 otherwise.
|
||||
// See llvm::FunctionPassManager::doFinalization.
|
||||
func (pm PassManager) FinalizeFunc() bool { return C.LLVMFinalizeFunctionPassManager(pm.C) != 0 }
|
||||
@ -1881,11 +1911,3 @@ func (pm PassManager) FinalizeFunc() bool { return C.LLVMFinalizeFunctionPassMan
|
||||
// the module provider.
|
||||
// See llvm::PassManagerBase::~PassManagerBase.
|
||||
func (pm PassManager) Dispose() { C.LLVMDisposePassManager(pm.C) }
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
// llvm.Metadata
|
||||
//-------------------------------------------------------------------------
|
||||
|
||||
func (md Metadata) ReplaceAllUsesWith(new Metadata) {
|
||||
C.LLVMMetadataReplaceAllUsesWith(md.C, new.C)
|
||||
}
|
||||
|
@ -89,6 +89,7 @@ func TestAttributes(t *testing.T) {
|
||||
"uwtable",
|
||||
"zeroext",
|
||||
"cold",
|
||||
"nocf_check",
|
||||
}
|
||||
|
||||
for _, name := range attrTests {
|
||||
@ -111,7 +112,11 @@ func TestDebugLoc(t *testing.T) {
|
||||
}()
|
||||
file := d.CreateFile("dummy_file", "dummy_dir")
|
||||
voidInfo := d.CreateBasicType(DIBasicType{Name: "void"})
|
||||
typeInfo := d.CreateSubroutineType(DISubroutineType{file, []Metadata{voidInfo}})
|
||||
typeInfo := d.CreateSubroutineType(DISubroutineType{
|
||||
File: file,
|
||||
Parameters: []Metadata{voidInfo},
|
||||
Flags: 0,
|
||||
})
|
||||
scope := d.CreateFunction(file, DIFunction{
|
||||
Name: "foo",
|
||||
LinkageName: "foo",
|
||||
|
@ -15,6 +15,7 @@ package llvm
|
||||
|
||||
/*
|
||||
#include "llvm-c/Transforms/Scalar.h"
|
||||
#include "llvm-c/Transforms/Utils.h"
|
||||
*/
|
||||
import "C"
|
||||
|
||||
|
@ -2619,7 +2619,7 @@ module PassManager : sig
|
||||
See the [llvm::FunctionPassManager::run] method. *)
|
||||
val run_function : llvalue -> [ `Function ] t -> bool
|
||||
|
||||
(** [finalize fpm] finalizes all of the function passes scheduled in in the
|
||||
(** [finalize fpm] finalizes all of the function passes scheduled in the
|
||||
function pass manager [fpm]. Returns [true] if any of the passes
|
||||
modified the module, [false] otherwise.
|
||||
See the [llvm::FunctionPassManager::doFinalization] method. *)
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include <string.h>
|
||||
#include "llvm-c/Core.h"
|
||||
#include "llvm-c/Support.h"
|
||||
#include "llvm/Config/llvm-config.h"
|
||||
#include "caml/alloc.h"
|
||||
#include "caml/custom.h"
|
||||
#include "caml/memory.h"
|
||||
|
@ -7,9 +7,6 @@
|
||||
*
|
||||
*===----------------------------------------------------------------------===*)
|
||||
|
||||
external add_bb_vectorize
|
||||
: [<Llvm.PassManager.any] Llvm.PassManager.t -> unit
|
||||
= "llvm_add_bb_vectorize"
|
||||
external add_loop_vectorize
|
||||
: [<Llvm.PassManager.any] Llvm.PassManager.t -> unit
|
||||
= "llvm_add_loop_vectorize"
|
||||
|
@ -12,11 +12,6 @@
|
||||
This interface provides an OCaml API for LLVM vectorize transforms, the
|
||||
classes in the [LLVMVectorize] library. *)
|
||||
|
||||
(** See the [llvm::createBBVectorizePass] function. *)
|
||||
external add_bb_vectorize
|
||||
: [<Llvm.PassManager.any] Llvm.PassManager.t -> unit
|
||||
= "llvm_add_bb_vectorize"
|
||||
|
||||
(** See the [llvm::createLoopVectorizePass] function. *)
|
||||
external add_loop_vectorize
|
||||
: [<Llvm.PassManager.any] Llvm.PassManager.t -> unit
|
||||
|
@ -19,12 +19,6 @@
|
||||
#include "caml/mlvalues.h"
|
||||
#include "caml/misc.h"
|
||||
|
||||
/* [<Llvm.PassManager.any] Llvm.PassManager.t -> unit */
|
||||
CAMLprim value llvm_add_bb_vectorize(LLVMPassManagerRef PM) {
|
||||
LLVMAddBBVectorizePass(PM);
|
||||
return Val_unit;
|
||||
}
|
||||
|
||||
/* [<Llvm.PassManager.any] Llvm.PassManager.t -> unit */
|
||||
CAMLprim value llvm_add_loop_vectorize(LLVMPassManagerRef PM) {
|
||||
LLVMAddLoopVectorizePass(PM);
|
||||
|
@ -456,6 +456,9 @@ def register_library(library):
|
||||
library.LLVMInitializeInstCombine.argtypes = [PassRegistry]
|
||||
library.LLVMInitializeInstCombine.restype = None
|
||||
|
||||
library.LLVMInitializeAggressiveInstCombiner.argtypes = [PassRegistry]
|
||||
library.LLVMInitializeAggressiveInstCombiner.restype = None
|
||||
|
||||
library.LLVMInitializeIPO.argtypes = [PassRegistry]
|
||||
library.LLVMInitializeIPO.restype = None
|
||||
|
||||
|
@ -4,13 +4,10 @@ if( WIN32 AND NOT CYGWIN )
|
||||
endif()
|
||||
|
||||
include(CheckIncludeFile)
|
||||
include(CheckIncludeFileCXX)
|
||||
include(CheckLibraryExists)
|
||||
include(CheckSymbolExists)
|
||||
include(CheckFunctionExists)
|
||||
include(CheckCCompilerFlag)
|
||||
include(CheckCXXSourceCompiles)
|
||||
include(TestBigEndian)
|
||||
|
||||
include(CheckCompilerVersion)
|
||||
include(HandleLLVMStdlib)
|
||||
@ -25,49 +22,24 @@ if( CMAKE_SYSTEM MATCHES "FreeBSD-9.2-RELEASE" AND
|
||||
list(APPEND CMAKE_REQUIRED_LIBRARIES "cxxrt")
|
||||
endif()
|
||||
|
||||
# Helper macros and functions
|
||||
macro(add_cxx_include result files)
|
||||
set(${result} "")
|
||||
foreach (file_name ${files})
|
||||
set(${result} "${${result}}#include<${file_name}>\n")
|
||||
endforeach()
|
||||
endmacro(add_cxx_include files result)
|
||||
|
||||
function(check_type_exists type files variable)
|
||||
add_cxx_include(includes "${files}")
|
||||
CHECK_CXX_SOURCE_COMPILES("
|
||||
${includes} ${type} typeVar;
|
||||
int main() {
|
||||
return 0;
|
||||
}
|
||||
" ${variable})
|
||||
endfunction()
|
||||
|
||||
# include checks
|
||||
check_include_file(dirent.h HAVE_DIRENT_H)
|
||||
check_include_file(dlfcn.h HAVE_DLFCN_H)
|
||||
check_include_file(errno.h HAVE_ERRNO_H)
|
||||
check_include_file(fcntl.h HAVE_FCNTL_H)
|
||||
check_include_file(inttypes.h HAVE_INTTYPES_H)
|
||||
check_include_file(link.h HAVE_LINK_H)
|
||||
check_include_file(malloc.h HAVE_MALLOC_H)
|
||||
check_include_file(malloc/malloc.h HAVE_MALLOC_MALLOC_H)
|
||||
check_include_file(ndir.h HAVE_NDIR_H)
|
||||
if( NOT PURE_WINDOWS )
|
||||
check_include_file(pthread.h HAVE_PTHREAD_H)
|
||||
endif()
|
||||
check_include_file(signal.h HAVE_SIGNAL_H)
|
||||
check_include_file(stdint.h HAVE_STDINT_H)
|
||||
check_include_file(sys/dir.h HAVE_SYS_DIR_H)
|
||||
check_include_file(sys/ioctl.h HAVE_SYS_IOCTL_H)
|
||||
check_include_file(sys/mman.h HAVE_SYS_MMAN_H)
|
||||
check_include_file(sys/ndir.h HAVE_SYS_NDIR_H)
|
||||
check_include_file(sys/param.h HAVE_SYS_PARAM_H)
|
||||
check_include_file(sys/resource.h HAVE_SYS_RESOURCE_H)
|
||||
check_include_file(sys/stat.h HAVE_SYS_STAT_H)
|
||||
check_include_file(sys/time.h HAVE_SYS_TIME_H)
|
||||
check_include_file(sys/types.h HAVE_SYS_TYPES_H)
|
||||
check_include_file(sys/uio.h HAVE_SYS_UIO_H)
|
||||
check_include_file(termios.h HAVE_TERMIOS_H)
|
||||
check_include_file(unistd.h HAVE_UNISTD_H)
|
||||
check_include_file(valgrind/valgrind.h HAVE_VALGRIND_VALGRIND_H)
|
||||
@ -117,6 +89,9 @@ if( NOT PURE_WINDOWS )
|
||||
check_library_exists(rt clock_gettime "" HAVE_LIBRT)
|
||||
endif()
|
||||
|
||||
# Check for libpfm.
|
||||
include(FindLibpfm)
|
||||
|
||||
if(HAVE_LIBPTHREAD)
|
||||
# We want to find pthreads library and at the moment we do want to
|
||||
# have it reported as '-l<lib>' instead of '-pthread'.
|
||||
@ -173,7 +148,11 @@ if(NOT LLVM_USE_SANITIZER MATCHES "Memory.*")
|
||||
find_package(LibXml2)
|
||||
if (LIBXML2_FOUND)
|
||||
set(LLVM_LIBXML2_ENABLED 1)
|
||||
include_directories(${LIBXML2_INCLUDE_DIR})
|
||||
if ((CMAKE_OSX_SYSROOT) AND (EXISTS ${CMAKE_OSX_SYSROOT}/${LIBXML2_INCLUDE_DIR}))
|
||||
include_directories(${CMAKE_OSX_SYSROOT}/${LIBXML2_INCLUDE_DIR})
|
||||
else()
|
||||
include_directories(${LIBXML2_INCLUDE_DIR})
|
||||
endif()
|
||||
set(LIBXML2_LIBS "xml2")
|
||||
endif()
|
||||
endif()
|
||||
@ -218,9 +197,6 @@ check_symbol_exists(posix_fallocate fcntl.h HAVE_POSIX_FALLOCATE)
|
||||
if( HAVE_SIGNAL_H AND NOT LLVM_USE_SANITIZER MATCHES ".*Address.*" AND NOT APPLE )
|
||||
check_symbol_exists(sigaltstack signal.h HAVE_SIGALTSTACK)
|
||||
endif()
|
||||
if( HAVE_SYS_UIO_H )
|
||||
check_symbol_exists(writev sys/uio.h HAVE_WRITEV)
|
||||
endif()
|
||||
set(CMAKE_REQUIRED_DEFINITIONS "-D_LARGEFILE64_SOURCE")
|
||||
check_symbol_exists(lseek64 "sys/types.h;unistd.h" HAVE_LSEEK64)
|
||||
set(CMAKE_REQUIRED_DEFINITIONS "")
|
||||
@ -228,17 +204,11 @@ check_symbol_exists(mallctl malloc_np.h HAVE_MALLCTL)
|
||||
check_symbol_exists(mallinfo malloc.h HAVE_MALLINFO)
|
||||
check_symbol_exists(malloc_zone_statistics malloc/malloc.h
|
||||
HAVE_MALLOC_ZONE_STATISTICS)
|
||||
check_symbol_exists(mkdtemp "stdlib.h;unistd.h" HAVE_MKDTEMP)
|
||||
check_symbol_exists(mkstemp "stdlib.h;unistd.h" HAVE_MKSTEMP)
|
||||
check_symbol_exists(mktemp "stdlib.h;unistd.h" HAVE_MKTEMP)
|
||||
check_symbol_exists(getcwd unistd.h HAVE_GETCWD)
|
||||
check_symbol_exists(gettimeofday sys/time.h HAVE_GETTIMEOFDAY)
|
||||
check_symbol_exists(getrlimit "sys/types.h;sys/time.h;sys/resource.h" HAVE_GETRLIMIT)
|
||||
check_symbol_exists(posix_spawn spawn.h HAVE_POSIX_SPAWN)
|
||||
check_symbol_exists(pread unistd.h HAVE_PREAD)
|
||||
check_symbol_exists(realpath stdlib.h HAVE_REALPATH)
|
||||
check_symbol_exists(sbrk unistd.h HAVE_SBRK)
|
||||
check_symbol_exists(strtoll stdlib.h HAVE_STRTOLL)
|
||||
check_symbol_exists(strerror string.h HAVE_STRERROR)
|
||||
check_symbol_exists(strerror_r string.h HAVE_STRERROR_R)
|
||||
check_symbol_exists(strerror_s string.h HAVE_DECL_STRERROR_S)
|
||||
@ -286,28 +256,17 @@ endif()
|
||||
# This check requires _GNU_SOURCE
|
||||
check_symbol_exists(sched_getaffinity sched.h HAVE_SCHED_GETAFFINITY)
|
||||
check_symbol_exists(CPU_COUNT sched.h HAVE_CPU_COUNT)
|
||||
if(HAVE_LIBPTHREAD)
|
||||
check_library_exists(pthread pthread_getname_np "" HAVE_PTHREAD_GETNAME_NP)
|
||||
check_library_exists(pthread pthread_setname_np "" HAVE_PTHREAD_SETNAME_NP)
|
||||
elseif(PTHREAD_IN_LIBC)
|
||||
check_library_exists(c pthread_getname_np "" HAVE_PTHREAD_GETNAME_NP)
|
||||
check_library_exists(c pthread_setname_np "" HAVE_PTHREAD_SETNAME_NP)
|
||||
if (NOT PURE_WINDOWS)
|
||||
if (LLVM_PTHREAD_LIB)
|
||||
list(APPEND CMAKE_REQUIRED_LIBRARIES ${LLVM_PTHREAD_LIB})
|
||||
endif()
|
||||
check_symbol_exists(pthread_getname_np pthread.h HAVE_PTHREAD_GETNAME_NP)
|
||||
check_symbol_exists(pthread_setname_np pthread.h HAVE_PTHREAD_SETNAME_NP)
|
||||
if (LLVM_PTHREAD_LIB)
|
||||
list(REMOVE_ITEM CMAKE_REQUIRED_LIBRARIES ${LLVM_PTHREAD_LIB})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set(headers "sys/types.h")
|
||||
|
||||
if (HAVE_INTTYPES_H)
|
||||
set(headers ${headers} "inttypes.h")
|
||||
endif()
|
||||
|
||||
if (HAVE_STDINT_H)
|
||||
set(headers ${headers} "stdint.h")
|
||||
endif()
|
||||
|
||||
check_type_exists(int64_t "${headers}" HAVE_INT64_T)
|
||||
check_type_exists(uint64_t "${headers}" HAVE_UINT64_T)
|
||||
check_type_exists(u_int64_t "${headers}" HAVE_U_INT64_T)
|
||||
|
||||
# available programs checks
|
||||
function(llvm_find_program name)
|
||||
string(TOUPPER ${name} NAME)
|
||||
@ -441,6 +400,10 @@ elseif (LLVM_NATIVE_ARCH MATCHES "wasm32")
|
||||
set(LLVM_NATIVE_ARCH WebAssembly)
|
||||
elseif (LLVM_NATIVE_ARCH MATCHES "wasm64")
|
||||
set(LLVM_NATIVE_ARCH WebAssembly)
|
||||
elseif (LLVM_NATIVE_ARCH MATCHES "riscv32")
|
||||
set(LLVM_NATIVE_ARCH RISCV)
|
||||
elseif (LLVM_NATIVE_ARCH MATCHES "riscv64")
|
||||
set(LLVM_NATIVE_ARCH RISCV)
|
||||
else ()
|
||||
message(FATAL_ERROR "Unknown architecture ${LLVM_NATIVE_ARCH}")
|
||||
endif ()
|
||||
@ -475,23 +438,6 @@ else ()
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if( MINGW )
|
||||
set(HAVE_LIBPSAPI 1)
|
||||
set(HAVE_LIBSHELL32 1)
|
||||
# TODO: Check existence of libraries.
|
||||
# include(CheckLibraryExists)
|
||||
endif( MINGW )
|
||||
|
||||
if (NOT HAVE_STRTOLL)
|
||||
# Use _strtoi64 if strtoll is not available.
|
||||
check_symbol_exists(_strtoi64 stdlib.h have_strtoi64)
|
||||
if (have_strtoi64)
|
||||
set(HAVE_STRTOLL 1)
|
||||
set(strtoll "_strtoi64")
|
||||
set(strtoull "_strtoui64")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if( MSVC )
|
||||
set(SHLIBEXT ".lib")
|
||||
set(stricmp "_stricmp")
|
||||
@ -569,10 +515,10 @@ else()
|
||||
endif()
|
||||
|
||||
set(LLVM_BINDINGS "")
|
||||
if(WIN32)
|
||||
find_program(GO_EXECUTABLE NAMES go DOC "go executable")
|
||||
if(WIN32 OR NOT LLVM_ENABLE_BINDINGS)
|
||||
message(STATUS "Go bindings disabled.")
|
||||
else()
|
||||
find_program(GO_EXECUTABLE NAMES go DOC "go executable")
|
||||
if(GO_EXECUTABLE STREQUAL "GO_EXECUTABLE-NOTFOUND")
|
||||
message(STATUS "Go bindings disabled.")
|
||||
else()
|
||||
@ -612,7 +558,7 @@ endif()
|
||||
# Keep the version requirements in sync with bindings/ocaml/README.txt.
|
||||
include(FindOCaml)
|
||||
include(AddOCaml)
|
||||
if(WIN32)
|
||||
if(WIN32 OR NOT LLVM_ENABLE_BINDINGS)
|
||||
message(STATUS "OCaml bindings disabled.")
|
||||
else()
|
||||
find_package(OCaml)
|
||||
@ -640,7 +586,8 @@ endif()
|
||||
string(REPLACE " " ";" LLVM_BINDINGS_LIST "${LLVM_BINDINGS}")
|
||||
|
||||
function(find_python_module module)
|
||||
string(TOUPPER ${module} module_upper)
|
||||
string(REPLACE "." "_" module_name ${module})
|
||||
string(TOUPPER ${module_name} module_upper)
|
||||
set(FOUND_VAR PY_${module_upper}_FOUND)
|
||||
|
||||
execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c" "import ${module}"
|
||||
@ -658,13 +605,16 @@ endfunction()
|
||||
|
||||
set (PYTHON_MODULES
|
||||
pygments
|
||||
# Some systems still don't have pygments.lexers.c_cpp which was introduced in
|
||||
# version 2.0 in 2014...
|
||||
pygments.lexers.c_cpp
|
||||
yaml
|
||||
)
|
||||
foreach(module ${PYTHON_MODULES})
|
||||
find_python_module(${module})
|
||||
endforeach()
|
||||
|
||||
if(PY_PYGMENTS_FOUND AND PY_YAML_FOUND)
|
||||
if(PY_PYGMENTS_FOUND AND PY_PYGMENTS_LEXERS_C_CPP_FOUND AND PY_YAML_FOUND)
|
||||
set (LLVM_HAVE_OPT_VIEWER_MODULES 1)
|
||||
else()
|
||||
set (LLVM_HAVE_OPT_VIEWER_MODULES 0)
|
||||
|
@ -83,7 +83,7 @@ function(add_llvm_symbol_exports target_name export_file)
|
||||
# FIXME: Don't write the "local:" line on OpenBSD.
|
||||
# in the export file, also add a linker script to version LLVM symbols (form: LLVM_N.M)
|
||||
add_custom_command(OUTPUT ${native_export_file}
|
||||
COMMAND echo "LLVM_${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR} {" > ${native_export_file}
|
||||
COMMAND echo "LLVM_${LLVM_VERSION_MAJOR} {" > ${native_export_file}
|
||||
COMMAND grep -q "[[:alnum:]]" ${export_file} && echo " global:" >> ${native_export_file} || :
|
||||
COMMAND sed -e "s/$/;/" -e "s/^/ /" < ${export_file} >> ${native_export_file}
|
||||
COMMAND echo " local: *;" >> ${native_export_file}
|
||||
@ -147,34 +147,48 @@ function(add_llvm_symbol_exports target_name export_file)
|
||||
set(LLVM_COMMON_DEPENDS ${LLVM_COMMON_DEPENDS} PARENT_SCOPE)
|
||||
endfunction(add_llvm_symbol_exports)
|
||||
|
||||
if(NOT WIN32 AND NOT APPLE)
|
||||
if(APPLE)
|
||||
execute_process(
|
||||
COMMAND "${CMAKE_LINKER}" -v
|
||||
ERROR_VARIABLE stderr
|
||||
)
|
||||
set(LLVM_LINKER_DETECTED YES)
|
||||
if("${stderr}" MATCHES "PROJECT:ld64")
|
||||
set(LLVM_LINKER_IS_LD64 YES)
|
||||
message(STATUS "Linker detection: ld64")
|
||||
else()
|
||||
set(LLVM_LINKER_DETECTED NO)
|
||||
message(STATUS "Linker detection: unknown")
|
||||
endif()
|
||||
elseif(NOT WIN32)
|
||||
# Detect what linker we have here
|
||||
if( LLVM_USE_LINKER )
|
||||
set(command ${CMAKE_C_COMPILER} -fuse-ld=${LLVM_USE_LINKER} -Wl,--version)
|
||||
else()
|
||||
set(command ${CMAKE_C_COMPILER} -Wl,--version)
|
||||
separate_arguments(flags UNIX_COMMAND "${CMAKE_EXE_LINKER_FLAGS}")
|
||||
set(command ${CMAKE_C_COMPILER} ${flags} -Wl,--version)
|
||||
endif()
|
||||
execute_process(
|
||||
COMMAND ${command}
|
||||
OUTPUT_VARIABLE stdout
|
||||
ERROR_VARIABLE stderr
|
||||
)
|
||||
set(LLVM_LINKER_DETECTED ON)
|
||||
set(LLVM_LINKER_DETECTED YES)
|
||||
if("${stdout}" MATCHES "GNU gold")
|
||||
set(LLVM_LINKER_IS_GOLD ON)
|
||||
set(LLVM_LINKER_IS_GOLD YES)
|
||||
message(STATUS "Linker detection: GNU Gold")
|
||||
elseif("${stdout}" MATCHES "^LLD")
|
||||
set(LLVM_LINKER_IS_LLD ON)
|
||||
set(LLVM_LINKER_IS_LLD YES)
|
||||
message(STATUS "Linker detection: LLD")
|
||||
elseif("${stdout}" MATCHES "GNU ld")
|
||||
set(LLVM_LINKER_IS_GNULD ON)
|
||||
set(LLVM_LINKER_IS_GNULD YES)
|
||||
message(STATUS "Linker detection: GNU ld")
|
||||
elseif("${stderr}" MATCHES "Solaris Link Editors" OR
|
||||
"${stdout}" MATCHES "Solaris Link Editors")
|
||||
set(LLVM_LINKER_IS_SOLARISLD ON)
|
||||
set(LLVM_LINKER_IS_SOLARISLD YES)
|
||||
message(STATUS "Linker detection: Solaris ld")
|
||||
else()
|
||||
set(LLVM_LINKER_DETECTED OFF)
|
||||
set(LLVM_LINKER_DETECTED NO)
|
||||
message(STATUS "Linker detection: unknown")
|
||||
endif()
|
||||
endif()
|
||||
@ -207,7 +221,7 @@ function(add_link_opts target_name)
|
||||
elseif(${CMAKE_SYSTEM_NAME} MATCHES "SunOS")
|
||||
set_property(TARGET ${target_name} APPEND_STRING PROPERTY
|
||||
LINK_FLAGS " -Wl,-z -Wl,discard-unused=sections")
|
||||
elseif(NOT WIN32 AND NOT LLVM_LINKER_IS_GOLD)
|
||||
elseif(NOT WIN32 AND NOT LLVM_LINKER_IS_GOLD AND NOT ${CMAKE_SYSTEM_NAME} MATCHES "OpenBSD")
|
||||
# Object files are compiled with -ffunction-data-sections.
|
||||
# Versions of bfd ld < 2.23.1 have a bug in --gc-sections that breaks
|
||||
# tools that use plugins. Always pass --gc-sections once we require
|
||||
@ -486,7 +500,7 @@ function(llvm_add_library name)
|
||||
PROPERTIES
|
||||
# Since 4.0.0, the ABI version is indicated by the major version
|
||||
SOVERSION ${LLVM_VERSION_MAJOR}
|
||||
VERSION ${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}${LLVM_VERSION_SUFFIX})
|
||||
VERSION ${LLVM_VERSION_MAJOR}${LLVM_VERSION_SUFFIX})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
@ -508,7 +522,7 @@ function(llvm_add_library name)
|
||||
if(${output_name} STREQUAL "output_name-NOTFOUND")
|
||||
set(output_name ${name})
|
||||
endif()
|
||||
set(library_name ${output_name}-${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}${LLVM_VERSION_SUFFIX})
|
||||
set(library_name ${output_name}-${LLVM_VERSION_MAJOR}${LLVM_VERSION_SUFFIX})
|
||||
set(api_name ${output_name}-${LLVM_VERSION_MAJOR}.${LLVM_VERSION_MINOR}.${LLVM_VERSION_PATCH}${LLVM_VERSION_SUFFIX})
|
||||
set_target_properties(${name} PROPERTIES OUTPUT_NAME ${library_name})
|
||||
llvm_install_library_symlink(${api_name} ${library_name} SHARED
|
||||
@ -566,6 +580,7 @@ function(llvm_add_library name)
|
||||
|
||||
if(ARG_SHARED OR ARG_MODULE)
|
||||
llvm_externalize_debuginfo(${name})
|
||||
llvm_codesign(${name})
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
@ -770,6 +785,8 @@ macro(add_llvm_executable name)
|
||||
# API for all shared libaries loaded by this executable.
|
||||
target_link_libraries(${name} PRIVATE ${LLVM_PTHREAD_LIB})
|
||||
endif()
|
||||
|
||||
llvm_codesign(${name})
|
||||
endmacro(add_llvm_executable name)
|
||||
|
||||
function(export_executable_symbols target)
|
||||
@ -1269,7 +1286,7 @@ function(get_llvm_lit_path base_dir file_name)
|
||||
endif()
|
||||
|
||||
set(lit_file_name "llvm-lit")
|
||||
if (WIN32 AND NOT CYGWIN)
|
||||
if (CMAKE_HOST_WIN32 AND NOT CYGWIN)
|
||||
# llvm-lit needs suffix.py for multiprocess to find a main module.
|
||||
set(lit_file_name "${lit_file_name}.py")
|
||||
endif ()
|
||||
@ -1473,7 +1490,7 @@ function(add_llvm_tool_symlink link_name target)
|
||||
if(NOT ARG_OUTPUT_DIR)
|
||||
# If you're not overriding the OUTPUT_DIR, we can make the link relative in
|
||||
# the same directory.
|
||||
if(UNIX)
|
||||
if(CMAKE_HOST_UNIX)
|
||||
set(dest_binary "$<TARGET_FILE_NAME:${target}>")
|
||||
endif()
|
||||
if(CMAKE_CONFIGURATION_TYPES)
|
||||
@ -1499,7 +1516,7 @@ function(add_llvm_tool_symlink link_name target)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(UNIX)
|
||||
if(CMAKE_HOST_UNIX)
|
||||
set(LLVM_LINK_OR_COPY create_symlink)
|
||||
else()
|
||||
set(LLVM_LINK_OR_COPY copy)
|
||||
@ -1543,9 +1560,12 @@ function(llvm_externalize_debuginfo name)
|
||||
|
||||
if(NOT LLVM_EXTERNALIZE_DEBUGINFO_SKIP_STRIP)
|
||||
if(APPLE)
|
||||
set(strip_command COMMAND xcrun strip -Sxl $<TARGET_FILE:${name}>)
|
||||
if(NOT CMAKE_STRIP)
|
||||
set(CMAKE_STRIP xcrun strip)
|
||||
endif()
|
||||
set(strip_command COMMAND ${CMAKE_STRIP} -Sxl $<TARGET_FILE:${name}>)
|
||||
else()
|
||||
set(strip_command COMMAND strip -gx $<TARGET_FILE:${name}>)
|
||||
set(strip_command COMMAND ${CMAKE_STRIP} -gx $<TARGET_FILE:${name}>)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
@ -1557,19 +1577,48 @@ function(llvm_externalize_debuginfo name)
|
||||
set_property(TARGET ${name} APPEND_STRING PROPERTY
|
||||
LINK_FLAGS " -Wl,-object_path_lto,${lto_object}")
|
||||
endif()
|
||||
if(NOT CMAKE_DSYMUTIL)
|
||||
set(CMAKE_DSYMUTIL xcrun dsymutil)
|
||||
endif()
|
||||
add_custom_command(TARGET ${name} POST_BUILD
|
||||
COMMAND xcrun dsymutil $<TARGET_FILE:${name}>
|
||||
COMMAND ${CMAKE_DSYMUTIL} $<TARGET_FILE:${name}>
|
||||
${strip_command}
|
||||
)
|
||||
else()
|
||||
add_custom_command(TARGET ${name} POST_BUILD
|
||||
COMMAND objcopy --only-keep-debug $<TARGET_FILE:${name}> $<TARGET_FILE:${name}>.debug
|
||||
COMMAND ${CMAKE_OBJCOPY} --only-keep-debug $<TARGET_FILE:${name}> $<TARGET_FILE:${name}>.debug
|
||||
${strip_command} -R .gnu_debuglink
|
||||
COMMAND objcopy --add-gnu-debuglink=$<TARGET_FILE:${name}>.debug $<TARGET_FILE:${name}>
|
||||
COMMAND ${CMAKE_OBJCOPY} --add-gnu-debuglink=$<TARGET_FILE:${name}>.debug $<TARGET_FILE:${name}>
|
||||
)
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
function(llvm_codesign name)
|
||||
if(NOT LLVM_CODESIGNING_IDENTITY)
|
||||
return()
|
||||
endif()
|
||||
|
||||
if(APPLE)
|
||||
if(NOT CMAKE_CODESIGN)
|
||||
set(CMAKE_CODESIGN xcrun codesign)
|
||||
endif()
|
||||
if(NOT CMAKE_CODESIGN_ALLOCATE)
|
||||
execute_process(
|
||||
COMMAND xcrun -f codesign_allocate
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||
OUTPUT_VARIABLE CMAKE_CODESIGN_ALLOCATE
|
||||
)
|
||||
endif()
|
||||
add_custom_command(
|
||||
TARGET ${name} POST_BUILD
|
||||
COMMAND ${CMAKE_COMMAND} -E
|
||||
env CODESIGN_ALLOCATE=${CMAKE_CODESIGN_ALLOCATE}
|
||||
${CMAKE_CODESIGN} -s ${LLVM_CODESIGNING_IDENTITY}
|
||||
$<TARGET_FILE:${name}>
|
||||
)
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
function(llvm_setup_rpath name)
|
||||
if(CMAKE_INSTALL_RPATH)
|
||||
return()
|
||||
@ -1589,7 +1638,8 @@ function(llvm_setup_rpath name)
|
||||
if(${CMAKE_SYSTEM_NAME} MATCHES "(FreeBSD|DragonFly)")
|
||||
set_property(TARGET ${name} APPEND_STRING PROPERTY
|
||||
LINK_FLAGS " -Wl,-z,origin ")
|
||||
elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Linux" AND NOT LLVM_LINKER_IS_GOLD)
|
||||
endif()
|
||||
if(LLVM_LINKER_IS_GNULD)
|
||||
# $ORIGIN is not interpreted at link time by ld.bfd
|
||||
set_property(TARGET ${name} APPEND_STRING PROPERTY
|
||||
LINK_FLAGS " -Wl,-rpath-link,${LLVM_LIBRARY_OUTPUT_INTDIR} ")
|
||||
@ -1613,10 +1663,10 @@ function(setup_dependency_debugging name)
|
||||
return()
|
||||
endif()
|
||||
|
||||
set(deny_attributes_gen "(deny file* (literal \"${LLVM_BINARY_DIR}/include/llvm/IR/Attributes.gen\"))")
|
||||
set(deny_intrinsics_gen "(deny file* (literal \"${LLVM_BINARY_DIR}/include/llvm/IR/Intrinsics.gen\"))")
|
||||
set(deny_attributes_inc "(deny file* (literal \"${LLVM_BINARY_DIR}/include/llvm/IR/Attributes.inc\"))")
|
||||
set(deny_intrinsics_inc "(deny file* (literal \"${LLVM_BINARY_DIR}/include/llvm/IR/Intrinsics.inc\"))")
|
||||
|
||||
set(sandbox_command "sandbox-exec -p '(version 1) (allow default) ${deny_attributes_gen} ${deny_intrinsics_gen}'")
|
||||
set(sandbox_command "sandbox-exec -p '(version 1) (allow default) ${deny_attributes_inc} ${deny_intrinsics_inc}'")
|
||||
set_target_properties(${name} PROPERTIES RULE_LAUNCH_COMPILE ${sandbox_command})
|
||||
endfunction()
|
||||
|
||||
|
@ -47,6 +47,12 @@ set(LLVM_CONFIG_LIBRARY_DIRS
|
||||
set(LLVM_CONFIG_CMAKE_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
|
||||
set(LLVM_CONFIG_BINARY_DIR "${LLVM_BINARY_DIR}")
|
||||
set(LLVM_CONFIG_TOOLS_BINARY_DIR "${LLVM_TOOLS_BINARY_DIR}")
|
||||
|
||||
if (LLVM_LINK_LLVM_DYLIB)
|
||||
set(LLVM_CONFIG_LINK_LLVM_DYLIB
|
||||
"set(LLVM_LINK_LLVM_DYLIB ${LLVM_LINK_LLVM_DYLIB})")
|
||||
endif()
|
||||
|
||||
# We need to use the full path to the LLVM Exports file to make sure we get the
|
||||
# one from the build tree. This is due to our cmake files being split between
|
||||
# this source dir and the binary dir in the build tree configuration and the
|
||||
|
@ -45,10 +45,13 @@ function(llvm_create_cross_target_internal target_name toolchain buildtype)
|
||||
|
||||
add_custom_command(OUTPUT ${LLVM_${target_name}_BUILD}/CMakeCache.txt
|
||||
COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}"
|
||||
-DCMAKE_MAKE_PROGRAM="${CMAKE_MAKE_PROGRAM}"
|
||||
${CROSS_TOOLCHAIN_FLAGS_${target_name}} ${CMAKE_SOURCE_DIR}
|
||||
-DLLVM_TARGET_IS_CROSSCOMPILE_HOST=TRUE
|
||||
-DLLVM_TARGETS_TO_BUILD="${targets_to_build_arg}"
|
||||
-DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD="${experimental_targets_to_build_arg}"
|
||||
-DLLVM_DEFAULT_TARGET_TRIPLE="${TARGET_TRIPLE}"
|
||||
-DLLVM_TARGET_ARCH="${LLVM_TARGET_ARCH}"
|
||||
${build_type_flags} ${linker_flag} ${external_clang_dir}
|
||||
WORKING_DIRECTORY ${LLVM_${target_name}_BUILD}
|
||||
DEPENDS CREATE_LLVM_${target_name}
|
||||
|
23
cmake/modules/FindLibpfm.cmake
Normal file
23
cmake/modules/FindLibpfm.cmake
Normal file
@ -0,0 +1,23 @@
|
||||
# CMake module for finding libpfm4.
|
||||
#
|
||||
# If successful, the following variables will be defined:
|
||||
# HAVE_LIBPFM
|
||||
#
|
||||
# Libpfm can be disabled by setting LLVM_ENABLE_LIBPFM to 0.
|
||||
|
||||
include(CheckIncludeFile)
|
||||
include(CheckLibraryExists)
|
||||
|
||||
if (LLVM_ENABLE_LIBPFM)
|
||||
check_library_exists(pfm pfm_initialize "" HAVE_LIBPFM_INITIALIZE)
|
||||
if(HAVE_LIBPFM_INITIALIZE)
|
||||
check_include_file(perfmon/perf_event.h HAVE_PERFMON_PERF_EVENT_H)
|
||||
check_include_file(perfmon/pfmlib.h HAVE_PERFMON_PFMLIB_H)
|
||||
check_include_file(perfmon/pfmlib_perf_event.h HAVE_PERFMON_PFMLIB_PERF_EVENT_H)
|
||||
if(HAVE_PERFMON_PERF_EVENT_H AND HAVE_PERFMON_PFMLIB_H AND HAVE_PERFMON_PFMLIB_PERF_EVENT_H)
|
||||
set(HAVE_LIBPFM 1)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
@ -48,14 +48,6 @@ elseif(LLVM_PARALLEL_LINK_JOBS)
|
||||
message(WARNING "Job pooling is only available with Ninja generators.")
|
||||
endif()
|
||||
|
||||
if (LINKER_IS_LLD_LINK)
|
||||
# Pass /MANIFEST:NO so that CMake doesn't run mt.exe on our binaries. Adding
|
||||
# manifests with mt.exe breaks LLD's symbol tables and takes as much time as
|
||||
# the link. See PR24476.
|
||||
append("/MANIFEST:NO"
|
||||
CMAKE_EXE_LINKER_FLAGS CMAKE_MODULE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS)
|
||||
endif()
|
||||
|
||||
if( LLVM_ENABLE_ASSERTIONS )
|
||||
# MSVC doesn't like _DEBUG on release builds. See PR 4379.
|
||||
if( NOT MSVC )
|
||||
@ -115,7 +107,7 @@ if(WIN32)
|
||||
set(LLVM_ON_UNIX 0)
|
||||
endif(CYGWIN)
|
||||
else(WIN32)
|
||||
if(UNIX)
|
||||
if(FUCHSIA OR UNIX)
|
||||
set(LLVM_ON_WIN32 0)
|
||||
set(LLVM_ON_UNIX 1)
|
||||
if(APPLE OR ${CMAKE_SYSTEM_NAME} MATCHES "AIX")
|
||||
@ -123,9 +115,9 @@ else(WIN32)
|
||||
else()
|
||||
set(LLVM_HAVE_LINK_VERSION_SCRIPT 1)
|
||||
endif()
|
||||
else(UNIX)
|
||||
else(FUCHSIA OR UNIX)
|
||||
MESSAGE(SEND_ERROR "Unable to determine platform")
|
||||
endif(UNIX)
|
||||
endif(FUCHSIA OR UNIX)
|
||||
endif(WIN32)
|
||||
|
||||
set(EXEEXT ${CMAKE_EXECUTABLE_SUFFIX})
|
||||
@ -353,6 +345,19 @@ if( MSVC )
|
||||
|
||||
append("/Zc:inline" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
|
||||
|
||||
# Allow users to request PDBs in release mode. CMake offeres the
|
||||
# RelWithDebInfo configuration, but it uses different optimization settings
|
||||
# (/Ob1 vs /Ob2 or -O2 vs -O3). LLVM provides this flag so that users can get
|
||||
# PDBs without changing codegen.
|
||||
option(LLVM_ENABLE_PDB OFF)
|
||||
if (LLVM_ENABLE_PDB AND uppercase_CMAKE_BUILD_TYPE STREQUAL "RELEASE")
|
||||
append("/Zi" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
|
||||
# /DEBUG disables linker GC and ICF, but we want those in Release mode.
|
||||
append("/DEBUG /OPT:REF /OPT:ICF"
|
||||
CMAKE_EXE_LINKER_FLAGS CMAKE_MODULE_LINKER_FLAGS
|
||||
CMAKE_SHARED_LINKER_FLAGS)
|
||||
endif()
|
||||
|
||||
# /Zc:strictStrings is incompatible with VS12's (Visual Studio 2013's)
|
||||
# debug mode headers. Instead of only enabling them in VS2013's debug mode,
|
||||
# we'll just enable them for Visual Studio 2015 (VS 14, MSVC_VERSION 1900)
|
||||
@ -550,7 +555,7 @@ if (LLVM_ENABLE_WARNINGS AND (LLVM_COMPILER_IS_GCC_COMPATIBLE OR CLANG_CL))
|
||||
append("-Wall" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
|
||||
endif()
|
||||
|
||||
append("-W -Wno-unused-parameter -Wwrite-strings" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
|
||||
append("-Wextra -Wno-unused-parameter -Wwrite-strings" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
|
||||
append("-Wcast-qual" CMAKE_CXX_FLAGS)
|
||||
|
||||
# Turn off missing field initializer warnings for gcc to avoid noise from
|
||||
@ -574,6 +579,11 @@ if (LLVM_ENABLE_WARNINGS AND (LLVM_COMPILER_IS_GCC_COMPATIBLE OR CLANG_CL))
|
||||
append_if(USE_NO_UNINITIALIZED "-Wno-uninitialized" CMAKE_CXX_FLAGS)
|
||||
append_if(USE_NO_MAYBE_UNINITIALIZED "-Wno-maybe-uninitialized" CMAKE_CXX_FLAGS)
|
||||
|
||||
# Disable -Wclass-memaccess, a C++-only warning from GCC 8 that fires on
|
||||
# LLVM's ADT classes.
|
||||
check_cxx_compiler_flag("-Wclass-memaccess" CXX_SUPPORTS_CLASS_MEMACCESS_FLAG)
|
||||
append_if(CXX_SUPPORTS_CLASS_MEMACCESS_FLAG "-Wno-class-memaccess" CMAKE_CXX_FLAGS)
|
||||
|
||||
# Check if -Wnon-virtual-dtor warns even though the class is marked final.
|
||||
# If it does, don't add it. So it won't be added on clang 3.4 and older.
|
||||
# This also catches cases when -Wnon-virtual-dtor isn't supported by
|
||||
@ -626,7 +636,7 @@ macro(append_common_sanitizer_flags)
|
||||
add_flag_if_supported("-gline-tables-only" GLINE_TABLES_ONLY)
|
||||
endif()
|
||||
# Use -O1 even in debug mode, otherwise sanitizers slowdown is too large.
|
||||
if (uppercase_CMAKE_BUILD_TYPE STREQUAL "DEBUG")
|
||||
if (uppercase_CMAKE_BUILD_TYPE STREQUAL "DEBUG" AND LLVM_OPTIMIZE_SANITIZED_BUILDS)
|
||||
add_flag_if_supported("-O1" O1)
|
||||
endif()
|
||||
elseif (CLANG_CL)
|
||||
@ -660,11 +670,6 @@ if(LLVM_USE_SANITIZER)
|
||||
append_common_sanitizer_flags()
|
||||
append("-fsanitize=undefined -fno-sanitize=vptr,function -fno-sanitize-recover=all"
|
||||
CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
|
||||
set(BLACKLIST_FILE "${CMAKE_SOURCE_DIR}/utils/sanitizers/ubsan_blacklist.txt")
|
||||
if (EXISTS "${BLACKLIST_FILE}")
|
||||
append("-fsanitize-blacklist=${BLACKLIST_FILE}"
|
||||
CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
|
||||
endif()
|
||||
elseif (LLVM_USE_SANITIZER STREQUAL "Thread")
|
||||
append_common_sanitizer_flags()
|
||||
append("-fsanitize=thread" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
|
||||
@ -696,6 +701,13 @@ if(LLVM_USE_SANITIZER)
|
||||
if (LLVM_USE_SANITIZE_COVERAGE)
|
||||
append("-fsanitize=fuzzer-no-link" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
|
||||
endif()
|
||||
if (LLVM_USE_SANITIZER MATCHES ".*Undefined.*")
|
||||
set(BLACKLIST_FILE "${CMAKE_SOURCE_DIR}/utils/sanitizers/ubsan_blacklist.txt")
|
||||
if (EXISTS "${BLACKLIST_FILE}")
|
||||
append("-fsanitize-blacklist=${BLACKLIST_FILE}"
|
||||
CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Turn on -gsplit-dwarf if requested
|
||||
@ -707,11 +719,13 @@ add_definitions( -D__STDC_CONSTANT_MACROS )
|
||||
add_definitions( -D__STDC_FORMAT_MACROS )
|
||||
add_definitions( -D__STDC_LIMIT_MACROS )
|
||||
|
||||
# clang doesn't print colored diagnostics when invoked from Ninja
|
||||
# clang and gcc don't default-print colored diagnostics when invoked from Ninja.
|
||||
if (UNIX AND
|
||||
CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND
|
||||
CMAKE_GENERATOR STREQUAL "Ninja")
|
||||
append("-fcolor-diagnostics" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
|
||||
CMAKE_GENERATOR STREQUAL "Ninja" AND
|
||||
(CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR
|
||||
(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND
|
||||
NOT (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.9))))
|
||||
append("-fdiagnostics-color" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
|
||||
endif()
|
||||
|
||||
# lld doesn't print colored diagnostics when invoked from Ninja
|
||||
@ -849,6 +863,13 @@ else()
|
||||
set(LLVM_ENABLE_PLUGINS ON)
|
||||
endif()
|
||||
|
||||
set(LLVM_ENABLE_IDE_default OFF)
|
||||
if (XCODE OR MSVC_IDE OR CMAKE_EXTRA_GENERATOR)
|
||||
set(LLVM_ENABLE_IDE_default ON)
|
||||
endif()
|
||||
option(LLVM_ENABLE_IDE "Generate targets and process sources for use with an IDE"
|
||||
${LLVM_ENABLE_IDE_default})
|
||||
|
||||
function(get_compile_definitions)
|
||||
get_directory_property(top_dir_definitions DIRECTORY ${CMAKE_SOURCE_DIR} COMPILE_DEFINITIONS)
|
||||
foreach(definition ${top_dir_definitions})
|
||||
@ -861,3 +882,5 @@ function(get_compile_definitions)
|
||||
set(LLVM_DEFINITIONS "${result}" PARENT_SCOPE)
|
||||
endfunction()
|
||||
get_compile_definitions()
|
||||
|
||||
option(LLVM_FORCE_ENABLE_STATS "Enable statistics collection for builds that wouldn't normally enable it" OFF)
|
||||
|
@ -13,10 +13,12 @@ if(NOT DEFINED LLVM_STDLIB_HANDLED)
|
||||
endfunction()
|
||||
|
||||
include(CheckCXXCompilerFlag)
|
||||
include(CheckLinkerFlag)
|
||||
if(LLVM_ENABLE_LIBCXX)
|
||||
if(LLVM_COMPILER_IS_GCC_COMPATIBLE)
|
||||
check_cxx_compiler_flag("-stdlib=libc++" CXX_SUPPORTS_STDLIB)
|
||||
if(CXX_SUPPORTS_STDLIB)
|
||||
check_cxx_compiler_flag("-stdlib=libc++" CXX_COMPILER_SUPPORTS_STDLIB)
|
||||
check_linker_flag("-stdlib=libc++" CXX_LINKER_SUPPORTS_STDLIB)
|
||||
if(CXX_COMPILER_SUPPORTS_STDLIB AND CXX_LINKER_SUPPORTS_STDLIB)
|
||||
append("-stdlib=libc++"
|
||||
CMAKE_CXX_FLAGS CMAKE_EXE_LINKER_FLAGS CMAKE_SHARED_LINKER_FLAGS
|
||||
CMAKE_MODULE_LINKER_FLAGS)
|
||||
|
@ -68,7 +68,7 @@ macro(llvm_config executable)
|
||||
cmake_parse_arguments(ARG "USE_SHARED" "" "" ${ARGN})
|
||||
set(link_components ${ARG_UNPARSED_ARGUMENTS})
|
||||
|
||||
if(USE_SHARED)
|
||||
if(ARG_USE_SHARED)
|
||||
# If USE_SHARED is specified, then we link against libLLVM,
|
||||
# but also against the component libraries below. This is
|
||||
# done in case libLLVM does not contain all of the components
|
||||
|
@ -13,6 +13,10 @@ set(LLVM_COMMON_DEPENDS @LLVM_COMMON_DEPENDS@)
|
||||
|
||||
set(LLVM_AVAILABLE_LIBS @LLVM_AVAILABLE_LIBS@)
|
||||
|
||||
@LLVM_CONFIG_LINK_LLVM_DYLIB@
|
||||
|
||||
set(LLVM_DYLIB_COMPONENTS @LLVM_DYLIB_COMPONENTS@)
|
||||
|
||||
set(LLVM_ALL_TARGETS @LLVM_ALL_TARGETS@)
|
||||
|
||||
set(LLVM_TARGETS_TO_BUILD @LLVM_TARGETS_TO_BUILD@)
|
||||
@ -37,6 +41,8 @@ set(LLVM_ENABLE_THREADS @LLVM_ENABLE_THREADS@)
|
||||
|
||||
set(LLVM_ENABLE_ZLIB @LLVM_ENABLE_ZLIB@)
|
||||
|
||||
set(LLVM_LIBXML2_ENABLED @LLVM_LIBXML2_ENABLED@)
|
||||
|
||||
set(LLVM_ENABLE_DIA_SDK @LLVM_ENABLE_DIA_SDK@)
|
||||
|
||||
set(LLVM_NATIVE_ARCH @LLVM_NATIVE_ARCH@)
|
||||
@ -54,7 +60,6 @@ set(LLVM_EXPORT_SYMBOLS_FOR_PLUGINS @LLVM_EXPORT_SYMBOLS_FOR_PLUGINS@)
|
||||
set(LLVM_PLUGIN_EXT @LLVM_PLUGIN_EXT@)
|
||||
|
||||
set(LLVM_ON_UNIX @LLVM_ON_UNIX@)
|
||||
set(LLVM_ON_WIN32 @LLVM_ON_WIN32@)
|
||||
|
||||
set(LLVM_LIBDIR_SUFFIX @LLVM_LIBDIR_SUFFIX@)
|
||||
|
||||
|
@ -46,7 +46,7 @@ function(llvm_ExternalProject_Add name source_dir)
|
||||
if(NOT ARG_TOOLCHAIN_TOOLS)
|
||||
set(ARG_TOOLCHAIN_TOOLS clang lld)
|
||||
if(NOT APPLE AND NOT WIN32)
|
||||
list(APPEND ARG_TOOLCHAIN_TOOLS llvm-ar llvm-ranlib)
|
||||
list(APPEND ARG_TOOLCHAIN_TOOLS llvm-ar llvm-ranlib llvm-nm llvm-objcopy llvm-objdump llvm-strip)
|
||||
endif()
|
||||
endif()
|
||||
foreach(tool ${ARG_TOOLCHAIN_TOOLS})
|
||||
@ -102,17 +102,32 @@ function(llvm_ExternalProject_Add name source_dir)
|
||||
endforeach()
|
||||
endforeach()
|
||||
|
||||
if(ARG_USE_TOOLCHAIN)
|
||||
if(ARG_USE_TOOLCHAIN AND NOT CMAKE_CROSSCOMPILING)
|
||||
if(CLANG_IN_TOOLCHAIN)
|
||||
set(compiler_args -DCMAKE_C_COMPILER=${LLVM_RUNTIME_OUTPUT_INTDIR}/clang
|
||||
-DCMAKE_CXX_COMPILER=${LLVM_RUNTIME_OUTPUT_INTDIR}/clang++)
|
||||
endif()
|
||||
if(lld IN_LIST TOOLCHAIN_TOOLS)
|
||||
list(APPEND compiler_args -DCMAKE_LINKER=${LLVM_RUNTIME_OUTPUT_INTDIR}/ld.lld)
|
||||
endif()
|
||||
if(llvm-ar IN_LIST TOOLCHAIN_TOOLS)
|
||||
list(APPEND compiler_args -DCMAKE_AR=${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-ar)
|
||||
endif()
|
||||
if(llvm-ranlib IN_LIST TOOLCHAIN_TOOLS)
|
||||
list(APPEND compiler_args -DCMAKE_RANLIB=${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-ranlib)
|
||||
endif()
|
||||
if(llvm-nm IN_LIST TOOLCHAIN_TOOLS)
|
||||
list(APPEND compiler_args -DCMAKE_NM=${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-nm)
|
||||
endif()
|
||||
if(llvm-objdump IN_LIST TOOLCHAIN_TOOLS)
|
||||
list(APPEND compiler_args -DCMAKE_OBJDUMP=${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-objdump)
|
||||
endif()
|
||||
if(llvm-objcopy IN_LIST TOOLCHAIN_TOOLS)
|
||||
list(APPEND compiler_args -DCMAKE_OBJCOPY=${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-objcopy)
|
||||
endif()
|
||||
if(llvm-strip IN_LIST TOOLCHAIN_TOOLS)
|
||||
list(APPEND compiler_args -DCMAKE_STRIP=${LLVM_RUNTIME_OUTPUT_INTDIR}/llvm-strip)
|
||||
endif()
|
||||
list(APPEND ARG_DEPENDS ${TOOLCHAIN_TOOLS})
|
||||
endif()
|
||||
|
||||
@ -136,6 +151,21 @@ function(llvm_ExternalProject_Add name source_dir)
|
||||
set(sysroot_arg -DCMAKE_SYSROOT=${CMAKE_SYSROOT})
|
||||
endif()
|
||||
|
||||
if(CMAKE_CROSSCOMPILING)
|
||||
set(compiler_args -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
|
||||
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
|
||||
-DCMAKE_LINKER=${CMAKE_LINKER}
|
||||
-DCMAKE_AR=${CMAKE_AR}
|
||||
-DCMAKE_RANLIB=${CMAKE_RANLIB}
|
||||
-DCMAKE_NM=${CMAKE_NM}
|
||||
-DCMAKE_OBJCOPY=${CMAKE_OBJCOPY}
|
||||
-DCMAKE_OBJDUMP=${CMAKE_OBJDUMP}
|
||||
-DCMAKE_STRIP=${CMAKE_STRIP})
|
||||
set(llvm_config_path ${LLVM_CONFIG_PATH})
|
||||
else()
|
||||
set(llvm_config_path "$<TARGET_FILE:llvm-config>")
|
||||
endif()
|
||||
|
||||
ExternalProject_Add(${name}
|
||||
DEPENDS ${ARG_DEPENDS} llvm-config
|
||||
${name}-clobber
|
||||
@ -149,7 +179,7 @@ function(llvm_ExternalProject_Add name source_dir)
|
||||
-DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}
|
||||
${sysroot_arg}
|
||||
-DLLVM_BINARY_DIR=${PROJECT_BINARY_DIR}
|
||||
-DLLVM_CONFIG_PATH=$<TARGET_FILE:llvm-config>
|
||||
-DLLVM_CONFIG_PATH=${llvm_config_path}
|
||||
-DLLVM_ENABLE_WERROR=${LLVM_ENABLE_WERROR}
|
||||
-DLLVM_HOST_TRIPLE=${LLVM_HOST_TRIPLE}
|
||||
-DLLVM_HAVE_LINK_VERSION_SCRIPT=${LLVM_HAVE_LINK_VERSION_SCRIPT}
|
||||
@ -191,7 +221,7 @@ function(llvm_ExternalProject_Add name source_dir)
|
||||
endif()
|
||||
|
||||
if(NOT ARG_NO_INSTALL)
|
||||
install(CODE "execute_process\(COMMAND \${CMAKE_COMMAND} -DCMAKE_INSTALL_PREFIX=\${CMAKE_INSTALL_PREFIX} -P ${BINARY_DIR}/cmake_install.cmake \)"
|
||||
install(CODE "execute_process\(COMMAND \${CMAKE_COMMAND} -DCMAKE_INSTALL_PREFIX=\${CMAKE_INSTALL_PREFIX} -DCMAKE_INSTALL_DO_STRIP=\${CMAKE_INSTALL_DO_STRIP} -P ${BINARY_DIR}/cmake_install.cmake\)"
|
||||
COMPONENT ${name})
|
||||
|
||||
add_llvm_install_targets(install-${name}
|
||||
@ -201,16 +231,13 @@ function(llvm_ExternalProject_Add name source_dir)
|
||||
|
||||
# Add top-level targets
|
||||
foreach(target ${ARG_EXTRA_TARGETS})
|
||||
string(REPLACE ":" ";" target_list ${target})
|
||||
list(GET target_list 0 target)
|
||||
list(LENGTH target_list target_list_len)
|
||||
if(${target_list_len} GREATER 1)
|
||||
list(GET target_list 1 target_name)
|
||||
if(DEFINED ${target})
|
||||
set(external_target "${${target}}")
|
||||
else()
|
||||
set(target_name "${target}")
|
||||
set(external_target "${target}")
|
||||
endif()
|
||||
llvm_ExternalProject_BuildCmd(build_runtime_cmd ${target} ${BINARY_DIR})
|
||||
add_custom_target(${target_name}
|
||||
llvm_ExternalProject_BuildCmd(build_runtime_cmd ${external_target} ${BINARY_DIR})
|
||||
add_custom_target(${target}
|
||||
COMMAND ${build_runtime_cmd}
|
||||
DEPENDS ${name}-configure
|
||||
WORKING_DIRECTORY ${BINARY_DIR}
|
||||
|
@ -3,7 +3,7 @@
|
||||
# See PR8397.
|
||||
|
||||
function(install_symlink name target outdir)
|
||||
if(UNIX)
|
||||
if(CMAKE_HOST_UNIX)
|
||||
set(LINK_OR_COPY create_symlink)
|
||||
set(DESTDIR $ENV{DESTDIR})
|
||||
else()
|
||||
|
@ -52,7 +52,7 @@ function(llvm_process_sources OUT_VAR)
|
||||
cmake_parse_arguments(ARG "" "" "ADDITIONAL_HEADERS;ADDITIONAL_HEADER_DIRS" ${ARGN})
|
||||
set(sources ${ARG_UNPARSED_ARGUMENTS})
|
||||
llvm_check_source_file_list( ${sources} )
|
||||
if( MSVC_IDE OR XCODE )
|
||||
if( LLVM_ENABLE_IDE )
|
||||
# This adds .td and .h files to the Visual Studio solution:
|
||||
add_td_sources(sources)
|
||||
find_all_header_files(hdrs "${ARG_ADDITIONAL_HEADER_DIRS}")
|
||||
@ -69,14 +69,18 @@ endfunction(llvm_process_sources)
|
||||
|
||||
function(llvm_check_source_file_list)
|
||||
cmake_parse_arguments(ARG "" "SOURCE_DIR" "" ${ARGN})
|
||||
set(listed ${ARG_UNPARSED_ARGUMENTS})
|
||||
foreach(l ${ARG_UNPARSED_ARGUMENTS})
|
||||
get_filename_component(fp ${l} REALPATH)
|
||||
list(APPEND listed ${fp})
|
||||
endforeach()
|
||||
|
||||
if(ARG_SOURCE_DIR)
|
||||
file(GLOB globbed
|
||||
RELATIVE "${CMAKE_CURRENT_LIST_DIR}"
|
||||
"${ARG_SOURCE_DIR}/*.c" "${ARG_SOURCE_DIR}/*.cpp")
|
||||
else()
|
||||
file(GLOB globbed *.c *.cpp)
|
||||
endif()
|
||||
|
||||
foreach(g ${globbed})
|
||||
get_filename_component(fn ${g} NAME)
|
||||
if(ARG_SOURCE_DIR)
|
||||
@ -84,15 +88,21 @@ function(llvm_check_source_file_list)
|
||||
else()
|
||||
set(entry "${fn}")
|
||||
endif()
|
||||
get_filename_component(gp ${g} REALPATH)
|
||||
|
||||
# Don't reject hidden files. Some editors create backups in the
|
||||
# same directory as the file.
|
||||
if (NOT "${fn}" MATCHES "^\\.")
|
||||
list(FIND LLVM_OPTIONAL_SOURCES ${entry} idx)
|
||||
if( idx LESS 0 )
|
||||
list(FIND listed ${entry} idx)
|
||||
list(FIND listed ${gp} idx)
|
||||
if( idx LESS 0 )
|
||||
message(SEND_ERROR "Found unknown source file ${g}
|
||||
if(ARG_SOURCE_DIR)
|
||||
set(fn_relative "${ARG_SOURCE_DIR}/${fn}")
|
||||
else()
|
||||
set(fn_relative "${fn}")
|
||||
endif()
|
||||
message(SEND_ERROR "Found unknown source file ${fn_relative}
|
||||
Please update ${CMAKE_CURRENT_LIST_FILE}\n")
|
||||
endif()
|
||||
endif()
|
||||
|
@ -53,3 +53,50 @@ IF(NOT CMAKE_RANLIB)
|
||||
SET(CMAKE_RANLIB ${CMAKE_RANLIB_val} CACHE FILEPATH "Ranlib")
|
||||
message(STATUS "Using ranlib ${CMAKE_RANLIB}")
|
||||
ENDIF()
|
||||
|
||||
IF(NOT CMAKE_STRIP)
|
||||
execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT} -find strip
|
||||
OUTPUT_VARIABLE CMAKE_STRIP_val
|
||||
ERROR_QUIET
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
SET(CMAKE_STRIP ${CMAKE_STRIP_val} CACHE FILEPATH "Strip")
|
||||
message(STATUS "Using strip ${CMAKE_STRIP}")
|
||||
ENDIF()
|
||||
|
||||
IF(NOT CMAKE_DSYMUTIL)
|
||||
execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT} -find dsymutil
|
||||
OUTPUT_VARIABLE CMAKE_DSYMUTIL_val
|
||||
ERROR_QUIET
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
SET(CMAKE_DSYMUTIL ${CMAKE_DSYMUTIL_val} CACHE FILEPATH "Dsymutil")
|
||||
message(STATUS "Using dsymutil ${CMAKE_DSYMUTIL}")
|
||||
ENDIF()
|
||||
|
||||
IF(NOT CMAKE_LIBTOOL)
|
||||
execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT} -find libtool
|
||||
OUTPUT_VARIABLE CMAKE_LIBTOOL_val
|
||||
ERROR_QUIET
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
SET(CMAKE_LIBTOOL ${CMAKE_LIBTOOL_val} CACHE FILEPATH "Libtool")
|
||||
message(STATUS "Using libtool ${CMAKE_LIBTOOL}")
|
||||
ENDIF()
|
||||
|
||||
IF(NOT CMAKE_CODESIGN)
|
||||
execute_process(COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT} -find codesign
|
||||
OUTPUT_VARIABLE CMAKE_CODESIGN_val
|
||||
ERROR_QUIET
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
SET(CMAKE_CODESIGN ${CMAKE_CODESIGN_val} CACHE FILEPATH "Codesign")
|
||||
message(STATUS "Using codesign ${CMAKE_CODESIGN}")
|
||||
ENDIF()
|
||||
|
||||
IF(NOT CMAKE_CODESIGN_ALLOCATE)
|
||||
execute_process(
|
||||
COMMAND xcrun -sdk ${CMAKE_OSX_SYSROOT} -find codesign_allocate
|
||||
OUTPUT_VARIABLE CMAKE_CODESIGN_ALLOCATE_val
|
||||
ERROR_QUIET
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
SET(CMAKE_CODESIGN_ALLOCATE ${CMAKE_CODESIGN_ALLOCATE_val} CACHE
|
||||
FILEPATH "Codesign_Allocate")
|
||||
message(STATUS "Using codesign_allocate ${CMAKE_CODESIGN_ALLOCATE}")
|
||||
ENDIF()
|
||||
|
1255
docs/AMDGPUAsmGFX7.rst
Normal file
1255
docs/AMDGPUAsmGFX7.rst
Normal file
File diff suppressed because it is too large
Load Diff
1672
docs/AMDGPUAsmGFX8.rst
Normal file
1672
docs/AMDGPUAsmGFX8.rst
Normal file
File diff suppressed because it is too large
Load Diff
1906
docs/AMDGPUAsmGFX9.rst
Normal file
1906
docs/AMDGPUAsmGFX9.rst
Normal file
File diff suppressed because it is too large
Load Diff
1055
docs/AMDGPUOperandSyntax.rst
Normal file
1055
docs/AMDGPUOperandSyntax.rst
Normal file
File diff suppressed because it is too large
Load Diff
1044
docs/AMDGPUUsage.rst
1044
docs/AMDGPUUsage.rst
File diff suppressed because it is too large
Load Diff
@ -151,7 +151,7 @@ The PGO came cache generates the following additional targets:
|
||||
=======================
|
||||
|
||||
In the ancient lore of compilers non-determinism is like the multi-headed hydra.
|
||||
Whenever it's head pops up, terror and chaos ensue.
|
||||
Whenever its head pops up, terror and chaos ensue.
|
||||
|
||||
Historically one of the tests to verify that a compiler was deterministic would
|
||||
be a three stage build. The idea of a three stage build is you take your sources
|
||||
|
@ -389,11 +389,6 @@ in its ``getAnalysisUsage`` that it does so. Some passes attempt to use
|
||||
``AU.addPreserved<AliasAnalysis>``, however this doesn't actually have any
|
||||
effect.
|
||||
|
||||
``AliasAnalysisCounter`` (``-count-aa``) are implemented as ``ModulePass``
|
||||
classes, so if your alias analysis uses ``FunctionPass``, it won't be able to
|
||||
use these utilities. If you try to use them, the pass manager will silently
|
||||
route alias analysis queries directly to ``BasicAliasAnalysis`` instead.
|
||||
|
||||
Similarly, the ``opt -p`` option introduces ``ModulePass`` passes between each
|
||||
pass, which prevents the use of ``FunctionPass`` alias analysis passes.
|
||||
|
||||
@ -408,17 +403,10 @@ before it appears in an alias query. However, popular clients such as ``GVN``
|
||||
don't support this, and are known to trigger errors when run with the
|
||||
``AliasAnalysisDebugger``.
|
||||
|
||||
Due to several of the above limitations, the most obvious use for the
|
||||
``AliasAnalysisCounter`` utility, collecting stats on all alias queries in a
|
||||
compilation, doesn't work, even if the ``AliasAnalysis`` implementations don't
|
||||
use ``FunctionPass``. There's no way to set a default, much less a default
|
||||
sequence, and there's no way to preserve it.
|
||||
|
||||
The ``AliasSetTracker`` class (which is used by ``LICM``) makes a
|
||||
non-deterministic number of alias queries. This can cause stats collected by
|
||||
``AliasAnalysisCounter`` to have fluctuations among identical runs, for
|
||||
example. Another consequence is that debugging techniques involving pausing
|
||||
execution after a predetermined number of queries can be unreliable.
|
||||
non-deterministic number of alias queries. This can cause debugging techniques
|
||||
involving pausing execution after a predetermined number of queries to be
|
||||
unreliable.
|
||||
|
||||
Many alias queries can be reformulated in terms of other alias queries. When
|
||||
multiple ``AliasAnalysis`` queries are chained together, it would make sense to
|
||||
@ -676,21 +664,6 @@ you're using the ``AliasSetTracker`` class. To use it, use something like:
|
||||
|
||||
% opt -ds-aa -print-alias-sets -disable-output
|
||||
|
||||
The ``-count-aa`` pass
|
||||
^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
The ``-count-aa`` pass is useful to see how many queries a particular pass is
|
||||
making and what responses are returned by the alias analysis. As an example:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
% opt -basicaa -count-aa -ds-aa -count-aa -licm
|
||||
|
||||
will print out how many queries (and what responses are returned) by the
|
||||
``-licm`` pass (of the ``-ds-aa`` pass) and how many queries are made of the
|
||||
``-basicaa`` pass by the ``-ds-aa`` pass. This can be useful when debugging a
|
||||
transformation or an alias analysis implementation.
|
||||
|
||||
The ``-aa-eval`` pass
|
||||
^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
|
@ -62,10 +62,12 @@ understanding the encoding.
|
||||
Magic Numbers
|
||||
-------------
|
||||
|
||||
The first two bytes of a bitcode file are 'BC' (``0x42``, ``0x43``). The second
|
||||
two bytes are an application-specific magic number. Generic bitcode tools can
|
||||
look at only the first two bytes to verify the file is bitcode, while
|
||||
application-specific programs will want to look at all four.
|
||||
The first four bytes of a bitstream are used as an application-specific magic
|
||||
number. Generic bitcode tools may look at the first four bytes to determine
|
||||
whether the stream is a known stream type. However, these tools should *not*
|
||||
determine whether a bitstream is valid based on its magic number alone. New
|
||||
application-specific bitstream formats are being developed all the time; tools
|
||||
should not reject them just because they have a hitherto unseen magic number.
|
||||
|
||||
.. _primitives:
|
||||
|
||||
@ -496,12 +498,9 @@ LLVM IR Magic Number
|
||||
The magic number for LLVM IR files is:
|
||||
|
||||
:raw-html:`<tt><blockquote>`
|
||||
[0x0\ :sub:`4`, 0xC\ :sub:`4`, 0xE\ :sub:`4`, 0xD\ :sub:`4`]
|
||||
['B'\ :sub:`8`, 'C'\ :sub:`8`, 0x0\ :sub:`4`, 0xC\ :sub:`4`, 0xE\ :sub:`4`, 0xD\ :sub:`4`]
|
||||
:raw-html:`</blockquote></tt>`
|
||||
|
||||
When combined with the bitcode magic number and viewed as bytes, this is
|
||||
``"BC 0xC0DE"``.
|
||||
|
||||
.. _Signed VBRs:
|
||||
|
||||
Signed VBRs
|
||||
@ -904,7 +903,7 @@ PARAMATTR_CODE_ENTRY Record
|
||||
|
||||
The ``ENTRY`` record (code 2) contains a variable number of values describing a
|
||||
unique set of function parameter attributes. Each *attrgrp* value is used as a
|
||||
key with which to look up an entry in the the attribute group table described
|
||||
key with which to look up an entry in the attribute group table described
|
||||
in the ``PARAMATTR_GROUP_BLOCK`` block.
|
||||
|
||||
.. _PARAMATTR_CODE_ENTRY_OLD:
|
||||
@ -1055,6 +1054,9 @@ The integer codes are mapped to well-known attributes as follows.
|
||||
* code 53: ``speculatable``
|
||||
* code 54: ``strictfp``
|
||||
* code 55: ``sanitize_hwaddress``
|
||||
* code 56: ``nocf_check``
|
||||
* code 57: ``optforfuzzing``
|
||||
* code 58: ``shadowcallstack``
|
||||
|
||||
.. note::
|
||||
The ``allocsize`` attribute has a special encoding for its arguments. Its two
|
||||
|
@ -198,14 +198,14 @@ desired ranges. For example:
|
||||
|
||||
static int calledCount = 0;
|
||||
calledCount++;
|
||||
DEBUG(if (calledCount < 212) return false);
|
||||
DEBUG(if (calledCount > 217) return false);
|
||||
DEBUG(if (calledCount == 213) return false);
|
||||
DEBUG(if (calledCount == 214) return false);
|
||||
DEBUG(if (calledCount == 215) return false);
|
||||
DEBUG(if (calledCount == 216) return false);
|
||||
DEBUG(dbgs() << "visitXOR calledCount: " << calledCount << "\n");
|
||||
DEBUG(dbgs() << "I: "; I->dump());
|
||||
LLVM_DEBUG(if (calledCount < 212) return false);
|
||||
LLVM_DEBUG(if (calledCount > 217) return false);
|
||||
LLVM_DEBUG(if (calledCount == 213) return false);
|
||||
LLVM_DEBUG(if (calledCount == 214) return false);
|
||||
LLVM_DEBUG(if (calledCount == 215) return false);
|
||||
LLVM_DEBUG(if (calledCount == 216) return false);
|
||||
LLVM_DEBUG(dbgs() << "visitXOR calledCount: " << calledCount << "\n");
|
||||
LLVM_DEBUG(dbgs() << "I: "; I->dump());
|
||||
|
||||
could be added to ``visitXOR`` to limit ``visitXor`` to being applied only to
|
||||
calls 212 and 217. This is from an actual test case and raises an important
|
||||
|
@ -25,8 +25,8 @@ This tool will be present as a part of the LLVM toolchain, and will reside in
|
||||
the "/llvm/tools/llvm-cfi-verify" directory, relative to the LLVM trunk. It will
|
||||
be tested in two methods:
|
||||
|
||||
- Unit tests to validate code sections, present in "/llvm/unittests/llvm-cfi-
|
||||
verify".
|
||||
- Unit tests to validate code sections, present in
|
||||
"/llvm/unittests/tools/llvm-cfi-verify".
|
||||
- Integration tests, present in "/llvm/tools/clang/test/LLVMCFIVerify". These
|
||||
integration tests are part of clang as part of a continuous integration
|
||||
framework, ensuring updates to the compiler that reduce CFI coverage on
|
||||
@ -86,6 +86,8 @@ Only machine code sections that are marked as executable will be subject to this
|
||||
analysis. Non-executable sections do not require analysis as any execution
|
||||
present in these sections has already violated the control flow integrity.
|
||||
|
||||
Suitable extensions may be made at a later date to include anaylsis for indirect
|
||||
Suitable extensions may be made at a later date to include analysis for indirect
|
||||
control flow operations across DSO boundaries. Currently, these CFI features are
|
||||
only experimental with an unstable ABI, making them unsuitable for analysis.
|
||||
|
||||
The tool currently only supports the x86, x86_64, and AArch64 architectures.
|
||||
|
@ -12,8 +12,8 @@ Introduction
|
||||
does not build the project, it generates the files needed by your build tool
|
||||
(GNU make, Visual Studio, etc.) for building LLVM.
|
||||
|
||||
If **you are a new contributor**, please start with the :doc:`GettingStarted`
|
||||
page. This page is geared for existing contributors moving from the
|
||||
If **you are a new contributor**, please start with the :doc:`GettingStarted`
|
||||
page. This page is geared for existing contributors moving from the
|
||||
legacy configure/make system.
|
||||
|
||||
If you are really anxious about getting a functional LLVM build, go to the
|
||||
@ -370,6 +370,14 @@ LLVM-specific variables
|
||||
**LLVM_USE_INTEL_JITEVENTS**:BOOL
|
||||
Enable building support for Intel JIT Events API. Defaults to OFF.
|
||||
|
||||
**LLVM_ENABLE_LIBPFM**:BOOL
|
||||
Enable building with libpfm to support hardware counter measurements in LLVM
|
||||
tools.
|
||||
Defaults to ON.
|
||||
|
||||
**LLVM_USE_PERF**:BOOL
|
||||
Enable building support for Perf (linux profiling tool) JIT support. Defaults to OFF.
|
||||
|
||||
**LLVM_ENABLE_ZLIB**:BOOL
|
||||
Enable building with zlib to support compression/uncompression in LLVM tools.
|
||||
Defaults to ON.
|
||||
@ -409,10 +417,10 @@ LLVM-specific variables
|
||||
**LLVM_BUILD_DOCS**:BOOL
|
||||
Adds all *enabled* documentation targets (i.e. Doxgyen and Sphinx targets) as
|
||||
dependencies of the default build targets. This results in all of the (enabled)
|
||||
documentation targets being as part of a normal build. If the ``install``
|
||||
target is run then this also enables all built documentation targets to be
|
||||
installed. Defaults to OFF. To enable a particular documentation target, see
|
||||
see LLVM_ENABLE_SPHINX and LLVM_ENABLE_DOXYGEN.
|
||||
documentation targets being as part of a normal build. If the ``install``
|
||||
target is run then this also enables all built documentation targets to be
|
||||
installed. Defaults to OFF. To enable a particular documentation target, see
|
||||
see LLVM_ENABLE_SPHINX and LLVM_ENABLE_DOXYGEN.
|
||||
|
||||
**LLVM_ENABLE_DOXYGEN**:BOOL
|
||||
Enables the generation of browsable HTML documentation using doxygen.
|
||||
@ -509,7 +517,7 @@ LLVM-specific variables
|
||||
OS X Only: If enabled CMake will generate a target named
|
||||
'install-xcode-toolchain'. This target will create a directory at
|
||||
$CMAKE_INSTALL_PREFIX/Toolchains containing an xctoolchain directory which can
|
||||
be used to override the default system tools.
|
||||
be used to override the default system tools.
|
||||
|
||||
**LLVM_BUILD_LLVM_DYLIB**:BOOL
|
||||
If enabled, the target for building the libLLVM shared library is added.
|
||||
@ -530,7 +538,7 @@ LLVM-specific variables
|
||||
library (ON) or as a static library (OFF). Its default value is OFF. On
|
||||
Windows, shared libraries may be used when building with MinGW, including
|
||||
mingw-w64, but not when building with the Microsoft toolchain.
|
||||
|
||||
|
||||
.. note:: BUILD_SHARED_LIBS is only recommended for use by LLVM developers.
|
||||
If you want to build LLVM as a shared library, you should use the
|
||||
``LLVM_BUILD_LLVM_DYLIB`` option.
|
||||
@ -551,6 +559,14 @@ LLVM-specific variables
|
||||
<http://clang.llvm.org/docs/SourceBasedCodeCoverage.html>`_ instrumentation
|
||||
is enabled while building llvm.
|
||||
|
||||
**LLVM_CCACHE_BUILD**:BOOL
|
||||
If enabled and the ``ccache`` program is available, then LLVM will be
|
||||
built using ``ccache`` to speed up rebuilds of LLVM and its components.
|
||||
Defaults to OFF. The size and location of the cache maintained
|
||||
by ``ccache`` can be adjusted via the LLVM_CCACHE_MAXSIZE and LLVM_CCACHE_DIR
|
||||
options, which are passed to the CCACHE_MAXSIZE and CCACHE_DIR environment
|
||||
variables, respectively.
|
||||
|
||||
CMake Caches
|
||||
============
|
||||
|
||||
|
@ -566,7 +566,7 @@ MI bundle support does not change the physical representations of
|
||||
MachineBasicBlock and MachineInstr. All the MIs (including top level and nested
|
||||
ones) are stored as sequential list of MIs. The "bundled" MIs are marked with
|
||||
the 'InsideBundle' flag. A top level MI with the special BUNDLE opcode is used
|
||||
to represent the start of a bundle. It's legal to mix BUNDLE MIs with indiviual
|
||||
to represent the start of a bundle. It's legal to mix BUNDLE MIs with individual
|
||||
MIs that are not inside bundles nor represent bundles.
|
||||
|
||||
MachineInstr passes should operate on a MI bundle as a single unit. Member
|
||||
@ -1584,7 +1584,7 @@ Emitting function stack size information
|
||||
A section containing metadata on function stack sizes will be emitted when
|
||||
``TargetLoweringObjectFile::StackSizesSection`` is not null, and
|
||||
``TargetOptions::EmitStackSizeSection`` is set (-stack-size-section). The
|
||||
section will contain an array of pairs of function symbol references (8 byte)
|
||||
section will contain an array of pairs of function symbol values (pointer size)
|
||||
and stack sizes (unsigned LEB128). The stack size values only include the space
|
||||
allocated in the function prologue. Functions with dynamic stack allocations are
|
||||
not included.
|
||||
|
@ -91,9 +91,9 @@ guidance below to help you know what to expect.
|
||||
|
||||
Each toolchain provides a good reference for what it accepts:
|
||||
|
||||
* Clang: http://clang.llvm.org/cxx_status.html
|
||||
* GCC: http://gcc.gnu.org/projects/cxx0x.html
|
||||
* MSVC: http://msdn.microsoft.com/en-us/library/hh567368.aspx
|
||||
* Clang: https://clang.llvm.org/cxx_status.html
|
||||
* GCC: https://gcc.gnu.org/projects/cxx-status.html#cxx11
|
||||
* MSVC: https://msdn.microsoft.com/en-us/library/hh567368.aspx
|
||||
|
||||
In most cases, the MSVC list will be the dominating factor. Here is a summary
|
||||
of the features that are expected to work. Features not on this list are
|
||||
@ -184,7 +184,7 @@ you hit a type trait which doesn't work we can then add support to LLVM's
|
||||
traits header to emulate it.
|
||||
|
||||
.. _the libstdc++ manual:
|
||||
http://gcc.gnu.org/onlinedocs/gcc-4.8.0/libstdc++/manual/manual/status.html#status.iso.2011
|
||||
https://gcc.gnu.org/onlinedocs/gcc-4.8.0/libstdc++/manual/manual/status.html#status.iso.2011
|
||||
|
||||
Other Languages
|
||||
---------------
|
||||
@ -591,7 +591,7 @@ understood for formatting nested function calls. Examples:
|
||||
This formatting scheme also makes it particularly easy to get predictable,
|
||||
consistent, and automatic formatting with tools like `Clang Format`_.
|
||||
|
||||
.. _Clang Format: http://clang.llvm.org/docs/ClangFormat.html
|
||||
.. _Clang Format: https://clang.llvm.org/docs/ClangFormat.html
|
||||
|
||||
Language and Compiler Issues
|
||||
----------------------------
|
||||
@ -667,14 +667,14 @@ Do not use Static Constructors
|
||||
Static constructors and destructors (e.g. global variables whose types have a
|
||||
constructor or destructor) should not be added to the code base, and should be
|
||||
removed wherever possible. Besides `well known problems
|
||||
<http://yosefk.com/c++fqa/ctors.html#fqa-10.12>`_ where the order of
|
||||
<https://yosefk.com/c++fqa/ctors.html#fqa-10.12>`_ where the order of
|
||||
initialization is undefined between globals in different source files, the
|
||||
entire concept of static constructors is at odds with the common use case of
|
||||
LLVM as a library linked into a larger application.
|
||||
|
||||
Consider the use of LLVM as a JIT linked into another application (perhaps for
|
||||
`OpenGL, custom languages <http://llvm.org/Users.html>`_, `shaders in movies
|
||||
<http://llvm.org/devmtg/2010-11/Gritz-OpenShadingLang.pdf>`_, etc). Due to the
|
||||
`OpenGL, custom languages <https://llvm.org/Users.html>`_, `shaders in movies
|
||||
<https://llvm.org/devmtg/2010-11/Gritz-OpenShadingLang.pdf>`_, etc). Due to the
|
||||
design of static constructors, they must be executed at startup time of the
|
||||
entire application, regardless of whether or how LLVM is used in that larger
|
||||
application. There are two problems with this:
|
||||
@ -692,7 +692,7 @@ target or other library into an application, but static constructors violate
|
||||
this goal.
|
||||
|
||||
That said, LLVM unfortunately does contain static constructors. It would be a
|
||||
`great project <http://llvm.org/PR11944>`_ for someone to purge all static
|
||||
`great project <https://llvm.org/PR11944>`_ for someone to purge all static
|
||||
constructors from LLVM, and then enable the ``-Wglobal-constructors`` warning
|
||||
flag (when building with Clang) to ensure we do not regress in the future.
|
||||
|
||||
@ -826,33 +826,71 @@ As a rule of thumb, in case an ordered result is expected, remember to
|
||||
sort an unordered container before iteration. Or use ordered containers
|
||||
like vector/MapVector/SetVector if you want to iterate pointer keys.
|
||||
|
||||
Beware of non-deterministic sorting order of equal elements
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
std::sort uses a non-stable sorting algorithm in which the order of equal
|
||||
elements is not guaranteed to be preserved. Thus using std::sort for a
|
||||
container having equal elements may result in non-determinstic behavior.
|
||||
To uncover such instances of non-determinism, LLVM has introduced a new
|
||||
llvm::sort wrapper function. For an EXPENSIVE_CHECKS build this will randomly
|
||||
shuffle the container before sorting. As a rule of thumb, always make sure to
|
||||
use llvm::sort instead of std::sort.
|
||||
|
||||
Style Issues
|
||||
============
|
||||
|
||||
The High-Level Issues
|
||||
---------------------
|
||||
|
||||
A Public Header File **is** a Module
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
Self-contained Headers
|
||||
^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
C++ doesn't do too well in the modularity department. There is no real
|
||||
encapsulation or data hiding (unless you use expensive protocol classes), but it
|
||||
is what we have to work with. When you write a public header file (in the LLVM
|
||||
source tree, they live in the top level "``include``" directory), you are
|
||||
defining a module of functionality.
|
||||
Header files should be self-contained (compile on their own) and end in .h.
|
||||
Non-header files that are meant for inclusion should end in .inc and be used
|
||||
sparingly.
|
||||
|
||||
Ideally, modules should be completely independent of each other, and their
|
||||
header files should only ``#include`` the absolute minimum number of headers
|
||||
possible. A module is not just a class, a function, or a namespace: it's a
|
||||
collection of these that defines an interface. This interface may be several
|
||||
functions, classes, or data structures, but the important issue is how they work
|
||||
together.
|
||||
All header files should be self-contained. Users and refactoring tools should
|
||||
not have to adhere to special conditions to include the header. Specifically, a
|
||||
header should have header guards and include all other headers it needs.
|
||||
|
||||
In general, a module should be implemented by one or more ``.cpp`` files. Each
|
||||
There are rare cases where a file designed to be included is not
|
||||
self-contained. These are typically intended to be included at unusual
|
||||
locations, such as the middle of another file. They might not use header
|
||||
guards, and might not include their prerequisites. Name such files with the
|
||||
.inc extension. Use sparingly, and prefer self-contained headers when possible.
|
||||
|
||||
In general, a header should be implemented by one or more ``.cpp`` files. Each
|
||||
of these ``.cpp`` files should include the header that defines their interface
|
||||
first. This ensures that all of the dependences of the module header have been
|
||||
properly added to the module header itself, and are not implicit. System
|
||||
headers should be included after user headers for a translation unit.
|
||||
first. This ensures that all of the dependences of the header have been
|
||||
properly added to the header itself, and are not implicit. System headers
|
||||
should be included after user headers for a translation unit.
|
||||
|
||||
Library Layering
|
||||
^^^^^^^^^^^^^^^^
|
||||
|
||||
A directory of header files (for example ``include/llvm/Foo``) defines a
|
||||
library (``Foo``). Dependencies between libraries are defined by the
|
||||
``LLVMBuild.txt`` file in their implementation (``lib/Foo``). One library (both
|
||||
its headers and implementation) should only use things from the libraries
|
||||
listed in its dependencies.
|
||||
|
||||
Some of this constraint can be enforced by classic Unix linkers (Mac & Windows
|
||||
linkers, as well as lld, do not enforce this constraint). A Unix linker
|
||||
searches left to right through the libraries specified on its command line and
|
||||
never revisits a library. In this way, no circular dependencies between
|
||||
libraries can exist.
|
||||
|
||||
This doesn't fully enforce all inter-library dependencies, and importantly
|
||||
doesn't enforce header file circular dependencies created by inline functions.
|
||||
A good way to answer the "is this layered correctly" would be to consider
|
||||
whether a Unix linker would succeed at linking the program if all inline
|
||||
functions were defined out-of-line. (& for all valid orderings of dependencies
|
||||
- since linking resolution is linear, it's possible that some implicit
|
||||
dependencies can sneak through: A depends on B and C, so valid orderings are
|
||||
"C B A" or "B C A", in both cases the explicit dependencies come before their
|
||||
use. But in the first case, B could still link successfully if it implicitly
|
||||
depended on C, or the opposite in the second case)
|
||||
|
||||
.. _minimal list of #includes:
|
||||
|
||||
@ -1659,12 +1697,12 @@ A lot of these comments and recommendations have been culled from other sources.
|
||||
Two particularly important books for our work are:
|
||||
|
||||
#. `Effective C++
|
||||
<http://www.amazon.com/Effective-Specific-Addison-Wesley-Professional-Computing/dp/0321334876>`_
|
||||
<https://www.amazon.com/Effective-Specific-Addison-Wesley-Professional-Computing/dp/0321334876>`_
|
||||
by Scott Meyers. Also interesting and useful are "More Effective C++" and
|
||||
"Effective STL" by the same author.
|
||||
|
||||
#. `Large-Scale C++ Software Design
|
||||
<http://www.amazon.com/Large-Scale-Software-Design-John-Lakos/dp/0201633620/ref=sr_1_1>`_
|
||||
<https://www.amazon.com/Large-Scale-Software-Design-John-Lakos/dp/0201633620>`_
|
||||
by John Lakos
|
||||
|
||||
If you get some free time, and you haven't read them: do so, you might learn
|
||||
|
@ -77,6 +77,10 @@ OPTIONS
|
||||
-verify``. With this option FileCheck will verify that input does not contain
|
||||
warnings not covered by any ``CHECK:`` patterns.
|
||||
|
||||
.. option:: --dump-input-on-failure
|
||||
|
||||
When the check fails, dump all of the original input.
|
||||
|
||||
.. option:: --enable-var-scope
|
||||
|
||||
Enables scope for regex variables.
|
||||
@ -95,6 +99,23 @@ OPTIONS
|
||||
|
||||
Show the version number of this program.
|
||||
|
||||
.. option:: -v
|
||||
|
||||
Print directive pattern matches.
|
||||
|
||||
.. option:: -vv
|
||||
|
||||
Print information helpful in diagnosing internal FileCheck issues, such as
|
||||
discarded overlapping ``CHECK-DAG:`` matches, implicit EOF pattern matches,
|
||||
and ``CHECK-NOT:`` patterns that do not have matches. Implies ``-v``.
|
||||
|
||||
.. option:: --allow-deprecated-dag-overlap
|
||||
|
||||
Enable overlapping among matches in a group of consecutive ``CHECK-DAG:``
|
||||
directives. This option is deprecated and is only provided for convenience
|
||||
as old tests are migrated to the new non-overlapping ``CHECK-DAG:``
|
||||
implementation.
|
||||
|
||||
EXIT STATUS
|
||||
-----------
|
||||
|
||||
@ -241,6 +262,25 @@ For example, the following works like you'd expect:
|
||||
it and the previous directive. A "``CHECK-SAME:``" cannot be the first
|
||||
directive in a file.
|
||||
|
||||
The "CHECK-EMPTY:" directive
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
If you need to check that the next line has nothing on it, not even whitespace,
|
||||
you can use the "``CHECK-EMPTY:``" directive.
|
||||
|
||||
.. code-block:: llvm
|
||||
|
||||
foo
|
||||
|
||||
bar
|
||||
; CHECK: foo
|
||||
; CHECK-EMPTY:
|
||||
; CHECK-NEXT: bar
|
||||
|
||||
Just like "``CHECK-NEXT:``" the directive will fail if there is more than one
|
||||
newline before it finds the next blank line, and it cannot be the first
|
||||
directive in a file.
|
||||
|
||||
The "CHECK-NOT:" directive
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
@ -341,6 +381,25 @@ real bugs away.
|
||||
|
||||
In those cases, to enforce the order, use a non-DAG directive between DAG-blocks.
|
||||
|
||||
A ``CHECK-DAG:`` directive skips matches that overlap the matches of any
|
||||
preceding ``CHECK-DAG:`` directives in the same ``CHECK-DAG:`` block. Not only
|
||||
is this non-overlapping behavior consistent with other directives, but it's
|
||||
also necessary to handle sets of non-unique strings or patterns. For example,
|
||||
the following directives look for unordered log entries for two tasks in a
|
||||
parallel program, such as the OpenMP runtime:
|
||||
|
||||
.. code-block:: text
|
||||
|
||||
// CHECK-DAG: [[THREAD_ID:[0-9]+]]: task_begin
|
||||
// CHECK-DAG: [[THREAD_ID]]: task_end
|
||||
//
|
||||
// CHECK-DAG: [[THREAD_ID:[0-9]+]]: task_begin
|
||||
// CHECK-DAG: [[THREAD_ID]]: task_end
|
||||
|
||||
The second pair of directives is guaranteed not to match the same log entries
|
||||
as the first pair even though the patterns are identical and even if the text
|
||||
of the log entries is identical because the thread ID manages to be reused.
|
||||
|
||||
The "CHECK-LABEL:" directive
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
|
@ -35,6 +35,15 @@ OPTIONS
|
||||
Produce a flat dSYM file. A ``.dwarf`` extension will be appended to the
|
||||
executable name unless the output file is specified using the -o option.
|
||||
|
||||
|
||||
.. option:: -z, --minimize
|
||||
|
||||
When used when creating a dSYM file, this option will suppress the emission of
|
||||
the .debug_inlines, .debug_pubnames, and .debug_pubtypes sections since
|
||||
dsymutil currently has better equivalents: .apple_names and .apple_types. When
|
||||
used in conjunction with --update option, this option will cause redundant
|
||||
accelerator tables to be removed.
|
||||
|
||||
.. option:: --no-odr
|
||||
|
||||
Do not use ODR (One Definition Rule) for uniquing C++ types.
|
||||
@ -61,10 +70,27 @@ OPTIONS
|
||||
|
||||
Specifies a ``path`` to prepend to all debug symbol object file paths.
|
||||
|
||||
.. option:: --papertrail
|
||||
|
||||
When running dsymutil as part of your build system, it can be desirable for
|
||||
warnings to be part of the end product, rather than just being emitted to the
|
||||
output stream. When enabled warnings are embedded in the linked DWARF debug
|
||||
information.
|
||||
|
||||
.. option:: -s, --symtab
|
||||
|
||||
Dumps the symbol table found in *executable* or object file(s) and exits.
|
||||
|
||||
.. option:: --toolchain
|
||||
|
||||
Embed the toolchain in the dSYM bundle's property list.
|
||||
|
||||
.. option:: -u, --update
|
||||
|
||||
Update an existing dSYM file to contain the latest accelerator tables and
|
||||
other DWARF optimizations. This option will rebuild the '.apple_names' and
|
||||
'.apple_types' hashed accelerator tables.
|
||||
|
||||
.. option:: -v, --verbose
|
||||
|
||||
Display verbose information when linking.
|
||||
|
@ -31,6 +31,7 @@ Basic Commands
|
||||
llvm-symbolizer
|
||||
llvm-dwarfdump
|
||||
dsymutil
|
||||
llvm-mca
|
||||
|
||||
Debugging Tools
|
||||
~~~~~~~~~~~~~~~
|
||||
@ -52,5 +53,6 @@ Developer Tools
|
||||
tblgen
|
||||
lit
|
||||
llvm-build
|
||||
llvm-exegesis
|
||||
llvm-pdbutil
|
||||
llvm-readobj
|
||||
|
@ -85,6 +85,10 @@ OUTPUT OPTIONS
|
||||
Echo all commands to stdout, as they are being executed.
|
||||
This can be valuable for debugging test failures, as the last echoed command
|
||||
will be the one which has failed.
|
||||
:program:`lit` normally inserts a no-op command (``:`` in the case of bash)
|
||||
with argument ``'RUN: at line N'`` before each command pipeline, and this
|
||||
option also causes those no-op commands to be echoed to stdout to help you
|
||||
locate the source line of the failed command.
|
||||
This option implies ``--verbose``.
|
||||
|
||||
.. option:: -a, --show-all
|
||||
|
@ -135,7 +135,7 @@ End-user Options
|
||||
.. option:: -stack-size-section
|
||||
|
||||
Emit the .stack_sizes section which contains stack size metadata. The section
|
||||
contains an array of pairs of function symbol references (8 byte) and stack
|
||||
contains an array of pairs of function symbol values (pointer size) and stack
|
||||
sizes (unsigned LEB128). The stack size values only include the space allocated
|
||||
in the function prologue. Functions with dynamic stack allocations are not
|
||||
included.
|
||||
|
@ -246,6 +246,10 @@ OPTIONS
|
||||
|
||||
Show code coverage only for functions that match the given regular expression.
|
||||
|
||||
.. option:: -ignore-filename-regex=<PATTERN>
|
||||
|
||||
Skip source code files with file paths that match the given regular expression.
|
||||
|
||||
.. option:: -format=<FORMAT>
|
||||
|
||||
Use the specified output format. The supported formats are: "text", "html".
|
||||
@ -323,8 +327,8 @@ the binaries *BIN*,... using the profile data *PROFILE*. It can optionally be
|
||||
filtered to only show the coverage for the files listed in *SOURCES*.
|
||||
|
||||
If no source files are provided, a summary line is printed for each file in the
|
||||
coverage data. If any files are provided, summaries are shown for each function
|
||||
in the listed files instead.
|
||||
coverage data. If any files are provided, summaries can be shown for each
|
||||
function in the listed files if the ``-show-functions`` option is enabled.
|
||||
|
||||
For information on compiling programs for coverage and generating profile data,
|
||||
see :ref:`llvm-cov-show`.
|
||||
@ -351,6 +355,10 @@ OPTIONS
|
||||
|
||||
Show statistics for all function instantiations. Defaults to false.
|
||||
|
||||
.. option:: -ignore-filename-regex=<PATTERN>
|
||||
|
||||
Skip source code files with file paths that match the given regular expression.
|
||||
|
||||
.. program:: llvm-cov export
|
||||
|
||||
.. _llvm-cov-export:
|
||||
@ -361,14 +369,15 @@ EXPORT COMMAND
|
||||
SYNOPSIS
|
||||
^^^^^^^^
|
||||
|
||||
:program:`llvm-cov export` [*options*] -instr-profile *PROFILE* *BIN* [*-object BIN,...*] [[*-object BIN*]]
|
||||
:program:`llvm-cov export` [*options*] -instr-profile *PROFILE* *BIN* [*-object BIN,...*] [[*-object BIN*]] [*SOURCES*]
|
||||
|
||||
DESCRIPTION
|
||||
^^^^^^^^^^^
|
||||
|
||||
The :program:`llvm-cov export` command exports regions, functions, expansions,
|
||||
and summaries of the coverage of the binaries *BIN*,... using the profile data
|
||||
*PROFILE* as JSON.
|
||||
*PROFILE* as JSON. It can optionally be filtered to only export the coverage
|
||||
for the files listed in *SOURCES*.
|
||||
|
||||
For information on compiling programs for coverage and generating profile data,
|
||||
see :ref:`llvm-cov-show`.
|
||||
@ -389,3 +398,7 @@ OPTIONS
|
||||
will not export coverage information for smaller units such as individual
|
||||
functions or regions. The result will be the same as produced by :program:
|
||||
`llvm-cov report` command, but presented in JSON format rather than text.
|
||||
|
||||
.. option:: -ignore-filename-regex=<PATTERN>
|
||||
|
||||
Skip source code files with file paths that match the given regular expression.
|
||||
|
BIN
docs/CommandGuide/llvm-exegesis-analysis.png
Normal file
BIN
docs/CommandGuide/llvm-exegesis-analysis.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 34 KiB |
186
docs/CommandGuide/llvm-exegesis.rst
Normal file
186
docs/CommandGuide/llvm-exegesis.rst
Normal file
@ -0,0 +1,186 @@
|
||||
llvm-exegesis - LLVM Machine Instruction Benchmark
|
||||
==================================================
|
||||
|
||||
SYNOPSIS
|
||||
--------
|
||||
|
||||
:program:`llvm-exegesis` [*options*]
|
||||
|
||||
DESCRIPTION
|
||||
-----------
|
||||
|
||||
:program:`llvm-exegesis` is a benchmarking tool that uses information available
|
||||
in LLVM to measure host machine instruction characteristics like latency or port
|
||||
decomposition.
|
||||
|
||||
Given an LLVM opcode name and a benchmarking mode, :program:`llvm-exegesis`
|
||||
generates a code snippet that makes execution as serial (resp. as parallel) as
|
||||
possible so that we can measure the latency (resp. uop decomposition) of the
|
||||
instruction.
|
||||
The code snippet is jitted and executed on the host subtarget. The time taken
|
||||
(resp. resource usage) is measured using hardware performance counters. The
|
||||
result is printed out as YAML to the standard output.
|
||||
|
||||
The main goal of this tool is to automatically (in)validate the LLVM's TableDef
|
||||
scheduling models. To that end, we also provide analysis of the results.
|
||||
|
||||
EXAMPLES: benchmarking
|
||||
----------------------
|
||||
|
||||
Assume you have an X86-64 machine. To measure the latency of a single
|
||||
instruction, run:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ llvm-exegesis -mode=latency -opcode-name=ADD64rr
|
||||
|
||||
Measuring the uop decomposition of an instruction works similarly:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ llvm-exegesis -mode=uops -opcode-name=ADD64rr
|
||||
|
||||
The output is a YAML document (the default is to write to stdout, but you can
|
||||
redirect the output to a file using `-benchmarks-file`):
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
---
|
||||
key:
|
||||
opcode_name: ADD64rr
|
||||
mode: latency
|
||||
config: ''
|
||||
cpu_name: haswell
|
||||
llvm_triple: x86_64-unknown-linux-gnu
|
||||
num_repetitions: 10000
|
||||
measurements:
|
||||
- { key: latency, value: 1.0058, debug_string: '' }
|
||||
error: ''
|
||||
info: 'explicit self cycles, selecting one aliasing configuration.
|
||||
Snippet:
|
||||
ADD64rr R8, R8, R10
|
||||
'
|
||||
...
|
||||
|
||||
To measure the latency of all instructions for the host architecture, run:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
#!/bin/bash
|
||||
readonly INSTRUCTIONS=$(($(grep INSTRUCTION_LIST_END build/lib/Target/X86/X86GenInstrInfo.inc | cut -f2 -d=) - 1))
|
||||
for INSTRUCTION in $(seq 1 ${INSTRUCTIONS});
|
||||
do
|
||||
./build/bin/llvm-exegesis -mode=latency -opcode-index=${INSTRUCTION} | sed -n '/---/,$p'
|
||||
done
|
||||
|
||||
FIXME: Provide an :program:`llvm-exegesis` option to test all instructions.
|
||||
|
||||
EXAMPLES: analysis
|
||||
----------------------
|
||||
|
||||
Assuming you have a set of benchmarked instructions (either latency or uops) as
|
||||
YAML in file `/tmp/benchmarks.yaml`, you can analyze the results using the
|
||||
following command:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ llvm-exegesis -mode=analysis \
|
||||
-benchmarks-file=/tmp/benchmarks.yaml \
|
||||
-analysis-clusters-output-file=/tmp/clusters.csv \
|
||||
-analysis-inconsistencies-output-file=/tmp/inconsistencies.txt
|
||||
|
||||
This will group the instructions into clusters with the same performance
|
||||
characteristics. The clusters will be written out to `/tmp/clusters.csv` in the
|
||||
following format:
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
cluster_id,opcode_name,config,sched_class
|
||||
...
|
||||
2,ADD32ri8_DB,,WriteALU,1.00
|
||||
2,ADD32ri_DB,,WriteALU,1.01
|
||||
2,ADD32rr,,WriteALU,1.01
|
||||
2,ADD32rr_DB,,WriteALU,1.00
|
||||
2,ADD32rr_REV,,WriteALU,1.00
|
||||
2,ADD64i32,,WriteALU,1.01
|
||||
2,ADD64ri32,,WriteALU,1.01
|
||||
2,MOVSX64rr32,,BSWAP32r_BSWAP64r_MOVSX64rr32,1.00
|
||||
2,VPADDQYrr,,VPADDBYrr_VPADDDYrr_VPADDQYrr_VPADDWYrr_VPSUBBYrr_VPSUBDYrr_VPSUBQYrr_VPSUBWYrr,1.02
|
||||
2,VPSUBQYrr,,VPADDBYrr_VPADDDYrr_VPADDQYrr_VPADDWYrr_VPSUBBYrr_VPSUBDYrr_VPSUBQYrr_VPSUBWYrr,1.01
|
||||
2,ADD64ri8,,WriteALU,1.00
|
||||
2,SETBr,,WriteSETCC,1.01
|
||||
...
|
||||
|
||||
:program:`llvm-exegesis` will also analyze the clusters to point out
|
||||
inconsistencies in the scheduling information. The output is an html file. For
|
||||
example, `/tmp/inconsistencies.html` will contain messages like the following :
|
||||
|
||||
.. image:: llvm-exegesis-analysis.png
|
||||
:align: center
|
||||
|
||||
Note that the scheduling class names will be resolved only when
|
||||
:program:`llvm-exegesis` is compiled in debug mode, else only the class id will
|
||||
be shown. This does not invalidate any of the analysis results though.
|
||||
|
||||
|
||||
OPTIONS
|
||||
-------
|
||||
|
||||
.. option:: -help
|
||||
|
||||
Print a summary of command line options.
|
||||
|
||||
.. option:: -opcode-index=<LLVM opcode index>
|
||||
|
||||
Specify the opcode to measure, by index.
|
||||
Either `opcode-index` or `opcode-name` must be set.
|
||||
|
||||
.. option:: -opcode-name=<LLVM opcode name>
|
||||
|
||||
Specify the opcode to measure, by name.
|
||||
Either `opcode-index` or `opcode-name` must be set.
|
||||
|
||||
.. option:: -mode=[latency|uops|analysis]
|
||||
|
||||
Specify the run mode.
|
||||
|
||||
.. option:: -num-repetitions=<Number of repetition>
|
||||
|
||||
Specify the number of repetitions of the asm snippet.
|
||||
Higher values lead to more accurate measurements but lengthen the benchmark.
|
||||
|
||||
.. option:: -benchmarks-file=</path/to/file>
|
||||
|
||||
File to read (`analysis` mode) or write (`latency`/`uops` modes) benchmark
|
||||
results. "-" uses stdin/stdout.
|
||||
|
||||
.. option:: -analysis-clusters-output-file=</path/to/file>
|
||||
|
||||
If provided, write the analysis clusters as CSV to this file. "-" prints to
|
||||
stdout.
|
||||
|
||||
.. option:: -analysis-inconsistencies-output-file=</path/to/file>
|
||||
|
||||
If non-empty, write inconsistencies found during analysis to this file. `-`
|
||||
prints to stdout.
|
||||
|
||||
.. option:: -analysis-numpoints=<dbscan numPoints parameter>
|
||||
|
||||
Specify the numPoints parameters to be used for DBSCAN clustering
|
||||
(`analysis` mode).
|
||||
|
||||
.. option:: -analysis-espilon=<dbscan epsilon parameter>
|
||||
|
||||
Specify the numPoints parameters to be used for DBSCAN clustering
|
||||
(`analysis` mode).
|
||||
|
||||
.. option:: -ignore-invalid-sched-class=false
|
||||
|
||||
If set, ignore instructions that do not have a sched class (class idx = 0).
|
||||
|
||||
|
||||
EXIT STATUS
|
||||
-----------
|
||||
|
||||
:program:`llvm-exegesis` returns 0 on success. Otherwise, an error message is
|
||||
printed to standard error, and the tool returns a non 0 value.
|
551
docs/CommandGuide/llvm-mca.rst
Normal file
551
docs/CommandGuide/llvm-mca.rst
Normal file
@ -0,0 +1,551 @@
|
||||
llvm-mca - LLVM Machine Code Analyzer
|
||||
=====================================
|
||||
|
||||
SYNOPSIS
|
||||
--------
|
||||
|
||||
:program:`llvm-mca` [*options*] [input]
|
||||
|
||||
DESCRIPTION
|
||||
-----------
|
||||
|
||||
:program:`llvm-mca` is a performance analysis tool that uses information
|
||||
available in LLVM (e.g. scheduling models) to statically measure the performance
|
||||
of machine code in a specific CPU.
|
||||
|
||||
Performance is measured in terms of throughput as well as processor resource
|
||||
consumption. The tool currently works for processors with an out-of-order
|
||||
backend, for which there is a scheduling model available in LLVM.
|
||||
|
||||
The main goal of this tool is not just to predict the performance of the code
|
||||
when run on the target, but also help with diagnosing potential performance
|
||||
issues.
|
||||
|
||||
Given an assembly code sequence, llvm-mca estimates the Instructions Per Cycle
|
||||
(IPC), as well as hardware resource pressure. The analysis and reporting style
|
||||
were inspired by the IACA tool from Intel.
|
||||
|
||||
:program:`llvm-mca` allows the usage of special code comments to mark regions of
|
||||
the assembly code to be analyzed. A comment starting with substring
|
||||
``LLVM-MCA-BEGIN`` marks the beginning of a code region. A comment starting with
|
||||
substring ``LLVM-MCA-END`` marks the end of a code region. For example:
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
# LLVM-MCA-BEGIN My Code Region
|
||||
...
|
||||
# LLVM-MCA-END
|
||||
|
||||
Multiple regions can be specified provided that they do not overlap. A code
|
||||
region can have an optional description. If no user-defined region is specified,
|
||||
then :program:`llvm-mca` assumes a default region which contains every
|
||||
instruction in the input file. Every region is analyzed in isolation, and the
|
||||
final performance report is the union of all the reports generated for every
|
||||
code region.
|
||||
|
||||
Inline assembly directives may be used from source code to annotate the
|
||||
assembly text:
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
int foo(int a, int b) {
|
||||
__asm volatile("# LLVM-MCA-BEGIN foo");
|
||||
a += 42;
|
||||
__asm volatile("# LLVM-MCA-END");
|
||||
a *= b;
|
||||
return a;
|
||||
}
|
||||
|
||||
So for example, you can compile code with clang, output assembly, and pipe it
|
||||
directly into llvm-mca for analysis:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ clang foo.c -O2 -target x86_64-unknown-unknown -S -o - | llvm-mca -mcpu=btver2
|
||||
|
||||
Or for Intel syntax:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ clang foo.c -O2 -target x86_64-unknown-unknown -mllvm -x86-asm-syntax=intel -S -o - | llvm-mca -mcpu=btver2
|
||||
|
||||
OPTIONS
|
||||
-------
|
||||
|
||||
If ``input`` is "``-``" or omitted, :program:`llvm-mca` reads from standard
|
||||
input. Otherwise, it will read from the specified filename.
|
||||
|
||||
If the :option:`-o` option is omitted, then :program:`llvm-mca` will send its output
|
||||
to standard output if the input is from standard input. If the :option:`-o`
|
||||
option specifies "``-``", then the output will also be sent to standard output.
|
||||
|
||||
|
||||
.. option:: -help
|
||||
|
||||
Print a summary of command line options.
|
||||
|
||||
.. option:: -mtriple=<target triple>
|
||||
|
||||
Specify a target triple string.
|
||||
|
||||
.. option:: -march=<arch>
|
||||
|
||||
Specify the architecture for which to analyze the code. It defaults to the
|
||||
host default target.
|
||||
|
||||
.. option:: -mcpu=<cpuname>
|
||||
|
||||
Specify the processor for which to analyze the code. By default, the cpu name
|
||||
is autodetected from the host.
|
||||
|
||||
.. option:: -output-asm-variant=<variant id>
|
||||
|
||||
Specify the output assembly variant for the report generated by the tool.
|
||||
On x86, possible values are [0, 1]. A value of 0 (vic. 1) for this flag enables
|
||||
the AT&T (vic. Intel) assembly format for the code printed out by the tool in
|
||||
the analysis report.
|
||||
|
||||
.. option:: -dispatch=<width>
|
||||
|
||||
Specify a different dispatch width for the processor. The dispatch width
|
||||
defaults to field 'IssueWidth' in the processor scheduling model. If width is
|
||||
zero, then the default dispatch width is used.
|
||||
|
||||
.. option:: -register-file-size=<size>
|
||||
|
||||
Specify the size of the register file. When specified, this flag limits how
|
||||
many temporary registers are available for register renaming purposes. A value
|
||||
of zero for this flag means "unlimited number of temporary registers".
|
||||
|
||||
.. option:: -iterations=<number of iterations>
|
||||
|
||||
Specify the number of iterations to run. If this flag is set to 0, then the
|
||||
tool sets the number of iterations to a default value (i.e. 100).
|
||||
|
||||
.. option:: -noalias=<bool>
|
||||
|
||||
If set, the tool assumes that loads and stores don't alias. This is the
|
||||
default behavior.
|
||||
|
||||
.. option:: -lqueue=<load queue size>
|
||||
|
||||
Specify the size of the load queue in the load/store unit emulated by the tool.
|
||||
By default, the tool assumes an unbound number of entries in the load queue.
|
||||
A value of zero for this flag is ignored, and the default load queue size is
|
||||
used instead.
|
||||
|
||||
.. option:: -squeue=<store queue size>
|
||||
|
||||
Specify the size of the store queue in the load/store unit emulated by the
|
||||
tool. By default, the tool assumes an unbound number of entries in the store
|
||||
queue. A value of zero for this flag is ignored, and the default store queue
|
||||
size is used instead.
|
||||
|
||||
.. option:: -timeline
|
||||
|
||||
Enable the timeline view.
|
||||
|
||||
.. option:: -timeline-max-iterations=<iterations>
|
||||
|
||||
Limit the number of iterations to print in the timeline view. By default, the
|
||||
timeline view prints information for up to 10 iterations.
|
||||
|
||||
.. option:: -timeline-max-cycles=<cycles>
|
||||
|
||||
Limit the number of cycles in the timeline view. By default, the number of
|
||||
cycles is set to 80.
|
||||
|
||||
.. option:: -resource-pressure
|
||||
|
||||
Enable the resource pressure view. This is enabled by default.
|
||||
|
||||
.. option:: -register-file-stats
|
||||
|
||||
Enable register file usage statistics.
|
||||
|
||||
.. option:: -dispatch-stats
|
||||
|
||||
Enable extra dispatch statistics. This view collects and analyzes instruction
|
||||
dispatch events, as well as static/dynamic dispatch stall events. This view
|
||||
is disabled by default.
|
||||
|
||||
.. option:: -scheduler-stats
|
||||
|
||||
Enable extra scheduler statistics. This view collects and analyzes instruction
|
||||
issue events. This view is disabled by default.
|
||||
|
||||
.. option:: -retire-stats
|
||||
|
||||
Enable extra retire control unit statistics. This view is disabled by default.
|
||||
|
||||
.. option:: -instruction-info
|
||||
|
||||
Enable the instruction info view. This is enabled by default.
|
||||
|
||||
.. option:: -all-stats
|
||||
|
||||
Print all hardware statistics. This enables extra statistics related to the
|
||||
dispatch logic, the hardware schedulers, the register file(s), and the retire
|
||||
control unit. This option is disabled by default.
|
||||
|
||||
.. option:: -all-views
|
||||
|
||||
Enable all the view.
|
||||
|
||||
.. option:: -instruction-tables
|
||||
|
||||
Prints resource pressure information based on the static information
|
||||
available from the processor model. This differs from the resource pressure
|
||||
view because it doesn't require that the code is simulated. It instead prints
|
||||
the theoretical uniform distribution of resource pressure for every
|
||||
instruction in sequence.
|
||||
|
||||
|
||||
EXIT STATUS
|
||||
-----------
|
||||
|
||||
:program:`llvm-mca` returns 0 on success. Otherwise, an error message is printed
|
||||
to standard error, and the tool returns 1.
|
||||
|
||||
HOW MCA WORKS
|
||||
-------------
|
||||
|
||||
MCA takes assembly code as input. The assembly code is parsed into a sequence
|
||||
of MCInst with the help of the existing LLVM target assembly parsers. The
|
||||
parsed sequence of MCInst is then analyzed by a ``Pipeline`` module to generate
|
||||
a performance report.
|
||||
|
||||
The Pipeline module simulates the execution of the machine code sequence in a
|
||||
loop of iterations (default is 100). During this process, the pipeline collects
|
||||
a number of execution related statistics. At the end of this process, the
|
||||
pipeline generates and prints a report from the collected statistics.
|
||||
|
||||
Here is an example of a performance report generated by MCA for a dot-product
|
||||
of two packed float vectors of four elements. The analysis is conducted for
|
||||
target x86, cpu btver2. The following result can be produced via the following
|
||||
command using the example located at
|
||||
``test/tools/llvm-mca/X86/BtVer2/dot-product.s``:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=300 dot-product.s
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
Iterations: 300
|
||||
Instructions: 900
|
||||
Total Cycles: 610
|
||||
Dispatch Width: 2
|
||||
IPC: 1.48
|
||||
Block RThroughput: 2.0
|
||||
|
||||
|
||||
Instruction Info:
|
||||
[1]: #uOps
|
||||
[2]: Latency
|
||||
[3]: RThroughput
|
||||
[4]: MayLoad
|
||||
[5]: MayStore
|
||||
[6]: HasSideEffects (U)
|
||||
|
||||
[1] [2] [3] [4] [5] [6] Instructions:
|
||||
1 2 1.00 vmulps %xmm0, %xmm1, %xmm2
|
||||
1 3 1.00 vhaddps %xmm2, %xmm2, %xmm3
|
||||
1 3 1.00 vhaddps %xmm3, %xmm3, %xmm4
|
||||
|
||||
|
||||
Resources:
|
||||
[0] - JALU0
|
||||
[1] - JALU1
|
||||
[2] - JDiv
|
||||
[3] - JFPA
|
||||
[4] - JFPM
|
||||
[5] - JFPU0
|
||||
[6] - JFPU1
|
||||
[7] - JLAGU
|
||||
[8] - JMul
|
||||
[9] - JSAGU
|
||||
[10] - JSTC
|
||||
[11] - JVALU0
|
||||
[12] - JVALU1
|
||||
[13] - JVIMUL
|
||||
|
||||
|
||||
Resource pressure per iteration:
|
||||
[0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13]
|
||||
- - - 2.00 1.00 2.00 1.00 - - - - - - -
|
||||
|
||||
Resource pressure by instruction:
|
||||
[0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions:
|
||||
- - - - 1.00 - 1.00 - - - - - - - vmulps %xmm0, %xmm1, %xmm2
|
||||
- - - 1.00 - 1.00 - - - - - - - - vhaddps %xmm2, %xmm2, %xmm3
|
||||
- - - 1.00 - 1.00 - - - - - - - - vhaddps %xmm3, %xmm3, %xmm4
|
||||
|
||||
According to this report, the dot-product kernel has been executed 300 times,
|
||||
for a total of 900 dynamically executed instructions.
|
||||
|
||||
The report is structured in three main sections. The first section collects a
|
||||
few performance numbers; the goal of this section is to give a very quick
|
||||
overview of the performance throughput. In this example, the two important
|
||||
performance indicators are the predicted total number of cycles, and the IPC.
|
||||
IPC is probably the most important throughput indicator. A big delta between
|
||||
the Dispatch Width and the computed IPC is an indicator of potential
|
||||
performance issues.
|
||||
|
||||
The second section of the report shows the latency and reciprocal
|
||||
throughput of every instruction in the sequence. That section also reports
|
||||
extra information related to the number of micro opcodes, and opcode properties
|
||||
(i.e., 'MayLoad', 'MayStore', and 'HasSideEffects').
|
||||
|
||||
The third section is the *Resource pressure view*. This view reports
|
||||
the average number of resource cycles consumed every iteration by instructions
|
||||
for every processor resource unit available on the target. Information is
|
||||
structured in two tables. The first table reports the number of resource cycles
|
||||
spent on average every iteration. The second table correlates the resource
|
||||
cycles to the machine instruction in the sequence. For example, every iteration
|
||||
of the instruction vmulps always executes on resource unit [6]
|
||||
(JFPU1 - floating point pipeline #1), consuming an average of 1 resource cycle
|
||||
per iteration. Note that on AMD Jaguar, vector floating-point multiply can
|
||||
only be issued to pipeline JFPU1, while horizontal floating-point additions can
|
||||
only be issued to pipeline JFPU0.
|
||||
|
||||
The resource pressure view helps with identifying bottlenecks caused by high
|
||||
usage of specific hardware resources. Situations with resource pressure mainly
|
||||
concentrated on a few resources should, in general, be avoided. Ideally,
|
||||
pressure should be uniformly distributed between multiple resources.
|
||||
|
||||
Timeline View
|
||||
^^^^^^^^^^^^^
|
||||
MCA's timeline view produces a detailed report of each instruction's state
|
||||
transitions through an instruction pipeline. This view is enabled by the
|
||||
command line option ``-timeline``. As instructions transition through the
|
||||
various stages of the pipeline, their states are depicted in the view report.
|
||||
These states are represented by the following characters:
|
||||
|
||||
* D : Instruction dispatched.
|
||||
* e : Instruction executing.
|
||||
* E : Instruction executed.
|
||||
* R : Instruction retired.
|
||||
* = : Instruction already dispatched, waiting to be executed.
|
||||
* \- : Instruction executed, waiting to be retired.
|
||||
|
||||
Below is the timeline view for a subset of the dot-product example located in
|
||||
``test/tools/llvm-mca/X86/BtVer2/dot-product.s`` and processed by
|
||||
MCA using the following command:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=3 -timeline dot-product.s
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
Timeline view:
|
||||
012345
|
||||
Index 0123456789
|
||||
|
||||
[0,0] DeeER. . . vmulps %xmm0, %xmm1, %xmm2
|
||||
[0,1] D==eeeER . . vhaddps %xmm2, %xmm2, %xmm3
|
||||
[0,2] .D====eeeER . vhaddps %xmm3, %xmm3, %xmm4
|
||||
[1,0] .DeeE-----R . vmulps %xmm0, %xmm1, %xmm2
|
||||
[1,1] . D=eeeE---R . vhaddps %xmm2, %xmm2, %xmm3
|
||||
[1,2] . D====eeeER . vhaddps %xmm3, %xmm3, %xmm4
|
||||
[2,0] . DeeE-----R . vmulps %xmm0, %xmm1, %xmm2
|
||||
[2,1] . D====eeeER . vhaddps %xmm2, %xmm2, %xmm3
|
||||
[2,2] . D======eeeER vhaddps %xmm3, %xmm3, %xmm4
|
||||
|
||||
|
||||
Average Wait times (based on the timeline view):
|
||||
[0]: Executions
|
||||
[1]: Average time spent waiting in a scheduler's queue
|
||||
[2]: Average time spent waiting in a scheduler's queue while ready
|
||||
[3]: Average time elapsed from WB until retire stage
|
||||
|
||||
[0] [1] [2] [3]
|
||||
0. 3 1.0 1.0 3.3 vmulps %xmm0, %xmm1, %xmm2
|
||||
1. 3 3.3 0.7 1.0 vhaddps %xmm2, %xmm2, %xmm3
|
||||
2. 3 5.7 0.0 0.0 vhaddps %xmm3, %xmm3, %xmm4
|
||||
|
||||
The timeline view is interesting because it shows instruction state changes
|
||||
during execution. It also gives an idea of how MCA processes instructions
|
||||
executed on the target, and how their timing information might be calculated.
|
||||
|
||||
The timeline view is structured in two tables. The first table shows
|
||||
instructions changing state over time (measured in cycles); the second table
|
||||
(named *Average Wait times*) reports useful timing statistics, which should
|
||||
help diagnose performance bottlenecks caused by long data dependencies and
|
||||
sub-optimal usage of hardware resources.
|
||||
|
||||
An instruction in the timeline view is identified by a pair of indices, where
|
||||
the first index identifies an iteration, and the second index is the
|
||||
instruction index (i.e., where it appears in the code sequence). Since this
|
||||
example was generated using 3 iterations: ``-iterations=3``, the iteration
|
||||
indices range from 0-2 inclusively.
|
||||
|
||||
Excluding the first and last column, the remaining columns are in cycles.
|
||||
Cycles are numbered sequentially starting from 0.
|
||||
|
||||
From the example output above, we know the following:
|
||||
|
||||
* Instruction [1,0] was dispatched at cycle 1.
|
||||
* Instruction [1,0] started executing at cycle 2.
|
||||
* Instruction [1,0] reached the write back stage at cycle 4.
|
||||
* Instruction [1,0] was retired at cycle 10.
|
||||
|
||||
Instruction [1,0] (i.e., vmulps from iteration #1) does not have to wait in the
|
||||
scheduler's queue for the operands to become available. By the time vmulps is
|
||||
dispatched, operands are already available, and pipeline JFPU1 is ready to
|
||||
serve another instruction. So the instruction can be immediately issued on the
|
||||
JFPU1 pipeline. That is demonstrated by the fact that the instruction only
|
||||
spent 1cy in the scheduler's queue.
|
||||
|
||||
There is a gap of 5 cycles between the write-back stage and the retire event.
|
||||
That is because instructions must retire in program order, so [1,0] has to wait
|
||||
for [0,2] to be retired first (i.e., it has to wait until cycle 10).
|
||||
|
||||
In the example, all instructions are in a RAW (Read After Write) dependency
|
||||
chain. Register %xmm2 written by vmulps is immediately used by the first
|
||||
vhaddps, and register %xmm3 written by the first vhaddps is used by the second
|
||||
vhaddps. Long data dependencies negatively impact the ILP (Instruction Level
|
||||
Parallelism).
|
||||
|
||||
In the dot-product example, there are anti-dependencies introduced by
|
||||
instructions from different iterations. However, those dependencies can be
|
||||
removed at register renaming stage (at the cost of allocating register aliases,
|
||||
and therefore consuming temporary registers).
|
||||
|
||||
Table *Average Wait times* helps diagnose performance issues that are caused by
|
||||
the presence of long latency instructions and potentially long data dependencies
|
||||
which may limit the ILP. Note that MCA, by default, assumes at least 1cy
|
||||
between the dispatch event and the issue event.
|
||||
|
||||
When the performance is limited by data dependencies and/or long latency
|
||||
instructions, the number of cycles spent while in the *ready* state is expected
|
||||
to be very small when compared with the total number of cycles spent in the
|
||||
scheduler's queue. The difference between the two counters is a good indicator
|
||||
of how large of an impact data dependencies had on the execution of the
|
||||
instructions. When performance is mostly limited by the lack of hardware
|
||||
resources, the delta between the two counters is small. However, the number of
|
||||
cycles spent in the queue tends to be larger (i.e., more than 1-3cy),
|
||||
especially when compared to other low latency instructions.
|
||||
|
||||
Extra Statistics to Further Diagnose Performance Issues
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
The ``-all-stats`` command line option enables extra statistics and performance
|
||||
counters for the dispatch logic, the reorder buffer, the retire control unit,
|
||||
and the register file.
|
||||
|
||||
Below is an example of ``-all-stats`` output generated by MCA for the
|
||||
dot-product example discussed in the previous sections.
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
Dynamic Dispatch Stall Cycles:
|
||||
RAT - Register unavailable: 0
|
||||
RCU - Retire tokens unavailable: 0
|
||||
SCHEDQ - Scheduler full: 272
|
||||
LQ - Load queue full: 0
|
||||
SQ - Store queue full: 0
|
||||
GROUP - Static restrictions on the dispatch group: 0
|
||||
|
||||
|
||||
Dispatch Logic - number of cycles where we saw N instructions dispatched:
|
||||
[# dispatched], [# cycles]
|
||||
0, 24 (3.9%)
|
||||
1, 272 (44.6%)
|
||||
2, 314 (51.5%)
|
||||
|
||||
|
||||
Schedulers - number of cycles where we saw N instructions issued:
|
||||
[# issued], [# cycles]
|
||||
0, 7 (1.1%)
|
||||
1, 306 (50.2%)
|
||||
2, 297 (48.7%)
|
||||
|
||||
|
||||
Scheduler's queue usage:
|
||||
JALU01, 0/20
|
||||
JFPU01, 18/18
|
||||
JLSAGU, 0/12
|
||||
|
||||
|
||||
Retire Control Unit - number of cycles where we saw N instructions retired:
|
||||
[# retired], [# cycles]
|
||||
0, 109 (17.9%)
|
||||
1, 102 (16.7%)
|
||||
2, 399 (65.4%)
|
||||
|
||||
|
||||
Register File statistics:
|
||||
Total number of mappings created: 900
|
||||
Max number of mappings used: 35
|
||||
|
||||
* Register File #1 -- JFpuPRF:
|
||||
Number of physical registers: 72
|
||||
Total number of mappings created: 900
|
||||
Max number of mappings used: 35
|
||||
|
||||
* Register File #2 -- JIntegerPRF:
|
||||
Number of physical registers: 64
|
||||
Total number of mappings created: 0
|
||||
Max number of mappings used: 0
|
||||
|
||||
If we look at the *Dynamic Dispatch Stall Cycles* table, we see the counter for
|
||||
SCHEDQ reports 272 cycles. This counter is incremented every time the dispatch
|
||||
logic is unable to dispatch a group of two instructions because the scheduler's
|
||||
queue is full.
|
||||
|
||||
Looking at the *Dispatch Logic* table, we see that the pipeline was only able
|
||||
to dispatch two instructions 51.5% of the time. The dispatch group was limited
|
||||
to one instruction 44.6% of the cycles, which corresponds to 272 cycles. The
|
||||
dispatch statistics are displayed by either using the command option
|
||||
``-all-stats`` or ``-dispatch-stats``.
|
||||
|
||||
The next table, *Schedulers*, presents a histogram displaying a count,
|
||||
representing the number of instructions issued on some number of cycles. In
|
||||
this case, of the 610 simulated cycles, single
|
||||
instructions were issued 306 times (50.2%) and there were 7 cycles where
|
||||
no instructions were issued.
|
||||
|
||||
The *Scheduler's queue usage* table shows that the maximum number of buffer
|
||||
entries (i.e., scheduler queue entries) used at runtime. Resource JFPU01
|
||||
reached its maximum (18 of 18 queue entries). Note that AMD Jaguar implements
|
||||
three schedulers:
|
||||
|
||||
* JALU01 - A scheduler for ALU instructions.
|
||||
* JFPU01 - A scheduler floating point operations.
|
||||
* JLSAGU - A scheduler for address generation.
|
||||
|
||||
The dot-product is a kernel of three floating point instructions (a vector
|
||||
multiply followed by two horizontal adds). That explains why only the floating
|
||||
point scheduler appears to be used.
|
||||
|
||||
A full scheduler queue is either caused by data dependency chains or by a
|
||||
sub-optimal usage of hardware resources. Sometimes, resource pressure can be
|
||||
mitigated by rewriting the kernel using different instructions that consume
|
||||
different scheduler resources. Schedulers with a small queue are less resilient
|
||||
to bottlenecks caused by the presence of long data dependencies.
|
||||
The scheduler statistics are displayed by
|
||||
using the command option ``-all-stats`` or ``-scheduler-stats``.
|
||||
|
||||
The next table, *Retire Control Unit*, presents a histogram displaying a count,
|
||||
representing the number of instructions retired on some number of cycles. In
|
||||
this case, of the 610 simulated cycles, two instructions were retired during
|
||||
the same cycle 399 times (65.4%) and there were 109 cycles where no
|
||||
instructions were retired. The retire statistics are displayed by using the
|
||||
command option ``-all-stats`` or ``-retire-stats``.
|
||||
|
||||
The last table presented is *Register File statistics*. Each physical register
|
||||
file (PRF) used by the pipeline is presented in this table. In the case of AMD
|
||||
Jaguar, there are two register files, one for floating-point registers
|
||||
(JFpuPRF) and one for integer registers (JIntegerPRF). The table shows that of
|
||||
the 900 instructions processed, there were 900 mappings created. Since this
|
||||
dot-product example utilized only floating point registers, the JFPuPRF was
|
||||
responsible for creating the 900 mappings. However, we see that the pipeline
|
||||
only used a maximum of 35 of 72 available register slots at any given time. We
|
||||
can conclude that the floating point PRF was the only register file used for
|
||||
the example, and that it was never resource constrained. The register file
|
||||
statistics are displayed by using the command option ``-all-stats`` or
|
||||
``-register-file-stats``.
|
||||
|
||||
In this example, we can conclude that the IPC is mostly limited by data
|
||||
dependencies, and not by resource pressure.
|
@ -93,6 +93,10 @@ OPTIONS
|
||||
Print only symbols whose definitions are external; that is, accessible
|
||||
from other files.
|
||||
|
||||
.. option:: --no-weak, -W
|
||||
|
||||
Don't print any weak symbols in the output.
|
||||
|
||||
.. option:: --format=format, -f format
|
||||
|
||||
Select an output format; *format* may be *sysv*, *posix*, or *bsd*. The default
|
||||
|
@ -96,7 +96,7 @@ OPTIONS
|
||||
.. option:: -debug
|
||||
|
||||
If this is a debug build, this option will enable debug printouts from passes
|
||||
which use the ``DEBUG()`` macro. See the `LLVM Programmer's Manual
|
||||
which use the ``LLVM_DEBUG()`` macro. See the `LLVM Programmer's Manual
|
||||
<../ProgrammersManual.html>`_, section ``#DEBUG`` for more information.
|
||||
|
||||
.. option:: -load=<plugin>
|
||||
|
@ -57,6 +57,11 @@ OPTIONS
|
||||
|
||||
Print all records to standard output (default).
|
||||
|
||||
.. option:: -dump-json
|
||||
|
||||
Print a JSON representation of all records, suitable for further
|
||||
automated processing.
|
||||
|
||||
.. option:: -print-enums
|
||||
|
||||
Print enumeration values for a class.
|
||||
@ -109,9 +114,13 @@ OPTIONS
|
||||
|
||||
Generate subtarget enumerations.
|
||||
|
||||
.. option:: -gen-intrinsic
|
||||
.. option:: -gen-intrinsic-enums
|
||||
|
||||
Generate intrinsic information.
|
||||
Generate intrinsic enums.
|
||||
|
||||
.. option:: -gen-intrinsic-impl
|
||||
|
||||
Generate intrinsic implementation.
|
||||
|
||||
.. option:: -gen-tgt-intrinsic
|
||||
|
||||
|
@ -886,12 +886,12 @@ To do this, set up your .h file with your option, like this for example:
|
||||
// debug build, then the code specified as the option to the macro will be
|
||||
// executed. Otherwise it will not be.
|
||||
#ifdef NDEBUG
|
||||
#define DEBUG(X)
|
||||
#define LLVM_DEBUG(X)
|
||||
#else
|
||||
#define DEBUG(X) do { if (DebugFlag) { X; } } while (0)
|
||||
#define LLVM_DEBUG(X) do { if (DebugFlag) { X; } } while (0)
|
||||
#endif
|
||||
|
||||
This allows clients to blissfully use the ``DEBUG()`` macro, or the
|
||||
This allows clients to blissfully use the ``LLVM_DEBUG()`` macro, or the
|
||||
``DebugFlag`` explicitly if they want to. Now we just need to be able to set
|
||||
the ``DebugFlag`` boolean when the option is set. To do this, we pass an
|
||||
additional argument to our command line argument processor, and we specify where
|
||||
@ -1716,7 +1716,7 @@ line option outside of the library. In these cases the library does or should
|
||||
provide an external storage location that is accessible to users of the
|
||||
library. Examples of this include the ``llvm::DebugFlag`` exported by the
|
||||
``lib/Support/Debug.cpp`` file and the ``llvm::TimePassesIsEnabled`` flag
|
||||
exported by the ``lib/VMCore/PassManager.cpp`` file.
|
||||
exported by the ``lib/IR/PassManager.cpp`` file.
|
||||
|
||||
.. todo::
|
||||
|
||||
|
@ -40,7 +40,7 @@ Lanai
|
||||
MIPS
|
||||
----
|
||||
|
||||
* `MIPS Processor Architecture <https://imgtec.com/mips/architectures/>`_
|
||||
* `MIPS Processor Architecture <https://www.mips.com/products/>`_
|
||||
|
||||
* `MIPS 64-bit ELF Object File Specification <http://techpubs.sgi.com/library/manuals/4000/007-4658-001/pdf/007-4658-001.pdf>`_
|
||||
|
||||
|
127
docs/Contributing.rst
Normal file
127
docs/Contributing.rst
Normal file
@ -0,0 +1,127 @@
|
||||
==================================
|
||||
Contributing to LLVM
|
||||
==================================
|
||||
|
||||
|
||||
Thank you for your interest in contributing to LLVM! There are multiple ways to
|
||||
contribute, and we appreciate all contributions. In case you
|
||||
have questions, you can either use the `Developer's List (llvm-dev)`_
|
||||
or the #llvm channel on `irc.oftc.net`_.
|
||||
|
||||
If you want to contribute code, please familiarize yourself with the :doc:`DeveloperPolicy`.
|
||||
|
||||
.. contents::
|
||||
:local:
|
||||
|
||||
|
||||
Ways to Contribute
|
||||
==================
|
||||
|
||||
Bug Reports
|
||||
-----------
|
||||
If you are working with LLVM and run into a bug, we definitely want to know
|
||||
about it. Please let us know and follow the instructions in
|
||||
:doc:`HowToSubmitABug` to create a bug report.
|
||||
|
||||
Bug Fixes
|
||||
---------
|
||||
If you are interested in contributing code to LLVM, bugs labeled with the
|
||||
`beginner keyword`_ in the `bug tracker`_ are a good way to get familiar with
|
||||
the code base. If you are interested in fixing a bug, please create an account
|
||||
for the bug tracker and assign it to yourself, to let people know you are working on
|
||||
it.
|
||||
|
||||
Then try to reproduce and fix the bug with upstream LLVM. Start by building
|
||||
LLVM from source as described in :doc:`GettingStarted` and
|
||||
and use the built binaries to reproduce the failure described in the bug. Use
|
||||
a debug build (`-DCMAKE_BUILD_TYPE=Debug`) or a build with assertions
|
||||
(`-DLLVM_ENABLE_ASSERTIONS=On`, enabled for Debug builds).
|
||||
|
||||
Bigger Pieces of Work
|
||||
---------------------
|
||||
In case you are interested in taking on a bigger piece of work, a list of
|
||||
interesting projects is maintained at the `LLVM's Open Projects page`_. In case
|
||||
you are interested in working on any of these projects, please send a mail to
|
||||
the `LLVM Developer's mailing list`_, so that we know the project is being
|
||||
worked on.
|
||||
|
||||
|
||||
How to Submit a Patch
|
||||
=====================
|
||||
Once you have a patch ready, it is time to submit it. The patch should:
|
||||
|
||||
* include a small unit test
|
||||
* conform to the :doc:`CodingStandards`. You can use the `clang-format-diff.py`_ or `git-clang-format`_ tools to automatically format your patch properly.
|
||||
* not contain any unrelated changes
|
||||
* be an isolated change. Independent changes should be submitted as separate patches as this makes reviewing easier.
|
||||
|
||||
To get a patch accepted, it has to be reviewed by the LLVM community. This can
|
||||
be done using `LLVM's Phabricator`_ or the llvm-commits mailing list.
|
||||
Please follow :ref:`Phabricator#requesting-a-review-via-the-web-interface <phabricator-request-review-web>`
|
||||
to request a review using Phabricator.
|
||||
|
||||
To make sure the right people see your patch, please select suitable reviewers
|
||||
and add them to your patch when requesting a review. Suitable reviewers are the
|
||||
code owner (see CODE_OWNERS.txt) and other people doing work in the area your
|
||||
patch touches. If you are using Phabricator, add them to the `Reviewers` field
|
||||
when creating a review and if you are using `llvm-commits`, add them to the CC of
|
||||
your email.
|
||||
|
||||
A reviewer may request changes or ask questions during the review. If you are
|
||||
uncertain on how to provide test cases, documentation, etc., feel free to ask
|
||||
for guidance during the review. Please address the feedback and re-post an
|
||||
updated version of your patch. This cycle continues until all requests and comments
|
||||
have been addressed and a reviewer accepts the patch with a `Looks good to me` or `LGTM`.
|
||||
Once that is done the change can be committed. If you do not have commit
|
||||
access, please let people know during the review and someone should commit it
|
||||
on your behalf.
|
||||
|
||||
If you have received no comments on your patch for a week, you can request a
|
||||
review by 'ping'ing a patch by responding to the email thread containing the
|
||||
patch, or the Phabricator review with "Ping." The common courtesy 'ping' rate
|
||||
is once a week. Please remember that you are asking for valuable time from other
|
||||
professional developers.
|
||||
|
||||
|
||||
Helpful Information About LLVM
|
||||
==============================
|
||||
:doc:`LLVM's documentation <index>` provides a wealth of information about LLVM's internals as
|
||||
well as various user guides. The pages listed below should provide a good overview
|
||||
of LLVM's high-level design, as well as its internals:
|
||||
|
||||
:doc:`GettingStarted`
|
||||
Discusses how to get up and running quickly with the LLVM infrastructure.
|
||||
Everything from unpacking and compilation of the distribution to execution
|
||||
of some tools.
|
||||
|
||||
:doc:`LangRef`
|
||||
Defines the LLVM intermediate representation.
|
||||
|
||||
:doc:`ProgrammersManual`
|
||||
Introduction to the general layout of the LLVM sourcebase, important classes
|
||||
and APIs, and some tips & tricks.
|
||||
|
||||
:ref:`index-subsystem-docs`
|
||||
A collection of pages documenting various subsystems of LLVM.
|
||||
|
||||
`LLVM for Grad Students`__
|
||||
This is an introduction to the LLVM infrastructure by Adrian Sampson. While it
|
||||
has been written for grad students, it provides a good, compact overview of
|
||||
LLVM's architecture, LLVM's IR and how to write a new pass.
|
||||
|
||||
.. __: http://www.cs.cornell.edu/~asampson/blog/llvm.html
|
||||
|
||||
`Intro to LLVM`__
|
||||
Book chapter providing a compiler hacker's introduction to LLVM.
|
||||
|
||||
.. __: http://www.aosabook.org/en/llvm.html
|
||||
|
||||
.. _Developer's List (llvm-dev): http://lists.llvm.org/mailman/listinfo/llvm-dev
|
||||
.. _irc.oftc.net: irc://irc.oftc.net/llvm
|
||||
.. _beginner keyword: https://bugs.llvm.org/buglist.cgi?bug_status=NEW&bug_status=REOPENED&keywords=beginner%2C%20&keywords_type=allwords&list_id=130748&query_format=advanced&resolution=---
|
||||
.. _bug tracker: https://bugs.llvm.org
|
||||
.. _clang-format-diff.py: https://reviews.llvm.org/source/clang/browse/cfe/trunk/tools/clang-format/clang-format-diff.py
|
||||
.. _git-clang-format: https://reviews.llvm.org/source/clang/browse/cfe/trunk/tools/clang-format/git-clang-format
|
||||
.. _LLVM's Phabricator: https://reviews.llvm.org/
|
||||
.. _LLVM's Open Projects page: https://llvm.org/OpenProjects.html#what
|
||||
.. _LLVM Developer's mailing list: http://lists.llvm.org/mailman/listinfo/llvm-dev
|
@ -880,6 +880,32 @@ Example:
|
||||
%phi = phi i8* [ null, %entry ], [ %alloc, %coro.alloc ]
|
||||
%frame = call i8* @llvm.coro.begin(token %id, i8* %phi)
|
||||
|
||||
.. _coro.noop:
|
||||
|
||||
'llvm.coro.noop' Intrinsic
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
::
|
||||
|
||||
declare i8* @llvm.coro.noop()
|
||||
|
||||
Overview:
|
||||
"""""""""
|
||||
|
||||
The '``llvm.coro.noop``' intrinsic returns an address of the coroutine frame of
|
||||
a coroutine that does nothing when resumed or destroyed.
|
||||
|
||||
Arguments:
|
||||
""""""""""
|
||||
|
||||
None
|
||||
|
||||
Semantics:
|
||||
""""""""""
|
||||
|
||||
This intrinsic is lowered to refer to a private constant coroutine frame. The
|
||||
resume and destroy handlers for this frame are empty functions that do nothing.
|
||||
Note that in different translation units llvm.coro.noop may return different pointers.
|
||||
|
||||
.. _coro.frame:
|
||||
|
||||
'llvm.coro.frame' Intrinsic
|
||||
|
@ -53,24 +53,15 @@ serve as a basis for anyone who wants to create their own Docker image with
|
||||
LLVM components, compiled from sources. The sources are checked out from the
|
||||
upstream svn repository when building the image.
|
||||
|
||||
Inside each subfolder we host Dockerfiles for two images:
|
||||
The resulting image contains only the requested LLVM components and a few extra
|
||||
packages to make the image minimally useful for C++ development, e.g. libstdc++
|
||||
and binutils.
|
||||
|
||||
- ``build/`` image is used to compile LLVM, it installs a system compiler and all
|
||||
build dependencies of LLVM. After the build process is finished, the build
|
||||
image will have an archive with compiled components at ``/tmp/clang.tar.gz``.
|
||||
- ``release/`` image usually only contains LLVM components, compiled by the
|
||||
``build/`` image, and also libstdc++ and binutils to make image minimally
|
||||
useful for C++ development. The assumption is that you usually want clang to
|
||||
be one of the provided components.
|
||||
|
||||
To build both of those images, use ``build_docker_image.sh`` script.
|
||||
It will checkout LLVM sources and build clang in the ``build`` container, copy results
|
||||
of the build to the local filesystem and then build the ``release`` container using
|
||||
those. The ``build_docker_image.sh`` accepts a list of LLVM repositories to
|
||||
checkout, and arguments for CMake invocation.
|
||||
The interface to run the build is ``build_docker_image.sh`` script. It accepts a
|
||||
list of LLVM repositories to checkout and arguments for CMake invocation.
|
||||
|
||||
If you want to write your own docker image, start with an ``example/`` subfolder.
|
||||
It provides incomplete Dockerfiles with (very few) FIXMEs explaining the steps
|
||||
It provides an incomplete Dockerfile with (very few) FIXMEs explaining the steps
|
||||
you need to take in order to make your Dockerfiles functional.
|
||||
|
||||
Usage
|
||||
@ -110,10 +101,10 @@ this command will do that:
|
||||
-DBOOTSTRAP_CMAKE_BUILD_TYPE=Release \
|
||||
-DCLANG_ENABLE_BOOTSTRAP=ON -DCLANG_BOOTSTRAP_TARGETS="install-clang;install-clang-headers"
|
||||
|
||||
This will produce two images, a release image ``clang-debian8:staging`` and a
|
||||
build image ``clang-debian8-build:staging`` from the latest upstream revision.
|
||||
After the image is built you can run bash inside a container based on your
|
||||
image like this:
|
||||
This will produce a new image ``clang-debian8:staging`` from the latest
|
||||
upstream revision.
|
||||
After the image is built you can run bash inside a container based on your image
|
||||
like this:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
@ -181,19 +172,14 @@ debian8-based image using the latest ``google/stable`` sources for you:
|
||||
|
||||
Minimizing docker image size
|
||||
============================
|
||||
Due to Docker restrictions we use two images (i.e., build and release folders)
|
||||
for the release image to be as small as possible. It's much easier to achieve
|
||||
that using two images, because Docker would store a filesystem layer for each
|
||||
command in the Dockerfile, i.e. if you install some packages in one command,
|
||||
then remove those in a separate command, the size of the resulting image will
|
||||
still be proportinal to the size of an image with installed packages.
|
||||
Therefore, we strive to provide a very simple release image which only copies
|
||||
compiled clang and does not do anything else.
|
||||
|
||||
Docker 1.13 added a ``--squash`` flag that allows to flatten the layers of the
|
||||
image, i.e. remove the parts that were actually deleted. That is an easier way
|
||||
to produce the smallest images possible by using just a single image. We do not
|
||||
use it because as of today the flag is in experimental stage and not everyone
|
||||
may have the latest docker version available. When the flag is out of
|
||||
experimental stage, we should investigate replacing two images approach with
|
||||
just a single image, built using ``--squash`` flag.
|
||||
Due to how Docker's filesystem works, all intermediate writes are persisted in
|
||||
the resulting image, even if they are removed in the following commands.
|
||||
To minimize the resulting image size we use `multi-stage Docker builds
|
||||
<https://docs.docker.com/develop/develop-images/multistage-build/>`_.
|
||||
Internally Docker builds two images. The first image does all the work: installs
|
||||
build dependencies, checks out LLVM source code, compiles LLVM, etc.
|
||||
The first image is only used during build and does not have a descriptive name,
|
||||
i.e. it is only accessible via the hash value after the build is finished.
|
||||
The second image is our resulting image. It contains only the built binaries
|
||||
and not any build dependencies. It is also accessible via a descriptive name
|
||||
(specified by -d and -t flags).
|
||||
|
@ -365,7 +365,7 @@ abstract interface.
|
||||
|
||||
When used in the native Windows C++ exception handling implementation, this
|
||||
intrinsic serves as a placeholder to delimit code before a catch handler is
|
||||
outlined. When the handler is is outlined, this intrinsic will be replaced
|
||||
outlined. When the handler is outlined, this intrinsic will be replaced
|
||||
by instructions that retrieve the exception object pointer from the frame
|
||||
allocation block.
|
||||
|
||||
@ -839,3 +839,66 @@ or ``catchswitch`` to unwind.
|
||||
Finally, the funclet pads' unwind destinations cannot form a cycle. This
|
||||
ensures that EH lowering can construct "try regions" with a tree-like
|
||||
structure, which funclet-based personalities may require.
|
||||
|
||||
Exception Handling support on the target
|
||||
=================================================
|
||||
|
||||
In order to support exception handling on particular target, there are a few
|
||||
items need to be implemented.
|
||||
|
||||
* CFI directives
|
||||
|
||||
First, you have to assign each target register with a unique DWARF number.
|
||||
Then in ``TargetFrameLowering``'s ``emitPrologue``, you have to emit `CFI
|
||||
directives <https://sourceware.org/binutils/docs/as/CFI-directives.html>`_
|
||||
to specify how to calculate the CFA (Canonical Frame Address) and how register
|
||||
is restored from the address pointed by the CFA with an offset. The assembler
|
||||
is instructed by CFI directives to build ``.eh_frame`` section, which is used
|
||||
by th unwinder to unwind stack during exception handling.
|
||||
|
||||
* ``getExceptionPointerRegister`` and ``getExceptionSelectorRegister``
|
||||
|
||||
``TargetLowering`` must implement both functions. The *personality function*
|
||||
passes the *exception structure* (a pointer) and *selector value* (an integer)
|
||||
to the landing pad through the registers specified by ``getExceptionPointerRegister``
|
||||
and ``getExceptionSelectorRegister`` respectively. On most platforms, they
|
||||
will be GPRs and will be the same as the ones specified in the calling convention.
|
||||
|
||||
* ``EH_RETURN``
|
||||
|
||||
The ISD node represents the undocumented GCC extension ``__builtin_eh_return (offset, handler)``,
|
||||
which adjusts the stack by offset and then jumps to the handler. ``__builtin_eh_return``
|
||||
is used in GCC unwinder (`libgcc <https://gcc.gnu.org/onlinedocs/gccint/Libgcc.html>`_),
|
||||
but not in LLVM unwinder (`libunwind <https://clang.llvm.org/docs/Toolchain.html#unwind-library>`_).
|
||||
If you are on the top of ``libgcc`` and have particular requirement on your target,
|
||||
you have to handle ``EH_RETURN`` in ``TargetLowering``.
|
||||
|
||||
If you don't leverage the existing runtime (``libstdc++`` and ``libgcc``),
|
||||
you have to take a look on `libc++ <https://libcxx.llvm.org/>`_ and
|
||||
`libunwind <https://clang.llvm.org/docs/Toolchain.html#unwind-library>`_
|
||||
to see what have to be done there. For ``libunwind``, you have to do the following
|
||||
|
||||
* ``__libunwind_config.h``
|
||||
|
||||
Define macros for your target.
|
||||
|
||||
* ``include/libunwind.h``
|
||||
|
||||
Define enum for the target registers.
|
||||
|
||||
* ``src/Registers.hpp``
|
||||
|
||||
Define ``Registers`` class for your target, implement setter and getter functions.
|
||||
|
||||
* ``src/UnwindCursor.hpp``
|
||||
|
||||
Define ``dwarfEncoding`` and ``stepWithCompactEncoding`` for your ``Registers``
|
||||
class.
|
||||
|
||||
* ``src/UnwindRegistersRestore.S``
|
||||
|
||||
Write an assembly function to restore all your target registers from the memory.
|
||||
|
||||
* ``src/UnwindRegistersSave.S``
|
||||
|
||||
Write an assembly function to save all your target registers on the memory.
|
||||
|
@ -182,6 +182,30 @@ which gnu as does not support. For gas compatibility, sections with a name
|
||||
starting with ".debug" are implicitly discardable.
|
||||
|
||||
|
||||
ARM64/COFF-Dependent
|
||||
--------------------
|
||||
|
||||
Relocations
|
||||
^^^^^^^^^^^
|
||||
|
||||
The following additional symbol variants are supported:
|
||||
|
||||
**:secrel_lo12:** generates a relocation that corresponds to the COFF relocation
|
||||
types ``IMAGE_REL_ARM64_SECREL_LOW12A`` or ``IMAGE_REL_ARM64_SECREL_LOW12L``.
|
||||
|
||||
**:secrel_hi12:** generates a relocation that corresponds to the COFF relocation
|
||||
type ``IMAGE_REL_ARM64_SECREL_HIGH12A``.
|
||||
|
||||
.. code-block:: gas
|
||||
|
||||
add x0, x0, :secrel_hi12:symbol
|
||||
ldr x0, [x0, :secrel_lo12:symbol]
|
||||
|
||||
add x1, x1, :secrel_hi12:symbol
|
||||
add x1, x1, :secrel_lo12:symbol
|
||||
...
|
||||
|
||||
|
||||
ELF-Dependent
|
||||
-------------
|
||||
|
||||
@ -221,6 +245,186 @@ which is equivalent to just
|
||||
.section .foo,"a",@progbits
|
||||
.section .bar,"ao",@progbits,.foo
|
||||
|
||||
``.linker-options`` Section (linker options)
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
In order to support passing linker options from the frontend to the linker, a
|
||||
special section of type ``SHT_LLVM_LINKER_OPTIONS`` (usually named
|
||||
``.linker-options`` though the name is not significant as it is identified by
|
||||
the type). The contents of this section is a simple pair-wise encoding of
|
||||
directives for consideration by the linker. The strings are encoded as standard
|
||||
null-terminated UTF-8 strings. They are emitted inline to avoid having the
|
||||
linker traverse the object file for retrieving the value. The linker is
|
||||
permitted to not honour the option and instead provide a warning/error to the
|
||||
user that the requested option was not honoured.
|
||||
|
||||
The section has type ``SHT_LLVM_LINKER_OPTIONS`` and has the ``SHF_EXCLUDE``
|
||||
flag to ensure that the section is treated as opaque by linkers which do not
|
||||
support the feature and will not be emitted into the final linked binary.
|
||||
|
||||
This would be equivalent to the follow raw assembly:
|
||||
|
||||
.. code-block:: gas
|
||||
|
||||
.section ".linker-options","e",@llvm_linker_options
|
||||
.asciz "option 1"
|
||||
.asciz "value 1"
|
||||
.asciz "option 2"
|
||||
.asciz "value 2"
|
||||
|
||||
The following directives are specified:
|
||||
|
||||
- lib
|
||||
|
||||
The parameter identifies a library to be linked against. The library will
|
||||
be looked up in the default and any specified library search paths
|
||||
(specified to this point).
|
||||
|
||||
- libpath
|
||||
|
||||
The paramter identifies an additional library search path to be considered
|
||||
when looking up libraries after the inclusion of this option.
|
||||
|
||||
``SHT_LLVM_CALL_GRAPH_PROFILE`` Section (Call Graph Profile)
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
This section is used to pass a call graph profile to the linker which can be
|
||||
used to optimize the placement of sections. It contains a sequence of
|
||||
(from symbol, to symbol, weight) tuples.
|
||||
|
||||
It shall have a type of ``SHT_LLVM_CALL_GRAPH_PROFILE`` (0x6fff4c02), shall
|
||||
have the ``SHF_EXCLUDE`` flag set, the ``sh_link`` member shall hold the section
|
||||
header index of the associated symbol table, and shall have a ``sh_entsize`` of
|
||||
16. It should be named ``.llvm.call-graph-profile``.
|
||||
|
||||
The contents of the section shall be a sequence of ``Elf_CGProfile`` entries.
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
typedef struct {
|
||||
Elf_Word cgp_from;
|
||||
Elf_Word cgp_to;
|
||||
Elf_Xword cgp_weight;
|
||||
} Elf_CGProfile;
|
||||
|
||||
cgp_from
|
||||
The symbol index of the source of the edge.
|
||||
|
||||
cgp_to
|
||||
The symbol index of the destination of the edge.
|
||||
|
||||
cgp_weight
|
||||
The weight of the edge.
|
||||
|
||||
This is represented in assembly as:
|
||||
|
||||
.. code-block:: gas
|
||||
|
||||
.cg_profile from, to, 42
|
||||
|
||||
``.cg_profile`` directives are processed at the end of the file. It is an error
|
||||
if either ``from`` or ``to`` are undefined temporary symbols. If either symbol
|
||||
is a temporary symbol, then the section symbol is used instead. If either
|
||||
symbol is undefined, then that symbol is defined as if ``.weak symbol`` has been
|
||||
written at the end of the file. This forces the symbol to show up in the symbol
|
||||
table.
|
||||
|
||||
``SHT_LLVM_ADDRSIG`` Section (address-significance table)
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
This section is used to mark symbols as address-significant, i.e. the address
|
||||
of the symbol is used in a comparison or leaks outside the translation unit. It
|
||||
has the same meaning as the absence of the LLVM attributes ``unnamed_addr``
|
||||
and ``local_unnamed_addr``.
|
||||
|
||||
Any sections referred to by symbols that are not marked as address-significant
|
||||
in any object file may be safely merged by a linker without breaking the
|
||||
address uniqueness guarantee provided by the C and C++ language standards.
|
||||
|
||||
The contents of the section are a sequence of ULEB128-encoded integers
|
||||
referring to the symbol table indexes of the address-significant symbols.
|
||||
|
||||
There are two associated assembly directives:
|
||||
|
||||
.. code-block:: gas
|
||||
|
||||
.addrsig
|
||||
|
||||
This instructs the assembler to emit an address-significance table. Without
|
||||
this directive, all symbols are considered address-significant.
|
||||
|
||||
.. code-block:: gas
|
||||
|
||||
.addrsig_sym sym
|
||||
|
||||
This marks ``sym`` as address-significant.
|
||||
|
||||
CodeView-Dependent
|
||||
------------------
|
||||
|
||||
``.cv_file`` Directive
|
||||
^^^^^^^^^^^^^^^^^^^^^^
|
||||
Syntax:
|
||||
``.cv_file`` *FileNumber FileName* [ *checksum* ] [ *checksumkind* ]
|
||||
|
||||
``.cv_func_id`` Directive
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
Introduces a function ID that can be used with ``.cv_loc``.
|
||||
|
||||
Syntax:
|
||||
``.cv_func_id`` *FunctionId*
|
||||
|
||||
``.cv_inline_site_id`` Directive
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
Introduces a function ID that can be used with ``.cv_loc``. Includes
|
||||
``inlined at`` source location information for use in the line table of the
|
||||
caller, whether the caller is a real function or another inlined call site.
|
||||
|
||||
Syntax:
|
||||
``.cv_inline_site_id`` *FunctionId* ``within`` *Function* ``inlined_at`` *FileNumber Line* [ *Colomn* ]
|
||||
|
||||
``.cv_loc`` Directive
|
||||
^^^^^^^^^^^^^^^^^^^^^
|
||||
The first number is a file number, must have been previously assigned with a
|
||||
``.file`` directive, the second number is the line number and optionally the
|
||||
third number is a column position (zero if not specified). The remaining
|
||||
optional items are ``.loc`` sub-directives.
|
||||
|
||||
Syntax:
|
||||
``.cv_loc`` *FunctionId FileNumber* [ *Line* ] [ *Column* ] [ *prologue_end* ] [ ``is_stmt`` *value* ]
|
||||
|
||||
``.cv_linetable`` Directive
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
Syntax:
|
||||
``.cv_linetable`` *FunctionId* ``,`` *FunctionStart* ``,`` *FunctionEnd*
|
||||
|
||||
``.cv_inline_linetable`` Directive
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
Syntax:
|
||||
``.cv_inline_linetable`` *PrimaryFunctionId* ``,`` *FileNumber Line FunctionStart FunctionEnd*
|
||||
|
||||
``.cv_def_range`` Directive
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
The *GapStart* and *GapEnd* options may be repeated as needed.
|
||||
|
||||
Syntax:
|
||||
``.cv_def_range`` *RangeStart RangeEnd* [ *GapStart GapEnd* ] ``,`` *bytes*
|
||||
|
||||
``.cv_stringtable`` Directive
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
``.cv_filechecksums`` Directive
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
``.cv_filechecksumoffset`` Directive
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
Syntax:
|
||||
``.cv_filechecksumoffset`` *FileNumber*
|
||||
|
||||
``.cv_fpo_data`` Directive
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
Syntax:
|
||||
``.cv_fpo_data`` *procsym*
|
||||
|
||||
Target Specific Behaviour
|
||||
=========================
|
||||
|
@ -433,7 +433,7 @@ data structure, but there are only 20 lines of meaningful code.)
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
/// @brief The map for a single function's stack frame. One of these is
|
||||
/// The map for a single function's stack frame. One of these is
|
||||
/// compiled as constant data into the executable for each function.
|
||||
///
|
||||
/// Storage of metadata values is elided if the %metadata parameter to
|
||||
@ -444,7 +444,7 @@ data structure, but there are only 20 lines of meaningful code.)
|
||||
const void *Meta[0]; //< Metadata for each root.
|
||||
};
|
||||
|
||||
/// @brief A link in the dynamic shadow stack. One of these is embedded in
|
||||
/// A link in the dynamic shadow stack. One of these is embedded in
|
||||
/// the stack frame of each function on the call stack.
|
||||
struct StackEntry {
|
||||
StackEntry *Next; //< Link to next stack entry (the caller's).
|
||||
@ -452,13 +452,13 @@ data structure, but there are only 20 lines of meaningful code.)
|
||||
void *Roots[0]; //< Stack roots (in-place array).
|
||||
};
|
||||
|
||||
/// @brief The head of the singly-linked list of StackEntries. Functions push
|
||||
/// The head of the singly-linked list of StackEntries. Functions push
|
||||
/// and pop onto this in their prologue and epilogue.
|
||||
///
|
||||
/// Since there is only a global list, this technique is not threadsafe.
|
||||
StackEntry *llvm_gc_root_chain;
|
||||
|
||||
/// @brief Calls Visitor(root, meta) for each GC root on the stack.
|
||||
/// Calls Visitor(root, meta) for each GC root on the stack.
|
||||
/// root and meta are exactly the values passed to
|
||||
/// @llvm.gcroot.
|
||||
///
|
||||
@ -1032,7 +1032,7 @@ a realistic example:
|
||||
|
||||
// Emit PointCount.
|
||||
OS.AddComment("safe point count");
|
||||
AP.EmitInt32(MD.size());
|
||||
AP.emitInt32(MD.size());
|
||||
|
||||
// And each safe point...
|
||||
for (GCFunctionInfo::iterator PI = MD.begin(),
|
||||
@ -1049,18 +1049,18 @@ a realistic example:
|
||||
|
||||
// Emit the stack frame size.
|
||||
OS.AddComment("stack frame size (in words)");
|
||||
AP.EmitInt32(MD.getFrameSize() / IntPtrSize);
|
||||
AP.emitInt32(MD.getFrameSize() / IntPtrSize);
|
||||
|
||||
// Emit stack arity, i.e. the number of stacked arguments.
|
||||
unsigned RegisteredArgs = IntPtrSize == 4 ? 5 : 6;
|
||||
unsigned StackArity = MD.getFunction().arg_size() > RegisteredArgs ?
|
||||
MD.getFunction().arg_size() - RegisteredArgs : 0;
|
||||
OS.AddComment("stack arity");
|
||||
AP.EmitInt32(StackArity);
|
||||
AP.emitInt32(StackArity);
|
||||
|
||||
// Emit the number of live roots in the function.
|
||||
OS.AddComment("live root count");
|
||||
AP.EmitInt32(MD.live_size(PI));
|
||||
AP.emitInt32(MD.live_size(PI));
|
||||
|
||||
// And for each live root...
|
||||
for (GCFunctionInfo::live_iterator LI = MD.live_begin(PI),
|
||||
@ -1068,7 +1068,7 @@ a realistic example:
|
||||
LI != LE; ++LI) {
|
||||
// Emit live root's offset within the stack frame.
|
||||
OS.AddComment("stack index (offset / wordsize)");
|
||||
AP.EmitInt32(LI->StackOffset);
|
||||
AP.emitInt32(LI->StackOffset);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -200,7 +200,7 @@ will need about 1-3 GB of space. A full build of LLVM and Clang will need aroun
|
||||
is so large because of all the debugging information and the fact that the
|
||||
libraries are statically linked into multiple tools).
|
||||
|
||||
If you you are space-constrained, you can build only selected tools or only
|
||||
If you are space-constrained, you can build only selected tools or only
|
||||
selected targets. The Release build requires considerably less space.
|
||||
|
||||
The LLVM suite *may* compile on other platforms, but it is not guaranteed to do
|
||||
@ -324,7 +324,7 @@ However, some Linux distributions and some other or older BSDs sometimes have
|
||||
extremely old versions of GCC. These steps attempt to help you upgrade you
|
||||
compiler even on such a system. However, if at all possible, we encourage you
|
||||
to use a recent version of a distribution with a modern system compiler that
|
||||
meets these requirements. Note that it is tempting to to install a prior
|
||||
meets these requirements. Note that it is tempting to install a prior
|
||||
version of Clang and libc++ to be the host compiler, however libc++ was not
|
||||
well tested or set up to build on Linux until relatively recently. As
|
||||
a consequence, this guide suggests just using libstdc++ and a modern GCC as the
|
||||
@ -492,8 +492,16 @@ Git Mirror
|
||||
Git mirrors are available for a number of LLVM subprojects. These mirrors sync
|
||||
automatically with each Subversion commit and contain all necessary git-svn
|
||||
marks (so, you can recreate git-svn metadata locally). Note that right now
|
||||
mirrors reflect only ``trunk`` for each project. You can do the read-only Git
|
||||
clone of LLVM via:
|
||||
mirrors reflect only ``trunk`` for each project.
|
||||
|
||||
.. note::
|
||||
|
||||
On Windows, first you will want to do ``git config --global core.autocrlf
|
||||
false`` before you clone. This goes a long way toward ensuring that
|
||||
line-endings will be handled correctly (the LLVM project mostly uses Linux
|
||||
line-endings).
|
||||
|
||||
You can do the read-only Git clone of LLVM via:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
@ -912,7 +920,7 @@ where they are built (a Canadian Cross build). To generate build files for
|
||||
cross-compiling CMake provides a variable ``CMAKE_TOOLCHAIN_FILE`` which can
|
||||
define compiler flags and variables used during the CMake test operations.
|
||||
|
||||
The result of such a build is executables that are not runnable on on the build
|
||||
The result of such a build is executables that are not runnable on the build
|
||||
host but can be executed on the target. As an example the following CMake
|
||||
invocation can generate build files targeting iOS. This will work on Mac OS X
|
||||
with the latest Xcode:
|
||||
|
@ -6,13 +6,16 @@ Introduction
|
||||
============
|
||||
|
||||
Building with link time optimization requires cooperation from
|
||||
the system linker. LTO support on Linux systems requires that you use the
|
||||
`gold linker`_ or ld.bfd from binutils >= 2.21.51.0.2, as they support LTO via plugins. This is the same mechanism
|
||||
the system linker. LTO support on Linux systems is available via the
|
||||
`gold linker`_ which supports LTO via plugins. This is the same mechanism
|
||||
used by the `GCC LTO`_ project.
|
||||
|
||||
The LLVM gold plugin implements the gold plugin interface on top of
|
||||
:ref:`libLTO`. The same plugin can also be used by other tools such as
|
||||
``ar`` and ``nm``.
|
||||
``ar`` and ``nm``. Note that ld.bfd from binutils version 2.21.51.0.2
|
||||
and above also supports LTO via plugins. However, usage of the LLVM
|
||||
gold plugin with ld.bfd is not tested and therefore not officially
|
||||
supported or recommended.
|
||||
|
||||
.. _`gold linker`: http://sourceware.org/binutils
|
||||
.. _`GCC LTO`: http://gcc.gnu.org/wiki/LinkTimeOptimization
|
||||
@ -23,25 +26,44 @@ The LLVM gold plugin implements the gold plugin interface on top of
|
||||
How to build it
|
||||
===============
|
||||
|
||||
Check for plugin support by running ``/usr/bin/ld -plugin``. If it complains
|
||||
"missing argument" then you have plugin support. If not, such as an "unknown option"
|
||||
error then you will either need to build gold or install a recent version
|
||||
of ld.bfd with plugin support and then build gold plugin.
|
||||
You need to have gold with plugin support and build the LLVMgold plugin.
|
||||
The gold linker is installed as ld.gold. To see whether gold is the default
|
||||
on your system, run ``/usr/bin/ld -v``. It will report "GNU
|
||||
gold" or else "GNU ld" if not. If gold is already installed at
|
||||
``/usr/bin/ld.gold``, one option is to simply make that the default by
|
||||
backing up your existing ``/usr/bin/ld`` and creating a symbolic link
|
||||
with ``ln -s /usr/bin/ld.gold /usr/bin/ld``. Alternatively, you can build
|
||||
with clang's ``-fuse-ld=gold`` or add ``-fuse-ld=gold`` to LDFLAGS, which will
|
||||
cause the clang driver to invoke ``/usr/bin/ld.gold`` directly.
|
||||
|
||||
* Download, configure and build ld.bfd with plugin support:
|
||||
If you have gold installed, check for plugin support by running
|
||||
``/usr/bin/ld.gold -plugin``. If it complains "missing argument" then
|
||||
you have plugin support. If not, and you get an error such as "unknown option",
|
||||
then you will either need to build gold or install a version with plugin
|
||||
support.
|
||||
|
||||
* Download, configure and build gold with plugin support:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ git clone --depth 1 git://sourceware.org/git/binutils-gdb.git binutils
|
||||
$ mkdir build
|
||||
$ cd build
|
||||
$ ../binutils/configure --disable-werror # ld.bfd includes plugin support by default
|
||||
$ make all-ld
|
||||
$ ../binutils/configure --enable-gold --enable-plugins --disable-werror
|
||||
$ make all-gold
|
||||
|
||||
That should leave you with ``build/ld/ld-new`` which supports
|
||||
That should leave you with ``build/gold/ld-new`` which supports
|
||||
the ``-plugin`` option. Running ``make`` will additionally build
|
||||
``build/binutils/ar`` and ``nm-new`` binaries supporting plugins.
|
||||
|
||||
Once you're ready to switch to using gold, backup your existing
|
||||
``/usr/bin/ld`` then replace it with ``ld-new``. Alternatively, install
|
||||
in ``/usr/bin/ld.gold`` and use ``-fuse-ld=gold`` as described earlier.
|
||||
|
||||
Optionally, add ``--enable=gold=default`` to the above configure invocation
|
||||
to automatically install the newly built gold as the default linker with
|
||||
``make install``.
|
||||
|
||||
* Build the LLVMgold plugin. Run CMake with
|
||||
``-DLLVM_BINUTILS_INCDIR=/path/to/binutils/include``. The correct include
|
||||
path will contain the file ``plugin-api.h``.
|
||||
@ -49,19 +71,12 @@ of ld.bfd with plugin support and then build gold plugin.
|
||||
Usage
|
||||
=====
|
||||
|
||||
The linker takes a ``-plugin`` option that points to the path of
|
||||
the plugin ``.so`` file. To find out what link command ``gcc``
|
||||
would run in a given situation, run ``gcc -v [...]`` and
|
||||
look for the line where it runs ``collect2``. Replace that with
|
||||
``ld-new -plugin /path/to/LLVMgold.so`` to test it out. Once you're
|
||||
ready to switch to using gold, backup your existing ``/usr/bin/ld``
|
||||
then replace it with ``ld-new``.
|
||||
|
||||
You should produce bitcode files from ``clang`` with the option
|
||||
``-flto``. This flag will also cause ``clang`` to look for the gold plugin in
|
||||
the ``lib`` directory under its prefix and pass the ``-plugin`` option to
|
||||
``ld``. It will not look for an alternate linker, which is why you need
|
||||
gold to be the installed system linker in your path.
|
||||
``ld``. It will not look for an alternate linker without ``-fuse-ld=gold``,
|
||||
which is why you otherwise need gold to be the installed system linker in
|
||||
your path.
|
||||
|
||||
``ar`` and ``nm`` also accept the ``-plugin`` option and it's possible to
|
||||
to install ``LLVMgold.so`` to ``/usr/lib/bfd-plugins`` for a seamless setup.
|
||||
|
@ -38,7 +38,7 @@ Crashing Bugs
|
||||
|
||||
More often than not, bugs in the compiler cause it to crash---often due to
|
||||
an assertion failure of some sort. The most important piece of the puzzle
|
||||
is to figure out if it is crashing in the GCC front-end or if it is one of
|
||||
is to figure out if it is crashing in the Clang front-end or if it is one of
|
||||
the LLVM libraries (e.g. the optimizer or code generator) that has
|
||||
problems.
|
||||
|
||||
|
1455
docs/LangRef.rst
1455
docs/LangRef.rst
File diff suppressed because it is too large
Load Diff
@ -133,6 +133,12 @@ H
|
||||
I
|
||||
-
|
||||
|
||||
**ICE**
|
||||
Internal Compiler Error. This abbreviation is used to describe errors
|
||||
that occur in LLVM or Clang as they are compiling source code. For example,
|
||||
if a valid C++ source program were to trigger an assert in Clang when
|
||||
compiled, that could be referred to as an "ICE".
|
||||
|
||||
**IPA**
|
||||
Inter-Procedural Analysis. Refers to any variety of code analysis that
|
||||
occurs between procedures, functions or compilation units (modules).
|
||||
|
@ -75,11 +75,13 @@ Recent versions of Clang (starting from 6.0) include libFuzzer, and no extra ins
|
||||
|
||||
In order to build your fuzzer binary, use the `-fsanitize=fuzzer` flag during the
|
||||
compilation and linking. In most cases you may want to combine libFuzzer with
|
||||
AddressSanitizer_ (ASAN), UndefinedBehaviorSanitizer_ (UBSAN), or both::
|
||||
AddressSanitizer_ (ASAN), UndefinedBehaviorSanitizer_ (UBSAN), or both. You can
|
||||
also build with MemorySanitizer_ (MSAN), but support is experimental::
|
||||
|
||||
clang -g -O1 -fsanitize=fuzzer mytarget.c # Builds the fuzz target w/o sanitizers
|
||||
clang -g -O1 -fsanitize=fuzzer,address mytarget.c # Builds the fuzz target with ASAN
|
||||
clang -g -O1 -fsanitize=fuzzer,signed-integer-overflow mytarget.c # Builds the fuzz target with a part of UBSAN
|
||||
clang -g -O1 -fsanitize=fuzzer,memory mytarget.c # Builds the fuzz target with MSAN
|
||||
|
||||
This will perform the necessary instrumentation, as well as linking with the libFuzzer library.
|
||||
Note that ``-fsanitize=fuzzer`` links in the libFuzzer's ``main()`` symbol.
|
||||
@ -93,10 +95,6 @@ instrumentation without linking::
|
||||
Then libFuzzer can be linked to the desired driver by passing in
|
||||
``-fsanitize=fuzzer`` during the linking stage.
|
||||
|
||||
Using MemorySanitizer_ (MSAN) with libFuzzer is possible too, but tricky.
|
||||
The exact details are out of scope, we expect to simplify this in future
|
||||
versions.
|
||||
|
||||
.. _libfuzzer-corpus:
|
||||
|
||||
Corpus
|
||||
@ -369,14 +367,16 @@ possible event codes are:
|
||||
Each output line also reports the following statistics (when non-zero):
|
||||
|
||||
``cov:``
|
||||
Total number of code blocks or edges covered by the executing the current
|
||||
corpus.
|
||||
Total number of code blocks or edges covered by executing the current corpus.
|
||||
``ft:``
|
||||
libFuzzer uses different signals to evaluate the code coverage:
|
||||
edge coverage, edge counters, value profiles, indirect caller/callee pairs, etc.
|
||||
These signals combined are called *features* (`ft:`).
|
||||
``corp:``
|
||||
Number of entries in the current in-memory test corpus and its size in bytes.
|
||||
``lim:``
|
||||
Current limit on the length of new entries in the corpus. Increases over time
|
||||
until the max length (``-max_len``) is reached.
|
||||
``exec/s:``
|
||||
Number of fuzzer iterations per second.
|
||||
``rss:``
|
||||
|
@ -185,15 +185,15 @@ of such YAML document:
|
||||
name: inc
|
||||
tracksRegLiveness: true
|
||||
liveins:
|
||||
- { reg: '%rdi' }
|
||||
- { reg: '$rdi' }
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: %rdi
|
||||
liveins: $rdi
|
||||
|
||||
%eax = MOV32rm %rdi, 1, _, 0, _
|
||||
%eax = INC32r killed %eax, implicit-def dead %eflags
|
||||
MOV32mr killed %rdi, 1, _, 0, _, %eax
|
||||
RETQ %eax
|
||||
$eax = MOV32rm $rdi, 1, _, 0, _
|
||||
$eax = INC32r killed $eax, implicit-def dead $eflags
|
||||
MOV32mr killed $rdi, 1, _, 0, _, $eax
|
||||
RETQ $eax
|
||||
...
|
||||
|
||||
The document above consists of attributes that represent the various
|
||||
@ -307,7 +307,7 @@ the instructions:
|
||||
.. code-block:: text
|
||||
|
||||
bb.0.entry:
|
||||
liveins: %edi, %esi
|
||||
liveins: $edi, $esi
|
||||
|
||||
The list of live in registers and successors can be empty. The language also
|
||||
allows multiple live in register and successor lists - they are combined into
|
||||
@ -344,7 +344,7 @@ operand:
|
||||
|
||||
.. code-block:: text
|
||||
|
||||
RETQ %eax
|
||||
RETQ $eax
|
||||
|
||||
However, if the machine instruction has one or more explicitly defined register
|
||||
operands, the instruction's name has to be specified after them. The example
|
||||
@ -353,7 +353,7 @@ defined register operands:
|
||||
|
||||
.. code-block:: text
|
||||
|
||||
%sp, %fp, %lr = LDPXpost %sp, 2
|
||||
$sp, $fp, $lr = LDPXpost $sp, 2
|
||||
|
||||
The instruction names are serialized using the exact definitions from the
|
||||
target's ``*InstrInfo.td`` files, and they are case sensitive. This means that
|
||||
@ -365,40 +365,60 @@ machine instructions.
|
||||
Instruction Flags
|
||||
^^^^^^^^^^^^^^^^^
|
||||
|
||||
The flag ``frame-setup`` can be specified before the instruction's name:
|
||||
The flag ``frame-setup`` or ``frame-destroy`` can be specified before the
|
||||
instruction's name:
|
||||
|
||||
.. code-block:: text
|
||||
|
||||
%fp = frame-setup ADDXri %sp, 0, 0
|
||||
$fp = frame-setup ADDXri $sp, 0, 0
|
||||
|
||||
.. code-block:: text
|
||||
|
||||
$x21, $x20 = frame-destroy LDPXi $sp
|
||||
|
||||
.. _registers:
|
||||
|
||||
Bundled Instructions
|
||||
^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
The syntax for bundled instructions is the following:
|
||||
|
||||
.. code-block:: text
|
||||
|
||||
BUNDLE implicit-def $r0, implicit-def $r1, implicit $r2 {
|
||||
$r0 = SOME_OP $r2
|
||||
$r1 = ANOTHER_OP internal $r0
|
||||
}
|
||||
|
||||
The first instruction is often a bundle header. The instructions between ``{``
|
||||
and ``}`` are bundled with the first instruction.
|
||||
|
||||
Registers
|
||||
---------
|
||||
|
||||
Registers are one of the key primitives in the machine instructions
|
||||
serialization language. They are primarly used in the
|
||||
serialization language. They are primarily used in the
|
||||
:ref:`register machine operands <register-operands>`,
|
||||
but they can also be used in a number of other places, like the
|
||||
:ref:`basic block's live in list <bb-liveins>`.
|
||||
|
||||
The physical registers are identified by their name. They use the following
|
||||
syntax:
|
||||
The physical registers are identified by their name and by the '$' prefix sigil.
|
||||
They use the following syntax:
|
||||
|
||||
.. code-block:: text
|
||||
|
||||
%<name>
|
||||
$<name>
|
||||
|
||||
The example below shows three X86 physical registers:
|
||||
|
||||
.. code-block:: text
|
||||
|
||||
%eax
|
||||
%r15
|
||||
%eflags
|
||||
$eax
|
||||
$r15
|
||||
$eflags
|
||||
|
||||
The virtual registers are identified by their ID number. They use the following
|
||||
syntax:
|
||||
The virtual registers are identified by their ID number and by the '%' sigil.
|
||||
They use the following syntax:
|
||||
|
||||
.. code-block:: text
|
||||
|
||||
@ -411,7 +431,7 @@ Example:
|
||||
%0
|
||||
|
||||
The null registers are represented using an underscore ('``_``'). They can also be
|
||||
represented using a '``%noreg``' named register, although the former syntax
|
||||
represented using a '``$noreg``' named register, although the former syntax
|
||||
is preferred.
|
||||
|
||||
.. _machine-operands:
|
||||
@ -432,7 +452,7 @@ immediate machine operand ``-42``:
|
||||
|
||||
.. code-block:: text
|
||||
|
||||
%eax = MOV32ri -42
|
||||
$eax = MOV32ri -42
|
||||
|
||||
An immediate operand is also used to represent a subregister index when the
|
||||
machine instruction has one of the following opcodes:
|
||||
@ -490,7 +510,7 @@ This example shows an instance of the X86 ``XOR32rr`` instruction that has
|
||||
|
||||
.. code-block:: text
|
||||
|
||||
dead %eax = XOR32rr undef %eax, undef %eax, implicit-def dead %eflags, implicit-def %al
|
||||
dead $eax = XOR32rr undef $eax, undef $eax, implicit-def dead $eflags, implicit-def $al
|
||||
|
||||
.. _register-flags:
|
||||
|
||||
@ -610,7 +630,7 @@ a global value operand named ``G``:
|
||||
|
||||
.. code-block:: text
|
||||
|
||||
%rax = MOV64rm %rip, 1, _, @G, _
|
||||
$rax = MOV64rm $rip, 1, _, @G, _
|
||||
|
||||
The named global values are represented using an identifier with the '@' prefix.
|
||||
If the identifier doesn't match the regular expression
|
||||
@ -632,7 +652,7 @@ and the offset 8:
|
||||
|
||||
.. code-block:: text
|
||||
|
||||
%sgpr2 = S_ADD_U32 _, target-index(amdgpu-constdata-start) + 8, implicit-def _, implicit-def _
|
||||
$sgpr2 = S_ADD_U32 _, target-index(amdgpu-constdata-start) + 8, implicit-def _, implicit-def _
|
||||
|
||||
Jump-table Index Operands
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
@ -641,7 +661,7 @@ A jump-table index operand with the index 0 is printed as following:
|
||||
|
||||
.. code-block:: text
|
||||
|
||||
tBR_JTr killed %r0, %jump-table.0
|
||||
tBR_JTr killed $r0, %jump-table.0
|
||||
|
||||
A machine jump-table entry contains a list of ``MachineBasicBlocks``. When serializing all the function's jump-table entries, the following format is used:
|
||||
|
||||
@ -670,7 +690,7 @@ Example:
|
||||
External Symbol Operands
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
An external symbol operand is represented using an identifier with the ``$``
|
||||
An external symbol operand is represented using an identifier with the ``&``
|
||||
prefix. The identifier is surrounded with ""'s and escaped if it has any
|
||||
special non-printable characters in it.
|
||||
|
||||
@ -678,7 +698,7 @@ Example:
|
||||
|
||||
.. code-block:: text
|
||||
|
||||
CALL64pcrel32 $__stack_chk_fail, csr_64, implicit %rsp, implicit-def %rsp
|
||||
CALL64pcrel32 &__stack_chk_fail, csr_64, implicit $rsp, implicit-def $rsp
|
||||
|
||||
MCSymbol Operands
|
||||
^^^^^^^^^^^^^^^^^
|
||||
@ -705,7 +725,7 @@ The syntax is:
|
||||
|
||||
.. code-block:: text
|
||||
|
||||
CFI_INSTRUCTION offset %w30, -16
|
||||
CFI_INSTRUCTION offset $w30, -16
|
||||
|
||||
which may be emitted later in the MC layer as:
|
||||
|
||||
@ -722,7 +742,7 @@ The syntax for the ``returnaddress`` intrinsic is:
|
||||
|
||||
.. code-block:: text
|
||||
|
||||
%x0 = COPY intrinsic(@llvm.returnaddress)
|
||||
$x0 = COPY intrinsic(@llvm.returnaddress)
|
||||
|
||||
Predicate Operands
|
||||
^^^^^^^^^^^^^^^^^^
|
||||
@ -738,7 +758,6 @@ For an int eq predicate ``ICMP_EQ``, the syntax is:
|
||||
|
||||
.. TODO: Describe the parsers default behaviour when optional YAML attributes
|
||||
are missing.
|
||||
.. TODO: Describe the syntax for the bundled instructions.
|
||||
.. TODO: Describe the syntax for virtual register YAML definitions.
|
||||
.. TODO: Describe the machine function's YAML flag attributes.
|
||||
.. TODO: Describe the syntax for the register mask machine operands.
|
||||
|
@ -79,7 +79,7 @@ viewing this example, it may be helpful to view it in terms of clobbers. The
|
||||
operands of a given ``MemoryAccess`` are all (potential) clobbers of said
|
||||
MemoryAccess, and the value produced by a ``MemoryAccess`` can act as a clobber
|
||||
for other ``MemoryAccess``\ es. Another useful way of looking at it is in
|
||||
terms of heap versions. In that view, operands of of a given
|
||||
terms of heap versions. In that view, operands of a given
|
||||
``MemoryAccess`` are the version of the heap before the operation, and
|
||||
if the access produces a value, the value is the new version of the heap
|
||||
after the operation.
|
||||
|
@ -166,7 +166,7 @@ A MachineFunctionPass should use FunctionPass::skipFunction() as such:
|
||||
|
||||
bool MyMachineFunctionPass::runOnMachineFunction(Function &MF) {
|
||||
if (skipFunction(*MF.getFunction())
|
||||
return false;
|
||||
return false;
|
||||
// Otherwise, run the pass normally.
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,44 @@ The MSF File Format
|
||||
.. contents::
|
||||
:local:
|
||||
|
||||
.. _msf_layout:
|
||||
|
||||
File Layout
|
||||
===========
|
||||
|
||||
The MSF file format consists of the following components:
|
||||
|
||||
1. :ref:`msf_superblock`
|
||||
2. :ref:`msf_freeblockmap` (also know as Free Page Map, or FPM)
|
||||
3. Data
|
||||
|
||||
Each component is stored as an indexed block, the length of which is specified
|
||||
in ``SuperBlock::BlockSize``. The file consists of 1 or more iterations of the
|
||||
following pattern (sometimes referred to as an "interval"):
|
||||
|
||||
1. 1 block of data
|
||||
2. Free Block Map 1 (corresponds to ``SuperBlock::FreeBlockMapBlock`` 1)
|
||||
3. Free Block Map 2 (corresponds to ``SuperBlock::FreeBlockMapBlock`` 2)
|
||||
4. ``SuperBlock::BlockSize - 3`` blocks of data
|
||||
|
||||
In the first interval, the first data block is used to store
|
||||
:ref:`msf_superblock`.
|
||||
|
||||
The following diagram demonstrates the general layout of the file (\| denotes
|
||||
the end of an interval, and is for visualization purposes only):
|
||||
|
||||
+-------------+-----------------------+------------------+------------------+----------+----+------+------+------+-------------+----+-----+
|
||||
| Block Index | 0 | 1 | 2 | 3 - 4095 | \| | 4096 | 4097 | 4098 | 4099 - 8191 | \| | ... |
|
||||
+=============+=======================+==================+==================+==========+====+======+======+======+=============+====+=====+
|
||||
| Meaning | :ref:`msf_superblock` | Free Block Map 1 | Free Block Map 2 | Data | \| | Data | FPM1 | FPM2 | Data | \| | ... |
|
||||
+-------------+-----------------------+------------------+------------------+----------+----+------+------+------+-------------+----+-----+
|
||||
|
||||
The file may end after any block, including immediately after a FPM1.
|
||||
|
||||
.. note::
|
||||
LLVM only supports 4096 byte blocks (sometimes referred to as the "BigMsf"
|
||||
variant), so the rest of this document will assume a block size of 4096.
|
||||
|
||||
.. _msf_superblock:
|
||||
|
||||
The Superblock
|
||||
@ -32,14 +70,9 @@ follows:
|
||||
sizes of 4KiB, and all further discussion assumes a block size of 4KiB.
|
||||
- **FreeBlockMapBlock** - The index of a block within the file, at which begins
|
||||
a bitfield representing the set of all blocks within the file which are "free"
|
||||
(i.e. the data within that block is not used). This bitfield is spread across
|
||||
the MSF file at ``BlockSize`` intervals.
|
||||
**Important**: ``FreeBlockMapBlock`` can only be ``1`` or ``2``! This field
|
||||
is designed to support incremental and atomic updates of the underlying MSF
|
||||
file. While writing to an MSF file, if the value of this field is `1`, you
|
||||
can write your new modified bitfield to page 2, and vice versa. Only when
|
||||
you commit the file to disk do you need to swap the value in the SuperBlock
|
||||
to point to the new ``FreeBlockMapBlock``.
|
||||
(i.e. the data within that block is not used). See :ref:`msf_freeblockmap` for
|
||||
more information.
|
||||
**Important**: ``FreeBlockMapBlock`` can only be ``1`` or ``2``!
|
||||
- **NumBlocks** - The total number of blocks in the file. ``NumBlocks * BlockSize``
|
||||
should equal the size of the file on disk.
|
||||
- **NumDirectoryBytes** - The size of the stream directory, in bytes. The stream
|
||||
@ -53,7 +86,32 @@ follows:
|
||||
contains the list of blocks that the stream directory occupies, and the stream
|
||||
directory itself can be stitched together accordingly. The number of
|
||||
``ulittle32_t``'s in this array is given by ``ceil(NumDirectoryBytes / BlockSize)``.
|
||||
|
||||
|
||||
.. _msf_freeblockmap:
|
||||
|
||||
The Free Block Map
|
||||
==================
|
||||
|
||||
The Free Block Map (sometimes referred to as the Free Page Map, or FPM) is a
|
||||
series of blocks which contains a bit flag for every block in the file. The
|
||||
flag will be set to 0 if the block is in use, and 1 if the block is unused.
|
||||
|
||||
Each file contains two FPMs, one of which is active at any given time. This
|
||||
feature is designed to support incremental and atomic updates of the underlying
|
||||
MSF file. While writing to an MSF file, if the active FPM is FPM1, you can
|
||||
write your new modified bitfield to FPM2, and vice versa. Only when you commit
|
||||
the file to disk do you need to swap the value in the SuperBlock to point to
|
||||
the new ``FreeBlockMapBlock``.
|
||||
|
||||
The Free Block Maps are stored as a series of single blocks thoughout the file
|
||||
at intervals of BlockSize. Because each FPM block is of size ``BlockSize``
|
||||
bytes, it contains 8 times as many bits as an interval has blocks. This means
|
||||
that the first block of each FPM refers to the first 8 intervals of the file
|
||||
(the first 32768 blocks), the second block of each FPM refers to the next 8
|
||||
blocks, and so on. This results in far more FPM blocks being present than are
|
||||
required, but in order to maintain backwards compatibility the format must stay
|
||||
this way.
|
||||
|
||||
The Stream Directory
|
||||
====================
|
||||
The Stream Directory is the root of all access to the other streams in an MSF
|
||||
@ -66,10 +124,10 @@ file. Beginning at byte 0 of the stream directory is the following structure:
|
||||
ulittle32_t StreamSizes[NumStreams];
|
||||
ulittle32_t StreamBlocks[NumStreams][];
|
||||
};
|
||||
|
||||
|
||||
And this structure occupies exactly ``SuperBlock->NumDirectoryBytes`` bytes.
|
||||
Note that each of the last two arrays is of variable length, and in particular
|
||||
that the second array is jagged.
|
||||
that the second array is jagged.
|
||||
|
||||
**Example:** Suppose a hypothetical PDB file with a 4KiB block size, and 4
|
||||
streams of lengths {1000 bytes, 8000 bytes, 16000 bytes, 9000 bytes}.
|
||||
@ -97,7 +155,7 @@ like:
|
||||
{10, 15, 12}
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
In total, this occupies ``15 * 4 = 60`` bytes, so ``SuperBlock->NumDirectoryBytes``
|
||||
would equal ``60``, and ``SuperBlock->BlockMapAddr`` would be an array of one
|
||||
``ulittle32_t``, since ``60 <= SuperBlock->BlockSize``.
|
||||
|
@ -83,6 +83,8 @@ Yet to be written.
|
||||
A pass which can be used to count how many alias queries are being made and how
|
||||
the alias analysis implementation being used responds.
|
||||
|
||||
.. _passes-da:
|
||||
|
||||
``-da``: Dependence Analysis
|
||||
----------------------------
|
||||
|
||||
@ -641,6 +643,21 @@ not library calls are simplified is controlled by the
|
||||
:ref:`-functionattrs <passes-functionattrs>` pass and LLVM's knowledge of
|
||||
library calls on different targets.
|
||||
|
||||
.. _passes-aggressive-instcombine:
|
||||
|
||||
``-aggressive-instcombine``: Combine expression patterns
|
||||
--------------------------------------------------------
|
||||
|
||||
Combine expression patterns to form expressions with fewer, simple instructions.
|
||||
This pass does not modify the CFG.
|
||||
|
||||
For example, this pass reduce width of expressions post-dominated by TruncInst
|
||||
into smaller width when applicable.
|
||||
|
||||
It differs from instcombine pass in that it contains pattern optimization that
|
||||
requires higher complexity than the O(1), thus, it should run fewer times than
|
||||
instcombine pass.
|
||||
|
||||
``-internalize``: Internalize Global Symbols
|
||||
--------------------------------------------
|
||||
|
||||
@ -810,6 +827,27 @@ This pass implements a simple loop unroller. It works best when loops have
|
||||
been canonicalized by the :ref:`indvars <passes-indvars>` pass, allowing it to
|
||||
determine the trip counts of loops easily.
|
||||
|
||||
``-loop-unroll-and-jam``: Unroll and Jam loops
|
||||
----------------------------------------------
|
||||
|
||||
This pass implements a simple unroll and jam classical loop optimisation pass.
|
||||
It transforms loop from:
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
for i.. i+= 1 for i.. i+= 4
|
||||
for j.. for j..
|
||||
code(i, j) code(i, j)
|
||||
code(i+1, j)
|
||||
code(i+2, j)
|
||||
code(i+3, j)
|
||||
remainder loop
|
||||
|
||||
Which can be seen as unrolling the outer loop and "jamming" (fusing) the inner
|
||||
loops into one. When variables or loads can be shared in the new inner loop, this
|
||||
can lead to significant performance improvements. It uses
|
||||
:ref:`Dependence Analysis <passes-da>` for proving the transformations are safe.
|
||||
|
||||
``-loop-unswitch``: Unswitch loops
|
||||
----------------------------------
|
||||
|
||||
|
@ -38,6 +38,8 @@ the command line. To get you set up, follow the
|
||||
You can learn more about how to use arc to interact with
|
||||
Phabricator in the `Arcanist User Guide`_.
|
||||
|
||||
.. _phabricator-request-review-web:
|
||||
|
||||
Requesting a review via the web interface
|
||||
-----------------------------------------
|
||||
|
||||
@ -63,15 +65,16 @@ To upload a new patch:
|
||||
* Click *Differential*.
|
||||
* Click *+ Create Diff*.
|
||||
* Paste the text diff or browse to the patch file. Click *Create Diff*.
|
||||
* Leave the Repository field blank.
|
||||
* Leave this first Repository field blank. (We'll fill in the Repository
|
||||
later, when sending the review.)
|
||||
* Leave the drop down on *Create a new Revision...* and click *Continue*.
|
||||
* Enter a descriptive title and summary. The title and summary are usually
|
||||
in the form of a :ref:`commit message <commit messages>`.
|
||||
* Add reviewers (see below for advice) and subscribe mailing
|
||||
lists that you want to be included in the review. If your patch is
|
||||
for LLVM, add llvm-commits as a Subscriber; if your patch is for Clang,
|
||||
add cfe-commits.
|
||||
* Leave the Repository and Project fields blank.
|
||||
* Add reviewers (see below for advice). (If you set the Repository field
|
||||
correctly, llvm-commits or cfe-commits will be subscribed automatically;
|
||||
otherwise, you will have to manually subscribe them.)
|
||||
* In the Repository field, enter the name of the project (LLVM, Clang,
|
||||
etc.) to which the review should be sent.
|
||||
* Click *Save*.
|
||||
|
||||
To submit an updated patch:
|
||||
@ -81,7 +84,8 @@ To submit an updated patch:
|
||||
* Paste the updated diff or browse to the updated patch file. Click *Create Diff*.
|
||||
* Select the review you want to from the *Attach To* dropdown and click
|
||||
*Continue*.
|
||||
* Leave the Repository and Project fields blank.
|
||||
* Leave the Repository field blank. (We previously filled out the Repository
|
||||
for the review request.)
|
||||
* Add comments about the changes in the new diff. Click *Save*.
|
||||
|
||||
Choosing reviewers: You typically pick one or two people as initial reviewers.
|
||||
|
@ -1020,8 +1020,8 @@ be passed by value.
|
||||
|
||||
.. _DEBUG:
|
||||
|
||||
The ``DEBUG()`` macro and ``-debug`` option
|
||||
-------------------------------------------
|
||||
The ``LLVM_DEBUG()`` macro and ``-debug`` option
|
||||
------------------------------------------------
|
||||
|
||||
Often when working on your pass you will put a bunch of debugging printouts and
|
||||
other code into your pass. After you get it working, you want to remove it, but
|
||||
@ -1033,14 +1033,14 @@ them out, allowing you to enable them if you need them in the future.
|
||||
|
||||
The ``llvm/Support/Debug.h`` (`doxygen
|
||||
<http://llvm.org/doxygen/Debug_8h_source.html>`__) file provides a macro named
|
||||
``DEBUG()`` that is a much nicer solution to this problem. Basically, you can
|
||||
put arbitrary code into the argument of the ``DEBUG`` macro, and it is only
|
||||
``LLVM_DEBUG()`` that is a much nicer solution to this problem. Basically, you can
|
||||
put arbitrary code into the argument of the ``LLVM_DEBUG`` macro, and it is only
|
||||
executed if '``opt``' (or any other tool) is run with the '``-debug``' command
|
||||
line argument:
|
||||
|
||||
.. code-block:: c++
|
||||
|
||||
DEBUG(dbgs() << "I am here!\n");
|
||||
LLVM_DEBUG(dbgs() << "I am here!\n");
|
||||
|
||||
Then you can run your pass like this:
|
||||
|
||||
@ -1051,13 +1051,13 @@ Then you can run your pass like this:
|
||||
$ opt < a.bc > /dev/null -mypass -debug
|
||||
I am here!
|
||||
|
||||
Using the ``DEBUG()`` macro instead of a home-brewed solution allows you to not
|
||||
Using the ``LLVM_DEBUG()`` macro instead of a home-brewed solution allows you to not
|
||||
have to create "yet another" command line option for the debug output for your
|
||||
pass. Note that ``DEBUG()`` macros are disabled for non-asserts builds, so they
|
||||
pass. Note that ``LLVM_DEBUG()`` macros are disabled for non-asserts builds, so they
|
||||
do not cause a performance impact at all (for the same reason, they should also
|
||||
not contain side-effects!).
|
||||
|
||||
One additional nice thing about the ``DEBUG()`` macro is that you can enable or
|
||||
One additional nice thing about the ``LLVM_DEBUG()`` macro is that you can enable or
|
||||
disable it directly in gdb. Just use "``set DebugFlag=0``" or "``set
|
||||
DebugFlag=1``" from the gdb if the program is running. If the program hasn't
|
||||
been started yet, you can always just run it with ``-debug``.
|
||||
@ -1076,10 +1076,10 @@ follows:
|
||||
.. code-block:: c++
|
||||
|
||||
#define DEBUG_TYPE "foo"
|
||||
DEBUG(dbgs() << "'foo' debug type\n");
|
||||
LLVM_DEBUG(dbgs() << "'foo' debug type\n");
|
||||
#undef DEBUG_TYPE
|
||||
#define DEBUG_TYPE "bar"
|
||||
DEBUG(dbgs() << "'bar' debug type\n");
|
||||
LLVM_DEBUG(dbgs() << "'bar' debug type\n");
|
||||
#undef DEBUG_TYPE
|
||||
|
||||
Then you can run your pass like this:
|
||||
@ -1435,7 +1435,7 @@ order (so you can do pointer arithmetic between elements), supports efficient
|
||||
push_back/pop_back operations, supports efficient random access to its elements,
|
||||
etc.
|
||||
|
||||
The advantage of SmallVector is that it allocates space for some number of
|
||||
The main advantage of SmallVector is that it allocates space for some number of
|
||||
elements (N) **in the object itself**. Because of this, if the SmallVector is
|
||||
dynamically smaller than N, no malloc is performed. This can be a big win in
|
||||
cases where the malloc/free call is far more expensive than the code that
|
||||
@ -1450,6 +1450,21 @@ SmallVectors are most useful when on the stack.
|
||||
SmallVector also provides a nice portable and efficient replacement for
|
||||
``alloca``.
|
||||
|
||||
SmallVector has grown a few other minor advantages over std::vector, causing
|
||||
``SmallVector<Type, 0>`` to be preferred over ``std::vector<Type>``.
|
||||
|
||||
#. std::vector is exception-safe, and some implementations have pessimizations
|
||||
that copy elements when SmallVector would move them.
|
||||
|
||||
#. SmallVector understands ``isPodLike<Type>`` and uses realloc aggressively.
|
||||
|
||||
#. Many LLVM APIs take a SmallVectorImpl as an out parameter (see the note
|
||||
below).
|
||||
|
||||
#. SmallVector with N equal to 0 is smaller than std::vector on 64-bit
|
||||
platforms, since it uses ``unsigned`` (instead of ``void*``) for its size
|
||||
and capacity.
|
||||
|
||||
.. note::
|
||||
|
||||
Prefer to use ``SmallVectorImpl<T>`` as a parameter type.
|
||||
@ -1482,12 +1497,10 @@ SmallVector also provides a nice portable and efficient replacement for
|
||||
<vector>
|
||||
^^^^^^^^
|
||||
|
||||
``std::vector`` is well loved and respected. It is useful when SmallVector
|
||||
isn't: when the size of the vector is often large (thus the small optimization
|
||||
will rarely be a benefit) or if you will be allocating many instances of the
|
||||
vector itself (which would waste space for elements that aren't in the
|
||||
container). vector is also useful when interfacing with code that expects
|
||||
vectors :).
|
||||
``std::vector<T>`` is well loved and respected. However, ``SmallVector<T, 0>``
|
||||
is often a better option due to the advantages listed above. std::vector is
|
||||
still useful when you need to store more than ``UINT32_MAX`` elements or when
|
||||
interfacing with code that expects vectors :).
|
||||
|
||||
One worthwhile note about std::vector: avoid code like this:
|
||||
|
||||
@ -1832,7 +1845,7 @@ A sorted 'vector'
|
||||
^^^^^^^^^^^^^^^^^
|
||||
|
||||
If you intend to insert a lot of elements, then do a lot of queries, a great
|
||||
approach is to use a vector (or other sequential container) with
|
||||
approach is to use an std::vector (or other sequential container) with
|
||||
std::sort+std::unique to remove duplicates. This approach works really well if
|
||||
your usage pattern has these two distinct phases (insert then query), and can be
|
||||
coupled with a good choice of :ref:`sequential container <ds_sequential>`.
|
||||
@ -2984,7 +2997,7 @@ Conceptually, ``LLVMContext`` provides isolation. Every LLVM entity
|
||||
in-memory IR belongs to an ``LLVMContext``. Entities in different contexts
|
||||
*cannot* interact with each other: ``Module``\ s in different contexts cannot be
|
||||
linked together, ``Function``\ s cannot be added to ``Module``\ s in different
|
||||
contexts, etc. What this means is that is is safe to compile on multiple
|
||||
contexts, etc. What this means is that is safe to compile on multiple
|
||||
threads simultaneously, as long as no two threads operate on entities within the
|
||||
same context.
|
||||
|
||||
@ -3721,7 +3734,7 @@ Important Subclasses of the ``Instruction`` class
|
||||
|
||||
* ``CmpInst``
|
||||
|
||||
This subclass respresents the two comparison instructions,
|
||||
This subclass represents the two comparison instructions,
|
||||
`ICmpInst <LangRef.html#i_icmp>`_ (integer opreands), and
|
||||
`FCmpInst <LangRef.html#i_fcmp>`_ (floating point operands).
|
||||
|
||||
|
@ -212,7 +212,7 @@ Related LLVM components
|
||||
Polly [7]_.
|
||||
|
||||
3. Loop Vectorizer: the Vectorization Plan aims to upgrade the infrastructure of
|
||||
the Loop Vectorizer and extend it to handle outer loops [8,9]_.
|
||||
the Loop Vectorizer and extend it to handle outer loops [8]_, [9]_.
|
||||
|
||||
References
|
||||
----------
|
||||
|
@ -1,12 +1,12 @@
|
||||
========================
|
||||
LLVM 6.0.0 Release Notes
|
||||
LLVM 7.0.0 Release Notes
|
||||
========================
|
||||
|
||||
.. contents::
|
||||
:local:
|
||||
|
||||
.. warning::
|
||||
These are in-progress notes for the upcoming LLVM 6 release.
|
||||
These are in-progress notes for the upcoming LLVM 7 release.
|
||||
Release notes for previous releases can be found on
|
||||
`the Download Page <http://releases.llvm.org/download.html>`_.
|
||||
|
||||
@ -15,7 +15,7 @@ Introduction
|
||||
============
|
||||
|
||||
This document contains the release notes for the LLVM Compiler Infrastructure,
|
||||
release 5.0.0. Here we describe the status of LLVM, including major improvements
|
||||
release 7.0.0. Here we describe the status of LLVM, including major improvements
|
||||
from the previous release, improvements in various subprojects of LLVM, and
|
||||
some of the current users of the code. All LLVM releases may be downloaded
|
||||
from the `LLVM releases web site <http://llvm.org/releases/>`_.
|
||||
@ -40,19 +40,74 @@ Non-comprehensive list of changes in this release
|
||||
functionality, or simply have a lot to talk about), see the `NOTE` below
|
||||
for adding a new subsection.
|
||||
|
||||
* The ``Redirects`` argument of ``llvm::sys::ExecuteAndWait`` and
|
||||
``llvm::sys::ExecuteNoWait`` was changed to an ``ArrayRef`` of optional
|
||||
``StringRef``'s to make it safer and more convenient to use.
|
||||
* Libraries have been renamed from 7.0 to 7. This change also impacts
|
||||
downstream libraries like lldb.
|
||||
|
||||
* The backend name was added to the Target Registry to allow run-time
|
||||
information to be fed back into TableGen. Out-of-tree targets will need to add
|
||||
the name used in the `def X : Target` definition to the call to
|
||||
`RegisterTarget`.
|
||||
* The LoopInstSimplify pass (-loop-instsimplify) has been removed.
|
||||
|
||||
* The ``Debugify`` pass was added to ``opt`` to facilitate testing of debug
|
||||
info preservation. This pass attaches synthetic ``DILocations`` and
|
||||
``DIVariables`` to the instructions in a ``Module``. The ``CheckDebugify``
|
||||
pass determines how much of the metadata is lost.
|
||||
* Symbols starting with ``?`` are no longer mangled by LLVM when using the
|
||||
Windows ``x`` or ``w`` IR mangling schemes.
|
||||
|
||||
* A new tool named :doc:`llvm-exegesis <CommandGuide/llvm-exegesis>` has been
|
||||
added. :program:`llvm-exegesis` automatically measures instruction scheduling
|
||||
properties (latency/uops) and provides a principled way to edit scheduling
|
||||
models.
|
||||
|
||||
* A new tool named :doc:`llvm-mca <CommandGuide/llvm-mca>` has been added.
|
||||
:program:`llvm-mca` is a static performance analysis tool that uses
|
||||
information available in LLVM to statically predict the performance of
|
||||
machine code for a specific CPU.
|
||||
|
||||
* The optimization flag to merge constants (-fmerge-all-constants) is no longer
|
||||
applied by default.
|
||||
|
||||
* Optimization of floating-point casts is improved. This may cause surprising
|
||||
results for code that is relying on the undefined behavior of overflowing
|
||||
casts. The optimization can be disabled by specifying a function attribute:
|
||||
"strict-float-cast-overflow"="false". This attribute may be created by the
|
||||
clang option :option:`-fno-strict-float-cast-overflow`.
|
||||
Code sanitizers can be used to detect affected patterns. The option for
|
||||
detecting this problem alone is "-fsanitize=float-cast-overflow":
|
||||
|
||||
.. code-block:: c
|
||||
|
||||
int main() {
|
||||
float x = 4294967296.0f;
|
||||
x = (float)((int)x);
|
||||
printf("junk in the ftrunc: %f\n", x);
|
||||
return 0;
|
||||
}
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
clang -O1 ftrunc.c -fsanitize=float-cast-overflow ; ./a.out
|
||||
ftrunc.c:5:15: runtime error: 4.29497e+09 is outside the range of representable values of type 'int'
|
||||
junk in the ftrunc: 0.000000
|
||||
|
||||
* ``LLVM_ON_WIN32`` is no longer set by ``llvm/Config/config.h`` and
|
||||
``llvm/Config/llvm-config.h``. If you used this macro, use the compiler-set
|
||||
``_WIN32`` instead which is set exactly when ``LLVM_ON_WIN32`` used to be set.
|
||||
|
||||
* The ``DEBUG`` macro has been renamed to ``LLVM_DEBUG``, the interface remains
|
||||
the same. If you used this macro you need to migrate to the new one.
|
||||
You should also clang-format your code to make it easier to integrate future
|
||||
changes locally. This can be done with the following bash commands:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
git grep -l 'DEBUG' | xargs perl -pi -e 's/\bDEBUG\s?\(/LLVM_DEBUG(/g'
|
||||
git diff -U0 master | ../clang/tools/clang-format/clang-format-diff.py -i -p1 -style LLVM
|
||||
|
||||
* Early support for UBsan, X-Ray instrumentation and libFuzzer (x86 and x86_64) for OpenBSD. Support for MSan
|
||||
(x86_64), X-Ray instrumentation and libFuzzer (x86 and x86_64) for FreeBSD.
|
||||
|
||||
* ``SmallVector<T, 0>`` shrank from ``sizeof(void*) * 4 + sizeof(T)`` to
|
||||
``sizeof(void*) + sizeof(unsigned) * 2``, smaller than ``std::vector<T>`` on
|
||||
64-bit platforms. The maximum capacity is now restricted to ``UINT32_MAX``.
|
||||
Since SmallVector doesn't have the exception-safety pessimizations some
|
||||
implementations saddle std::vector with and is better at using ``realloc``,
|
||||
it's now a better choice even on the heap (although when TinyPtrVector works,
|
||||
it's even smaller).
|
||||
|
||||
* Note..
|
||||
|
||||
@ -69,6 +124,14 @@ Non-comprehensive list of changes in this release
|
||||
Changes to the LLVM IR
|
||||
----------------------
|
||||
|
||||
* The signatures for the builtins @llvm.memcpy, @llvm.memmove, and @llvm.memset
|
||||
have changed. Alignment is no longer an argument, and are instead conveyed as
|
||||
parameter attributes.
|
||||
|
||||
* invariant.group.barrier has been renamed to launder.invariant.group.
|
||||
|
||||
* invariant.group metadata can now refer only empty metadata nodes.
|
||||
|
||||
Changes to the ARM Backend
|
||||
--------------------------
|
||||
|
||||
@ -104,16 +167,26 @@ Changes to the AVR Target
|
||||
Changes to the OCaml bindings
|
||||
-----------------------------
|
||||
|
||||
During this release ...
|
||||
* Remove ``add_bb_vectorize``.
|
||||
|
||||
|
||||
Changes to the C API
|
||||
--------------------
|
||||
|
||||
During this release ...
|
||||
* Remove ``LLVMAddBBVectorizePass``. The implementation was removed and the C
|
||||
interface was made a deprecated no-op in LLVM 5. Use
|
||||
``LLVMAddSLPVectorizePass`` instead to get the supported SLP vectorizer.
|
||||
|
||||
Changes to the DAG infrastructure
|
||||
---------------------------------
|
||||
* ADDC/ADDE/SUBC/SUBE are now deprecated and will default to expand. Backends
|
||||
that wish to continue to use these opcodes should explicitely request so
|
||||
using ``setOperationAction`` in their ``TargetLowering``. New backends
|
||||
should use UADDO/ADDCARRY/USUBO/SUBCARRY instead of the deprecated opcodes.
|
||||
|
||||
External Open Source Projects Using LLVM 6
|
||||
* The SETCCE opcode has now been removed in favor of SETCCCARRY.
|
||||
|
||||
External Open Source Projects Using LLVM 7
|
||||
==========================================
|
||||
|
||||
* A project...
|
||||
|
@ -9,9 +9,9 @@ How To Validate a New Release
|
||||
Introduction
|
||||
============
|
||||
|
||||
This document contains information about testing the release candidates that will
|
||||
ultimately be the next LLVM release. For more information on how to manage the
|
||||
actual release, please refer to :doc:`HowToReleaseLLVM`.
|
||||
This document contains information about testing the release candidates that
|
||||
will ultimately be the next LLVM release. For more information on how to
|
||||
manage the actual release, please refer to :doc:`HowToReleaseLLVM`.
|
||||
|
||||
Overview of the Release Process
|
||||
-------------------------------
|
||||
@ -21,26 +21,28 @@ and it'll be the role of each volunteer to:
|
||||
|
||||
* Test and benchmark the previous release
|
||||
|
||||
* Test and benchmark each release candidate, comparing to the previous release and candidates
|
||||
* Test and benchmark each release candidate, comparing to the previous release
|
||||
and candidates
|
||||
|
||||
* Identify, reduce and report every regression found during tests and benchmarks
|
||||
|
||||
* Make sure the critical bugs get fixed and merged to the next release candidate
|
||||
|
||||
Not all bugs or regressions are show-stoppers and it's a bit of a grey area what
|
||||
should be fixed before the next candidate and what can wait until the next release.
|
||||
should be fixed before the next candidate and what can wait until the next
|
||||
release.
|
||||
|
||||
It'll depend on:
|
||||
|
||||
* The severity of the bug, how many people it affects and if it's a regression or a
|
||||
known bug. Known bugs are "unsupported features" and some bugs can be disabled if
|
||||
they have been implemented recently.
|
||||
* The severity of the bug, how many people it affects and if it's a regression
|
||||
or a known bug. Known bugs are "unsupported features" and some bugs can be
|
||||
disabled if they have been implemented recently.
|
||||
|
||||
* The stage in the release. Less critical bugs should be considered to be fixed between
|
||||
RC1 and RC2, but not so much at the end of it.
|
||||
* The stage in the release. Less critical bugs should be considered to be
|
||||
fixed between RC1 and RC2, but not so much at the end of it.
|
||||
|
||||
* If it's a correctness or a performance regression. Performance regression tends to be
|
||||
taken more lightly than correctness.
|
||||
* If it's a correctness or a performance regression. Performance regression
|
||||
tends to be taken more lightly than correctness.
|
||||
|
||||
.. _scripts:
|
||||
|
||||
@ -52,10 +54,12 @@ The scripts are in the ``utils/release`` directory.
|
||||
test-release.sh
|
||||
---------------
|
||||
|
||||
This script will check-out, configure and compile LLVM+Clang (+ most add-ons, like ``compiler-rt``,
|
||||
``libcxx``, ``libomp`` and ``clang-extra-tools``) in three stages, and will test the final stage.
|
||||
It'll have installed the final binaries on the Phase3/Releasei(+Asserts) directory, and
|
||||
that's the one you should use for the test-suite and other external tests.
|
||||
This script will check-out, configure and compile LLVM+Clang (+ most add-ons,
|
||||
like ``compiler-rt``, ``libcxx``, ``libomp`` and ``clang-extra-tools``) in
|
||||
three stages, and will test the final stage.
|
||||
It'll have installed the final binaries on the Phase3/Releasei(+Asserts)
|
||||
directory, and that's the one you should use for the test-suite and other
|
||||
external tests.
|
||||
|
||||
To run the script on a specific release candidate run::
|
||||
|
||||
@ -66,25 +70,32 @@ To run the script on a specific release candidate run::
|
||||
-test-asserts \
|
||||
-no-compare-files
|
||||
|
||||
Each system will require different options. For instance, x86_64 will obviously not need
|
||||
``-no-64bit`` while 32-bit systems will, or the script will fail.
|
||||
Each system will require different options. For instance, x86_64 will
|
||||
obviously not need ``-no-64bit`` while 32-bit systems will, or the script will
|
||||
fail.
|
||||
|
||||
The important flags to get right are:
|
||||
|
||||
* On the pre-release, you should change ``-rc 1`` to ``-final``. On RC2, change it to ``-rc 2`` and so on.
|
||||
* On the pre-release, you should change ``-rc 1`` to ``-final``. On RC2,
|
||||
change it to ``-rc 2`` and so on.
|
||||
|
||||
* On non-release testing, you can use ``-final`` in conjunction with ``-no-checkout``, but you'll have to
|
||||
create the ``final`` directory by hand and link the correct source dir to ``final/llvm.src``.
|
||||
* On non-release testing, you can use ``-final`` in conjunction with
|
||||
``-no-checkout``, but you'll have to create the ``final`` directory by hand
|
||||
and link the correct source dir to ``final/llvm.src``.
|
||||
|
||||
* For release candidates, you need ``-test-asserts``, or it won't create a "Release+Asserts" directory,
|
||||
which is needed for release testing and benchmarking. This will take twice as long.
|
||||
* For release candidates, you need ``-test-asserts``, or it won't create a
|
||||
"Release+Asserts" directory, which is needed for release testing and
|
||||
benchmarking. This will take twice as long.
|
||||
|
||||
* On the final candidate you just need Release builds, and that's the binary directory you'll have to pack.
|
||||
* On the final candidate you just need Release builds, and that's the binary
|
||||
directory you'll have to pack.
|
||||
|
||||
This script builds three phases of Clang+LLVM twice each (Release and Release+Asserts), so use
|
||||
screen or nohup to avoid headaches, since it'll take a long time.
|
||||
This script builds three phases of Clang+LLVM twice each (Release and
|
||||
Release+Asserts), so use screen or nohup to avoid headaches, since it'll take
|
||||
a long time.
|
||||
|
||||
Use the ``--help`` option to see all the options and chose it according to your needs.
|
||||
Use the ``--help`` option to see all the options and chose it according to
|
||||
your needs.
|
||||
|
||||
|
||||
findRegressions-nightly.py
|
||||
@ -100,9 +111,12 @@ Test Suite
|
||||
.. contents::
|
||||
:local:
|
||||
|
||||
Follow the `LNT Quick Start Guide <http://llvm.org/docs/lnt/quickstart.html>`__ link on how to set-up the test-suite
|
||||
Follow the `LNT Quick Start Guide
|
||||
<http://llvm.org/docs/lnt/quickstart.html>`__ link on how to set-up the
|
||||
test-suite
|
||||
|
||||
The binary location you'll have to use for testing is inside the ``rcN/Phase3/Release+Asserts/llvmCore-REL-RC.install``.
|
||||
The binary location you'll have to use for testing is inside the
|
||||
``rcN/Phase3/Release+Asserts/llvmCore-REL-RC.install``.
|
||||
Link that directory to an easier location and run the test-suite.
|
||||
|
||||
An example on the run command line, assuming you created a link from the correct
|
||||
@ -116,13 +130,16 @@ install directory to ``~/devel/llvm/install``::
|
||||
--cc ~/devel/llvm/install/bin/clang \
|
||||
--cxx ~/devel/llvm/install/bin/clang++
|
||||
|
||||
It should have no new regressions, compared to the previous release or release candidate. You don't need to fix
|
||||
all the bugs in the test-suite, since they're not necessarily meant to pass on all architectures all the time. This is
|
||||
due to the nature of the result checking, which relies on direct comparison, and most of the time, the failures are
|
||||
related to bad output checking, rather than bad code generation.
|
||||
It should have no new regressions, compared to the previous release or release
|
||||
candidate. You don't need to fix all the bugs in the test-suite, since they're
|
||||
not necessarily meant to pass on all architectures all the time. This is
|
||||
due to the nature of the result checking, which relies on direct comparison,
|
||||
and most of the time, the failures are related to bad output checking, rather
|
||||
than bad code generation.
|
||||
|
||||
If the errors are in LLVM itself, please report every single regression found as blocker, and all the other bugs
|
||||
as important, but not necessarily blocking the release to proceed. They can be set as "known failures" and to be
|
||||
If the errors are in LLVM itself, please report every single regression found
|
||||
as blocker, and all the other bugs as important, but not necessarily blocking
|
||||
the release to proceed. They can be set as "known failures" and to be
|
||||
fix on a future date.
|
||||
|
||||
.. _pre-release-process:
|
||||
@ -134,23 +151,26 @@ Pre-Release Process
|
||||
:local:
|
||||
|
||||
When the release process is announced on the mailing list, you should prepare
|
||||
for the testing, by applying the same testing you'll do on the release candidates,
|
||||
on the previous release.
|
||||
for the testing, by applying the same testing you'll do on the release
|
||||
candidates, on the previous release.
|
||||
|
||||
You should:
|
||||
|
||||
* Download the previous release sources from http://llvm.org/releases/download.html.
|
||||
* Download the previous release sources from
|
||||
http://llvm.org/releases/download.html.
|
||||
|
||||
* Run the test-release.sh script on ``final`` mode (change ``-rc 1`` to ``-final``).
|
||||
* Run the test-release.sh script on ``final`` mode (change ``-rc 1`` to
|
||||
``-final``).
|
||||
|
||||
* Once all three stages are done, it'll test the final stage.
|
||||
|
||||
* Using the ``Phase3/Release+Asserts/llvmCore-MAJ.MIN-final.install`` base, run the test-suite.
|
||||
* Using the ``Phase3/Release+Asserts/llvmCore-MAJ.MIN-final.install`` base,
|
||||
run the test-suite.
|
||||
|
||||
If the final phase's ``make check-all`` failed, it's a good idea to also test the
|
||||
intermediate stages by going on the obj directory and running ``make check-all`` to find
|
||||
if there's at least one stage that passes (helps when reducing the error for bug report
|
||||
purposes).
|
||||
If the final phase's ``make check-all`` failed, it's a good idea to also test
|
||||
the intermediate stages by going on the obj directory and running
|
||||
``make check-all`` to find if there's at least one stage that passes (helps
|
||||
when reducing the error for bug report purposes).
|
||||
|
||||
.. _release-process:
|
||||
|
||||
@ -166,22 +186,23 @@ to them), and run the release test as above.
|
||||
|
||||
You should:
|
||||
|
||||
* Download the current candidate sources from where the release manager points you
|
||||
(ex. http://llvm.org/pre-releases/3.3/rc1/).
|
||||
* Download the current candidate sources from where the release manager points
|
||||
you (ex. http://llvm.org/pre-releases/3.3/rc1/).
|
||||
|
||||
* Repeat the steps above with ``-rc 1``, ``-rc 2`` etc modes and run the test-suite
|
||||
the same way.
|
||||
* Repeat the steps above with ``-rc 1``, ``-rc 2`` etc modes and run the
|
||||
test-suite the same way.
|
||||
|
||||
* Compare the results, report all errors on Bugzilla and publish the binary blob
|
||||
where the release manager can grab it.
|
||||
|
||||
Once the release manages announces that the latest candidate is the good one, you
|
||||
have to pack the ``Release`` (no Asserts) install directory on ``Phase3`` and that
|
||||
will be the official binary.
|
||||
Once the release manages announces that the latest candidate is the good one,
|
||||
you have to pack the ``Release`` (no Asserts) install directory on ``Phase3``
|
||||
and that will be the official binary.
|
||||
|
||||
* Rename (or link) ``clang+llvm-REL-ARCH-ENV`` to the .install directory
|
||||
|
||||
* Tar that into the same name with ``.tar.gz`` extensioan from outside the directory
|
||||
* Tar that into the same name with ``.tar.gz`` extensioan from outside the
|
||||
directory
|
||||
|
||||
* Make it available for the release manager to download
|
||||
|
||||
@ -196,15 +217,15 @@ Bug Reporting Process
|
||||
If you found regressions or failures when comparing a release candidate with the
|
||||
previous release, follow the rules below:
|
||||
|
||||
* Critical bugs on compilation should be fixed as soon as possible, possibly before
|
||||
releasing the binary blobs.
|
||||
* Critical bugs on compilation should be fixed as soon as possible, possibly
|
||||
before releasing the binary blobs.
|
||||
|
||||
* Check-all tests should be fixed before the next release candidate, but can wait
|
||||
until the test-suite run is finished.
|
||||
* Check-all tests should be fixed before the next release candidate, but can
|
||||
wait until the test-suite run is finished.
|
||||
|
||||
* Bugs in the test suite or unimportant check-all tests can be fixed in between
|
||||
release candidates.
|
||||
|
||||
* New features or recent big changes, when close to the release, should have done
|
||||
in a way that it's easy to disable. If they misbehave, prefer disabling them than
|
||||
releasing an unstable (but untested) binary package.
|
||||
* New features or recent big changes, when close to the release, should have
|
||||
done in a way that it's easy to disable. If they misbehave, prefer disabling
|
||||
them than releasing an unstable (but untested) binary package.
|
||||
|
@ -18,7 +18,8 @@ Currently, the allocator supports (was tested on) the following architectures:
|
||||
- i386 (& i686) (32-bit);
|
||||
- x86_64 (64-bit);
|
||||
- armhf (32-bit);
|
||||
- AArch64 (64-bit).
|
||||
- AArch64 (64-bit);
|
||||
- MIPS (32-bit & 64-bit).
|
||||
|
||||
The name "Scudo" has been retained from the initial implementation (Escudo
|
||||
meaning Shield in Spanish and Portuguese).
|
||||
@ -26,32 +27,45 @@ meaning Shield in Spanish and Portuguese).
|
||||
Design
|
||||
======
|
||||
|
||||
Allocator
|
||||
---------
|
||||
Scudo can be considered a Frontend to the Sanitizers' common allocator (later
|
||||
referenced as the Backend). It is split between a Primary allocator, fast and
|
||||
efficient, that services smaller allocation sizes, and a Secondary allocator
|
||||
that services larger allocation sizes and is backed by the operating system
|
||||
memory mapping primitives.
|
||||
|
||||
Scudo was designed with security in mind, but aims at striking a good balance
|
||||
between security and performance. It is highly tunable and configurable.
|
||||
|
||||
Chunk Header
|
||||
------------
|
||||
Every chunk of heap memory will be preceded by a chunk header. This has two
|
||||
purposes, the first one being to store various information about the chunk,
|
||||
the second one being to detect potential heap overflows. In order to achieve
|
||||
this, the header will be checksumed, involving the pointer to the chunk itself
|
||||
this, the header will be checksummed, involving the pointer to the chunk itself
|
||||
and a global secret. Any corruption of the header will be detected when said
|
||||
header is accessed, and the process terminated.
|
||||
|
||||
The following information is stored in the header:
|
||||
|
||||
- the 16-bit checksum;
|
||||
- the unused bytes amount for that chunk, which is necessary for computing the
|
||||
size of the chunk;
|
||||
- the class ID for that chunk, which is the "bucket" where the chunk resides
|
||||
for Primary backed allocations, or 0 for Secondary backed allocations;
|
||||
- the size (Primary) or unused bytes amount (Secondary) for that chunk, which is
|
||||
necessary for computing the size of the chunk;
|
||||
- the state of the chunk (available, allocated or quarantined);
|
||||
- the allocation type (malloc, new, new[] or memalign), to detect potential
|
||||
mismatches in the allocation APIs used;
|
||||
- the offset of the chunk, which is the distance in bytes from the beginning of
|
||||
the returned chunk to the beginning of the backend allocation;
|
||||
- a 8-bit salt.
|
||||
the returned chunk to the beginning of the Backend allocation;
|
||||
|
||||
This header fits within 8 bytes, on all platforms supported.
|
||||
|
||||
The checksum is computed as a CRC32 (made faster with hardware support)
|
||||
of the global secret, the chunk pointer itself, and the 8 bytes of header with
|
||||
the checksum field zeroed out.
|
||||
the checksum field zeroed out. It is not intended to be cryptographically
|
||||
strong.
|
||||
|
||||
The header is atomically loaded and stored to prevent races. This is important
|
||||
as two consecutive chunks could belong to different threads. We also want to
|
||||
@ -60,9 +74,9 @@ local copies of the header for this purpose.
|
||||
|
||||
Delayed Freelist
|
||||
-----------------
|
||||
A delayed freelist allows us to not return a chunk directly to the backend, but
|
||||
A delayed freelist allows us to not return a chunk directly to the Backend, but
|
||||
to keep it aside for a while. Once a criterion is met, the delayed freelist is
|
||||
emptied, and the quarantined chunks are returned to the backend. This helps
|
||||
emptied, and the quarantined chunks are returned to the Backend. This helps
|
||||
mitigate use-after-free vulnerabilities by reducing the determinism of the
|
||||
allocation and deallocation patterns.
|
||||
|
||||
@ -74,7 +88,7 @@ Randomness
|
||||
----------
|
||||
It is important for the allocator to not make use of fixed addresses. We use
|
||||
the dynamic base option for the SizeClassAllocator, allowing us to benefit
|
||||
from the randomness of mmap.
|
||||
from the randomness of the system memory mapping functions.
|
||||
|
||||
Usage
|
||||
=====
|
||||
@ -98,26 +112,39 @@ You may also build Scudo like this:
|
||||
|
||||
cd $LLVM/projects/compiler-rt/lib
|
||||
clang++ -fPIC -std=c++11 -msse4.2 -O2 -I. scudo/*.cpp \
|
||||
$(\ls sanitizer_common/*.{cc,S} | grep -v "sanitizer_termination\|sanitizer_common_nolibc") \
|
||||
-shared -o scudo-allocator.so -pthread
|
||||
$(\ls sanitizer_common/*.{cc,S} | grep -v "sanitizer_termination\|sanitizer_common_nolibc\|sancov_\|sanitizer_unwind\|sanitizer_symbol") \
|
||||
-shared -o libscudo.so -pthread
|
||||
|
||||
and then use it with existing binaries as follows:
|
||||
|
||||
.. code::
|
||||
|
||||
LD_PRELOAD=`pwd`/scudo-allocator.so ./a.out
|
||||
LD_PRELOAD=`pwd`/libscudo.so ./a.out
|
||||
|
||||
Clang
|
||||
-----
|
||||
With a recent version of Clang (post rL317337), the allocator can be linked with
|
||||
a binary at compilation using the ``-fsanitize=scudo`` command-line argument, if
|
||||
the target platform is supported. Currently, the only other Sanitizer Scudo is
|
||||
compatible with is UBSan (eg: ``-fsanitize=scudo,undefined``). Compiling with
|
||||
Scudo will also enforce PIE for the output binary.
|
||||
|
||||
Options
|
||||
-------
|
||||
Several aspects of the allocator can be configured through the following ways:
|
||||
Several aspects of the allocator can be configured on a per process basis
|
||||
through the following ways:
|
||||
|
||||
- at compile time, by defining ``SCUDO_DEFAULT_OPTIONS`` to the options string
|
||||
you want set by default;
|
||||
|
||||
- by defining a ``__scudo_default_options`` function in one's program that
|
||||
returns the options string to be parsed. Said function must have the following
|
||||
prototype: ``extern "C" const char* __scudo_default_options()``.
|
||||
prototype: ``extern "C" const char* __scudo_default_options(void)``, with a
|
||||
default visibility. This will override the compile time define;
|
||||
|
||||
- through the environment variable SCUDO_OPTIONS, containing the options string
|
||||
to be parsed. Options defined this way will override any definition made
|
||||
through ``__scudo_default_options``;
|
||||
through ``__scudo_default_options``.
|
||||
|
||||
The options string follows a syntax similar to ASan, where distinct options
|
||||
can be assigned in the same string, separated by colons.
|
||||
@ -146,7 +173,9 @@ The following options are available:
|
||||
| | | | the actual deallocation of chunks. Lower value |
|
||||
| | | | may reduce memory usage but decrease the |
|
||||
| | | | effectiveness of the mitigation; a negative |
|
||||
| | | | value will fallback to the defaults. |
|
||||
| | | | value will fallback to the defaults. Setting |
|
||||
| | | | *both* this and ThreadLocalQuarantineSizeKb to |
|
||||
| | | | zero will disable the quarantine entirely. |
|
||||
+-----------------------------+----------------+----------------+------------------------------------------------+
|
||||
| QuarantineChunksUpToSize | 2048 | 512 | Size (in bytes) up to which chunks can be |
|
||||
| | | | quarantined. |
|
||||
@ -154,7 +183,9 @@ The following options are available:
|
||||
| ThreadLocalQuarantineSizeKb | 1024 | 256 | The size (in Kb) of per-thread cache use to |
|
||||
| | | | offload the global quarantine. Lower value may |
|
||||
| | | | reduce memory usage but might increase |
|
||||
| | | | contention on the global quarantine. |
|
||||
| | | | contention on the global quarantine. Setting |
|
||||
| | | | *both* this and QuarantineSizeKb to zero will |
|
||||
| | | | disable the quarantine entirely. |
|
||||
+-----------------------------+----------------+----------------+------------------------------------------------+
|
||||
| DeallocationTypeMismatch | true | true | Whether or not we report errors on |
|
||||
| | | | malloc/delete, new/free, new/delete[], etc. |
|
||||
@ -167,7 +198,6 @@ The following options are available:
|
||||
+-----------------------------+----------------+----------------+------------------------------------------------+
|
||||
|
||||
Allocator related common Sanitizer options can also be passed through Scudo
|
||||
options, such as ``allocator_may_return_null``. A detailed list including those
|
||||
can be found here:
|
||||
options, such as ``allocator_may_return_null`` or ``abort_on_error``. A detailed
|
||||
list including those can be found here:
|
||||
https://github.com/google/sanitizers/wiki/SanitizerCommonFlags.
|
||||
|
||||
|
@ -77,8 +77,8 @@ source from generated code.
|
||||
|
||||
.. _intro_debugopt:
|
||||
|
||||
Debugging optimized code
|
||||
------------------------
|
||||
Debug information and optimizations
|
||||
-----------------------------------
|
||||
|
||||
An extremely high priority of LLVM debugging information is to make it interact
|
||||
well with optimizations and analysis. In particular, the LLVM debug
|
||||
@ -1464,3 +1464,180 @@ Improving LLVM's CodeView support is a process of finding interesting type
|
||||
records, constructing a C++ test case that makes MSVC emit those records,
|
||||
dumping the records, understanding them, and then generating equivalent records
|
||||
in LLVM's backend.
|
||||
|
||||
Testing Debug Info Preservation in Optimizations
|
||||
================================================
|
||||
|
||||
The following paragraphs are an introduction to the debugify utility
|
||||
and examples of how to use it in regression tests to check debug info
|
||||
preservation after optimizations.
|
||||
|
||||
The ``debugify`` utility
|
||||
------------------------
|
||||
|
||||
The ``debugify`` synthetic debug info testing utility consists of two
|
||||
main parts. The ``debugify`` pass and the ``check-debugify`` one. They are
|
||||
meant to be used with ``opt`` for development purposes.
|
||||
|
||||
The first applies synthetic debug information to every instruction of the module,
|
||||
while the latter checks that this DI is still available after an optimization
|
||||
has occurred, reporting any errors/warnings while doing so.
|
||||
|
||||
The instructions are assigned sequentially increasing line locations,
|
||||
and are immediately used by debug value intrinsics when possible.
|
||||
|
||||
For example, here is a module before:
|
||||
|
||||
.. code-block:: llvm
|
||||
|
||||
define dso_local void @f(i32* %x) {
|
||||
entry:
|
||||
%x.addr = alloca i32*, align 8
|
||||
store i32* %x, i32** %x.addr, align 8
|
||||
%0 = load i32*, i32** %x.addr, align 8
|
||||
store i32 10, i32* %0, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
and after running ``opt -debugify`` on it we get:
|
||||
|
||||
.. code-block:: llvm
|
||||
|
||||
define dso_local void @f(i32* %x) !dbg !6 {
|
||||
entry:
|
||||
%x.addr = alloca i32*, align 8, !dbg !12
|
||||
call void @llvm.dbg.value(metadata i32** %x.addr, metadata !9, metadata !DIExpression()), !dbg !12
|
||||
store i32* %x, i32** %x.addr, align 8, !dbg !13
|
||||
%0 = load i32*, i32** %x.addr, align 8, !dbg !14
|
||||
call void @llvm.dbg.value(metadata i32* %0, metadata !11, metadata !DIExpression()), !dbg !14
|
||||
store i32 10, i32* %0, align 4, !dbg !15
|
||||
ret void, !dbg !16
|
||||
}
|
||||
|
||||
!llvm.dbg.cu = !{!0}
|
||||
!llvm.debugify = !{!3, !4}
|
||||
!llvm.module.flags = !{!5}
|
||||
|
||||
!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
|
||||
!1 = !DIFile(filename: "debugify-sample.ll", directory: "/")
|
||||
!2 = !{}
|
||||
!3 = !{i32 5}
|
||||
!4 = !{i32 2}
|
||||
!5 = !{i32 2, !"Debug Info Version", i32 3}
|
||||
!6 = distinct !DISubprogram(name: "f", linkageName: "f", scope: null, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: true, unit: !0, retainedNodes: !8)
|
||||
!7 = !DISubroutineType(types: !2)
|
||||
!8 = !{!9, !11}
|
||||
!9 = !DILocalVariable(name: "1", scope: !6, file: !1, line: 1, type: !10)
|
||||
!10 = !DIBasicType(name: "ty64", size: 64, encoding: DW_ATE_unsigned)
|
||||
!11 = !DILocalVariable(name: "2", scope: !6, file: !1, line: 3, type: !10)
|
||||
!12 = !DILocation(line: 1, column: 1, scope: !6)
|
||||
!13 = !DILocation(line: 2, column: 1, scope: !6)
|
||||
!14 = !DILocation(line: 3, column: 1, scope: !6)
|
||||
!15 = !DILocation(line: 4, column: 1, scope: !6)
|
||||
!16 = !DILocation(line: 5, column: 1, scope: !6)
|
||||
|
||||
The following is an example of the -check-debugify output:
|
||||
|
||||
.. code-block:: none
|
||||
|
||||
$ opt -enable-debugify -loop-vectorize llvm/test/Transforms/LoopVectorize/i8-induction.ll -disable-output
|
||||
ERROR: Instruction with empty DebugLoc in function f -- %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
|
||||
|
||||
Errors/warnings can range from instructions with empty debug location to an
|
||||
instruction having a type that's incompatible with the source variable it describes,
|
||||
all the way to missing lines and missing debug value intrinsics.
|
||||
|
||||
Fixing errors
|
||||
^^^^^^^^^^^^^
|
||||
|
||||
Each of the errors above has a relevant API available to fix it.
|
||||
|
||||
* In the case of missing debug location, ``Instruction::setDebugLoc`` or possibly
|
||||
``IRBuilder::setCurrentDebugLocation`` when using a Builder and the new location
|
||||
should be reused.
|
||||
|
||||
* When a debug value has incompatible type ``llvm::replaceAllDbgUsesWith`` can be used.
|
||||
After a RAUW call an incompatible type error can occur because RAUW does not handle
|
||||
widening and narrowing of variables while ``llvm::replaceAllDbgUsesWith`` does. It is
|
||||
also capable of changing the DWARF expression used by the debugger to describe the variable.
|
||||
It also prevents use-before-def by salvaging or deleting invalid debug values.
|
||||
|
||||
* When a debug value is missing ``llvm::salvageDebugInfo`` can be used when no replacement
|
||||
exists, or ``llvm::replaceAllDbgUsesWith`` when a replacement exists.
|
||||
|
||||
Using ``debugify``
|
||||
------------------
|
||||
|
||||
In order for ``check-debugify`` to work, the DI must be coming from
|
||||
``debugify``. Thus, modules with existing DI will be skipped.
|
||||
|
||||
The most straightforward way to use ``debugify`` is as follows::
|
||||
|
||||
$ opt -debugify -pass-to-test -check-debugify sample.ll
|
||||
|
||||
This will inject synthetic DI to ``sample.ll`` run the ``pass-to-test``
|
||||
and then check for missing DI.
|
||||
|
||||
Some other ways to run debugify are avaliable:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
# Same as the above example.
|
||||
$ opt -enable-debugify -pass-to-test sample.ll
|
||||
|
||||
# Suppresses verbose debugify output.
|
||||
$ opt -enable-debugify -debugify-quiet -pass-to-test sample.ll
|
||||
|
||||
# Prepend -debugify before and append -check-debugify -strip after
|
||||
# each pass on the pipeline (similar to -verify-each).
|
||||
$ opt -debugify-each -O2 sample.ll
|
||||
|
||||
``debugify`` can also be used to test a backend, e.g:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
$ opt -debugify < sample.ll | llc -o -
|
||||
|
||||
``debugify`` in regression tests
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
The ``-debugify`` pass is especially helpful when it comes to testing that
|
||||
a given pass preserves DI while transforming the module. For this to work,
|
||||
the ``-debugify`` output must be stable enough to use in regression tests.
|
||||
Changes to this pass are not allowed to break existing tests.
|
||||
|
||||
It allows us to test for DI loss in the same tests we check that the
|
||||
transformation is actually doing what it should.
|
||||
|
||||
Here is an example from ``test/Transforms/InstCombine/cast-mul-select.ll``:
|
||||
|
||||
.. code-block:: llvm
|
||||
|
||||
; RUN: opt < %s -debugify -instcombine -S | FileCheck %s --check-prefix=DEBUGINFO
|
||||
|
||||
define i32 @mul(i32 %x, i32 %y) {
|
||||
; DBGINFO-LABEL: @mul(
|
||||
; DBGINFO-NEXT: [[C:%.*]] = mul i32 {{.*}}
|
||||
; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i32 [[C]]
|
||||
; DBGINFO-NEXT: [[D:%.*]] = and i32 {{.*}}
|
||||
; DBGINFO-NEXT: call void @llvm.dbg.value(metadata i32 [[D]]
|
||||
|
||||
%A = trunc i32 %x to i8
|
||||
%B = trunc i32 %y to i8
|
||||
%C = mul i8 %A, %B
|
||||
%D = zext i8 %C to i32
|
||||
ret i32 %D
|
||||
}
|
||||
|
||||
Here we test that the two ``dbg.value`` instrinsics are preserved and
|
||||
are correctly pointing to the ``[[C]]`` and ``[[D]]`` variables.
|
||||
|
||||
.. note::
|
||||
|
||||
Note, that when writing this kind of regression tests, it is important
|
||||
to make them as robust as possible. That's why we should try to avoid
|
||||
hardcoding line/variable numbers in check lines. If for example you test
|
||||
for a ``DILocation`` to have a specific line number, and someone later adds
|
||||
an instruction before the one we check the test will fail. In the cases this
|
||||
can't be avoided (say, if a test wouldn't be precise enough), moving the
|
||||
test to it's own file is preferred.
|
||||
|
1099
docs/SpeculativeLoadHardening.md
Normal file
1099
docs/SpeculativeLoadHardening.md
Normal file
File diff suppressed because it is too large
Load Diff
@ -209,10 +209,9 @@ Implementations of the System Library interface are separated by their general
|
||||
class of operating system. Currently only Unix and Win32 classes are defined
|
||||
but more could be added for other operating system classifications. To
|
||||
distinguish which implementation to compile, the code in ``lib/System`` uses
|
||||
the ``LLVM_ON_UNIX`` and ``LLVM_ON_WIN32`` ``#defines`` provided via configure
|
||||
through the ``llvm/Config/config.h`` file. Each source file in ``lib/System``,
|
||||
after implementing the generic (operating system independent) functionality
|
||||
needs to include the correct implementation using a set of
|
||||
the ``LLVM_ON_UNIX`` and ``_WIN32`` ``#defines``. Each source file in
|
||||
``lib/System``, after implementing the generic (operating system independent)
|
||||
functionality needs to include the correct implementation using a set of
|
||||
``#if defined(LLVM_ON_XYZ)`` directives. For example, if we had
|
||||
``lib/System/File.cpp``, we'd expect to see in that file:
|
||||
|
||||
@ -221,7 +220,7 @@ needs to include the correct implementation using a set of
|
||||
#if defined(LLVM_ON_UNIX)
|
||||
#include "Unix/File.cpp"
|
||||
#endif
|
||||
#if defined(LLVM_ON_WIN32)
|
||||
#if defined(_WIN32)
|
||||
#include "Win32/File.cpp"
|
||||
#endif
|
||||
|
||||
|
@ -221,6 +221,22 @@ OptParserDefs
|
||||
|
||||
**Purpose**: Print enum values for a class.
|
||||
|
||||
SearchableTables
|
||||
----------------
|
||||
|
||||
**Purpose**: Generate custom searchable tables.
|
||||
|
||||
**Output**: Enums, global tables and lookup helper functions.
|
||||
|
||||
**Usage**: This backend allows generating free-form, target-specific tables
|
||||
from TableGen records. The ARM and AArch64 targets use this backend to generate
|
||||
tables of system registers; the AMDGPU target uses it to generate meta-data
|
||||
about complex image and memory buffer instructions.
|
||||
|
||||
More documentation is available in ``include/llvm/TableGen/SearchableTable.td``,
|
||||
which also contains the definitions of TableGen classes which must be
|
||||
instantiated in order to define the enums and tables emitted by this backend.
|
||||
|
||||
CTags
|
||||
-----
|
||||
|
||||
@ -419,6 +435,127 @@ AttrDocs
|
||||
**Purpose**: Creates ``AttributeReference.rst`` from ``AttrDocs.td``, and is
|
||||
used for documenting user-facing attributes.
|
||||
|
||||
General BackEnds
|
||||
================
|
||||
|
||||
JSON
|
||||
----
|
||||
|
||||
**Purpose**: Output all the values in every ``def``, as a JSON data
|
||||
structure that can be easily parsed by a variety of languages. Useful
|
||||
for writing custom backends without having to modify TableGen itself,
|
||||
or for performing auxiliary analysis on the same TableGen data passed
|
||||
to a built-in backend.
|
||||
|
||||
**Output**:
|
||||
|
||||
The root of the output file is a JSON object (i.e. dictionary),
|
||||
containing the following fixed keys:
|
||||
|
||||
* ``!tablegen_json_version``: a numeric version field that will
|
||||
increase if an incompatible change is ever made to the structure of
|
||||
this data. The format described here corresponds to version 1.
|
||||
|
||||
* ``!instanceof``: a dictionary whose keys are the class names defined
|
||||
in the TableGen input. For each key, the corresponding value is an
|
||||
array of strings giving the names of ``def`` records that derive
|
||||
from that class. So ``root["!instanceof"]["Instruction"]``, for
|
||||
example, would list the names of all the records deriving from the
|
||||
class ``Instruction``.
|
||||
|
||||
For each ``def`` record, the root object also has a key for the record
|
||||
name. The corresponding value is a subsidiary object containing the
|
||||
following fixed keys:
|
||||
|
||||
* ``!superclasses``: an array of strings giving the names of all the
|
||||
classes that this record derives from.
|
||||
|
||||
* ``!fields``: an array of strings giving the names of all the variables
|
||||
in this record that were defined with the ``field`` keyword.
|
||||
|
||||
* ``!name``: a string giving the name of the record. This is always
|
||||
identical to the key in the JSON root object corresponding to this
|
||||
record's dictionary. (If the record is anonymous, the name is
|
||||
arbitrary.)
|
||||
|
||||
* ``!anonymous``: a boolean indicating whether the record's name was
|
||||
specified by the TableGen input (if it is ``false``), or invented by
|
||||
TableGen itself (if ``true``).
|
||||
|
||||
For each variable defined in a record, the ``def`` object for that
|
||||
record also has a key for the variable name. The corresponding value
|
||||
is a translation into JSON of the variable's value, using the
|
||||
conventions described below.
|
||||
|
||||
Some TableGen data types are translated directly into the
|
||||
corresponding JSON type:
|
||||
|
||||
* A completely undefined value (e.g. for a variable declared without
|
||||
initializer in some superclass of this record, and never initialized
|
||||
by the record itself or any other superclass) is emitted as the JSON
|
||||
``null`` value.
|
||||
|
||||
* ``int`` and ``bit`` values are emitted as numbers. Note that
|
||||
TableGen ``int`` values are capable of holding integers too large to
|
||||
be exactly representable in IEEE double precision. The integer
|
||||
literal in the JSON output will show the full exact integer value.
|
||||
So if you need to retrieve large integers with full precision, you
|
||||
should use a JSON reader capable of translating such literals back
|
||||
into 64-bit integers without losing precision, such as Python's
|
||||
standard ``json`` module.
|
||||
|
||||
* ``string`` and ``code`` values are emitted as JSON strings.
|
||||
|
||||
* ``list<T>`` values, for any element type ``T``, are emitted as JSON
|
||||
arrays. Each element of the array is represented in turn using these
|
||||
same conventions.
|
||||
|
||||
* ``bits`` values are also emitted as arrays. A ``bits`` array is
|
||||
ordered from least-significant bit to most-significant. So the
|
||||
element with index ``i`` corresponds to the bit described as
|
||||
``x{i}`` in TableGen source. However, note that this means that
|
||||
scripting languages are likely to *display* the array in the
|
||||
opposite order from the way it appears in the TableGen source or in
|
||||
the diagnostic ``-print-records`` output.
|
||||
|
||||
All other TableGen value types are emitted as a JSON object,
|
||||
containing two standard fields: ``kind`` is a discriminator describing
|
||||
which kind of value the object represents, and ``printable`` is a
|
||||
string giving the same representation of the value that would appear
|
||||
in ``-print-records``.
|
||||
|
||||
* A reference to a ``def`` object has ``kind=="def"``, and has an
|
||||
extra field ``def`` giving the name of the object referred to.
|
||||
|
||||
* A reference to another variable in the same record has
|
||||
``kind=="var"``, and has an extra field ``var`` giving the name of
|
||||
the variable referred to.
|
||||
|
||||
* A reference to a specific bit of a ``bits``-typed variable in the
|
||||
same record has ``kind=="varbit"``, and has two extra fields:
|
||||
``var`` gives the name of the variable referred to, and ``index``
|
||||
gives the index of the bit.
|
||||
|
||||
* A value of type ``dag`` has ``kind=="dag"``, and has two extra
|
||||
fields. ``operator`` gives the initial value after the opening
|
||||
parenthesis of the dag initializer; ``args`` is an array giving the
|
||||
following arguments. The elements of ``args`` are arrays of length
|
||||
2, giving the value of each argument followed by its colon-suffixed
|
||||
name (if any). For example, in the JSON representation of the dag
|
||||
value ``(Op 22, "hello":$foo)`` (assuming that ``Op`` is the name of
|
||||
a record defined elsewhere with a ``def`` statement):
|
||||
|
||||
* ``operator`` will be an object in which ``kind=="def"`` and
|
||||
``def=="Op"``
|
||||
|
||||
* ``args`` will be the array ``[[22, null], ["hello", "foo"]]``.
|
||||
|
||||
* If any other kind of value or complicated expression appears in the
|
||||
output, it will have ``kind=="complex"``, and no additional fields.
|
||||
These values are not expected to be needed by backends. The standard
|
||||
``printable`` field can be used to extract a representation of them
|
||||
in TableGen source syntax if necessary.
|
||||
|
||||
How to write a back-end
|
||||
=======================
|
||||
|
||||
|
@ -152,8 +152,8 @@ supported include:
|
||||
``foreach <var> = [ <list> ] in <def>``
|
||||
Replicate <body> or <def>, replacing instances of <var> with each value
|
||||
in <list>. <var> is scoped at the level of the ``foreach`` loop and must
|
||||
not conflict with any other object introduced in <body> or <def>. Currently
|
||||
only ``def``\s are expanded within <body>.
|
||||
not conflict with any other object introduced in <body> or <def>. Only
|
||||
``def``\s and ``defm``\s are expanded within <body>.
|
||||
|
||||
``foreach <var> = 0-15 in ...``
|
||||
|
||||
@ -165,6 +165,24 @@ supported include:
|
||||
remaining elements in the list may be arbitrary other values, including
|
||||
nested ```dag``' values.
|
||||
|
||||
``!con(a, b, ...)``
|
||||
Concatenate two or more DAG nodes. Their operations must equal.
|
||||
|
||||
Example: !con((op a1:$name1, a2:$name2), (op b1:$name3)) results in
|
||||
the DAG node (op a1:$name1, a2:$name2, b1:$name3).
|
||||
|
||||
``!dag(op, children, names)``
|
||||
Generate a DAG node programmatically. 'children' and 'names' must be lists
|
||||
of equal length or unset ('?'). 'names' must be a 'list<string>'.
|
||||
|
||||
Due to limitations of the type system, 'children' must be a list of items
|
||||
of a common type. In practice, this means that they should either have the
|
||||
same type or be records with a common superclass. Mixing dag and non-dag
|
||||
items is not possible. However, '?' can be used.
|
||||
|
||||
Example: !dag(op, [a1, a2, ?], ["name1", "name2", "name3"]) results in
|
||||
(op a1:$name1, a2:$name2, ?:$name3).
|
||||
|
||||
``!listconcat(a, b, ...)``
|
||||
A list value that is the result of concatenating the 'a' and 'b' lists.
|
||||
The lists must have the same element type.
|
||||
@ -182,19 +200,48 @@ supported include:
|
||||
the operand of the paste.
|
||||
|
||||
``!cast<type>(a)``
|
||||
A symbol of type *type* obtained by looking up the string 'a' in the symbol
|
||||
table. If the type of 'a' does not match *type*, TableGen aborts with an
|
||||
error. !cast<string> is a special case in that the argument must be an
|
||||
object defined by a 'def' construct.
|
||||
If 'a' is a string, a record of type *type* obtained by looking up the
|
||||
string 'a' in the list of all records defined by the time that all template
|
||||
arguments in 'a' are fully resolved.
|
||||
|
||||
For example, if !cast<type>(a) appears in a multiclass definition, or in a
|
||||
class instantiated inside of a multiclass definition, and 'a' does not
|
||||
reference any template arguments of the multiclass, then a record of name
|
||||
'a' must be instantiated earlier in the source file. If 'a' does reference
|
||||
a template argument, then the lookup is delayed until defm statements
|
||||
instantiating the multiclass (or later, if the defm occurs in another
|
||||
multiclass and template arguments of the inner multiclass that are
|
||||
referenced by 'a' are substituted by values that themselves contain
|
||||
references to template arguments of the outer multiclass).
|
||||
|
||||
If the type of 'a' does not match *type*, TableGen aborts with an error.
|
||||
|
||||
Otherwise, perform a normal type cast e.g. between an int and a bit, or
|
||||
between record types. This allows casting a record to a subclass, though if
|
||||
the types do not match, constant folding will be inhibited. !cast<string>
|
||||
is a special case in that the argument can be an int or a record. In the
|
||||
latter case, the record's name is returned.
|
||||
|
||||
``!isa<type>(a)``
|
||||
Returns an integer: 1 if 'a' is dynamically of the given type, 0 otherwise.
|
||||
|
||||
``!subst(a, b, c)``
|
||||
If 'a' and 'b' are of string type or are symbol references, substitute 'b'
|
||||
for 'a' in 'c.' This operation is analogous to $(subst) in GNU make.
|
||||
|
||||
``!foreach(a, b, c)``
|
||||
For each member of dag or list 'b' apply operator 'c.' 'a' is a dummy
|
||||
variable that should be declared as a member variable of an instantiated
|
||||
class. This operation is analogous to $(foreach) in GNU make.
|
||||
For each member of dag or list 'b' apply operator 'c'. 'a' is the name
|
||||
of a variable that will be substituted by members of 'b' in 'c'.
|
||||
This operation is analogous to $(foreach) in GNU make.
|
||||
|
||||
``!foldl(start, lst, a, b, expr)``
|
||||
Perform a left-fold over 'lst' with the given starting value. 'a' and 'b'
|
||||
are variable names which will be substituted in 'expr'. If you think of
|
||||
expr as a function f(a,b), the fold will compute
|
||||
'f(...f(f(start, lst[0]), lst[1]), ...), lst[n-1])' for a list of length n.
|
||||
As usual, 'a' will be of the type of 'start', and 'b' will be of the type
|
||||
of elements of 'lst'. These types need not be the same, but 'expr' must be
|
||||
of the same type as 'start'.
|
||||
|
||||
``!head(a)``
|
||||
The first element of list 'a.'
|
||||
@ -205,6 +252,9 @@ supported include:
|
||||
``!empty(a)``
|
||||
An integer {0,1} indicating whether list 'a' is empty.
|
||||
|
||||
``!size(a)``
|
||||
An integer indicating the number of elements in list 'a'.
|
||||
|
||||
``!if(a,b,c)``
|
||||
'b' if the result of 'int' or 'bit' operator 'a' is nonzero, 'c' otherwise.
|
||||
|
||||
@ -213,8 +263,19 @@ supported include:
|
||||
on string, int and bit objects. Use !cast<string> to compare other types of
|
||||
objects.
|
||||
|
||||
``!shl(a,b)`` ``!srl(a,b)`` ``!sra(a,b)`` ``!add(a,b)`` ``!and(a,b)``
|
||||
The usual binary and arithmetic operators.
|
||||
``!ne(a,b)``
|
||||
The negation of ``!eq(a,b)``.
|
||||
|
||||
``!le(a,b), !lt(a,b), !ge(a,b), !gt(a,b)``
|
||||
(Signed) comparison of integer values that returns bit 1 or 0 depending on
|
||||
the result of the comparison.
|
||||
|
||||
``!shl(a,b)`` ``!srl(a,b)`` ``!sra(a,b)``
|
||||
The usual shift operators. Operations are on 64-bit integers, the result
|
||||
is undefined for shift counts outside [0, 63].
|
||||
|
||||
``!add(a,b,...)`` ``!and(a,b,...)`` ``!or(a,b,...)``
|
||||
The usual arithmetic and binary operators.
|
||||
|
||||
Note that all of the values have rules specifying how they convert to values
|
||||
for different types. These rules allow you to assign a value like "``7``"
|
||||
@ -287,6 +348,23 @@ In this case, the ``Z`` definition will have a zero value for its ``V`` value,
|
||||
despite the fact that it derives (indirectly) from the ``C`` class, because the
|
||||
``D`` class overrode its value.
|
||||
|
||||
References between variables in a record are substituted late, which gives
|
||||
``let`` expressions unusual power. Consider this admittedly silly example:
|
||||
|
||||
.. code-block:: text
|
||||
|
||||
class A<int x> {
|
||||
int Y = x;
|
||||
int Yplus1 = !add(Y, 1);
|
||||
int xplus1 = !add(x, 1);
|
||||
}
|
||||
def Z : A<5> {
|
||||
let Y = 10;
|
||||
}
|
||||
|
||||
The value of ``Z.xplus1`` will be 6, but the value of ``Z.Yplus1`` is 11. Use
|
||||
this power wisely.
|
||||
|
||||
.. _template arguments:
|
||||
|
||||
Class template arguments
|
||||
|
@ -98,7 +98,9 @@ wide variety of meanings:
|
||||
:!eq !if !head !tail !con
|
||||
:!add !shl !sra !srl !and
|
||||
:!or !empty !subst !foreach !strconcat
|
||||
:!cast !listconcat
|
||||
:!cast !listconcat !size !foldl
|
||||
:!isa !dag !le !lt !ge
|
||||
:!gt !ne
|
||||
|
||||
|
||||
Syntax
|
||||
@ -115,13 +117,15 @@ TableGen's top-level production consists of "objects".
|
||||
|
||||
.. productionlist::
|
||||
TableGenFile: `Object`*
|
||||
Object: `Class` | `Def` | `Defm` | `Let` | `MultiClass` | `Foreach`
|
||||
Object: `Class` | `Def` | `Defm` | `Defset` | `Let` | `MultiClass` |
|
||||
`Foreach`
|
||||
|
||||
``class``\es
|
||||
------------
|
||||
|
||||
.. productionlist::
|
||||
Class: "class" `TokIdentifier` [`TemplateArgList`] `ObjectBody`
|
||||
TemplateArgList: "<" `Declaration` ("," `Declaration`)* ">"
|
||||
|
||||
A ``class`` declaration creates a record which other records can inherit
|
||||
from. A class can be parametrized by a list of "template arguments", whose
|
||||
@ -142,8 +146,9 @@ forward declaration: note that records deriving from the forward-declared
|
||||
class will inherit no fields from it since the record expansion is done
|
||||
when the record is parsed.
|
||||
|
||||
.. productionlist::
|
||||
TemplateArgList: "<" `Declaration` ("," `Declaration`)* ">"
|
||||
Every class has an implicit template argument called ``NAME``, which is set
|
||||
to the name of the instantiating ``def`` or ``defm``. The result is undefined
|
||||
if the class is instantiated by an anonymous record.
|
||||
|
||||
Declarations
|
||||
------------
|
||||
@ -224,15 +229,17 @@ of:
|
||||
int Baz = Bar;
|
||||
}
|
||||
|
||||
Values defined in superclasses can be accessed the same way.
|
||||
|
||||
* a template arg of a ``class``, such as the use of ``Bar`` in::
|
||||
|
||||
class Foo<int Bar> {
|
||||
int Baz = Bar;
|
||||
}
|
||||
|
||||
* value local to a ``multiclass``, such as the use of ``Bar`` in::
|
||||
* value local to a ``class``, such as the use of ``Bar`` in::
|
||||
|
||||
multiclass Foo {
|
||||
class Foo {
|
||||
int Bar = 5;
|
||||
int Baz = Bar;
|
||||
}
|
||||
@ -240,9 +247,18 @@ of:
|
||||
* a template arg to a ``multiclass``, such as the use of ``Bar`` in::
|
||||
|
||||
multiclass Foo<int Bar> {
|
||||
int Baz = Bar;
|
||||
def : SomeClass<Bar>;
|
||||
}
|
||||
|
||||
* the iteration variable of a ``foreach``, such as the use of ``i`` in::
|
||||
|
||||
foreach i = 0-5 in
|
||||
def Foo#i;
|
||||
|
||||
* a variable defined by ``defset``
|
||||
|
||||
* the implicit template argument ``NAME`` in a ``class`` or ``multiclass``
|
||||
|
||||
.. productionlist::
|
||||
SimpleValue: `TokInteger`
|
||||
|
||||
@ -291,7 +307,7 @@ given values.
|
||||
leave it out.
|
||||
|
||||
.. productionlist::
|
||||
SimpleValue: "(" `DagArg` `DagArgList` ")"
|
||||
SimpleValue: "(" `DagArg` [`DagArgList`] ")"
|
||||
DagArgList: `DagArg` ("," `DagArg`)*
|
||||
DagArg: `Value` [":" `TokVarName`] | `TokVarName`
|
||||
|
||||
@ -322,50 +338,94 @@ It is after parsing the base class list that the "let stack" is applied.
|
||||
Body: ";" | "{" BodyList "}"
|
||||
BodyList: BodyItem*
|
||||
BodyItem: `Declaration` ";"
|
||||
:| "let" `TokIdentifier` [`RangeList`] "=" `Value` ";"
|
||||
:| "let" `TokIdentifier` [ "{" `RangeList` "}" ] "=" `Value` ";"
|
||||
|
||||
The ``let`` form allows overriding the value of an inherited field.
|
||||
|
||||
``def``
|
||||
-------
|
||||
|
||||
.. TODO::
|
||||
There can be pastes in the names here, like ``#NAME#``. Look into that
|
||||
and document it (it boils down to ParseIDValue with IDParseMode ==
|
||||
ParseNameMode). ParseObjectName calls into the general ParseValue, with
|
||||
the only different from "arbitrary expression parsing" being IDParseMode
|
||||
== Mode.
|
||||
|
||||
.. productionlist::
|
||||
Def: "def" `TokIdentifier` `ObjectBody`
|
||||
Def: "def" [`Value`] `ObjectBody`
|
||||
|
||||
Defines a record whose name is given by the :token:`TokIdentifier`. The
|
||||
fields of the record are inherited from the base classes and defined in the
|
||||
body.
|
||||
Defines a record whose name is given by the optional :token:`Value`. The value
|
||||
is parsed in a special mode where global identifiers (records and variables
|
||||
defined by ``defset``) are not recognized, and all unrecognized identifiers
|
||||
are interpreted as strings.
|
||||
|
||||
If no name is given, the record is anonymous. The final name of anonymous
|
||||
records is undefined, but globally unique.
|
||||
|
||||
Special handling occurs if this ``def`` appears inside a ``multiclass`` or
|
||||
a ``foreach``.
|
||||
|
||||
When a non-anonymous record is defined in a multiclass and the given name
|
||||
does not contain a reference to the implicit template argument ``NAME``, such
|
||||
a reference will automatically be prepended. That is, the following are
|
||||
equivalent inside a multiclass::
|
||||
|
||||
def Foo;
|
||||
def NAME#Foo;
|
||||
|
||||
``defm``
|
||||
--------
|
||||
|
||||
.. productionlist::
|
||||
Defm: "defm" `TokIdentifier` ":" `BaseClassListNE` ";"
|
||||
Defm: "defm" [`Value`] ":" `BaseClassListNE` ";"
|
||||
|
||||
Note that in the :token:`BaseClassList`, all of the ``multiclass``'s must
|
||||
precede any ``class``'s that appear.
|
||||
The :token:`BaseClassList` is a list of at least one ``multiclass`` and any
|
||||
number of ``class``'s. The ``multiclass``'s must occur before any ``class``'s.
|
||||
|
||||
Instantiates all records defined in all given ``multiclass``'s and adds the
|
||||
given ``class``'s as superclasses.
|
||||
|
||||
The name is parsed in the same special mode used by ``def``. If the name is
|
||||
missing, a globally unique string is used instead (but instantiated records
|
||||
are not considered to be anonymous, unless they were originally defined by an
|
||||
anonymous ``def``) That is, the following have different semantics::
|
||||
|
||||
defm : SomeMultiClass<...>; // some globally unique name
|
||||
defm "" : SomeMultiClass<...>; // empty name string
|
||||
|
||||
When it occurs inside a multiclass, the second variant is equivalent to
|
||||
``defm NAME : ...``. More generally, when ``defm`` occurs in a multiclass and
|
||||
its name does not contain a reference to the implicit template argument
|
||||
``NAME``, such a reference will automatically be prepended. That is, the
|
||||
following are equivalent inside a multiclass::
|
||||
|
||||
defm Foo : SomeMultiClass<...>;
|
||||
defm NAME#Foo : SomeMultiClass<...>;
|
||||
|
||||
``defset``
|
||||
----------
|
||||
.. productionlist::
|
||||
Defset: "defset" `Type` `TokIdentifier` "=" "{" `Object`* "}"
|
||||
|
||||
All records defined inside the braces via ``def`` and ``defm`` are collected
|
||||
in a globally accessible list of the given name (in addition to being added
|
||||
to the global collection of records as usual). Anonymous records created inside
|
||||
initializier expressions using the ``Class<args...>`` syntax are never collected
|
||||
in a defset.
|
||||
|
||||
The given type must be ``list<A>``, where ``A`` is some class. It is an error
|
||||
to define a record (via ``def`` or ``defm``) inside the braces which doesn't
|
||||
derive from ``A``.
|
||||
|
||||
``foreach``
|
||||
-----------
|
||||
|
||||
.. productionlist::
|
||||
Foreach: "foreach" `Declaration` "in" "{" `Object`* "}"
|
||||
:| "foreach" `Declaration` "in" `Object`
|
||||
Foreach: "foreach" `ForeachDeclaration` "in" "{" `Object`* "}"
|
||||
:| "foreach" `ForeachDeclaration` "in" `Object`
|
||||
ForeachDeclaration: ID "=" ( "{" `RangeList` "}" | `RangePiece` | `Value` )
|
||||
|
||||
The value assigned to the variable in the declaration is iterated over and
|
||||
the object or object list is reevaluated with the variable set at each
|
||||
iterated value.
|
||||
|
||||
Note that the productions involving RangeList and RangePiece have precedence
|
||||
over the more generic value parsing based on the first token.
|
||||
|
||||
Top-Level ``let``
|
||||
-----------------
|
||||
|
||||
|
@ -76,11 +76,14 @@ example, to get a list of all of the definitions that subclass a particular type
|
||||
ADD16rr, ADD32mi, ADD32mi8, ADD32mr, ADD32ri, ADD32ri8, ADD32rm, ADD32rr,
|
||||
ADD64mi32, ADD64mi8, ADD64mr, ADD64ri32, ...
|
||||
|
||||
The default backend prints out all of the records.
|
||||
The default backend prints out all of the records. There is also a general
|
||||
backend which outputs all the records as a JSON data structure, enabled using
|
||||
the `-dump-json` option.
|
||||
|
||||
If you plan to use TableGen, you will most likely have to write a `backend`_
|
||||
that extracts the information specific to what you need and formats it in the
|
||||
appropriate way.
|
||||
appropriate way. You can do this by extending TableGen itself in C++, or by
|
||||
writing a script in any language that can consume the JSON output.
|
||||
|
||||
Example
|
||||
-------
|
||||
@ -171,13 +174,6 @@ factor out the common features that instructions of its class share. A key
|
||||
feature of TableGen is that it allows the end-user to define the abstractions
|
||||
they prefer to use when describing their information.
|
||||
|
||||
Each ``def`` record has a special entry called "NAME". This is the name of the
|
||||
record ("``ADD32rr``" above). In the general case ``def`` names can be formed
|
||||
from various kinds of string processing expressions and ``NAME`` resolves to the
|
||||
final value obtained after resolving all of those expressions. The user may
|
||||
refer to ``NAME`` anywhere she desires to use the ultimate name of the ``def``.
|
||||
``NAME`` should not be defined anywhere else in user code to avoid conflicts.
|
||||
|
||||
Syntax
|
||||
======
|
||||
|
||||
@ -224,7 +220,7 @@ definitions of a particular class, such as "Instruction".
|
||||
|
||||
class ProcNoItin<string Name, list<SubtargetFeature> Features>
|
||||
: Processor<Name, NoItineraries, Features>;
|
||||
|
||||
|
||||
Here, the class ProcNoItin, receiving parameters `Name` of type `string` and
|
||||
a list of target features is specializing the class Processor by passing the
|
||||
arguments down as well as hard-coding NoItineraries.
|
||||
|
@ -460,7 +460,10 @@ RUN lines:
|
||||
Example: ``/home/user/llvm.build/test/MC/ELF/Output/foo_test.s.tmp``
|
||||
|
||||
``%T``
|
||||
Directory of ``%t``.
|
||||
Directory of ``%t``. Deprecated. Shouldn't be used, because it can be easily
|
||||
misused and cause race conditions between tests.
|
||||
|
||||
Use ``rm -rf %t && mkdir %t`` instead if a temporary directory is necessary.
|
||||
|
||||
Example: ``/home/user/llvm.build/test/MC/ELF/Output``
|
||||
|
||||
|
@ -428,12 +428,3 @@ through clang using the command line flag:
|
||||
.. code-block:: console
|
||||
|
||||
$ clang -fno-slp-vectorize file.c
|
||||
|
||||
LLVM has a second basic block vectorization phase
|
||||
which is more compile-time intensive (The BB vectorizer). This optimization
|
||||
can be enabled through clang using the command line flag:
|
||||
|
||||
.. code-block:: console
|
||||
|
||||
$ clang -fslp-vectorize-aggressive file.c
|
||||
|
||||
|
135
docs/XRay.rst
135
docs/XRay.rst
@ -28,9 +28,10 @@ XRay consists of three main parts:
|
||||
- A runtime library for enabling/disabling tracing at runtime.
|
||||
- A suite of tools for analysing the traces.
|
||||
|
||||
**NOTE:** As of February 27, 2017 , XRay is only available for the following
|
||||
**NOTE:** As of July 25, 2018 , XRay is only available for the following
|
||||
architectures running Linux: x86_64, arm7 (no thumb), aarch64, powerpc64le,
|
||||
mips, mipsel, mips64, mips64el.
|
||||
mips, mipsel, mips64, mips64el, NetBSD: x86_64, FreeBSD: x86_64 and
|
||||
OpenBSD: x86_64.
|
||||
|
||||
The compiler-inserted instrumentation points come in the form of nop-sleds in
|
||||
the final generated binary, and an ELF section named ``xray_instr_map`` which
|
||||
@ -59,7 +60,7 @@ For example:
|
||||
|
||||
::
|
||||
|
||||
clang -fxray-instrument ..
|
||||
clang -fxray-instrument ...
|
||||
|
||||
By default, functions that have at least 200 instructions will get XRay
|
||||
instrumentation points. You can tweak that number through the
|
||||
@ -67,7 +68,7 @@ instrumentation points. You can tweak that number through the
|
||||
|
||||
::
|
||||
|
||||
clang -fxray-instrument -fxray-instruction-threshold=1 ..
|
||||
clang -fxray-instrument -fxray-instruction-threshold=1 ...
|
||||
|
||||
You can also specifically instrument functions in your binary to either always
|
||||
or never be instrumented using source-level attributes. You can do it using the
|
||||
@ -117,6 +118,27 @@ it gets instrumented.
|
||||
; ...
|
||||
}
|
||||
|
||||
Special Case File
|
||||
-----------------
|
||||
|
||||
Attributes can be imbued through the use of special case files instead of
|
||||
adding them to the original source files. You can use this to mark certain
|
||||
functions and classes to be never, always, or instrumented with first-argument
|
||||
logging from a file. The file's format is described below:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
# Comments are supported
|
||||
[always]
|
||||
fun:always_instrument
|
||||
fun:log_arg1=arg1 # Log the first argument for the function
|
||||
|
||||
[never]
|
||||
fun:never_instrument
|
||||
|
||||
These files can be provided through the ``-fxray-attr-list=`` flag to clang.
|
||||
You may have multiple files loaded through multiple instances of the flag.
|
||||
|
||||
XRay Runtime Library
|
||||
--------------------
|
||||
|
||||
@ -150,20 +172,6 @@ variable, where we list down the options and their defaults below.
|
||||
| xray_logfile_base | ``const char*`` | ``xray-log.`` | Filename base for the |
|
||||
| | | | XRay logfile. |
|
||||
+-------------------+-----------------+---------------+------------------------+
|
||||
| xray_naive_log | ``bool`` | ``false`` | **DEPRECATED:** Use |
|
||||
| | | | xray_mode=xray-basic |
|
||||
| | | | instead. Whether to |
|
||||
| | | | install the basic log |
|
||||
| | | | the naive log |
|
||||
| | | | implementation. |
|
||||
+-------------------+-----------------+---------------+------------------------+
|
||||
| xray_fdr_log | ``bool`` | ``false`` | **DEPRECATED:** Use |
|
||||
| | | | xray_mode=xray-fdr |
|
||||
| | | | instead. Whether to |
|
||||
| | | | install the Flight |
|
||||
| | | | Data Recorder |
|
||||
| | | | (FDR) mode. |
|
||||
+-------------------+-----------------+---------------+------------------------+
|
||||
| verbosity | ``int`` | ``0`` | Runtime verbosity |
|
||||
| | | | level. |
|
||||
+-------------------+-----------------+---------------+------------------------+
|
||||
@ -172,30 +180,45 @@ variable, where we list down the options and their defaults below.
|
||||
If you choose to not use the default logging implementation that comes with the
|
||||
XRay runtime and/or control when/how the XRay instrumentation runs, you may use
|
||||
the XRay APIs directly for doing so. To do this, you'll need to include the
|
||||
``xray_interface.h`` from the compiler-rt ``xray`` directory. The important API
|
||||
``xray_log_interface.h`` from the compiler-rt ``xray`` directory. The important API
|
||||
functions we list below:
|
||||
|
||||
- ``__xray_set_handler(void (*entry)(int32_t, XRayEntryType))``: Install your
|
||||
own logging handler for when an event is encountered. See
|
||||
``xray/xray_interface.h`` for more details.
|
||||
- ``__xray_remove_handler()``: Removes whatever the installed handler is.
|
||||
- ``__xray_patch()``: Patch all the instrumentation points defined in the
|
||||
binary.
|
||||
- ``__xray_unpatch()``: Unpatch the instrumentation points defined in the
|
||||
binary.
|
||||
- ``__xray_log_register_mode(...)``: Register a logging implementation against
|
||||
a string Mode identifier. The implementation is an instance of
|
||||
``XRayLogImpl`` defined in ``xray/xray_log_interface.h``.
|
||||
- ``__xray_log_select_mode(...)``: Select the mode to install, associated with
|
||||
a string Mode identifier. Only implementations registered with
|
||||
``__xray_log_register_mode(...)`` can be chosen with this function.
|
||||
- ``__xray_log_init_mode(...)``: This function allows for initializing and
|
||||
re-initializing an installed logging implementation. See
|
||||
``xray/xray_log_interface.h`` for details, part of the XRay compiler-rt
|
||||
installation.
|
||||
|
||||
There are some requirements on the logging handler to be installed for the
|
||||
thread-safety of operations to be performed by the XRay runtime library:
|
||||
Once a logging implementation has been initialized, it can be "stopped" by
|
||||
finalizing the implementation through the ``__xray_log_finalize()`` function.
|
||||
The finalization routine is the opposite of the initialization. When finalized,
|
||||
an implementation's data can be cleared out through the
|
||||
``__xray_log_flushLog()`` function. For implementations that support in-memory
|
||||
processing, these should register an iterator function to provide access to the
|
||||
data via the ``__xray_log_set_buffer_iterator(...)`` which allows code calling
|
||||
the ``__xray_log_process_buffers(...)`` function to deal with the data in
|
||||
memory.
|
||||
|
||||
- The function should be thread-safe, as multiple threads may be invoking the
|
||||
function at the same time. If the logging function needs to do
|
||||
synchronisation, it must do so internally as XRay does not provide any
|
||||
synchronisation guarantees outside from the atomicity of updates to the
|
||||
pointer.
|
||||
- The pointer provided to ``__xray_set_handler(...)`` must be live even after
|
||||
calls to ``__xray_remove_handler()`` and ``__xray_unpatch()`` have succeeded.
|
||||
XRay cannot guarantee that all threads that have ever gotten a copy of the
|
||||
pointer will not invoke the function.
|
||||
All of this is better explained in the ``xray/xray_log_interface.h`` header.
|
||||
|
||||
Basic Mode
|
||||
----------
|
||||
|
||||
XRay supports a basic logging mode which will trace the application's
|
||||
execution, and periodically append to a single log. This mode can be
|
||||
installed/enabled by setting ``xray_mode=xray-basic`` in the ``XRAY_OPTIONS``
|
||||
environment variable. Combined with ``patch_premain=true`` this can allow for
|
||||
tracing applications from start to end.
|
||||
|
||||
Like all the other modes installed through ``__xray_log_select_mode(...)``, the
|
||||
implementation can be configured through the ``__xray_log_init_mode(...)``
|
||||
function, providing the mode string and the flag options. Basic-mode specific
|
||||
defaults can be provided in the ``XRAY_BASIC_OPTIONS`` environment variable.
|
||||
|
||||
Flight Data Recorder Mode
|
||||
-------------------------
|
||||
@ -205,9 +228,12 @@ fixed amount of memory's worth of events. Flight Data Recorder (FDR) mode works
|
||||
very much like a plane's "black box" which keeps recording data to memory in a
|
||||
fixed-size circular queue of buffers, and have the data available
|
||||
programmatically until the buffers are finalized and flushed. To use FDR mode
|
||||
on your application, you may set the ``xray_fdr_log`` option to ``true`` in the
|
||||
``XRAY_OPTIONS`` environment variable (while also optionally setting the
|
||||
``xray_naive_log`` to ``false``).
|
||||
on your application, you may set the ``xray_mode`` variable to ``xray-fdr`` in
|
||||
the ``XRAY_OPTIONS`` environment variable. Additional options to the FDR mode
|
||||
implementation can be provided in the ``XRAY_FDR_OPTIONS`` environment
|
||||
variable. Programmatic configuration can be done by calling
|
||||
``__xray_log_init_mode("xray-fdr", <configuration string>)`` once it has been
|
||||
selected/installed.
|
||||
|
||||
When the buffers are flushed to disk, the result is a binary trace format
|
||||
described by `XRay FDR format <XRayFDRFormat.html>`_
|
||||
@ -239,34 +265,15 @@ provided below:
|
||||
}
|
||||
|
||||
The default settings for the FDR mode implementation will create logs named
|
||||
similarly to the naive log implementation, but will have a different log
|
||||
similarly to the basic log implementation, but will have a different log
|
||||
format. All the trace analysis tools (and the trace reading library) will
|
||||
support all versions of the FDR mode format as we add more functionality and
|
||||
record types in the future.
|
||||
|
||||
**NOTE:** We do not however promise perpetual support for when we update the
|
||||
log versions we support going forward. Deprecation of the formats will be
|
||||
**NOTE:** We do not promise perpetual support for when we update the log
|
||||
versions we support going forward. Deprecation of the formats will be
|
||||
announced and discussed on the developers mailing list.
|
||||
|
||||
XRay allows for replacing the default FDR mode logging implementation using the
|
||||
following API:
|
||||
|
||||
- ``__xray_set_log_impl(...)``: This function takes a struct of type
|
||||
``XRayLogImpl``, which is defined in ``xray/xray_log_interface.h``, part of
|
||||
the XRay compiler-rt installation.
|
||||
- ``__xray_log_register_mode(...)``: Register a logging implementation against
|
||||
a string Mode. The implementation is an instance of ``XRayLogImpl`` defined
|
||||
in ``xray/xray_log_interface.h``.
|
||||
- ``__xray_log_select_mode(...)``: Select the mode to install, associated with
|
||||
a string Mode. Only implementations registered with
|
||||
``__xray_log_register_mode(...)`` can be chosen with this function. When
|
||||
successful, has the same effects as calling ``__xray_set_log_impl(...)`` with
|
||||
the registered logging implementation.
|
||||
- ``__xray_log_init(...)``: This function allows for initializing and
|
||||
re-initializing an installed logging implementation. See
|
||||
``xray/xray_log_interface.h`` for details, part of the XRay compiler-rt
|
||||
installation.
|
||||
|
||||
Trace Analysis Tools
|
||||
--------------------
|
||||
|
||||
@ -280,7 +287,7 @@ supports the following subcommands:
|
||||
options for sorting, and output formats (supports CSV, YAML, and
|
||||
console-friendly TEXT).
|
||||
- ``convert``: Converts an XRay log file from one format to another. We can
|
||||
convert from binary XRay traces (both naive and FDR mode) to YAML,
|
||||
convert from binary XRay traces (both basic and FDR mode) to YAML,
|
||||
`flame-graph <https://github.com/brendangregg/FlameGraph>`_ friendly text
|
||||
formats, as well as `Chrome Trace Viewer (catapult)
|
||||
<https://github.com/catapult-project/catapult>` formats.
|
||||
|
@ -48,11 +48,11 @@ Getting Traces
|
||||
--------------
|
||||
|
||||
By default, XRay does not write out the trace files or patch the application
|
||||
before main starts. If we just run ``llc`` it should just work like a normally
|
||||
built binary. However, if we want to get a full trace of the application's
|
||||
operations (of the functions we do end up instrumenting with XRay) then we need
|
||||
to enable XRay at application start. To do this, XRay checks the
|
||||
``XRAY_OPTIONS`` environment variable.
|
||||
before main starts. If we run ``llc`` it should work like a normally built
|
||||
binary. If we want to get a full trace of the application's operations (of the
|
||||
functions we do end up instrumenting with XRay) then we need to enable XRay
|
||||
at application start. To do this, XRay checks the ``XRAY_OPTIONS`` environment
|
||||
variable.
|
||||
|
||||
::
|
||||
|
||||
@ -73,9 +73,8 @@ instrumented, and how much time we're spending in parts of the code. To make
|
||||
sense of this data, we use the ``llvm-xray`` tool which has a few subcommands
|
||||
to help us understand our trace.
|
||||
|
||||
One of the simplest things we can do is to get an accounting of the functions
|
||||
that have been instrumented. We can see an example accounting with ``llvm-xray
|
||||
account``:
|
||||
One of the things we can do is to get an accounting of the functions that have
|
||||
been instrumented. We can see an example accounting with ``llvm-xray account``:
|
||||
|
||||
::
|
||||
|
||||
@ -178,22 +177,22 @@ add the attribute to the source.
|
||||
To use this feature, you can define one file for the functions to always
|
||||
instrument, and another for functions to never instrument. The format of these
|
||||
files are exactly the same as the SanitizerLists files that control similar
|
||||
things for the sanitizer implementations. For example, we can have two
|
||||
different files like below:
|
||||
things for the sanitizer implementations. For example:
|
||||
|
||||
::
|
||||
|
||||
# always-instrument.txt
|
||||
# xray-attr-list.txt
|
||||
# always instrument functions that match the following filters:
|
||||
[always]
|
||||
fun:main
|
||||
|
||||
# never-instrument.txt
|
||||
# never instrument functions that match the following filters:
|
||||
[never]
|
||||
fun:__cxx_*
|
||||
|
||||
Given the above two files we can re-build by providing those two files as
|
||||
arguments to clang as ``-fxray-always-instrument=always-instrument.txt`` or
|
||||
``-fxray-never-instrument=never-instrument.txt``.
|
||||
Given the file above we can re-build by providing it to the
|
||||
``-fxray-attr-list=`` flag to clang. You can have multiple files, each defining
|
||||
different sets of attribute sets, to be combined into a single list by clang.
|
||||
|
||||
The XRay stack tool
|
||||
-------------------
|
||||
@ -202,8 +201,7 @@ Given a trace, and optionally an instrumentation map, the ``llvm-xray stack``
|
||||
command can be used to analyze a call stack graph constructed from the function
|
||||
call timeline.
|
||||
|
||||
The simplest way to use the command is simply to output the top stacks by call
|
||||
count and time spent.
|
||||
The way to use the command is to output the top stacks by call count and time spent.
|
||||
|
||||
::
|
||||
|
||||
@ -245,7 +243,7 @@ FlameGraph tool, currently available on `github
|
||||
|
||||
To generate output for a flamegraph, a few more options are necessary.
|
||||
|
||||
- ``-all-stacks`` - Emits all of the stacks instead of just the top stacks.
|
||||
- ``-all-stacks`` - Emits all of the stacks.
|
||||
- ``-stack-format`` - Choose the flamegraph output format 'flame'.
|
||||
- ``-aggregation-type`` - Choose the metric to graph.
|
||||
|
||||
|
@ -15,7 +15,7 @@ When gathering XRay traces in Flight Data Recorder mode, each thread of an
|
||||
application will claim buffers to fill with trace data, which at some point
|
||||
is finalized and flushed.
|
||||
|
||||
A goal of the profiler is to minimize overhead, so the flushed data directly
|
||||
A goal of the profiler is to minimize overhead, the flushed data directly
|
||||
corresponds to the buffer.
|
||||
|
||||
This document describes the format of a trace file.
|
||||
@ -106,11 +106,11 @@ There are a few categories of data in the sequence.
|
||||
- ``Function Arguments``: The arguments to some functions are included in the
|
||||
trace. These are either pointer addresses or primitives that are read and
|
||||
logged independently of their types in a high level language. To the tracer,
|
||||
they are all simply numbers. Function Records that have attached arguments
|
||||
will indicate their presence on the function entry record. We only support
|
||||
logging contiguous function argument sequences starting with argument zero,
|
||||
which will be the "this" pointer for member function invocations. For example,
|
||||
we don't support logging the first and third argument.
|
||||
they are all numbers. Function Records that have attached arguments will
|
||||
indicate their presence on the function entry record. We only support logging
|
||||
contiguous function argument sequences starting with argument zero, which will
|
||||
be the "this" pointer for member function invocations. For example, we don't
|
||||
support logging the first and third argument.
|
||||
|
||||
A reader of the memory format must maintain a state machine. The format makes no
|
||||
attempt to pad for alignment, and it is not seekable.
|
||||
|
@ -1020,7 +1020,7 @@ object. For example:
|
||||
// Reading multiple documents in one file
|
||||
using llvm::yaml::Input;
|
||||
|
||||
LLVM_YAML_IS_DOCUMENT_LIST_VECTOR(std::vector<MyDocType>)
|
||||
LLVM_YAML_IS_DOCUMENT_LIST_VECTOR(MyDocType)
|
||||
|
||||
Input yin(mb.getBuffer());
|
||||
|
||||
|
@ -48,9 +48,9 @@ copyright = u'2003-%d, LLVM Project' % date.today().year
|
||||
# built documents.
|
||||
#
|
||||
# The short version.
|
||||
version = '6'
|
||||
version = '7'
|
||||
# The full version, including alpha/beta/rc tags.
|
||||
release = '6'
|
||||
release = '7'
|
||||
|
||||
# The language for content autogenerated by Sphinx. Refer to documentation
|
||||
# for a list of supported languages.
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user