Vendor import of compiler-rt trunk r338150:

https://llvm.org/svn/llvm-project/compiler-rt/trunk@338150
This commit is contained in:
Dimitry Andric 2018-07-28 11:06:48 +00:00
parent 0d8e7490d6
commit 93c1b73a09
870 changed files with 50343 additions and 6771 deletions

View File

@ -9,6 +9,7 @@ cmake_minimum_required(VERSION 3.4.3)
if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR COMPILER_RT_STANDALONE_BUILD)
project(CompilerRT C CXX ASM)
set(COMPILER_RT_STANDALONE_BUILD TRUE)
set_property(GLOBAL PROPERTY USE_FOLDERS ON)
endif()
# Add path for custom compiler-rt modules.
@ -63,6 +64,11 @@ set(COMPILER_RT_BAREMETAL_BUILD OFF CACHE BOOLEAN
if (COMPILER_RT_STANDALONE_BUILD)
load_llvm_config()
if (TARGET intrinsics_gen)
# Loading the llvm config causes this target to be imported so place it
# under the appropriate folder in an IDE.
set_target_properties(intrinsics_gen PROPERTIES FOLDER "Compiler-RT Misc")
endif()
# Find Python interpreter.
set(Python_ADDITIONAL_VERSIONS 2.7 2.6 2.5)
@ -96,6 +102,8 @@ pythonize_bool(ANDROID)
set(COMPILER_RT_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
set(COMPILER_RT_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
pythonize_bool(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR)
# We support running instrumented tests when we're not cross compiling
# and target a UNIX-like system or Windows.
# We can run tests on Android even when we are cross-compiling.
@ -112,9 +120,6 @@ option(COMPILER_RT_EXTERNALIZE_DEBUGINFO
# COMPILER_RT_DEBUG_PYBOOL is used by lit.common.configured.in.
pythonize_bool(COMPILER_RT_DEBUG)
include(HandleCompilerRT)
include(config-ix)
if(APPLE AND SANITIZER_MIN_OSX_VERSION AND SANITIZER_MIN_OSX_VERSION VERSION_LESS "10.9")
# Mac OS X prior to 10.9 had problems with exporting symbols from
# libc++/libc++abi.
@ -133,41 +138,34 @@ pythonize_bool(SANITIZER_CAN_USE_CXXABI)
set(SANITIZER_CXX_ABI "default" CACHE STRING
"Specify C++ ABI library to use.")
set(CXXABIS none default libcxxabi libstdc++ libc++)
set(CXXABIS none default libstdc++ libc++)
set_property(CACHE SANITIZER_CXX_ABI PROPERTY STRINGS ;${CXXABIS})
if (SANITIZER_CXX_ABI STREQUAL "default")
if (HAVE_LIBCXXABI AND COMPILER_RT_DEFAULT_TARGET_ONLY)
set(SANITIZER_CXX_ABI_LIBNAME "libcxxabi")
set(SANITIZER_CXX_ABI_INTREE 1)
elseif (APPLE)
set(SANITIZER_CXX_ABI_LIBNAME "libcxxabi")
if (APPLE)
set(SANITIZER_CXX_ABI_LIBNAME "libc++")
set(SANITIZER_CXX_ABI_SYSTEM 1)
elseif (FUCHSIA)
set(SANITIZER_CXX_ABI_LIBNAME "libc++")
set(SANITIZER_CXX_ABI_INTREE 1)
else()
set(SANITIZER_CXX_ABI_LIBNAME "libstdc++")
set(SANITIZER_CXX_ABI_SYSTEM 1)
endif()
else()
set(SANITIZER_CXX_ABI_LIBNAME "${SANITIZER_CXX_ABI}")
set(SANITIZER_CXX_ABI_SYSTEM 1)
endif()
if (SANITIZER_CXX_ABI_LIBNAME STREQUAL "libcxxabi")
if (SANITIZER_CXX_ABI_INTREE)
if (TARGET unwind_shared OR HAVE_LIBUNWIND)
list(APPEND SANITIZER_CXX_ABI_LIBRARY unwind_shared)
endif()
if (TARGET cxxabi_shared OR HAVE_LIBCXXABI)
list(APPEND SANITIZER_CXX_ABI_LIBRARY cxxabi_shared)
endif()
else()
list(APPEND SANITIZER_CXX_ABI_LIBRARY "c++abi")
endif()
elseif (SANITIZER_CXX_ABI_LIBNAME STREQUAL "libc++")
list(APPEND SANITIZER_CXX_ABI_LIBRARY "c++")
elseif (SANITIZER_CXX_ABI_LIBNAME STREQUAL "libstdc++")
append_list_if(COMPILER_RT_HAS_LIBSTDCXX stdc++ SANITIZER_CXX_ABI_LIBRARY)
set(DEFAULT_COMPILER_RT_USE_BUILTINS_LIBRARY OFF)
if (FUCHSIA)
set(DEFAULT_COMPILER_RT_USE_BUILTINS_LIBRARY ON)
endif()
option(SANITIZER_USE_COMPILER_RT "Use compiler-rt builtins instead of libgcc" OFF)
option(COMPILER_RT_USE_BUILTINS_LIBRARY
"Use compiler-rt builtins instead of libgcc" ${DEFAULT_COMPILER_RT_USE_BUILTINS_LIBRARY})
include(config-ix)
#================================
# Setup Compiler Flags
@ -274,12 +272,14 @@ else()
set(SANITIZER_LIMIT_FRAME_SIZE FALSE)
endif()
if(FUCHSIA OR UNIX)
set(SANITIZER_USE_SYMBOLS TRUE)
else()
set(SANITIZER_USE_SYMBOLS FALSE)
endif()
# Build sanitizer runtimes with debug info.
if(COMPILER_RT_HAS_GLINE_TABLES_ONLY_FLAG AND NOT COMPILER_RT_DEBUG)
list(APPEND SANITIZER_COMMON_CFLAGS -gline-tables-only)
elseif(COMPILER_RT_HAS_G_FLAG)
list(APPEND SANITIZER_COMMON_CFLAGS -g)
elseif(MSVC)
if(MSVC)
# Use /Z7 instead of /Zi for the asan runtime. This avoids the LNK4099
# warning from the MS linker complaining that it can't find the 'vc140.pdb'
# file used by our object library compilations.
@ -287,6 +287,10 @@ elseif(MSVC)
llvm_replace_compiler_option(CMAKE_CXX_FLAGS "/Z[i7I]" "/Z7")
llvm_replace_compiler_option(CMAKE_CXX_FLAGS_DEBUG "/Z[i7I]" "/Z7")
llvm_replace_compiler_option(CMAKE_CXX_FLAGS_RELWITHDEBINFO "/Z[i7I]" "/Z7")
elseif(COMPILER_RT_HAS_GLINE_TABLES_ONLY_FLAG AND NOT COMPILER_RT_DEBUG)
list(APPEND SANITIZER_COMMON_CFLAGS -gline-tables-only)
elseif(COMPILER_RT_HAS_G_FLAG)
list(APPEND SANITIZER_COMMON_CFLAGS -g)
endif()
if(LLVM_ENABLE_MODULES)
@ -309,9 +313,7 @@ append_list_if(COMPILER_RT_HAS_WD4800_FLAG /wd4800 SANITIZER_COMMON_CFLAGS)
# Set common link flags.
append_list_if(COMPILER_RT_HAS_NODEFAULTLIBS_FLAG -nodefaultlibs SANITIZER_COMMON_LINK_FLAGS)
if (SANITIZER_USE_COMPILER_RT)
list(APPEND SANITIZER_COMMON_LINK_FLAGS -rtlib=compiler-rt)
find_compiler_rt_library(builtins COMPILER_RT_BUILTINS_LIBRARY)
if (COMPILER_RT_USE_BUILTINS_LIBRARY)
list(APPEND SANITIZER_COMMON_LINK_LIBS ${COMPILER_RT_BUILTINS_LIBRARY})
else()
if (ANDROID)
@ -323,11 +325,40 @@ endif()
append_list_if(COMPILER_RT_HAS_LIBC c SANITIZER_COMMON_LINK_LIBS)
if(ANDROID)
# Put the Sanitizer shared libraries in the global group. For more details, see
# android-changes-for-ndk-developers.md#changes-to-library-search-order
if (COMPILER_RT_HAS_Z_GLOBAL)
list(APPEND SANITIZER_COMMON_LINK_FLAGS -Wl,-z,global)
endif()
endif()
if("${CMAKE_SYSTEM_NAME}" STREQUAL "Fuchsia")
list(APPEND SANITIZER_COMMON_LINK_FLAGS -Wl,-z,defs,-z,now,-z,relro)
list(APPEND SANITIZER_COMMON_LINK_LIBS zircon)
endif()
if (SANITIZER_CXX_ABI_LIBNAME STREQUAL "libc++")
if (SANITIZER_CXX_ABI_INTREE)
if (NOT LIBCXXABI_ENABLE_STATIC_UNWINDER AND (TARGET unwind_shared OR HAVE_LIBUNWIND))
list(APPEND SANITIZER_CXX_ABI_LIBRARY unwind_shared)
elseif (LIBCXXABI_ENABLE_STATIC_UNWINDER AND (TARGET unwind_static OR HAVE_LIBUNWIND))
list(APPEND SANITIZER_CXX_ABI_LIBRARY unwind_static)
endif()
if (NOT LIBCXX_ENABLE_STATIC_ABI_LIBRARY AND (TARGET cxxabi_shared OR HAVE_LIBCXXABI))
list(APPEND SANITIZER_CXX_ABI_LIBRARY cxxabi_shared)
elseif (LIBCXX_ENABLE_STATIC_ABI_LIBRARY AND (TARGET cxxabi_static OR HAVE_LIBCXXABI))
list(APPEND SANITIZER_CXX_ABI_LIBRARY cxxabi_static)
endif()
else()
append_list_if(COMPILER_RT_HAS_LIBCXX c++ SANITIZER_CXX_ABI_LIBRARY)
endif()
elseif (SANITIZER_CXX_ABI_LIBNAME STREQUAL "libcxxabi")
list(APPEND SANITIZER_CXX_ABI_LIBRARY "c++abi")
elseif (SANITIZER_CXX_ABI_LIBNAME STREQUAL "libstdc++")
append_list_if(COMPILER_RT_HAS_LIBSTDCXX stdc++ SANITIZER_CXX_ABI_LIBRARY)
endif()
# Warnings to turn off for all libraries, not just sanitizers.
append_string_if(COMPILER_RT_HAS_WUNUSED_PARAMETER_FLAG -Wno-unused-parameter CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
@ -339,7 +370,7 @@ if (CMAKE_LINKER MATCHES "link.exe$")
# it, but CMake doesn't seem to have a way to set linker flags for
# individual static libraries, so we enable the suppression flag for
# the whole compiler-rt project.
append("/IGNORE:4221" CMAKE_STATIC_LINKER_FLAGS)
set(CMAKE_STATIC_LINKER_FLAGS "${CMAKE_STATIC_LINKER_FLAGS} /IGNORE:4221")
endif()
add_subdirectory(include)

View File

@ -14,7 +14,7 @@ Full text of the relevant licenses is included below.
University of Illinois/NCSA
Open Source License
Copyright (c) 2009-2016 by the contributors listed in CREDITS.TXT
Copyright (c) 2009-2018 by the contributors listed in CREDITS.TXT
All rights reserved.

View File

@ -31,9 +31,12 @@ endfunction()
# ARCHS <architectures>
# SOURCES <source files>
# CFLAGS <compile flags>
# DEFS <compile definitions>)
# DEFS <compile definitions>
# DEPS <dependencies>
# ADDITIONAL_HEADERS <header files>)
function(add_compiler_rt_object_libraries name)
cmake_parse_arguments(LIB "" "" "OS;ARCHS;SOURCES;CFLAGS;DEFS" ${ARGN})
cmake_parse_arguments(LIB "" "" "OS;ARCHS;SOURCES;CFLAGS;DEFS;DEPS;ADDITIONAL_HEADERS"
${ARGN})
set(libnames)
if(APPLE)
foreach(os ${LIB_OS})
@ -54,8 +57,18 @@ function(add_compiler_rt_object_libraries name)
endforeach()
endif()
# Add headers to LIB_SOURCES for IDEs
compiler_rt_process_sources(LIB_SOURCES
${LIB_SOURCES}
ADDITIONAL_HEADERS
${LIB_ADDITIONAL_HEADERS}
)
foreach(libname ${libnames})
add_library(${libname} OBJECT ${LIB_SOURCES})
if(LIB_DEPS)
add_dependencies(${libname} ${LIB_DEPS})
endif()
# Strip out -msse3 if this isn't macOS.
set(target_flags ${LIB_CFLAGS})
@ -105,10 +118,14 @@ function(add_asm_sources output)
endfunction()
macro(set_output_name output name arch)
if(ANDROID AND ${arch} STREQUAL "i386")
set(${output} "${name}-i686${COMPILER_RT_OS_SUFFIX}")
if(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR)
set(${output} ${name})
else()
set(${output} "${name}-${arch}${COMPILER_RT_OS_SUFFIX}")
if(ANDROID AND ${arch} STREQUAL "i386")
set(${output} "${name}-i686${COMPILER_RT_OS_SUFFIX}")
else()
set(${output} "${name}-${arch}${COMPILER_RT_OS_SUFFIX}")
endif()
endif()
endmacro()
@ -124,7 +141,8 @@ endmacro()
# DEFS <compile definitions>
# LINK_LIBS <linked libraries> (only for shared library)
# OBJECT_LIBS <object libraries to use as sources>
# PARENT_TARGET <convenience parent target>)
# PARENT_TARGET <convenience parent target>
# ADDITIONAL_HEADERS <header files>)
function(add_compiler_rt_runtime name type)
if(NOT type MATCHES "^(STATIC|SHARED)$")
message(FATAL_ERROR "type argument must be STATIC or SHARED")
@ -133,7 +151,7 @@ function(add_compiler_rt_runtime name type)
cmake_parse_arguments(LIB
""
"PARENT_TARGET"
"OS;ARCHS;SOURCES;CFLAGS;LINK_FLAGS;DEFS;LINK_LIBS;OBJECT_LIBS"
"OS;ARCHS;SOURCES;CFLAGS;LINK_FLAGS;DEFS;LINK_LIBS;OBJECT_LIBS;ADDITIONAL_HEADERS"
${ARGN})
set(libnames)
# Until we support this some other way, build compiler-rt runtime without LTO
@ -144,6 +162,18 @@ function(add_compiler_rt_runtime name type)
set(NO_LTO_FLAGS "")
endif()
list(LENGTH LIB_SOURCES LIB_SOURCES_LENGTH)
if (${LIB_SOURCES_LENGTH} GREATER 0)
# Add headers to LIB_SOURCES for IDEs. It doesn't make sense to
# do this for a runtime library that only consists of OBJECT
# libraries, so only add the headers when source files are present.
compiler_rt_process_sources(LIB_SOURCES
${LIB_SOURCES}
ADDITIONAL_HEADERS
${LIB_ADDITIONAL_HEADERS}
)
endif()
if(APPLE)
foreach(os ${LIB_OS})
# Strip out -msse3 if this isn't macOS.
@ -164,6 +194,8 @@ function(add_compiler_rt_runtime name type)
set(output_name_${libname} ${libname}${COMPILER_RT_OS_SUFFIX})
set(sources_${libname} ${LIB_SOURCES})
format_object_libs(sources_${libname} ${os} ${LIB_OBJECT_LIBS})
get_compiler_rt_output_dir(${COMPILER_RT_DEFAULT_TARGET_ARCH} output_dir_${libname})
get_compiler_rt_install_dir(${COMPILER_RT_DEFAULT_TARGET_ARCH} install_dir_${libname})
endif()
endforeach()
else()
@ -189,6 +221,8 @@ function(add_compiler_rt_runtime name type)
format_object_libs(sources_${libname} ${arch} ${LIB_OBJECT_LIBS})
set(libnames ${libnames} ${libname})
set(extra_cflags_${libname} ${TARGET_${arch}_CFLAGS} ${NO_LTO_FLAGS} ${LIB_CFLAGS})
get_compiler_rt_output_dir(${arch} output_dir_${libname})
get_compiler_rt_install_dir(${arch} install_dir_${libname})
endforeach()
endif()
@ -200,6 +234,8 @@ function(add_compiler_rt_runtime name type)
# If the parent targets aren't created we should create them
if(NOT TARGET ${LIB_PARENT_TARGET})
add_custom_target(${LIB_PARENT_TARGET})
set_target_properties(${LIB_PARENT_TARGET} PROPERTIES
FOLDER "Compiler-RT Misc")
endif()
if(NOT TARGET install-${LIB_PARENT_TARGET})
# The parent install target specifies the parent component to scrape up
@ -239,7 +275,7 @@ function(add_compiler_rt_runtime name type)
set_target_link_flags(${libname} ${extra_link_flags_${libname}})
set_property(TARGET ${libname} APPEND PROPERTY
COMPILE_DEFINITIONS ${LIB_DEFS})
set_target_output_directories(${libname} ${COMPILER_RT_LIBRARY_OUTPUT_DIR})
set_target_output_directories(${libname} ${output_dir_${libname}})
set_target_properties(${libname} PROPERTIES
OUTPUT_NAME ${output_name_${libname}})
set_target_properties(${libname} PROPERTIES FOLDER "Compiler-RT Runtime")
@ -247,6 +283,9 @@ function(add_compiler_rt_runtime name type)
target_link_libraries(${libname} ${LIB_LINK_LIBS})
endif()
if(${type} STREQUAL "SHARED")
if(COMMAND llvm_setup_rpath)
llvm_setup_rpath(${libname})
endif()
if(WIN32 AND NOT CYGWIN AND NOT MINGW)
set_target_properties(${libname} PROPERTIES IMPORT_PREFIX "")
set_target_properties(${libname} PROPERTIES IMPORT_SUFFIX ".lib")
@ -261,11 +300,11 @@ function(add_compiler_rt_runtime name type)
endif()
endif()
install(TARGETS ${libname}
ARCHIVE DESTINATION ${COMPILER_RT_LIBRARY_INSTALL_DIR}
ARCHIVE DESTINATION ${install_dir_${libname}}
${COMPONENT_OPTION}
LIBRARY DESTINATION ${COMPILER_RT_LIBRARY_INSTALL_DIR}
LIBRARY DESTINATION ${install_dir_${libname}}
${COMPONENT_OPTION}
RUNTIME DESTINATION ${COMPILER_RT_LIBRARY_INSTALL_DIR}
RUNTIME DESTINATION ${install_dir_${libname}}
${COMPONENT_OPTION})
# We only want to generate per-library install targets if you aren't using
@ -431,7 +470,7 @@ endfunction()
macro(add_compiler_rt_resource_file target_name file_name component)
set(src_file "${CMAKE_CURRENT_SOURCE_DIR}/${file_name}")
set(dst_file "${COMPILER_RT_OUTPUT_DIR}/${file_name}")
set(dst_file "${COMPILER_RT_OUTPUT_DIR}/share/${file_name}")
add_custom_command(OUTPUT ${dst_file}
DEPENDS ${src_file}
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${src_file} ${dst_file}
@ -439,7 +478,7 @@ macro(add_compiler_rt_resource_file target_name file_name component)
add_custom_target(${target_name} DEPENDS ${dst_file})
# Install in Clang resource directory.
install(FILES ${file_name}
DESTINATION ${COMPILER_RT_INSTALL_PATH}
DESTINATION ${COMPILER_RT_INSTALL_PATH}/share
COMPONENT ${component})
add_dependencies(${component} ${target_name})
@ -463,53 +502,123 @@ endmacro(add_compiler_rt_script src name)
# Can be used to build sanitized versions of libc++ for running unit tests.
# add_custom_libcxx(<name> <prefix>
# DEPS <list of build deps>
# CFLAGS <list of compile flags>)
# CFLAGS <list of compile flags>
# USE_TOOLCHAIN)
macro(add_custom_libcxx name prefix)
if(NOT COMPILER_RT_LIBCXX_PATH)
message(FATAL_ERROR "libcxx not found!")
endif()
cmake_parse_arguments(LIBCXX "" "" "DEPS;CFLAGS;CMAKE_ARGS" ${ARGN})
foreach(flag ${LIBCXX_CFLAGS})
set(flagstr "${flagstr} ${flag}")
endforeach()
set(LIBCXX_CFLAGS ${flagstr})
cmake_parse_arguments(LIBCXX "USE_TOOLCHAIN" "" "DEPS;CFLAGS;CMAKE_ARGS" ${ARGN})
if(NOT COMPILER_RT_STANDALONE_BUILD)
list(APPEND LIBCXX_DEPS clang)
if(LIBCXX_USE_TOOLCHAIN)
set(compiler_args -DCMAKE_C_COMPILER=${COMPILER_RT_TEST_COMPILER}
-DCMAKE_CXX_COMPILER=${COMPILER_RT_TEST_CXX_COMPILER})
if(NOT COMPILER_RT_STANDALONE_BUILD)
set(toolchain_deps $<TARGET_FILE:clang>)
set(force_deps DEPENDS $<TARGET_FILE:clang>)
endif()
else()
set(compiler_args -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER})
endif()
set(STAMP_DIR ${prefix}-stamps/)
set(BINARY_DIR ${prefix}-bins/)
add_custom_target(${name}-clear
COMMAND ${CMAKE_COMMAND} -E remove_directory ${BINARY_DIR}
COMMAND ${CMAKE_COMMAND} -E remove_directory ${STAMP_DIR}
COMMENT "Clobbering ${name} build and stamp directories"
USES_TERMINAL
)
set_target_properties(${name}-clear PROPERTIES FOLDER "Compiler-RT Misc")
add_custom_command(
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${name}-clobber-stamp
DEPENDS ${LIBCXX_DEPS} ${toolchain_deps}
COMMAND ${CMAKE_COMMAND} -E touch ${BINARY_DIR}/CMakeCache.txt
COMMAND ${CMAKE_COMMAND} -E touch ${STAMP_DIR}/${name}-mkdir
COMMAND ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_BINARY_DIR}/${name}-clobber-stamp
COMMENT "Clobbering bootstrap build and stamp directories"
)
add_custom_target(${name}-clobber
DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/${name}-clobber-stamp)
set_target_properties(${name}-clobber PROPERTIES FOLDER "Compiler-RT Misc")
set(PASSTHROUGH_VARIABLES
CMAKE_C_COMPILER_TARGET
CMAKE_CXX_COMPILER_TARGET
CMAKE_INSTALL_PREFIX
CMAKE_MAKE_PROGRAM
CMAKE_LINKER
CMAKE_AR
CMAKE_RANLIB
CMAKE_NM
CMAKE_OBJCOPY
CMAKE_OBJDUMP
CMAKE_STRIP
CMAKE_SYSROOT
CMAKE_SYSTEM_NAME)
foreach(variable ${PASSTHROUGH_VARIABLES})
if(${variable})
list(APPEND CMAKE_PASSTHROUGH_VARIABLES -D${variable}=${${variable}})
endif()
endforeach()
string(REPLACE ";" " " FLAGS_STRING "${LIBCXX_CFLAGS}")
set(LIBCXX_C_FLAGS "${FLAGS_STRING}")
set(LIBCXX_CXX_FLAGS "${FLAGS_STRING}")
ExternalProject_Add(${name}
DEPENDS ${name}-clobber ${LIBCXX_DEPS}
PREFIX ${prefix}
SOURCE_DIR ${COMPILER_RT_LIBCXX_PATH}
CMAKE_ARGS -DCMAKE_MAKE_PROGRAM:STRING=${CMAKE_MAKE_PROGRAM}
-DCMAKE_C_COMPILER=${COMPILER_RT_TEST_COMPILER}
-DCMAKE_CXX_COMPILER=${COMPILER_RT_TEST_CXX_COMPILER}
-DCMAKE_C_FLAGS=${LIBCXX_CFLAGS}
-DCMAKE_CXX_FLAGS=${LIBCXX_CFLAGS}
STAMP_DIR ${STAMP_DIR}
BINARY_DIR ${BINARY_DIR}
CMAKE_ARGS ${CMAKE_PASSTHROUGH_VARIABLES}
${compiler_args}
-DCMAKE_C_FLAGS=${LIBCXX_C_FLAGS}
-DCMAKE_CXX_FLAGS=${LIBCXX_CXX_FLAGS}
-DCMAKE_BUILD_TYPE=Release
-DCMAKE_INSTALL_PREFIX:PATH=<INSTALL_DIR>
-DLLVM_PATH=${LLVM_MAIN_SRC_DIR}
-DLIBCXX_STANDALONE_BUILD=On
-DLLVM_BINARY_DIR=${prefix}
-DLLVM_LIBRARY_OUTPUT_INTDIR=${prefix}/lib
-DLIBCXX_STANDALONE_BUILD=ON
${LIBCXX_CMAKE_ARGS}
LOG_BUILD 1
LOG_CONFIGURE 1
LOG_INSTALL 1
)
set_target_properties(${name} PROPERTIES EXCLUDE_FROM_ALL TRUE)
ExternalProject_Add_Step(${name} force-reconfigure
DEPENDERS configure
ALWAYS 1
INSTALL_COMMAND ""
STEP_TARGETS configure build
BUILD_ALWAYS 1
USES_TERMINAL_CONFIGURE 1
USES_TERMINAL_BUILD 1
USES_TERMINAL_INSTALL 1
EXCLUDE_FROM_ALL TRUE
)
ExternalProject_Add_Step(${name} clobber
COMMAND ${CMAKE_COMMAND} -E remove_directory <BINARY_DIR>
COMMAND ${CMAKE_COMMAND} -E make_directory <BINARY_DIR>
COMMENT "Clobberring ${name} build directory..."
DEPENDERS configure
DEPENDS ${LIBCXX_DEPS}
if (CMAKE_GENERATOR MATCHES "Make")
set(run_clean "$(MAKE)" "-C" "${BINARY_DIR}" "clean")
else()
set(run_clean ${CMAKE_COMMAND} --build ${BINARY_DIR} --target clean
--config "$<CONFIGURATION>")
endif()
ExternalProject_Add_Step(${name} clean
COMMAND ${run_clean}
COMMENT "Cleaning ${name}..."
DEPENDEES configure
${force_deps}
WORKING_DIRECTORY ${BINARY_DIR}
EXCLUDE_FROM_MAIN 1
USES_TERMINAL 1
)
ExternalProject_Add_StepTargets(${name} clean)
if(LIBCXX_USE_TOOLCHAIN)
add_dependencies(${name}-clean ${name}-clobber)
set_target_properties(${name}-clean PROPERTIES
SOURCES ${CMAKE_CURRENT_BINARY_DIR}/${name}-clobber-stamp)
endif()
endmacro()
function(rt_externalize_debuginfo name)
@ -542,8 +651,10 @@ endfunction()
function(configure_compiler_rt_lit_site_cfg input output)
set_llvm_build_mode()
get_compiler_rt_output_dir(${COMPILER_RT_DEFAULT_TARGET_ARCH} output_dir)
string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} COMPILER_RT_RESOLVED_TEST_COMPILER ${COMPILER_RT_TEST_COMPILER})
string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} COMPILER_RT_RESOLVED_LIBRARY_OUTPUT_DIR ${COMPILER_RT_LIBRARY_OUTPUT_DIR})
string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} COMPILER_RT_RESOLVED_LIBRARY_OUTPUT_DIR ${output_dir})
configure_lit_site_cfg(${input} ${output})
endfunction()

View File

@ -43,7 +43,7 @@ endfunction()
# link for.
function(darwin_get_toolchain_supported_archs output_var)
execute_process(
COMMAND ld -v
COMMAND "${CMAKE_LINKER}" -v
ERROR_VARIABLE LINKER_VERSION)
string(REGEX MATCH "configured to support archs: ([^\n]+)"
@ -230,6 +230,7 @@ macro(darwin_add_builtin_library name suffix)
list(APPEND ${LIB_OS}_${suffix}_libs ${libname})
list(APPEND ${LIB_OS}_${suffix}_lipo_flags -arch ${arch} $<TARGET_FILE:${libname}>)
set_target_properties(${libname} PROPERTIES FOLDER "Compiler-RT Libraries")
endmacro()
function(darwin_lipo_libs name)
@ -251,6 +252,7 @@ function(darwin_lipo_libs name)
add_dependencies(${LIB_PARENT_TARGET} ${name})
install(FILES ${LIB_OUTPUT_DIR}/lib${name}.a
DESTINATION ${LIB_INSTALL_DIR})
set_target_properties(${name} PROPERTIES FOLDER "Compiler-RT Misc")
else()
message(WARNING "Not generating lipo target for ${name} because no input libraries exist.")
endif()

View File

@ -168,6 +168,7 @@ macro(detect_target_arch)
check_symbol_exists(__mips64__ "" __MIPS64)
check_symbol_exists(__powerpc64__ "" __PPC64)
check_symbol_exists(__powerpc64le__ "" __PPC64LE)
check_symbol_exists(__riscv "" __RISCV)
check_symbol_exists(__s390x__ "" __S390X)
check_symbol_exists(__wasm32__ "" __WEBASSEMBLY32)
check_symbol_exists(__wasm64__ "" __WEBASSEMBLY64)
@ -187,6 +188,14 @@ macro(detect_target_arch)
add_default_target_arch(powerpc64)
elseif(__PPC64LE)
add_default_target_arch(powerpc64le)
elseif(__RISCV)
if(CMAKE_SIZEOF_VOID_P EQUAL "4")
add_default_target_arch(riscv32)
elseif(CMAKE_SIZEOF_VOID_P EQUAL "8")
add_default_target_arch(riscv64)
else()
message(FATAL_ERROR "Unsupport XLEN for RISC-V")
endif()
elseif(__S390X)
add_default_target_arch(s390x)
elseif(__WEBASSEMBLY32)
@ -305,3 +314,69 @@ function(filter_builtin_sources output_var exclude_or_include excluded_list)
endforeach ()
set(${output_var} ${intermediate} PARENT_SCOPE)
endfunction()
function(get_compiler_rt_target arch variable)
if(ANDROID AND ${arch} STREQUAL "i386")
set(target "i686${COMPILER_RT_OS_SUFFIX}-${COMPILER_RT_DEFAULT_TARGET_OS}")
else()
set(target "${arch}-${COMPILER_RT_DEFAULT_TARGET_OS}")
endif()
if(COMPILER_RT_DEFAULT_TARGET_ABI)
set(target "${target}-${COMPILER_RT_DEFAULT_TARGET_ABI}")
endif()
set(${variable} ${target} PARENT_SCOPE)
endfunction()
function(get_compiler_rt_install_dir arch install_dir)
if(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR AND NOT APPLE)
get_compiler_rt_target(${arch} target)
set(${install_dir} ${COMPILER_RT_INSTALL_PATH}/${target}/lib PARENT_SCOPE)
else()
set(${install_dir} ${COMPILER_RT_LIBRARY_INSTALL_DIR} PARENT_SCOPE)
endif()
endfunction()
function(get_compiler_rt_output_dir arch output_dir)
if(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR AND NOT APPLE)
get_compiler_rt_target(${arch} target)
set(${output_dir} ${COMPILER_RT_OUTPUT_DIR}/${target}/lib PARENT_SCOPE)
else()
set(${output_dir} ${COMPILER_RT_LIBRARY_OUTPUT_DIR} PARENT_SCOPE)
endif()
endfunction()
# compiler_rt_process_sources(
# <OUTPUT_VAR>
# <SOURCE_FILE> ...
# [ADDITIONAL_HEADERS <header> ...]
# )
#
# Process the provided sources and write the list of new sources
# into `<OUTPUT_VAR>`.
#
# ADDITIONAL_HEADERS - Adds the supplied header to list of sources for IDEs.
#
# This function is very similar to `llvm_process_sources()` but exists here
# because we need to support standalone builds of compiler-rt.
function(compiler_rt_process_sources OUTPUT_VAR)
cmake_parse_arguments(
ARG
""
""
"ADDITIONAL_HEADERS"
${ARGN}
)
set(sources ${ARG_UNPARSED_ARGUMENTS})
set(headers "")
if (XCODE OR MSVC_IDE OR CMAKE_EXTRA_GENERATOR)
# For IDEs we need to tell CMake about header files.
# Otherwise they won't show up in UI.
set(headers ${ARG_ADDITIONAL_HEADERS})
list(LENGTH headers headers_length)
if (${headers_length} GREATER 0)
set_source_files_properties(${headers}
PROPERTIES HEADER_FILE_ONLY ON)
endif()
endif()
set("${OUTPUT_VAR}" ${sources} ${headers} PARENT_SCOPE)
endfunction()

View File

@ -1,5 +1,4 @@
function(find_compiler_rt_library name dest)
set(dest "" PARENT_SCOPE)
function(find_compiler_rt_library name variable)
set(CLANG_COMMAND ${CMAKE_CXX_COMPILER} ${SANITIZER_COMMON_CFLAGS}
"--rtlib=compiler-rt" "--print-libgcc-file-name")
if (CMAKE_CXX_COMPILER_ID MATCHES Clang AND CMAKE_CXX_COMPILER_TARGET)
@ -14,7 +13,7 @@ function(find_compiler_rt_library name dest)
string(REPLACE "builtins" "${name}" LIBRARY_FILE "${LIBRARY_FILE}")
if (NOT HAD_ERROR AND EXISTS "${LIBRARY_FILE}")
message(STATUS "Found compiler-rt ${name} library: ${LIBRARY_FILE}")
set(${dest} "${LIBRARY_FILE}" PARENT_SCOPE)
set(${variable} "${LIBRARY_FILE}" PARENT_SCOPE)
else()
message(STATUS "Failed to find compiler-rt ${name} library")
endif()

View File

@ -1,3 +1,5 @@
include(CompilerRTUtils)
set(SANITIZER_GEN_DYNAMIC_LIST
${COMPILER_RT_SOURCE_DIR}/lib/sanitizer_common/scripts/gen_dynamic_list.py)
@ -37,9 +39,9 @@ macro(add_sanitizer_rt_symbols name)
add_custom_target(${target_name}-symbols ALL
DEPENDS ${stamp}
SOURCES ${SANITIZER_GEN_DYNAMIC_LIST} ${ARG_EXTRA})
get_compiler_rt_install_dir(${arch} install_dir)
install(FILES $<TARGET_FILE:${target_name}>.syms
DESTINATION ${COMPILER_RT_LIBRARY_INSTALL_DIR})
DESTINATION ${install_dir})
if(ARG_PARENT_TARGET)
add_dependencies(${ARG_PARENT_TARGET} ${target_name}-symbols)
endif()
@ -81,7 +83,7 @@ macro(add_sanitizer_rt_version_list name)
endmacro()
# Add target to check code style for sanitizer runtimes.
if(CMAKE_HOST_UNIX)
if(CMAKE_HOST_UNIX AND NOT OS_NAME MATCHES "OpenBSD")
add_custom_target(SanitizerLintCheck
COMMAND env LLVM_CHECKOUT=${LLVM_MAIN_SRC_DIR} SILENT=1 TMPDIR=
PYTHON_EXECUTABLE=${PYTHON_EXECUTABLE}
@ -90,5 +92,9 @@ if(CMAKE_HOST_UNIX)
DEPENDS ${SANITIZER_LINT_SCRIPT}
COMMENT "Running lint check for sanitizer sources..."
VERBATIM)
else()
add_custom_target(SanitizerLintCheck
COMMAND echo "No lint check")
endif()
set_target_properties(SanitizerLintCheck
PROPERTIES FOLDER "Compiler-RT Misc")

View File

@ -12,7 +12,14 @@ check_include_file(unwind.h HAVE_UNWIND_H)
add_custom_target(compiler-rt ALL)
add_custom_target(install-compiler-rt)
add_custom_target(install-compiler-rt-stripped)
set_target_properties(compiler-rt PROPERTIES FOLDER "Compiler-RT Misc")
set_property(
TARGET
compiler-rt
install-compiler-rt
install-compiler-rt-stripped
PROPERTY
FOLDER "Compiler-RT Misc"
)
# Setting these variables from an LLVM build is sufficient that compiler-rt can
# construct the output paths, so it can behave as if it were in-tree here.
@ -69,10 +76,17 @@ endif()
if(NOT DEFINED COMPILER_RT_OS_DIR)
string(TOLOWER ${CMAKE_SYSTEM_NAME} COMPILER_RT_OS_DIR)
endif()
set(COMPILER_RT_LIBRARY_OUTPUT_DIR
${COMPILER_RT_OUTPUT_DIR}/lib/${COMPILER_RT_OS_DIR})
set(COMPILER_RT_LIBRARY_INSTALL_DIR
${COMPILER_RT_INSTALL_PATH}/lib/${COMPILER_RT_OS_DIR})
if(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR AND NOT APPLE)
set(COMPILER_RT_LIBRARY_OUTPUT_DIR
${COMPILER_RT_OUTPUT_DIR})
set(COMPILER_RT_LIBRARY_INSTALL_DIR
${COMPILER_RT_INSTALL_PATH})
else(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR)
set(COMPILER_RT_LIBRARY_OUTPUT_DIR
${COMPILER_RT_OUTPUT_DIR}/lib/${COMPILER_RT_OS_DIR})
set(COMPILER_RT_LIBRARY_INSTALL_DIR
${COMPILER_RT_INSTALL_PATH}/lib/${COMPILER_RT_OS_DIR})
endif()
if(APPLE)
# On Darwin if /usr/include doesn't exist, the user probably has Xcode but not
@ -139,8 +153,16 @@ macro(test_targets)
add_default_target_arch(${COMPILER_RT_DEFAULT_TARGET_ARCH})
elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "i[2-6]86|x86|amd64")
if(NOT MSVC)
test_target_arch(x86_64 "" "-m64")
test_target_arch(i386 __i386__ "-m32")
if(CMAKE_SYSTEM_NAME MATCHES "OpenBSD")
if (CMAKE_SIZEOF_VOID_P EQUAL 4)
test_target_arch(i386 __i386__ "-m32")
else()
test_target_arch(x86_64 "" "-m64")
endif()
else()
test_target_arch(x86_64 "" "-m64")
test_target_arch(i386 __i386__ "-m32")
endif()
else()
if (CMAKE_SIZEOF_VOID_P EQUAL 4)
test_target_arch(i386 "" "")
@ -186,6 +208,10 @@ macro(test_targets)
test_target_arch(aarch32 "" "-march=armv8-a")
elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "aarch64")
test_target_arch(aarch64 "" "-march=armv8-a")
elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "riscv32")
test_target_arch(riscv32 "" "")
elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "riscv64")
test_target_arch(riscv64 "" "")
elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "wasm32")
test_target_arch(wasm32 "" "--target=wasm32-unknown-unknown")
elseif("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "wasm64")

View File

@ -25,11 +25,14 @@ int foo(int x, int y) {
set(ARM64 aarch64)
set(ARM32 arm armhf armv6m armv7m armv7em armv7 armv7s armv7k)
set(HEXAGON hexagon)
set(X86 i386)
set(X86_64 x86_64)
set(MIPS32 mips mipsel)
set(MIPS64 mips64 mips64el)
set(PPC64 powerpc64 powerpc64le)
set(RISCV32 riscv32)
set(RISCV64 riscv64)
set(WASM32 wasm32)
set(WASM64 wasm64)
@ -40,7 +43,7 @@ if(APPLE)
endif()
set(ALL_BUILTIN_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64}
${MIPS32} ${MIPS64} ${PPC64} ${WASM32} ${WASM64})
${HEXAGON} ${MIPS32} ${MIPS64} ${PPC64} ${RISCV32} ${RISCV64} ${WASM32} ${WASM64})
include(CompilerRTUtils)
include(CompilerRTDarwinUtils)

View File

@ -13,7 +13,10 @@ function(check_linker_flag flag out_var)
endfunction()
check_library_exists(c fopen "" COMPILER_RT_HAS_LIBC)
if (NOT SANITIZER_USE_COMPILER_RT)
if (COMPILER_RT_USE_BUILTINS_LIBRARY)
include(HandleCompilerRT)
find_compiler_rt_library(builtins COMPILER_RT_BUILTINS_LIBRARY)
else()
if (ANDROID)
check_library_exists(gcc __gcc_personality_v0 "" COMPILER_RT_HAS_GCC_LIB)
else()
@ -27,9 +30,7 @@ if (COMPILER_RT_HAS_NODEFAULTLIBS_FLAG)
if (COMPILER_RT_HAS_LIBC)
list(APPEND CMAKE_REQUIRED_LIBRARIES c)
endif ()
if (SANITIZER_USE_COMPILER_RT)
list(APPEND CMAKE_REQUIRED_FLAGS -rtlib=compiler-rt)
find_compiler_rt_library(builtins COMPILER_RT_BUILTINS_LIBRARY)
if (COMPILER_RT_USE_BUILTINS_LIBRARY)
list(APPEND CMAKE_REQUIRED_LIBRARIES "${COMPILER_RT_BUILTINS_LIBRARY}")
elseif (COMPILER_RT_HAS_GCC_S_LIB)
list(APPEND CMAKE_REQUIRED_LIBRARIES gcc_s)
@ -108,6 +109,7 @@ if (ANDROID AND COMPILER_RT_HAS_LIBDL)
# Android's libstdc++ has a dependency on libdl.
list(APPEND CMAKE_REQUIRED_LIBRARIES dl)
endif()
check_library_exists(c++ __cxa_throw "" COMPILER_RT_HAS_LIBCXX)
check_library_exists(stdc++ __cxa_throw "" COMPILER_RT_HAS_LIBSTDCXX)
# Linker flags.
@ -174,11 +176,14 @@ endmacro()
set(ARM64 aarch64)
set(ARM32 arm armhf)
set(HEXAGON hexagon)
set(X86 i386)
set(X86_64 x86_64)
set(MIPS32 mips mipsel)
set(MIPS64 mips64 mips64el)
set(PPC64 powerpc64 powerpc64le)
set(RISCV32 riscv32)
set(RISCV64 riscv64)
set(S390X s390x)
set(WASM32 wasm32)
set(WASM64 wasm64)
@ -194,7 +199,7 @@ set(ALL_SANITIZER_COMMON_SUPPORTED_ARCH ${X86} ${X86_64} ${PPC64}
set(ALL_ASAN_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64}
${MIPS32} ${MIPS64} ${PPC64} ${S390X})
set(ALL_DFSAN_SUPPORTED_ARCH ${X86_64} ${MIPS64} ${ARM64})
set(ALL_FUZZER_SUPPORTED_ARCH x86_64)
set(ALL_FUZZER_SUPPORTED_ARCH ${X86_64} ${ARM64})
if(APPLE)
set(ALL_LSAN_SUPPORTED_ARCH ${X86} ${X86_64} ${MIPS64} ${ARM64})
@ -202,7 +207,7 @@ else()
set(ALL_LSAN_SUPPORTED_ARCH ${X86} ${X86_64} ${MIPS64} ${ARM64} ${ARM32} ${PPC64})
endif()
set(ALL_MSAN_SUPPORTED_ARCH ${X86_64} ${MIPS64} ${ARM64} ${PPC64})
set(ALL_HWASAN_SUPPORTED_ARCH ${ARM64})
set(ALL_HWASAN_SUPPORTED_ARCH ${X86_64} ${ARM64})
set(ALL_PROFILE_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${PPC64}
${MIPS32} ${MIPS64} ${S390X})
set(ALL_TSAN_SUPPORTED_ARCH ${X86_64} ${MIPS64} ${ARM64} ${PPC64})
@ -211,12 +216,13 @@ set(ALL_UBSAN_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64}
set(ALL_SAFESTACK_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM64} ${MIPS32} ${MIPS64})
set(ALL_CFI_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${MIPS64})
set(ALL_ESAN_SUPPORTED_ARCH ${X86_64} ${MIPS64})
set(ALL_SCUDO_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${MIPS32} ${MIPS64})
set(ALL_SCUDO_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${MIPS32} ${MIPS64} ${PPC64})
if(APPLE)
set(ALL_XRAY_SUPPORTED_ARCH ${X86_64})
else()
set(ALL_XRAY_SUPPORTED_ARCH ${X86_64} ${ARM32} ${ARM64} ${MIPS32} ${MIPS64} powerpc64le)
endif()
set(ALL_SHADOWCALLSTACK_SUPPORTED_ARCH ${X86_64} ${ARM64})
if(APPLE)
include(CompilerRTDarwinUtils)
@ -365,7 +371,11 @@ if(APPLE)
if(DARWIN_${platform}_ARCHS)
list(APPEND SANITIZER_COMMON_SUPPORTED_OS ${platform})
list(APPEND PROFILE_SUPPORTED_OS ${platform})
list(APPEND TSAN_SUPPORTED_OS ${platform})
list_intersect(DARWIN_${platform}_TSAN_ARCHS DARWIN_${platform}_ARCHS ALL_TSAN_SUPPORTED_ARCH)
if(DARWIN_${platform}_TSAN_ARCHS)
list(APPEND TSAN_SUPPORTED_OS ${platform})
endif()
endif()
foreach(arch ${DARWIN_${platform}_ARCHS})
list(APPEND COMPILER_RT_SUPPORTED_ARCH ${arch})
@ -378,7 +388,6 @@ if(APPLE)
# for list_intersect
include(CompilerRTUtils)
list_intersect(SANITIZER_COMMON_SUPPORTED_ARCH
ALL_SANITIZER_COMMON_SUPPORTED_ARCH
COMPILER_RT_SUPPORTED_ARCH
@ -423,10 +432,13 @@ if(APPLE)
SANITIZER_COMMON_SUPPORTED_ARCH)
list_intersect(FUZZER_SUPPORTED_ARCH
ALL_FUZZER_SUPPORTED_ARCH
ALL_SANITIZER_COMMON_SUPPORTED_ARCH)
SANITIZER_COMMON_SUPPORTED_ARCH)
list_intersect(XRAY_SUPPORTED_ARCH
ALL_XRAY_SUPPORTED_ARCH
SANITIZER_COMMON_SUPPORTED_ARCH)
list_intersect(SHADOWCALLSTACK_SUPPORTED_ARCH
ALL_SHADOWCALLSTACK_SUPPORTED_ARCH
SANITIZER_COMMON_SUPPORTED_ARCH)
else()
# Architectures supported by compiler-rt libraries.
@ -453,6 +465,8 @@ else()
filter_available_targets(ESAN_SUPPORTED_ARCH ${ALL_ESAN_SUPPORTED_ARCH})
filter_available_targets(SCUDO_SUPPORTED_ARCH ${ALL_SCUDO_SUPPORTED_ARCH})
filter_available_targets(XRAY_SUPPORTED_ARCH ${ALL_XRAY_SUPPORTED_ARCH})
filter_available_targets(SHADOWCALLSTACK_SUPPORTED_ARCH
${ALL_SHADOWCALLSTACK_SUPPORTED_ARCH})
endif()
if (MSVC)
@ -486,7 +500,7 @@ set(COMPILER_RT_SANITIZERS_TO_BUILD all CACHE STRING
list_replace(COMPILER_RT_SANITIZERS_TO_BUILD all "${ALL_SANITIZERS}")
if (SANITIZER_COMMON_SUPPORTED_ARCH AND NOT LLVM_USE_SANITIZER AND
(OS_NAME MATCHES "Android|Darwin|Linux|FreeBSD|NetBSD|Fuchsia|SunOS" OR
(OS_NAME MATCHES "Android|Darwin|Linux|FreeBSD|NetBSD|OpenBSD|Fuchsia|SunOS" OR
(OS_NAME MATCHES "Windows" AND (NOT MINGW AND NOT CYGWIN))))
set(COMPILER_RT_HAS_SANITIZER_COMMON TRUE)
else()
@ -499,7 +513,8 @@ else()
set(COMPILER_RT_HAS_INTERCEPTION FALSE)
endif()
if (COMPILER_RT_HAS_SANITIZER_COMMON AND ASAN_SUPPORTED_ARCH)
if (COMPILER_RT_HAS_SANITIZER_COMMON AND ASAN_SUPPORTED_ARCH AND
NOT OS_NAME MATCHES "OpenBSD")
set(COMPILER_RT_HAS_ASAN TRUE)
else()
set(COMPILER_RT_HAS_ASAN FALSE)
@ -528,7 +543,7 @@ else()
endif()
if (COMPILER_RT_HAS_SANITIZER_COMMON AND MSAN_SUPPORTED_ARCH AND
OS_NAME MATCHES "Linux|NetBSD")
OS_NAME MATCHES "Linux|FreeBSD|NetBSD")
set(COMPILER_RT_HAS_MSAN TRUE)
else()
set(COMPILER_RT_HAS_MSAN FALSE)
@ -542,7 +557,7 @@ else()
endif()
if (PROFILE_SUPPORTED_ARCH AND NOT LLVM_USE_SANITIZER AND
OS_NAME MATCHES "Darwin|Linux|FreeBSD|Windows|Android|SunOS")
OS_NAME MATCHES "Darwin|Linux|FreeBSD|Windows|Android|Fuchsia|SunOS")
set(COMPILER_RT_HAS_PROFILE TRUE)
else()
set(COMPILER_RT_HAS_PROFILE FALSE)
@ -556,14 +571,14 @@ else()
endif()
if (COMPILER_RT_HAS_SANITIZER_COMMON AND UBSAN_SUPPORTED_ARCH AND
OS_NAME MATCHES "Darwin|Linux|FreeBSD|NetBSD|Windows|Android|Fuchsia|SunOS")
OS_NAME MATCHES "Darwin|Linux|FreeBSD|NetBSD|OpenBSD|Windows|Android|Fuchsia|SunOS")
set(COMPILER_RT_HAS_UBSAN TRUE)
else()
set(COMPILER_RT_HAS_UBSAN FALSE)
endif()
if (COMPILER_RT_HAS_SANITIZER_COMMON AND UBSAN_SUPPORTED_ARCH AND
OS_NAME MATCHES "Linux|FreeBSD|NetBSD|Android|Darwin")
OS_NAME MATCHES "Linux|FreeBSD|NetBSD|OpenBSD|Android|Darwin")
set(COMPILER_RT_HAS_UBSAN_MINIMAL TRUE)
else()
set(COMPILER_RT_HAS_UBSAN_MINIMAL FALSE)
@ -590,22 +605,29 @@ else()
endif()
if (COMPILER_RT_HAS_SANITIZER_COMMON AND SCUDO_SUPPORTED_ARCH AND
OS_NAME MATCHES "Linux|Android")
OS_NAME MATCHES "Linux|Android|Fuchsia")
set(COMPILER_RT_HAS_SCUDO TRUE)
else()
set(COMPILER_RT_HAS_SCUDO FALSE)
endif()
if (COMPILER_RT_HAS_SANITIZER_COMMON AND XRAY_SUPPORTED_ARCH AND
OS_NAME MATCHES "Darwin|Linux")
OS_NAME MATCHES "Darwin|Linux|FreeBSD|NetBSD|OpenBSD")
set(COMPILER_RT_HAS_XRAY TRUE)
else()
set(COMPILER_RT_HAS_XRAY FALSE)
endif()
if (COMPILER_RT_HAS_SANITIZER_COMMON AND FUZZER_SUPPORTED_ARCH AND
OS_NAME MATCHES "Android|Darwin|Linux|NetBSD")
OS_NAME MATCHES "Android|Darwin|Linux|NetBSD|FreeBSD|OpenBSD|Fuchsia")
set(COMPILER_RT_HAS_FUZZER TRUE)
else()
set(COMPILER_RT_HAS_FUZZER FALSE)
endif()
if (COMPILER_RT_HAS_SANITIZER_COMMON AND SHADOWCALLSTACK_SUPPORTED_ARCH AND
OS_NAME MATCHES "Linux|Android")
set(COMPILER_RT_HAS_SHADOWCALLSTACK TRUE)
else()
set(COMPILER_RT_HAS_SHADOWCALLSTACK FALSE)
endif()

View File

@ -10,6 +10,7 @@ if (COMPILER_RT_BUILD_SANITIZERS)
sanitizer/linux_syscall_hooks.h
sanitizer/lsan_interface.h
sanitizer/msan_interface.h
sanitizer/netbsd_syscall_hooks.h
sanitizer/scudo_interface.h
sanitizer/tsan_interface.h
sanitizer/tsan_interface_atomic.h)

View File

@ -65,6 +65,11 @@ extern "C" {
void __sanitizer_unaligned_store32(void *p, uint32_t x);
void __sanitizer_unaligned_store64(void *p, uint64_t x);
// Returns 1 on the first call, then returns 0 thereafter. Called by the tool
// to ensure only one report is printed when multiple errors occur
// simultaneously.
int __sanitizer_acquire_crash_state();
// Annotate the current state of a contiguous container, such as
// std::vector, std::string or similar.
// A contiguous container is a container that keeps all of its elements

View File

@ -104,6 +104,14 @@ extern "C" {
copy. Source and destination regions can overlap. */
void __msan_copy_shadow(const volatile void *dst, const volatile void *src,
size_t size);
/* Disables uninitialized memory checks in interceptors. */
void __msan_scoped_disable_interceptor_checks(void);
/* Re-enables uninitialized memory checks in interceptors after a previous
call to __msan_scoped_disable_interceptor_checks. */
void __msan_scoped_enable_interceptor_checks(void);
#ifdef __cplusplus
} // extern "C"
#endif

File diff suppressed because it is too large Load Diff

View File

@ -26,7 +26,12 @@ extern "C" {
// the hard limit (HardLimit=1) or the soft limit (HardLimit=0). The limit
// can be removed by setting LimitMb to 0. This function's parameters should
// be fully trusted to avoid security mishaps.
void __scudo_set_rss_limit(unsigned long LimitMb, int HardLimit);
void __scudo_set_rss_limit(size_t LimitMb, int HardLimit);
// This function outputs various allocator statistics for both the Primary
// and Secondary allocators, including memory usage, number of allocations
// and deallocations.
void __scudo_print_stats(void);
#ifdef __cplusplus
} // extern "C"
#endif

View File

@ -27,6 +27,7 @@ enum XRayEntryType {
TAIL = 2,
LOG_ARGS_ENTRY = 3,
CUSTOM_EVENT = 4,
TYPED_EVENT = 5,
};
/// Provide a function to invoke for when instrumentation points are hit. This
@ -68,12 +69,23 @@ extern int __xray_set_handler_arg1(void (*entry)(int32_t, XRayEntryType,
extern int __xray_remove_handler_arg1();
/// Provide a function to invoke when XRay encounters a custom event.
extern int __xray_set_customevent_handler(void (*entry)(void*, std::size_t));
extern int __xray_set_customevent_handler(void (*entry)(void *, std::size_t));
/// This removes whatever the currently provided custom event handler is.
/// Returns 1 on success, 0 on error.
extern int __xray_remove_customevent_handler();
/// Set a handler for xray typed event logging. The first parameter is a type
/// identifier, the second is a payload, and the third is the payload size.
extern int __xray_set_typedevent_handler(void (*entry)(uint16_t, const void *,
std::size_t));
/// Removes the currently set typed event handler.
/// Returns 1 on success, 0 on error.
extern int __xray_remove_typedevent_handler();
extern uint16_t __xray_register_event_type(const char *event_type);
enum XRayPatchingStatus {
NOT_INITIALIZED = 0,
SUCCESS = 1,

View File

@ -21,27 +21,29 @@
///
/// The high-level usage pattern for these APIs look like the following:
///
/// // Before we try initializing the log implementation, we must set it as
/// // the log implementation. We provide the function pointers that define
/// // the various initialization, finalization, and other pluggable hooks
/// // that we need.
/// __xray_set_log_impl({...});
/// // We choose the mode which we'd like to install, and check whether this
/// // has succeeded. Each mode will have their own set of flags they will
/// // support, outside of the global XRay configuration options that are
/// // defined in the XRAY_OPTIONS environment variable.
/// auto select_status = __xray_log_select_mode("xray-fdr");
/// if (select_status != XRayLogRegisterStatus::XRAY_REGISTRATION_OK) {
/// // This failed, we should not proceed with attempting to initialise
/// // the currently selected mode.
/// return;
/// }
///
/// // Once that's done, we can now initialize the implementation. Each
/// // implementation has a chance to let users customize the implementation
/// // with a struct that their implementation supports. Roughly this might
/// // look like:
/// MyImplementationOptions opts;
/// opts.enable_feature = true;
/// ...
/// auto init_status = __xray_log_init(
/// BufferSize, MaxBuffers, &opts, sizeof opts);
/// if (init_status != XRayLogInitStatus::XRAY_LOG_INITIALIZED) {
/// // Once that's done, we can now attempt to configure the implementation.
/// // To do this, we provide the string flags configuration for the mode.
/// auto config_status = __xray_log_init_mode(
/// "xray-fdr", "verbosity=1 some_flag=1 another_flag=2");
/// if (config_status != XRayLogInitStatus::XRAY_LOG_INITIALIZED) {
/// // deal with the error here, if there is one.
/// }
///
/// // When the log implementation has had the chance to initialize, we can
/// // now patch the sleds.
/// // now patch the instrumentation points. Note that we could have patched
/// // the instrumentation points first, but there's no strict ordering to
/// // these operations.
/// auto patch_status = __xray_patch();
/// if (patch_status != XRayPatchingStatus::SUCCESS) {
/// // deal with the error here, if it is an error.
@ -56,12 +58,12 @@
///
/// // We can optionally wait before flushing the log to give other threads a
/// // chance to see that the implementation is already finalized. Also, at
/// // this point we can optionally unpatch the sleds to reduce overheads at
/// // runtime.
/// // this point we can optionally unpatch the instrumentation points to
/// // reduce overheads at runtime.
/// auto unpatch_status = __xray_unpatch();
/// if (unpatch_status != XRayPatchingStatus::SUCCESS) {
// // deal with the error here, if it is an error.
// }
/// // deal with the error here, if it is an error.
/// }
///
/// // If there are logs or data to be flushed somewhere, we can do so only
/// // after we've finalized the log. Some implementations may not actually
@ -72,6 +74,17 @@
/// // deal with the error here, if it is an error.
/// }
///
/// // Alternatively, we can go through the buffers ourselves without
/// // relying on the implementations' flushing semantics (if the
/// // implementation supports exporting this data directly).
/// auto MyBufferProcessor = +[](const char* mode, XRayBuffer buffer) {
/// // Check the "mode" to see if it's something we know how to handle...
/// // and/or do something with an XRayBuffer instance.
/// };
/// auto process_status = __xray_log_process_buffers(MyBufferProcessor);
/// if (process_status != XRayLogFlushStatus::XRAY_LOG_FLUSHED) {
/// // deal with the error here, if it is an error.
/// }
///
/// NOTE: Before calling __xray_patch() again, consider re-initializing the
/// implementation first. Some implementations might stay in an "off" state when
@ -182,9 +195,13 @@ struct XRayLogImpl {
XRayLogFlushStatus (*flush_log)();
};
/// DEPRECATED: Use the mode registration workflow instead with
/// __xray_log_register_mode(...) and __xray_log_select_mode(...). See the
/// documentation for those function.
///
/// This function installs a new logging implementation that XRay will use. In
/// case there are any nullptr members in Impl, XRay will *uninstall any
/// existing implementations*. It does NOT patch the instrumentation sleds.
/// existing implementations*. It does NOT patch the instrumentation points.
///
/// NOTE: This function does NOT attempt to finalize the currently installed
/// implementation. Use with caution.
@ -227,9 +244,14 @@ XRayLogRegisterStatus __xray_log_register_mode(const char *Mode,
/// does not update the currently installed implementation.
XRayLogRegisterStatus __xray_log_select_mode(const char *Mode);
/// Returns an identifier for the currently selected XRay mode chosen through
/// the __xray_log_select_mode(...) function call. Returns nullptr if there is
/// no currently installed mode.
const char *__xray_log_get_current_mode();
/// This function removes the currently installed implementation. It will also
/// uninstall any handlers that have been previously installed. It does NOT
/// unpatch the instrumentation sleds.
/// unpatch the instrumentation points.
///
/// NOTE: This function does NOT attempt to finalize the currently installed
/// implementation. Use with caution.
@ -244,11 +266,37 @@ XRayLogRegisterStatus __xray_log_select_mode(const char *Mode);
/// called while in any other states.
void __xray_remove_log_impl();
/// DEPRECATED: Use __xray_log_init_mode() instead, and provide all the options
/// in string form.
/// Invokes the installed implementation initialization routine. See
/// XRayLogInitStatus for what the return values mean.
XRayLogInitStatus __xray_log_init(size_t BufferSize, size_t MaxBuffers,
void *Args, size_t ArgsSize);
/// Invokes the installed initialization routine, which *must* support the
/// string based form.
///
/// NOTE: When this API is used, we still invoke the installed initialization
/// routine, but we will call it with the following convention to signal that we
/// are using the string form:
///
/// - BufferSize = 0
/// - MaxBuffers = 0
/// - ArgsSize = 0
/// - Args will be the pointer to the character buffer representing the
/// configuration.
///
/// FIXME: Updating the XRayLogImpl struct is an ABI breaking change. When we
/// are ready to make a breaking change, we should clean this up appropriately.
XRayLogInitStatus __xray_log_init_mode(const char *Mode, const char *Config);
/// Like __xray_log_init_mode(...) this version allows for providing
/// configurations that might have non-null-terminated strings. This will
/// operate similarly to __xray_log_init_mode, with the exception that
/// |ArgsSize| will be what |ConfigSize| is.
XRayLogInitStatus __xray_log_init_mode_bin(const char *Mode, const char *Config,
size_t ConfigSize);
/// Invokes the installed implementation finalization routine. See
/// XRayLogInitStatus for what the return values mean.
XRayLogInitStatus __xray_log_finalize();
@ -257,16 +305,68 @@ XRayLogInitStatus __xray_log_finalize();
/// XRayLogFlushStatus for what the return values mean.
XRayLogFlushStatus __xray_log_flushLog();
/// An XRayBuffer represents a section of memory which can be treated by log
/// processing functions as bytes stored in the logging implementation's
/// buffers.
struct XRayBuffer {
const void *Data;
size_t Size;
};
/// Registers an iterator function which takes an XRayBuffer argument, then
/// returns another XRayBuffer function representing the next buffer. When the
/// Iterator function returns an empty XRayBuffer (Data = nullptr, Size = 0),
/// this signifies the end of the buffers.
///
/// The first invocation of this Iterator function will always take an empty
/// XRayBuffer (Data = nullptr, Size = 0).
void __xray_log_set_buffer_iterator(XRayBuffer (*Iterator)(XRayBuffer));
/// Removes the currently registered buffer iterator function.
void __xray_log_remove_buffer_iterator();
/// Invokes the provided handler to process data maintained by the logging
/// handler. This API will be provided raw access to the data available in
/// memory from the logging implementation. The callback function must:
///
/// 1) Not modify the data, to avoid running into undefined behaviour.
///
/// 2) Either know the data layout, or treat the data as raw bytes for later
/// interpretation.
///
/// This API is best used in place of the `__xray_log_flushLog()` implementation
/// above to enable the caller to provide an alternative means of extracting the
/// data from the XRay implementation.
///
/// Implementations MUST then provide:
///
/// 1) A function that will return an XRayBuffer. Functions that return an
/// "empty" XRayBuffer signifies that there are no more buffers to be
/// processed. This function should be registered through the
/// `__xray_log_set_buffer_iterator(...)` function.
///
/// 2) Its own means of converting data it holds in memory into an XRayBuffer
/// structure.
///
/// See XRayLogFlushStatus for what the return values mean.
///
XRayLogFlushStatus __xray_log_process_buffers(void (*Processor)(const char *,
XRayBuffer));
} // extern "C"
namespace __xray {
/// DEPRECATED: Use __xray_log_init_mode(...) instead, and provide flag
/// configuration strings to set the options instead.
/// Options used by the LLVM XRay FDR logging implementation.
struct FDRLoggingOptions {
bool ReportErrors = false;
int Fd = -1;
};
/// DEPRECATED: Use __xray_log_init_mode(...) instead, and provide flag
/// configuration strings to set the options instead.
/// Options used by the LLVM XRay Basic (Naive) logging implementation.
struct BasicLoggingOptions {
int DurationFilterMicros = 0;

View File

@ -54,7 +54,7 @@ struct alignas(32) XRayFileHeader {
union {
char FreeForm[16];
// The current civiltime timestamp, as retrived from 'clock_gettime'. This
// The current civiltime timestamp, as retrieved from 'clock_gettime'. This
// allows readers of the file to determine when the file was created or
// written down.
struct timespec TS;
@ -95,8 +95,11 @@ struct alignas(32) XRayRecord {
// The thread ID for the currently running thread.
uint32_t TId = 0;
// The ID of process that is currently running
uint32_t PId = 0;
// Use some bytes in the end of the record for buffers.
char Buffer[4] = {};
char Buffer[8] = {};
} __attribute__((packed));
static_assert(sizeof(XRayRecord) == 32, "XRayRecord != 32 bytes");
@ -115,8 +118,8 @@ struct alignas(32) XRayArgPayload {
// The thread ID for the currently running thread.
uint32_t TId = 0;
// Add more padding.
uint8_t Padding2[4] = {};
// The ID of process that is currently running
uint32_t PId = 0;
// The argument payload.
uint64_t Arg = 0;

View File

@ -1 +1,2 @@
BasedOnStyle: Google
AllowShortIfStatementsOnASingleLine: false

View File

@ -23,6 +23,7 @@ set(ASAN_SOURCES
asan_posix.cc
asan_premap_shadow.cc
asan_report.cc
asan_rtems.cc
asan_rtl.cc
asan_shadow_setup.cc
asan_stack.cc
@ -37,6 +38,34 @@ set(ASAN_CXX_SOURCES
set(ASAN_PREINIT_SOURCES
asan_preinit.cc)
SET(ASAN_HEADERS
asan_activation.h
asan_activation_flags.inc
asan_allocator.h
asan_descriptions.h
asan_errors.h
asan_fake_stack.h
asan_flags.h
asan_flags.inc
asan_init_version.h
asan_interceptors.h
asan_interceptors_memintrinsics.h
asan_interface.inc
asan_interface_internal.h
asan_internal.h
asan_lock.h
asan_malloc_local.h
asan_mapping.h
asan_mapping_myriad.h
asan_poisoning.h
asan_premap_shadow.h
asan_report.h
asan_scariness_score.h
asan_stack.h
asan_stats.h
asan_suppressions.h
asan_thread.h)
include_directories(..)
set(ASAN_CFLAGS ${SANITIZER_COMMON_CFLAGS})
@ -46,20 +75,6 @@ append_rtti_flag(OFF ASAN_CFLAGS)
set(ASAN_DYNAMIC_LINK_FLAGS ${SANITIZER_COMMON_LINK_FLAGS})
if(ANDROID)
# On Android, -z global does not do what it is documented to do.
# On Android, -z global moves the library ahead in the lookup order,
# placing it right after the LD_PRELOADs. This is used to compensate for the fact
# that Android linker does not look at the dependencies of the main executable
# that aren't dependencies of the current DSO when resolving symbols from said DSO.
# As a net result, this allows running ASan executables without LD_PRELOAD-ing the
# ASan runtime library.
# The above is applicable to L MR1 or newer.
if (COMPILER_RT_HAS_Z_GLOBAL)
list(APPEND ASAN_DYNAMIC_LINK_FLAGS -Wl,-z,global)
endif()
endif()
set(ASAN_DYNAMIC_DEFINITIONS
${ASAN_COMMON_DEFINITIONS} ASAN_DYNAMIC=1)
append_list_if(WIN32 INTERCEPTION_DYNAMIC_CRT ASAN_DYNAMIC_DEFINITIONS)
@ -83,21 +98,28 @@ add_compiler_rt_object_libraries(RTAsan_dynamic
OS ${SANITIZER_COMMON_SUPPORTED_OS}
ARCHS ${ASAN_SUPPORTED_ARCH}
SOURCES ${ASAN_SOURCES} ${ASAN_CXX_SOURCES}
ADDITIONAL_HEADERS ${ASAN_HEADERS}
CFLAGS ${ASAN_DYNAMIC_CFLAGS}
DEFS ${ASAN_DYNAMIC_DEFINITIONS})
if(NOT APPLE)
add_compiler_rt_object_libraries(RTAsan
ARCHS ${ASAN_SUPPORTED_ARCH}
SOURCES ${ASAN_SOURCES} CFLAGS ${ASAN_CFLAGS}
SOURCES ${ASAN_SOURCES}
ADDITIONAL_HEADERS ${ASAN_HEADERS}
CFLAGS ${ASAN_CFLAGS}
DEFS ${ASAN_COMMON_DEFINITIONS})
add_compiler_rt_object_libraries(RTAsan_cxx
ARCHS ${ASAN_SUPPORTED_ARCH}
SOURCES ${ASAN_CXX_SOURCES} CFLAGS ${ASAN_CFLAGS}
SOURCES ${ASAN_CXX_SOURCES}
ADDITIONAL_HEADERS ${ASAN_HEADERS}
CFLAGS ${ASAN_CFLAGS}
DEFS ${ASAN_COMMON_DEFINITIONS})
add_compiler_rt_object_libraries(RTAsan_preinit
ARCHS ${ASAN_SUPPORTED_ARCH}
SOURCES ${ASAN_PREINIT_SOURCES} CFLAGS ${ASAN_CFLAGS}
SOURCES ${ASAN_PREINIT_SOURCES}
ADDITIONAL_HEADERS ${ASAN_HEADERS}
CFLAGS ${ASAN_CFLAGS}
DEFS ${ASAN_COMMON_DEFINITIONS})
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/dummy.cc "")
@ -125,6 +147,8 @@ if(APPLE)
RTInterception
RTSanitizerCommon
RTSanitizerCommonLibc
RTSanitizerCommonCoverage
RTSanitizerCommonSymbolizer
RTLSanCommon
RTUbsan
CFLAGS ${ASAN_DYNAMIC_CFLAGS}
@ -138,6 +162,8 @@ else()
RTInterception
RTSanitizerCommon
RTSanitizerCommonLibc
RTSanitizerCommonCoverage
RTSanitizerCommonSymbolizer
RTLSanCommon
RTUbsan)
@ -223,7 +249,7 @@ else()
DEFS ${ASAN_DYNAMIC_DEFINITIONS}
PARENT_TARGET asan)
if (UNIX AND NOT ${arch} STREQUAL "i386")
if (SANITIZER_USE_SYMBOLS AND NOT ${arch} STREQUAL "i386")
add_sanitizer_rt_symbols(clang_rt.asan_cxx
ARCHS ${arch})
add_dependencies(asan clang_rt.asan_cxx-${arch}-symbols)

View File

@ -134,8 +134,9 @@ struct AsanChunk: ChunkBase {
};
struct QuarantineCallback {
explicit QuarantineCallback(AllocatorCache *cache)
: cache_(cache) {
QuarantineCallback(AllocatorCache *cache, BufferedStackTrace *stack)
: cache_(cache),
stack_(stack) {
}
void Recycle(AsanChunk *m) {
@ -168,7 +169,7 @@ struct QuarantineCallback {
void *res = get_allocator().Allocate(cache_, size, 1);
// TODO(alekseys): Consider making quarantine OOM-friendly.
if (UNLIKELY(!res))
return DieOnFailure::OnOOM();
ReportOutOfMemory(size, stack_);
return res;
}
@ -176,7 +177,9 @@ struct QuarantineCallback {
get_allocator().Deallocate(cache_, p);
}
AllocatorCache *cache_;
private:
AllocatorCache* const cache_;
BufferedStackTrace* const stack_;
};
typedef Quarantine<QuarantineCallback, AsanChunk> AsanQuarantine;
@ -397,8 +400,11 @@ struct Allocator {
AllocType alloc_type, bool can_fill) {
if (UNLIKELY(!asan_inited))
AsanInitFromRtl();
if (RssLimitExceeded())
return AsanAllocator::FailureHandler::OnOOM();
if (RssLimitExceeded()) {
if (AllocatorMayReturnNull())
return nullptr;
ReportRssLimitExceeded(stack);
}
Flags &fl = *flags();
CHECK(stack);
const uptr min_alignment = SHADOW_GRANULARITY;
@ -431,9 +437,13 @@ struct Allocator {
}
CHECK(IsAligned(needed_size, min_alignment));
if (size > kMaxAllowedMallocSize || needed_size > kMaxAllowedMallocSize) {
Report("WARNING: AddressSanitizer failed to allocate 0x%zx bytes\n",
(void*)size);
return AsanAllocator::FailureHandler::OnBadRequest();
if (AllocatorMayReturnNull()) {
Report("WARNING: AddressSanitizer failed to allocate 0x%zx bytes\n",
(void*)size);
return nullptr;
}
ReportAllocationSizeTooBig(size, needed_size, kMaxAllowedMallocSize,
stack);
}
AsanThread *t = GetCurrentThread();
@ -446,8 +456,12 @@ struct Allocator {
AllocatorCache *cache = &fallback_allocator_cache;
allocated = allocator.Allocate(cache, needed_size, 8);
}
if (!allocated)
return nullptr;
if (UNLIKELY(!allocated)) {
SetAllocatorOutOfMemory();
if (AllocatorMayReturnNull())
return nullptr;
ReportOutOfMemory(size, stack);
}
if (*(u8 *)MEM_TO_SHADOW((uptr)allocated) == 0 && CanPoisonMemory()) {
// Heap poisoning is enabled, but the allocator provides an unpoisoned
@ -583,13 +597,13 @@ struct Allocator {
if (t) {
AsanThreadLocalMallocStorage *ms = &t->malloc_storage();
AllocatorCache *ac = GetAllocatorCache(ms);
quarantine.Put(GetQuarantineCache(ms), QuarantineCallback(ac), m,
m->UsedSize());
quarantine.Put(GetQuarantineCache(ms), QuarantineCallback(ac, stack), m,
m->UsedSize());
} else {
SpinMutexLock l(&fallback_mutex);
AllocatorCache *ac = &fallback_allocator_cache;
quarantine.Put(&fallback_quarantine_cache, QuarantineCallback(ac), m,
m->UsedSize());
quarantine.Put(&fallback_quarantine_cache, QuarantineCallback(ac, stack),
m, m->UsedSize());
}
}
@ -660,8 +674,11 @@ struct Allocator {
}
void *Calloc(uptr nmemb, uptr size, BufferedStackTrace *stack) {
if (CheckForCallocOverflow(size, nmemb))
return AsanAllocator::FailureHandler::OnBadRequest();
if (UNLIKELY(CheckForCallocOverflow(size, nmemb))) {
if (AllocatorMayReturnNull())
return nullptr;
ReportCallocOverflow(nmemb, size, stack);
}
void *ptr = Allocate(nmemb * size, 8, stack, FROM_MALLOC, false);
// If the memory comes from the secondary allocator no need to clear it
// as it comes directly from mmap.
@ -677,9 +694,9 @@ struct Allocator {
ReportFreeNotMalloced((uptr)ptr, stack);
}
void CommitBack(AsanThreadLocalMallocStorage *ms) {
void CommitBack(AsanThreadLocalMallocStorage *ms, BufferedStackTrace *stack) {
AllocatorCache *ac = GetAllocatorCache(ms);
quarantine.Drain(GetQuarantineCache(ms), QuarantineCallback(ac));
quarantine.Drain(GetQuarantineCache(ms), QuarantineCallback(ac, stack));
allocator.SwallowCache(ac);
}
@ -739,17 +756,19 @@ struct Allocator {
return AsanChunkView(m1);
}
void Purge() {
void Purge(BufferedStackTrace *stack) {
AsanThread *t = GetCurrentThread();
if (t) {
AsanThreadLocalMallocStorage *ms = &t->malloc_storage();
quarantine.DrainAndRecycle(GetQuarantineCache(ms),
QuarantineCallback(GetAllocatorCache(ms)));
QuarantineCallback(GetAllocatorCache(ms),
stack));
}
{
SpinMutexLock l(&fallback_mutex);
quarantine.DrainAndRecycle(&fallback_quarantine_cache,
QuarantineCallback(&fallback_allocator_cache));
QuarantineCallback(&fallback_allocator_cache,
stack));
}
allocator.ForceReleaseToOS();
@ -836,7 +855,8 @@ AsanChunkView FindHeapChunkByAllocBeg(uptr addr) {
}
void AsanThreadLocalMallocStorage::CommitBack() {
instance.CommitBack(this);
GET_STACK_TRACE_MALLOC;
instance.CommitBack(this, &stack);
}
void PrintInternalAllocatorStats() {
@ -883,7 +903,9 @@ void *asan_pvalloc(uptr size, BufferedStackTrace *stack) {
uptr PageSize = GetPageSizeCached();
if (UNLIKELY(CheckForPvallocOverflow(size, PageSize))) {
errno = errno_ENOMEM;
return AsanAllocator::FailureHandler::OnBadRequest();
if (AllocatorMayReturnNull())
return nullptr;
ReportPvallocOverflow(size, stack);
}
// pvalloc(0) should allocate one page.
size = size ? RoundUpTo(size, PageSize) : PageSize;
@ -895,20 +917,35 @@ void *asan_memalign(uptr alignment, uptr size, BufferedStackTrace *stack,
AllocType alloc_type) {
if (UNLIKELY(!IsPowerOfTwo(alignment))) {
errno = errno_EINVAL;
return AsanAllocator::FailureHandler::OnBadRequest();
if (AllocatorMayReturnNull())
return nullptr;
ReportInvalidAllocationAlignment(alignment, stack);
}
return SetErrnoOnNull(
instance.Allocate(size, alignment, stack, alloc_type, true));
}
void *asan_aligned_alloc(uptr alignment, uptr size, BufferedStackTrace *stack) {
if (UNLIKELY(!CheckAlignedAllocAlignmentAndSize(alignment, size))) {
errno = errno_EINVAL;
if (AllocatorMayReturnNull())
return nullptr;
ReportInvalidAlignedAllocAlignment(size, alignment, stack);
}
return SetErrnoOnNull(
instance.Allocate(size, alignment, stack, FROM_MALLOC, true));
}
int asan_posix_memalign(void **memptr, uptr alignment, uptr size,
BufferedStackTrace *stack) {
if (UNLIKELY(!CheckPosixMemalignAlignment(alignment))) {
AsanAllocator::FailureHandler::OnBadRequest();
return errno_EINVAL;
if (AllocatorMayReturnNull())
return errno_EINVAL;
ReportInvalidPosixMemalignAlignment(alignment, stack);
}
void *ptr = instance.Allocate(size, alignment, stack, FROM_MALLOC, true);
if (UNLIKELY(!ptr))
// OOM error is already taken care of by Allocate.
return errno_ENOMEM;
CHECK(IsAligned((uptr)ptr, alignment));
*memptr = ptr;
@ -1054,7 +1091,8 @@ uptr __sanitizer_get_allocated_size(const void *p) {
}
void __sanitizer_purge_allocator() {
instance.Purge();
GET_STACK_TRACE_MALLOC;
instance.Purge(&stack);
}
#if !SANITIZER_SUPPORTS_WEAK_HOOKS

View File

@ -125,11 +125,12 @@ const uptr kAllocatorSpace = ~(uptr)0;
const uptr kAllocatorSize = 0x40000000000ULL; // 4T.
typedef DefaultSizeClassMap SizeClassMap;
# elif defined(__powerpc64__)
const uptr kAllocatorSpace = 0xa0000000000ULL;
const uptr kAllocatorSpace = ~(uptr)0;
const uptr kAllocatorSize = 0x20000000000ULL; // 2T.
typedef DefaultSizeClassMap SizeClassMap;
# elif defined(__aarch64__) && SANITIZER_ANDROID
const uptr kAllocatorSpace = 0x3000000000ULL;
// Android needs to support 39, 42 and 48 bit VMA.
const uptr kAllocatorSpace = ~(uptr)0;
const uptr kAllocatorSize = 0x2000000000ULL; // 128G.
typedef VeryCompactSizeClassMap SizeClassMap;
# elif defined(__aarch64__)
@ -207,6 +208,7 @@ void *asan_realloc(void *p, uptr size, BufferedStackTrace *stack);
void *asan_valloc(uptr size, BufferedStackTrace *stack);
void *asan_pvalloc(uptr size, BufferedStackTrace *stack);
void *asan_aligned_alloc(uptr alignment, uptr size, BufferedStackTrace *stack);
int asan_posix_memalign(void **memptr, uptr alignment, uptr size,
BufferedStackTrace *stack);
uptr asan_malloc_usable_size(const void *ptr, uptr pc, uptr bp);

View File

@ -27,7 +27,8 @@ using namespace __asan;
static void FindInfoForStackVar(uptr addr, const char *frame_descr, uptr offset,
char *name, uptr name_size,
uptr &region_address, uptr &region_size) {
InternalMmapVector<StackVarDescr> vars(16);
InternalMmapVector<StackVarDescr> vars;
vars.reserve(16);
if (!ParseFrameDescription(frame_descr, &vars)) {
return;
}

View File

@ -20,23 +20,25 @@
namespace __asan {
// Return " (thread_name) " or an empty string if the name is empty.
const char *ThreadNameWithParenthesis(AsanThreadContext *t, char buff[],
uptr buff_len) {
const char *name = t->name;
if (name[0] == '\0') return "";
buff[0] = 0;
internal_strncat(buff, " (", 3);
internal_strncat(buff, name, buff_len - 4);
internal_strncat(buff, ")", 2);
return buff;
AsanThreadIdAndName::AsanThreadIdAndName(AsanThreadContext *t) {
Init(t->tid, t->name);
}
const char *ThreadNameWithParenthesis(u32 tid, char buff[], uptr buff_len) {
if (tid == kInvalidTid) return "";
asanThreadRegistry().CheckLocked();
AsanThreadContext *t = GetThreadContextByTidLocked(tid);
return ThreadNameWithParenthesis(t, buff, buff_len);
AsanThreadIdAndName::AsanThreadIdAndName(u32 tid) {
if (tid == kInvalidTid) {
Init(tid, "");
} else {
asanThreadRegistry().CheckLocked();
AsanThreadContext *t = GetThreadContextByTidLocked(tid);
Init(tid, t->name);
}
}
void AsanThreadIdAndName::Init(u32 tid, const char *tname) {
int len = internal_snprintf(name, sizeof(name), "T%d", tid);
CHECK(((unsigned int)len) < sizeof(name));
if (tname[0] != '\0')
internal_snprintf(&name[len], sizeof(name) - len, " (%s)", tname);
}
void DescribeThread(AsanThreadContext *context) {
@ -47,18 +49,15 @@ void DescribeThread(AsanThreadContext *context) {
return;
}
context->announced = true;
char tname[128];
InternalScopedString str(1024);
str.append("Thread T%d%s", context->tid,
ThreadNameWithParenthesis(context->tid, tname, sizeof(tname)));
str.append("Thread %s", AsanThreadIdAndName(context).c_str());
if (context->parent_tid == kInvalidTid) {
str.append(" created by unknown thread\n");
Printf("%s", str.data());
return;
}
str.append(
" created by T%d%s here:\n", context->parent_tid,
ThreadNameWithParenthesis(context->parent_tid, tname, sizeof(tname)));
str.append(" created by %s here:\n",
AsanThreadIdAndName(context->parent_tid).c_str());
Printf("%s", str.data());
StackDepotGet(context->stack_id).Print();
// Recursively described parent thread if needed.
@ -358,10 +357,9 @@ bool GlobalAddressDescription::PointsInsideTheSameVariable(
void StackAddressDescription::Print() const {
Decorator d;
char tname[128];
Printf("%s", d.Location());
Printf("Address %p is located in stack of thread T%d%s", addr, tid,
ThreadNameWithParenthesis(tid, tname, sizeof(tname)));
Printf("Address %p is located in stack of thread %s", addr,
AsanThreadIdAndName(tid).c_str());
if (!frame_descr) {
Printf("%s\n", d.Default());
@ -380,7 +378,8 @@ void StackAddressDescription::Print() const {
StackTrace alloca_stack(&frame_pc, 1);
alloca_stack.Print();
InternalMmapVector<StackVarDescr> vars(16);
InternalMmapVector<StackVarDescr> vars;
vars.reserve(16);
if (!ParseFrameDescription(frame_descr, &vars)) {
Printf(
"AddressSanitizer can't parse the stack frame "
@ -402,7 +401,7 @@ void StackAddressDescription::Print() const {
}
Printf(
"HINT: this may be a false positive if your program uses "
"some custom stack unwind mechanism or swapcontext\n");
"some custom stack unwind mechanism, swapcontext or vfork\n");
if (SANITIZER_WINDOWS)
Printf(" (longjmp, SEH and C++ exceptions *are* supported)\n");
else
@ -418,26 +417,19 @@ void HeapAddressDescription::Print() const {
AsanThreadContext *alloc_thread = GetThreadContextByTidLocked(alloc_tid);
StackTrace alloc_stack = GetStackTraceFromId(alloc_stack_id);
char tname[128];
Decorator d;
AsanThreadContext *free_thread = nullptr;
if (free_tid != kInvalidTid) {
free_thread = GetThreadContextByTidLocked(free_tid);
Printf("%sfreed by thread T%d%s here:%s\n", d.Allocation(),
free_thread->tid,
ThreadNameWithParenthesis(free_thread, tname, sizeof(tname)),
d.Default());
Printf("%sfreed by thread %s here:%s\n", d.Allocation(),
AsanThreadIdAndName(free_thread).c_str(), d.Default());
StackTrace free_stack = GetStackTraceFromId(free_stack_id);
free_stack.Print();
Printf("%spreviously allocated by thread T%d%s here:%s\n", d.Allocation(),
alloc_thread->tid,
ThreadNameWithParenthesis(alloc_thread, tname, sizeof(tname)),
d.Default());
Printf("%spreviously allocated by thread %s here:%s\n", d.Allocation(),
AsanThreadIdAndName(alloc_thread).c_str(), d.Default());
} else {
Printf("%sallocated by thread T%d%s here:%s\n", d.Allocation(),
alloc_thread->tid,
ThreadNameWithParenthesis(alloc_thread, tname, sizeof(tname)),
d.Default());
Printf("%sallocated by thread %s here:%s\n", d.Allocation(),
AsanThreadIdAndName(alloc_thread).c_str(), d.Default());
}
alloc_stack.Print();
DescribeThread(GetCurrentThread());

View File

@ -26,9 +26,20 @@ void DescribeThread(AsanThreadContext *context);
static inline void DescribeThread(AsanThread *t) {
if (t) DescribeThread(t->context());
}
const char *ThreadNameWithParenthesis(AsanThreadContext *t, char buff[],
uptr buff_len);
const char *ThreadNameWithParenthesis(u32 tid, char buff[], uptr buff_len);
class AsanThreadIdAndName {
public:
explicit AsanThreadIdAndName(AsanThreadContext *t);
explicit AsanThreadIdAndName(u32 tid);
// Contains "T%tid (%name)" or "T%tid" if the name is empty.
const char *c_str() const { return &name[0]; }
private:
void Init(u32 tid, const char *tname);
char name[128];
};
class Decorator : public __sanitizer::SanitizerCommonDecorator {
public:

View File

@ -45,13 +45,11 @@ void ErrorDeadlySignal::Print() {
void ErrorDoubleFree::Print() {
Decorator d;
Printf("%s", d.Warning());
char tname[128];
Printf("%s", d.Error());
Report(
"ERROR: AddressSanitizer: attempting %s on %p in "
"thread T%d%s:\n",
scariness.GetDescription(), addr_description.addr, tid,
ThreadNameWithParenthesis(tid, tname, sizeof(tname)));
"ERROR: AddressSanitizer: attempting %s on %p in thread %s:\n",
scariness.GetDescription(), addr_description.addr,
AsanThreadIdAndName(tid).c_str());
Printf("%s", d.Default());
scariness.Print();
GET_STACK_TRACE_FATAL(second_free_stack->trace[0],
@ -63,13 +61,11 @@ void ErrorDoubleFree::Print() {
void ErrorNewDeleteTypeMismatch::Print() {
Decorator d;
Printf("%s", d.Warning());
char tname[128];
Printf("%s", d.Error());
Report(
"ERROR: AddressSanitizer: %s on %p in thread "
"T%d%s:\n",
scariness.GetDescription(), addr_description.addr, tid,
ThreadNameWithParenthesis(tid, tname, sizeof(tname)));
"ERROR: AddressSanitizer: %s on %p in thread %s:\n",
scariness.GetDescription(), addr_description.addr,
AsanThreadIdAndName(tid).c_str());
Printf("%s object passed to delete has wrong type:\n", d.Default());
if (delete_size != 0) {
Printf(
@ -106,13 +102,11 @@ void ErrorNewDeleteTypeMismatch::Print() {
void ErrorFreeNotMalloced::Print() {
Decorator d;
Printf("%s", d.Warning());
char tname[128];
Printf("%s", d.Error());
Report(
"ERROR: AddressSanitizer: attempting free on address "
"which was not malloc()-ed: %p in thread T%d%s\n",
addr_description.Address(), tid,
ThreadNameWithParenthesis(tid, tname, sizeof(tname)));
"which was not malloc()-ed: %p in thread %s\n",
addr_description.Address(), AsanThreadIdAndName(tid).c_str());
Printf("%s", d.Default());
CHECK_GT(free_stack->size, 0);
scariness.Print();
@ -129,7 +123,7 @@ void ErrorAllocTypeMismatch::Print() {
"operator delete []"};
CHECK_NE(alloc_type, dealloc_type);
Decorator d;
Printf("%s", d.Warning());
Printf("%s", d.Error());
Report("ERROR: AddressSanitizer: %s (%s vs %s) on %p\n",
scariness.GetDescription(),
alloc_names[alloc_type], dealloc_names[dealloc_type],
@ -148,7 +142,7 @@ void ErrorAllocTypeMismatch::Print() {
void ErrorMallocUsableSizeNotOwned::Print() {
Decorator d;
Printf("%s", d.Warning());
Printf("%s", d.Error());
Report(
"ERROR: AddressSanitizer: attempting to call malloc_usable_size() for "
"pointer which is not owned: %p\n",
@ -161,7 +155,7 @@ void ErrorMallocUsableSizeNotOwned::Print() {
void ErrorSanitizerGetAllocatedSizeNotOwned::Print() {
Decorator d;
Printf("%s", d.Warning());
Printf("%s", d.Error());
Report(
"ERROR: AddressSanitizer: attempting to call "
"__sanitizer_get_allocated_size() for pointer which is not owned: %p\n",
@ -172,11 +166,123 @@ void ErrorSanitizerGetAllocatedSizeNotOwned::Print() {
ReportErrorSummary(scariness.GetDescription(), stack);
}
void ErrorCallocOverflow::Print() {
Decorator d;
Printf("%s", d.Error());
Report(
"ERROR: AddressSanitizer: calloc parameters overflow: count * size "
"(%zd * %zd) cannot be represented in type size_t (thread %s)\n",
count, size, AsanThreadIdAndName(tid).c_str());
Printf("%s", d.Default());
stack->Print();
PrintHintAllocatorCannotReturnNull();
ReportErrorSummary(scariness.GetDescription(), stack);
}
void ErrorPvallocOverflow::Print() {
Decorator d;
Printf("%s", d.Error());
Report(
"ERROR: AddressSanitizer: pvalloc parameters overflow: size 0x%zx "
"rounded up to system page size 0x%zx cannot be represented in type "
"size_t (thread %s)\n",
size, GetPageSizeCached(), AsanThreadIdAndName(tid).c_str());
Printf("%s", d.Default());
stack->Print();
PrintHintAllocatorCannotReturnNull();
ReportErrorSummary(scariness.GetDescription(), stack);
}
void ErrorInvalidAllocationAlignment::Print() {
Decorator d;
Printf("%s", d.Error());
Report(
"ERROR: AddressSanitizer: invalid allocation alignment: %zd, "
"alignment must be a power of two (thread %s)\n",
alignment, AsanThreadIdAndName(tid).c_str());
Printf("%s", d.Default());
stack->Print();
PrintHintAllocatorCannotReturnNull();
ReportErrorSummary(scariness.GetDescription(), stack);
}
void ErrorInvalidAlignedAllocAlignment::Print() {
Decorator d;
Printf("%s", d.Error());
#if SANITIZER_POSIX
Report("ERROR: AddressSanitizer: invalid alignment requested in "
"aligned_alloc: %zd, alignment must be a power of two and the "
"requested size 0x%zx must be a multiple of alignment "
"(thread %s)\n", alignment, size, AsanThreadIdAndName(tid).c_str());
#else
Report("ERROR: AddressSanitizer: invalid alignment requested in "
"aligned_alloc: %zd, the requested size 0x%zx must be a multiple of "
"alignment (thread %s)\n", alignment, size,
AsanThreadIdAndName(tid).c_str());
#endif
Printf("%s", d.Default());
stack->Print();
PrintHintAllocatorCannotReturnNull();
ReportErrorSummary(scariness.GetDescription(), stack);
}
void ErrorInvalidPosixMemalignAlignment::Print() {
Decorator d;
Printf("%s", d.Error());
Report(
"ERROR: AddressSanitizer: invalid alignment requested in posix_memalign: "
"%zd, alignment must be a power of two and a multiple of sizeof(void*) "
"== %zd (thread %s)\n",
alignment, sizeof(void*), AsanThreadIdAndName(tid).c_str()); // NOLINT
Printf("%s", d.Default());
stack->Print();
PrintHintAllocatorCannotReturnNull();
ReportErrorSummary(scariness.GetDescription(), stack);
}
void ErrorAllocationSizeTooBig::Print() {
Decorator d;
Printf("%s", d.Error());
Report(
"ERROR: AddressSanitizer: requested allocation size 0x%zx (0x%zx after "
"adjustments for alignment, red zones etc.) exceeds maximum supported "
"size of 0x%zx (thread %s)\n",
user_size, total_size, max_size, AsanThreadIdAndName(tid).c_str());
Printf("%s", d.Default());
stack->Print();
PrintHintAllocatorCannotReturnNull();
ReportErrorSummary(scariness.GetDescription(), stack);
}
void ErrorRssLimitExceeded::Print() {
Decorator d;
Printf("%s", d.Error());
Report(
"ERROR: AddressSanitizer: specified RSS limit exceeded, currently set to "
"soft_rss_limit_mb=%zd\n", common_flags()->soft_rss_limit_mb);
Printf("%s", d.Default());
stack->Print();
PrintHintAllocatorCannotReturnNull();
ReportErrorSummary(scariness.GetDescription(), stack);
}
void ErrorOutOfMemory::Print() {
Decorator d;
Printf("%s", d.Error());
Report(
"ERROR: AddressSanitizer: allocator is out of memory trying to allocate "
"0x%zx bytes\n", requested_size);
Printf("%s", d.Default());
stack->Print();
PrintHintAllocatorCannotReturnNull();
ReportErrorSummary(scariness.GetDescription(), stack);
}
void ErrorStringFunctionMemoryRangesOverlap::Print() {
Decorator d;
char bug_type[100];
internal_snprintf(bug_type, sizeof(bug_type), "%s-param-overlap", function);
Printf("%s", d.Warning());
Printf("%s", d.Error());
Report(
"ERROR: AddressSanitizer: %s: memory ranges [%p,%p) and [%p, %p) "
"overlap\n",
@ -193,7 +299,7 @@ void ErrorStringFunctionMemoryRangesOverlap::Print() {
void ErrorStringFunctionSizeOverflow::Print() {
Decorator d;
Printf("%s", d.Warning());
Printf("%s", d.Error());
Report("ERROR: AddressSanitizer: %s: (size=%zd)\n",
scariness.GetDescription(), size);
Printf("%s", d.Default());
@ -221,7 +327,7 @@ void ErrorBadParamsToAnnotateContiguousContainer::Print() {
void ErrorODRViolation::Print() {
Decorator d;
Printf("%s", d.Warning());
Printf("%s", d.Error());
Report("ERROR: AddressSanitizer: %s (%p):\n", scariness.GetDescription(),
global1.beg);
Printf("%s", d.Default());
@ -250,7 +356,7 @@ void ErrorODRViolation::Print() {
void ErrorInvalidPointerPair::Print() {
Decorator d;
Printf("%s", d.Warning());
Printf("%s", d.Error());
Report("ERROR: AddressSanitizer: %s: %p %p\n", scariness.GetDescription(),
addr1_description.Address(), addr2_description.Address());
Printf("%s", d.Default());
@ -414,6 +520,7 @@ static void PrintLegend(InternalScopedString *str) {
PrintShadowByte(str, " ASan internal: ", kAsanInternalHeapMagic);
PrintShadowByte(str, " Left alloca redzone: ", kAsanAllocaLeftMagic);
PrintShadowByte(str, " Right alloca redzone: ", kAsanAllocaRightMagic);
PrintShadowByte(str, " Shadow gap: ", kAsanShadowGap);
}
static void PrintShadowBytes(InternalScopedString *str, const char *before,
@ -453,17 +560,15 @@ static void PrintShadowMemoryForAddress(uptr addr) {
void ErrorGeneric::Print() {
Decorator d;
Printf("%s", d.Warning());
Printf("%s", d.Error());
uptr addr = addr_description.Address();
Report("ERROR: AddressSanitizer: %s on address %p at pc %p bp %p sp %p\n",
bug_descr, (void *)addr, pc, bp, sp);
Printf("%s", d.Default());
char tname[128];
Printf("%s%s of size %zu at %p thread T%d%s%s\n", d.Access(),
Printf("%s%s of size %zu at %p thread %s%s\n", d.Access(),
access_size ? (is_write ? "WRITE" : "READ") : "ACCESS", access_size,
(void *)addr, tid,
ThreadNameWithParenthesis(tid, tname, sizeof(tname)), d.Default());
(void *)addr, AsanThreadIdAndName(tid).c_str(), d.Default());
scariness.Print();
GET_STACK_TRACE_FATAL(pc, bp);

View File

@ -20,20 +20,30 @@
namespace __asan {
// (*) VS2013 does not implement unrestricted unions, so we need a trivial
// default constructor explicitly defined for each particular error.
// None of the error classes own the stack traces mentioned in them.
struct ErrorBase {
ErrorBase() = default;
explicit ErrorBase(u32 tid_) : tid(tid_) {}
ScarinessScoreBase scariness;
u32 tid;
ErrorBase() = default; // (*)
explicit ErrorBase(u32 tid_) : tid(tid_) {}
ErrorBase(u32 tid_, int initial_score, const char *reason) : tid(tid_) {
scariness.Clear();
scariness.Scare(initial_score, reason);
}
};
struct ErrorDeadlySignal : ErrorBase {
SignalContext signal;
// VS2013 doesn't implement unrestricted unions, so we need a trivial default
// constructor
ErrorDeadlySignal() = default;
ErrorDeadlySignal() = default; // (*)
ErrorDeadlySignal(u32 tid, const SignalContext &sig)
: ErrorBase(tid), signal(sig) {
: ErrorBase(tid),
signal(sig) {
scariness.Clear();
if (signal.IsStackOverflow()) {
scariness.Scare(10, "stack-overflow");
@ -55,125 +65,206 @@ struct ErrorDeadlySignal : ErrorBase {
};
struct ErrorDoubleFree : ErrorBase {
// ErrorDoubleFree doesn't own the stack trace.
const BufferedStackTrace *second_free_stack;
HeapAddressDescription addr_description;
// VS2013 doesn't implement unrestricted unions, so we need a trivial default
// constructor
ErrorDoubleFree() = default;
ErrorDoubleFree() = default; // (*)
ErrorDoubleFree(u32 tid, BufferedStackTrace *stack, uptr addr)
: ErrorBase(tid), second_free_stack(stack) {
: ErrorBase(tid, 42, "double-free"),
second_free_stack(stack) {
CHECK_GT(second_free_stack->size, 0);
GetHeapAddressInformation(addr, 1, &addr_description);
scariness.Clear();
scariness.Scare(42, "double-free");
}
void Print();
};
struct ErrorNewDeleteTypeMismatch : ErrorBase {
// ErrorNewDeleteTypeMismatch doesn't own the stack trace.
const BufferedStackTrace *free_stack;
HeapAddressDescription addr_description;
uptr delete_size;
uptr delete_alignment;
// VS2013 doesn't implement unrestricted unions, so we need a trivial default
// constructor
ErrorNewDeleteTypeMismatch() = default;
ErrorNewDeleteTypeMismatch() = default; // (*)
ErrorNewDeleteTypeMismatch(u32 tid, BufferedStackTrace *stack, uptr addr,
uptr delete_size_, uptr delete_alignment_)
: ErrorBase(tid), free_stack(stack), delete_size(delete_size_),
: ErrorBase(tid, 10, "new-delete-type-mismatch"),
free_stack(stack),
delete_size(delete_size_),
delete_alignment(delete_alignment_) {
GetHeapAddressInformation(addr, 1, &addr_description);
scariness.Clear();
scariness.Scare(10, "new-delete-type-mismatch");
}
void Print();
};
struct ErrorFreeNotMalloced : ErrorBase {
// ErrorFreeNotMalloced doesn't own the stack trace.
const BufferedStackTrace *free_stack;
AddressDescription addr_description;
// VS2013 doesn't implement unrestricted unions, so we need a trivial default
// constructor
ErrorFreeNotMalloced() = default;
ErrorFreeNotMalloced() = default; // (*)
ErrorFreeNotMalloced(u32 tid, BufferedStackTrace *stack, uptr addr)
: ErrorBase(tid),
: ErrorBase(tid, 40, "bad-free"),
free_stack(stack),
addr_description(addr, /*shouldLockThreadRegistry=*/false) {
scariness.Clear();
scariness.Scare(40, "bad-free");
}
addr_description(addr, /*shouldLockThreadRegistry=*/false) {}
void Print();
};
struct ErrorAllocTypeMismatch : ErrorBase {
// ErrorAllocTypeMismatch doesn't own the stack trace.
const BufferedStackTrace *dealloc_stack;
HeapAddressDescription addr_description;
AllocType alloc_type, dealloc_type;
// VS2013 doesn't implement unrestricted unions, so we need a trivial default
// constructor
ErrorAllocTypeMismatch() = default;
ErrorAllocTypeMismatch() = default; // (*)
ErrorAllocTypeMismatch(u32 tid, BufferedStackTrace *stack, uptr addr,
AllocType alloc_type_, AllocType dealloc_type_)
: ErrorBase(tid),
: ErrorBase(tid, 10, "alloc-dealloc-mismatch"),
dealloc_stack(stack),
alloc_type(alloc_type_),
dealloc_type(dealloc_type_) {
GetHeapAddressInformation(addr, 1, &addr_description);
scariness.Clear();
scariness.Scare(10, "alloc-dealloc-mismatch");
};
void Print();
};
struct ErrorMallocUsableSizeNotOwned : ErrorBase {
// ErrorMallocUsableSizeNotOwned doesn't own the stack trace.
const BufferedStackTrace *stack;
AddressDescription addr_description;
// VS2013 doesn't implement unrestricted unions, so we need a trivial default
// constructor
ErrorMallocUsableSizeNotOwned() = default;
ErrorMallocUsableSizeNotOwned() = default; // (*)
ErrorMallocUsableSizeNotOwned(u32 tid, BufferedStackTrace *stack_, uptr addr)
: ErrorBase(tid),
: ErrorBase(tid, 10, "bad-malloc_usable_size"),
stack(stack_),
addr_description(addr, /*shouldLockThreadRegistry=*/false) {
scariness.Clear();
scariness.Scare(10, "bad-malloc_usable_size");
}
addr_description(addr, /*shouldLockThreadRegistry=*/false) {}
void Print();
};
struct ErrorSanitizerGetAllocatedSizeNotOwned : ErrorBase {
// ErrorSanitizerGetAllocatedSizeNotOwned doesn't own the stack trace.
const BufferedStackTrace *stack;
AddressDescription addr_description;
// VS2013 doesn't implement unrestricted unions, so we need a trivial default
// constructor
ErrorSanitizerGetAllocatedSizeNotOwned() = default;
ErrorSanitizerGetAllocatedSizeNotOwned() = default; // (*)
ErrorSanitizerGetAllocatedSizeNotOwned(u32 tid, BufferedStackTrace *stack_,
uptr addr)
: ErrorBase(tid),
: ErrorBase(tid, 10, "bad-__sanitizer_get_allocated_size"),
stack(stack_),
addr_description(addr, /*shouldLockThreadRegistry=*/false) {
scariness.Clear();
scariness.Scare(10, "bad-__sanitizer_get_allocated_size");
}
addr_description(addr, /*shouldLockThreadRegistry=*/false) {}
void Print();
};
struct ErrorCallocOverflow : ErrorBase {
const BufferedStackTrace *stack;
uptr count;
uptr size;
ErrorCallocOverflow() = default; // (*)
ErrorCallocOverflow(u32 tid, BufferedStackTrace *stack_, uptr count_,
uptr size_)
: ErrorBase(tid, 10, "calloc-overflow"),
stack(stack_),
count(count_),
size(size_) {}
void Print();
};
struct ErrorPvallocOverflow : ErrorBase {
const BufferedStackTrace *stack;
uptr size;
ErrorPvallocOverflow() = default; // (*)
ErrorPvallocOverflow(u32 tid, BufferedStackTrace *stack_, uptr size_)
: ErrorBase(tid, 10, "pvalloc-overflow"),
stack(stack_),
size(size_) {}
void Print();
};
struct ErrorInvalidAllocationAlignment : ErrorBase {
const BufferedStackTrace *stack;
uptr alignment;
ErrorInvalidAllocationAlignment() = default; // (*)
ErrorInvalidAllocationAlignment(u32 tid, BufferedStackTrace *stack_,
uptr alignment_)
: ErrorBase(tid, 10, "invalid-allocation-alignment"),
stack(stack_),
alignment(alignment_) {}
void Print();
};
struct ErrorInvalidAlignedAllocAlignment : ErrorBase {
const BufferedStackTrace *stack;
uptr size;
uptr alignment;
ErrorInvalidAlignedAllocAlignment() = default; // (*)
ErrorInvalidAlignedAllocAlignment(u32 tid, BufferedStackTrace *stack_,
uptr size_, uptr alignment_)
: ErrorBase(tid, 10, "invalid-aligned-alloc-alignment"),
stack(stack_),
size(size_),
alignment(alignment_) {}
void Print();
};
struct ErrorInvalidPosixMemalignAlignment : ErrorBase {
const BufferedStackTrace *stack;
uptr alignment;
ErrorInvalidPosixMemalignAlignment() = default; // (*)
ErrorInvalidPosixMemalignAlignment(u32 tid, BufferedStackTrace *stack_,
uptr alignment_)
: ErrorBase(tid, 10, "invalid-posix-memalign-alignment"),
stack(stack_),
alignment(alignment_) {}
void Print();
};
struct ErrorAllocationSizeTooBig : ErrorBase {
const BufferedStackTrace *stack;
uptr user_size;
uptr total_size;
uptr max_size;
ErrorAllocationSizeTooBig() = default; // (*)
ErrorAllocationSizeTooBig(u32 tid, BufferedStackTrace *stack_,
uptr user_size_, uptr total_size_, uptr max_size_)
: ErrorBase(tid, 10, "allocation-size-too-big"),
stack(stack_),
user_size(user_size_),
total_size(total_size_),
max_size(max_size_) {}
void Print();
};
struct ErrorRssLimitExceeded : ErrorBase {
const BufferedStackTrace *stack;
ErrorRssLimitExceeded() = default; // (*)
ErrorRssLimitExceeded(u32 tid, BufferedStackTrace *stack_)
: ErrorBase(tid, 10, "rss-limit-exceeded"),
stack(stack_) {}
void Print();
};
struct ErrorOutOfMemory : ErrorBase {
const BufferedStackTrace *stack;
uptr requested_size;
ErrorOutOfMemory() = default; // (*)
ErrorOutOfMemory(u32 tid, BufferedStackTrace *stack_, uptr requested_size_)
: ErrorBase(tid, 10, "out-of-memory"),
stack(stack_),
requested_size(requested_size_) {}
void Print();
};
struct ErrorStringFunctionMemoryRangesOverlap : ErrorBase {
// ErrorStringFunctionMemoryRangesOverlap doesn't own the stack trace.
const BufferedStackTrace *stack;
uptr length1, length2;
AddressDescription addr1_description;
AddressDescription addr2_description;
const char *function;
// VS2013 doesn't implement unrestricted unions, so we need a trivial default
// constructor
ErrorStringFunctionMemoryRangesOverlap() = default;
ErrorStringFunctionMemoryRangesOverlap() = default; // (*)
ErrorStringFunctionMemoryRangesOverlap(u32 tid, BufferedStackTrace *stack_,
uptr addr1, uptr length1_, uptr addr2,
uptr length2_, const char *function_)
@ -193,65 +284,51 @@ struct ErrorStringFunctionMemoryRangesOverlap : ErrorBase {
};
struct ErrorStringFunctionSizeOverflow : ErrorBase {
// ErrorStringFunctionSizeOverflow doesn't own the stack trace.
const BufferedStackTrace *stack;
AddressDescription addr_description;
uptr size;
// VS2013 doesn't implement unrestricted unions, so we need a trivial default
// constructor
ErrorStringFunctionSizeOverflow() = default;
ErrorStringFunctionSizeOverflow() = default; // (*)
ErrorStringFunctionSizeOverflow(u32 tid, BufferedStackTrace *stack_,
uptr addr, uptr size_)
: ErrorBase(tid),
: ErrorBase(tid, 10, "negative-size-param"),
stack(stack_),
addr_description(addr, /*shouldLockThreadRegistry=*/false),
size(size_) {
scariness.Clear();
scariness.Scare(10, "negative-size-param");
}
size(size_) {}
void Print();
};
struct ErrorBadParamsToAnnotateContiguousContainer : ErrorBase {
// ErrorBadParamsToAnnotateContiguousContainer doesn't own the stack trace.
const BufferedStackTrace *stack;
uptr beg, end, old_mid, new_mid;
// VS2013 doesn't implement unrestricted unions, so we need a trivial default
// constructor
ErrorBadParamsToAnnotateContiguousContainer() = default;
ErrorBadParamsToAnnotateContiguousContainer() = default; // (*)
// PS4: Do we want an AddressDescription for beg?
ErrorBadParamsToAnnotateContiguousContainer(u32 tid,
BufferedStackTrace *stack_,
uptr beg_, uptr end_,
uptr old_mid_, uptr new_mid_)
: ErrorBase(tid),
: ErrorBase(tid, 10, "bad-__sanitizer_annotate_contiguous_container"),
stack(stack_),
beg(beg_),
end(end_),
old_mid(old_mid_),
new_mid(new_mid_) {
scariness.Clear();
scariness.Scare(10, "bad-__sanitizer_annotate_contiguous_container");
}
new_mid(new_mid_) {}
void Print();
};
struct ErrorODRViolation : ErrorBase {
__asan_global global1, global2;
u32 stack_id1, stack_id2;
// VS2013 doesn't implement unrestricted unions, so we need a trivial default
// constructor
ErrorODRViolation() = default;
ErrorODRViolation() = default; // (*)
ErrorODRViolation(u32 tid, const __asan_global *g1, u32 stack_id1_,
const __asan_global *g2, u32 stack_id2_)
: ErrorBase(tid),
: ErrorBase(tid, 10, "odr-violation"),
global1(*g1),
global2(*g2),
stack_id1(stack_id1_),
stack_id2(stack_id2_) {
scariness.Clear();
scariness.Scare(10, "odr-violation");
}
stack_id2(stack_id2_) {}
void Print();
};
@ -259,20 +336,16 @@ struct ErrorInvalidPointerPair : ErrorBase {
uptr pc, bp, sp;
AddressDescription addr1_description;
AddressDescription addr2_description;
// VS2013 doesn't implement unrestricted unions, so we need a trivial default
// constructor
ErrorInvalidPointerPair() = default;
ErrorInvalidPointerPair() = default; // (*)
ErrorInvalidPointerPair(u32 tid, uptr pc_, uptr bp_, uptr sp_, uptr p1,
uptr p2)
: ErrorBase(tid),
: ErrorBase(tid, 10, "invalid-pointer-pair"),
pc(pc_),
bp(bp_),
sp(sp_),
addr1_description(p1, 1, /*shouldLockThreadRegistry=*/false),
addr2_description(p2, 1, /*shouldLockThreadRegistry=*/false) {
scariness.Clear();
scariness.Scare(10, "invalid-pointer-pair");
}
addr2_description(p2, 1, /*shouldLockThreadRegistry=*/false) {}
void Print();
};
@ -283,9 +356,8 @@ struct ErrorGeneric : ErrorBase {
const char *bug_descr;
bool is_write;
u8 shadow_val;
// VS2013 doesn't implement unrestricted unions, so we need a trivial default
// constructor
ErrorGeneric() = default;
ErrorGeneric() = default; // (*)
ErrorGeneric(u32 tid, uptr addr, uptr pc_, uptr bp_, uptr sp_, bool is_write_,
uptr access_size_);
void Print();
@ -300,6 +372,14 @@ struct ErrorGeneric : ErrorBase {
macro(AllocTypeMismatch) \
macro(MallocUsableSizeNotOwned) \
macro(SanitizerGetAllocatedSizeNotOwned) \
macro(CallocOverflow) \
macro(PvallocOverflow) \
macro(InvalidAllocationAlignment) \
macro(InvalidAlignedAllocAlignment) \
macro(InvalidPosixMemalignAlignment) \
macro(AllocationSizeTooBig) \
macro(RssLimitExceeded) \
macro(OutOfMemory) \
macro(StringFunctionMemoryRangesOverlap) \
macro(StringFunctionSizeOverflow) \
macro(BadParamsToAnnotateContiguousContainer) \
@ -334,6 +414,7 @@ struct ErrorDescription {
};
ErrorDescription() { internal_memset(this, 0, sizeof(*this)); }
explicit ErrorDescription(LinkerInitialized) {}
ASAN_FOR_EACH_ERROR_KIND(ASAN_ERROR_DESCRIPTION_CONSTRUCTOR)
bool IsValid() { return kind != kErrorKindInvalid; }

View File

@ -33,10 +33,7 @@ static const char *MaybeCallAsanDefaultOptions() {
static const char *MaybeUseAsanDefaultOptionsCompileDefinition() {
#ifdef ASAN_DEFAULT_OPTIONS
// Stringize the macro value.
# define ASAN_STRINGIZE(x) #x
# define ASAN_STRINGIZE_OPTIONS(options) ASAN_STRINGIZE(options)
return ASAN_STRINGIZE_OPTIONS(ASAN_DEFAULT_OPTIONS);
return SANITIZER_STRINGIFY(ASAN_DEFAULT_OPTIONS);
#else
return "";
#endif
@ -163,6 +160,10 @@ void InitializeFlags() {
CHECK_LE(f->max_redzone, 2048);
CHECK(IsPowerOfTwo(f->redzone));
CHECK(IsPowerOfTwo(f->max_redzone));
if (SANITIZER_RTEMS) {
CHECK(!f->unmap_shadow_on_exit);
CHECK(!f->protect_shadow_gap);
}
// quarantine_size is deprecated but we still honor it.
// quarantine_size can not be used together with quarantine_size_mb.

View File

@ -88,7 +88,8 @@ ASAN_FLAG(bool, check_malloc_usable_size, true,
"295.*.")
ASAN_FLAG(bool, unmap_shadow_on_exit, false,
"If set, explicitly unmaps the (huge) shadow at exit.")
ASAN_FLAG(bool, protect_shadow_gap, true, "If set, mprotect the shadow gap")
ASAN_FLAG(bool, protect_shadow_gap, !SANITIZER_RTEMS,
"If set, mprotect the shadow gap")
ASAN_FLAG(bool, print_stats, false,
"Print various statistics after printing an error message or if "
"atexit=1.")
@ -136,9 +137,9 @@ ASAN_FLAG(
"Android. ")
ASAN_FLAG(
int, detect_invalid_pointer_pairs, 0,
"If non-zero, try to detect operations like <, <=, >, >= and - on "
"invalid pointer pairs (e.g. when pointers belong to different objects). "
"The bigger the value the harder we try.")
"If >= 2, detect operations like <, <=, >, >= and - on invalid pointer "
"pairs (e.g. when pointers belong to different objects); "
"If == 1, detect invalid operations only when both pointers are non-null.")
ASAN_FLAG(
bool, detect_container_overflow, true,
"If true, honor the container overflow annotations. See "

View File

@ -224,8 +224,9 @@ static void RegisterGlobal(const Global *g) {
list_of_all_globals = l;
if (g->has_dynamic_init) {
if (!dynamic_init_globals) {
dynamic_init_globals = new(allocator_for_globals)
VectorOfGlobals(kDynamicInitGlobalsInitialCapacity);
dynamic_init_globals =
new (allocator_for_globals) VectorOfGlobals; // NOLINT
dynamic_init_globals->reserve(kDynamicInitGlobalsInitialCapacity);
}
DynInitGlobal dyn_global = { *g, false };
dynamic_init_globals->push_back(dyn_global);
@ -358,9 +359,11 @@ void __asan_register_globals(__asan_global *globals, uptr n) {
GET_STACK_TRACE_MALLOC;
u32 stack_id = StackDepotPut(stack);
BlockingMutexLock lock(&mu_for_globals);
if (!global_registration_site_vector)
if (!global_registration_site_vector) {
global_registration_site_vector =
new(allocator_for_globals) GlobalRegistrationSiteVector(128);
new (allocator_for_globals) GlobalRegistrationSiteVector; // NOLINT
global_registration_site_vector->reserve(128);
}
GlobalRegistrationSite site = {stack_id, &globals[0], &globals[n - 1]};
global_registration_site_vector->push_back(site);
if (flags()->report_globals >= 2) {

View File

@ -19,9 +19,9 @@ namespace __asan {
#pragma section(".ASAN$GA", read, write) // NOLINT
#pragma section(".ASAN$GZ", read, write) // NOLINT
extern "C" __declspec(allocate(".ASAN$GA"))
__asan_global __asan_globals_start = {};
ALIGNED(sizeof(__asan_global)) __asan_global __asan_globals_start = {};
extern "C" __declspec(allocate(".ASAN$GZ"))
__asan_global __asan_globals_end = {};
ALIGNED(sizeof(__asan_global)) __asan_global __asan_globals_end = {};
#pragma comment(linker, "/merge:.ASAN=.data")
static void call_on_globals(void (*hook)(__asan_global *, uptr)) {

View File

@ -24,15 +24,20 @@
#include "lsan/lsan_common.h"
#include "sanitizer_common/sanitizer_libc.h"
// There is no general interception at all on Fuchsia.
// There is no general interception at all on Fuchsia and RTEMS.
// Only the functions in asan_interceptors_memintrinsics.cc are
// really defined to replace libc functions.
#if !SANITIZER_FUCHSIA
#if !SANITIZER_FUCHSIA && !SANITIZER_RTEMS
#if SANITIZER_POSIX
#include "sanitizer_common/sanitizer_posix.h"
#endif
#if ASAN_INTERCEPT__UNWIND_RAISEEXCEPTION || \
ASAN_INTERCEPT__SJLJ_UNWIND_RAISEEXCEPTION
#include <unwind.h>
#endif
#if defined(__i386) && SANITIZER_LINUX
#define ASAN_PTHREAD_CREATE_VERSION "GLIBC_2.1"
#elif defined(__mips__) && SANITIZER_LINUX
@ -178,6 +183,7 @@ DECLARE_REAL_AND_INTERCEPTOR(void, free, void *)
(void)(s); \
} while (false)
#include "sanitizer_common/sanitizer_common_syscalls.inc"
#include "sanitizer_common/sanitizer_syscalls_netbsd.inc"
struct ThreadStartParam {
atomic_uintptr_t t;
@ -269,7 +275,15 @@ INTERCEPTOR(int, swapcontext, struct ucontext_t *oucp,
uptr stack, ssize;
ReadContextStack(ucp, &stack, &ssize);
ClearShadowMemoryForContextStack(stack, ssize);
#if __has_attribute(__indirect_return__) && \
(defined(__x86_64__) || defined(__i386__))
int (*real_swapcontext)(struct ucontext_t *, struct ucontext_t *)
__attribute__((__indirect_return__))
= REAL(swapcontext);
int res = real_swapcontext(oucp, ucp);
#else
int res = REAL(swapcontext)(oucp, ucp);
#endif
// swapcontext technically does not return, but program may swap context to
// "oucp" later, that would look as if swapcontext() returned 0.
// We need to clear shadow for ucp once again, as it may be in arbitrary
@ -318,6 +332,32 @@ INTERCEPTOR(void, __cxa_throw, void *a, void *b, void *c) {
}
#endif
#if ASAN_INTERCEPT___CXA_RETHROW_PRIMARY_EXCEPTION
INTERCEPTOR(void, __cxa_rethrow_primary_exception, void *a) {
CHECK(REAL(__cxa_rethrow_primary_exception));
__asan_handle_no_return();
REAL(__cxa_rethrow_primary_exception)(a);
}
#endif
#if ASAN_INTERCEPT__UNWIND_RAISEEXCEPTION
INTERCEPTOR(_Unwind_Reason_Code, _Unwind_RaiseException,
_Unwind_Exception *object) {
CHECK(REAL(_Unwind_RaiseException));
__asan_handle_no_return();
return REAL(_Unwind_RaiseException)(object);
}
#endif
#if ASAN_INTERCEPT__SJLJ_UNWIND_RAISEEXCEPTION
INTERCEPTOR(_Unwind_Reason_Code, _Unwind_SjLj_RaiseException,
_Unwind_Exception *object) {
CHECK(REAL(_Unwind_SjLj_RaiseException));
__asan_handle_no_return();
return REAL(_Unwind_SjLj_RaiseException)(object);
}
#endif
#if ASAN_INTERCEPT_INDEX
# if ASAN_USE_ALIAS_ATTRIBUTE_FOR_INDEX
INTERCEPTOR(char*, index, const char *string, int c)
@ -540,14 +580,6 @@ INTERCEPTOR(int, __cxa_atexit, void (*func)(void *), void *arg,
}
#endif // ASAN_INTERCEPT___CXA_ATEXIT
#if ASAN_INTERCEPT_FORK
INTERCEPTOR(int, fork, void) {
ENSURE_ASAN_INITED();
int pid = REAL(fork)();
return pid;
}
#endif // ASAN_INTERCEPT_FORK
// ---------------------- InitializeAsanInterceptors ---------------- {{{1
namespace __asan {
void InitializeAsanInterceptors() {
@ -598,6 +630,17 @@ void InitializeAsanInterceptors() {
#if ASAN_INTERCEPT___CXA_THROW
ASAN_INTERCEPT_FUNC(__cxa_throw);
#endif
#if ASAN_INTERCEPT___CXA_RETHROW_PRIMARY_EXCEPTION
ASAN_INTERCEPT_FUNC(__cxa_rethrow_primary_exception);
#endif
// Indirectly intercept std::rethrow_exception.
#if ASAN_INTERCEPT__UNWIND_RAISEEXCEPTION
INTERCEPT_FUNCTION(_Unwind_RaiseException);
#endif
// Indirectly intercept std::rethrow_exception.
#if ASAN_INTERCEPT__UNWIND_SJLJ_RAISEEXCEPTION
INTERCEPT_FUNCTION(_Unwind_SjLj_RaiseException);
#endif
// Intercept threading-related functions
#if ASAN_INTERCEPT_PTHREAD_CREATE
@ -614,10 +657,6 @@ void InitializeAsanInterceptors() {
ASAN_INTERCEPT_FUNC(__cxa_atexit);
#endif
#if ASAN_INTERCEPT_FORK
ASAN_INTERCEPT_FUNC(fork);
#endif
InitializePlatformInterceptors();
VReport(1, "AddressSanitizer: libc interceptors initialized\n");

View File

@ -34,10 +34,10 @@ void InitializePlatformInterceptors();
} // namespace __asan
// There is no general interception at all on Fuchsia.
// There is no general interception at all on Fuchsia and RTEMS.
// Only the functions in asan_interceptors_memintrinsics.h are
// really defined to replace libc functions.
#if !SANITIZER_FUCHSIA
#if !SANITIZER_FUCHSIA && !SANITIZER_RTEMS
// Use macro to describe if specific function should be
// intercepted on a given platform.
@ -46,13 +46,11 @@ void InitializePlatformInterceptors();
# define ASAN_INTERCEPT__LONGJMP 1
# define ASAN_INTERCEPT_INDEX 1
# define ASAN_INTERCEPT_PTHREAD_CREATE 1
# define ASAN_INTERCEPT_FORK 1
#else
# define ASAN_INTERCEPT_ATOLL_AND_STRTOLL 0
# define ASAN_INTERCEPT__LONGJMP 0
# define ASAN_INTERCEPT_INDEX 0
# define ASAN_INTERCEPT_PTHREAD_CREATE 0
# define ASAN_INTERCEPT_FORK 0
#endif
#if SANITIZER_FREEBSD || SANITIZER_LINUX || SANITIZER_NETBSD || \
@ -80,13 +78,20 @@ void InitializePlatformInterceptors();
# define ASAN_INTERCEPT___LONGJMP_CHK 0
#endif
// Android bug: https://code.google.com/p/android/issues/detail?id=61799
#if ASAN_HAS_EXCEPTIONS && !SANITIZER_WINDOWS && \
!(SANITIZER_ANDROID && defined(__i386)) && \
!SANITIZER_SOLARIS
#if ASAN_HAS_EXCEPTIONS && !SANITIZER_WINDOWS && !SANITIZER_SOLARIS && \
!SANITIZER_NETBSD
# define ASAN_INTERCEPT___CXA_THROW 1
# define ASAN_INTERCEPT___CXA_RETHROW_PRIMARY_EXCEPTION 1
# if defined(_GLIBCXX_SJLJ_EXCEPTIONS) || (SANITIZER_IOS && defined(__arm__))
# define ASAN_INTERCEPT__UNWIND_SJLJ_RAISEEXCEPTION 1
# else
# define ASAN_INTERCEPT__UNWIND_RAISEEXCEPTION 1
# endif
#else
# define ASAN_INTERCEPT___CXA_THROW 0
# define ASAN_INTERCEPT___CXA_RETHROW_PRIMARY_EXCEPTION 0
# define ASAN_INTERCEPT__UNWIND_RAISEEXCEPTION 0
# define ASAN_INTERCEPT__UNWIND_SJLJ_RAISEEXCEPTION 0
#endif
#if !SANITIZER_WINDOWS

View File

@ -31,14 +31,14 @@ void *__asan_memmove(void *to, const void *from, uptr size) {
ASAN_MEMMOVE_IMPL(nullptr, to, from, size);
}
#if SANITIZER_FUCHSIA
#if SANITIZER_FUCHSIA || SANITIZER_RTEMS
// Fuchsia doesn't use sanitizer_common_interceptors.inc, but the only
// things there it wants are these three. Just define them as aliases
// here rather than repeating the contents.
// Fuchsia and RTEMS don't use sanitizer_common_interceptors.inc, but
// the only things there it wants are these three. Just define them
// as aliases here rather than repeating the contents.
decltype(memcpy) memcpy[[gnu::alias("__asan_memcpy")]];
decltype(memmove) memmove[[gnu::alias("__asan_memmove")]];
decltype(memset) memset[[gnu::alias("__asan_memset")]];
extern "C" decltype(__asan_memcpy) memcpy[[gnu::alias("__asan_memcpy")]];
extern "C" decltype(__asan_memmove) memmove[[gnu::alias("__asan_memmove")]];
extern "C" decltype(__asan_memset) memset[[gnu::alias("__asan_memset")]];
#endif // SANITIZER_FUCHSIA
#endif // SANITIZER_FUCHSIA || SANITIZER_RTEMS

View File

@ -133,15 +133,22 @@ static inline bool RangesOverlap(const char *offset1, uptr length1,
const char *offset2, uptr length2) {
return !((offset1 + length1 <= offset2) || (offset2 + length2 <= offset1));
}
#define CHECK_RANGES_OVERLAP(name, _offset1, length1, _offset2, length2) do { \
const char *offset1 = (const char*)_offset1; \
const char *offset2 = (const char*)_offset2; \
if (RangesOverlap(offset1, length1, offset2, length2)) { \
GET_STACK_TRACE_FATAL_HERE; \
ReportStringFunctionMemoryRangesOverlap(name, offset1, length1, \
offset2, length2, &stack); \
} \
} while (0)
#define CHECK_RANGES_OVERLAP(name, _offset1, length1, _offset2, length2) \
do { \
const char *offset1 = (const char *)_offset1; \
const char *offset2 = (const char *)_offset2; \
if (RangesOverlap(offset1, length1, offset2, length2)) { \
GET_STACK_TRACE_FATAL_HERE; \
bool suppressed = IsInterceptorSuppressed(name); \
if (!suppressed && HaveStackTraceBasedSuppressions()) { \
suppressed = IsStackTraceSuppressed(&stack); \
} \
if (!suppressed) { \
ReportStringFunctionMemoryRangesOverlap(name, offset1, length1, \
offset2, length2, &stack); \
} \
} \
} while (0)
} // namespace __asan

View File

@ -36,7 +36,7 @@
// If set, values like allocator chunk size, as well as defaults for some flags
// will be changed towards less memory overhead.
#ifndef ASAN_LOW_MEMORY
# if SANITIZER_IOS || SANITIZER_ANDROID
# if SANITIZER_IOS || SANITIZER_ANDROID || SANITIZER_RTEMS
# define ASAN_LOW_MEMORY 1
# else
# define ASAN_LOW_MEMORY 0
@ -78,7 +78,7 @@ void InitializeShadowMemory();
// asan_malloc_linux.cc / asan_malloc_mac.cc
void ReplaceSystemMalloc();
// asan_linux.cc / asan_mac.cc / asan_win.cc
// asan_linux.cc / asan_mac.cc / asan_rtems.cc / asan_win.cc
uptr FindDynamicShadowStart();
void *AsanDoesNotSupportStaticLinkage();
void AsanCheckDynamicRTPrereqs();
@ -147,6 +147,9 @@ const int kAsanArrayCookieMagic = 0xac;
const int kAsanIntraObjectRedzone = 0xbb;
const int kAsanAllocaLeftMagic = 0xca;
const int kAsanAllocaRightMagic = 0xcb;
// Used to populate the shadow gap for systems without memory
// protection there (i.e. Myriad).
const int kAsanShadowGap = 0xcc;
static const uptr kCurrentStackFrameMagic = 0x41B58AB3;
static const uptr kRetiredStackFrameMagic = 0x45E0360E;

View File

@ -32,6 +32,7 @@
#include <sys/types.h>
#include <dlfcn.h>
#include <fcntl.h>
#include <limits.h>
#include <pthread.h>
#include <stdio.h>
#include <unistd.h>
@ -214,7 +215,7 @@ void AsanCheckIncompatibleRT() {
// the functions in dynamic ASan runtime instead of the functions in
// system libraries, causing crashes later in ASan initialization.
MemoryMappingLayout proc_maps(/*cache_enabled*/true);
char filename[128];
char filename[PATH_MAX];
MemoryMappedSegment segment(filename, sizeof(filename));
while (proc_maps.Next(&segment)) {
if (IsDynamicRTName(segment.filename)) {

View File

@ -62,16 +62,36 @@ uptr FindDynamicShadowStart() {
uptr space_size = kHighShadowEnd + left_padding;
uptr largest_gap_found = 0;
uptr shadow_start = FindAvailableMemoryRange(space_size, alignment,
granularity, &largest_gap_found);
uptr max_occupied_addr = 0;
VReport(2, "FindDynamicShadowStart, space_size = %p\n", space_size);
uptr shadow_start =
FindAvailableMemoryRange(space_size, alignment, granularity,
&largest_gap_found, &max_occupied_addr);
// If the shadow doesn't fit, restrict the address space to make it fit.
if (shadow_start == 0) {
VReport(
2,
"Shadow doesn't fit, largest_gap_found = %p, max_occupied_addr = %p\n",
largest_gap_found, max_occupied_addr);
uptr new_max_vm = RoundDownTo(largest_gap_found << SHADOW_SCALE, alignment);
if (new_max_vm < max_occupied_addr) {
Report("Unable to find a memory range for dynamic shadow.\n");
Report(
"space_size = %p, largest_gap_found = %p, max_occupied_addr = %p, "
"new_max_vm = %p\n",
space_size, largest_gap_found, max_occupied_addr, new_max_vm);
CHECK(0 && "cannot place shadow");
}
RestrictMemoryToMaxAddress(new_max_vm);
kHighMemEnd = new_max_vm - 1;
space_size = kHighShadowEnd + left_padding;
shadow_start =
FindAvailableMemoryRange(space_size, alignment, granularity, nullptr);
VReport(2, "FindDynamicShadowStart, space_size = %p\n", space_size);
shadow_start = FindAvailableMemoryRange(space_size, alignment, granularity,
nullptr, nullptr);
if (shadow_start == 0) {
Report("Unable to find a memory range after restricting VM.\n");
CHECK(0 && "cannot place shadow after restricting vm");
}
}
CHECK_NE((uptr)0, shadow_start);
CHECK(IsAligned(shadow_start, alignment));

View File

@ -16,19 +16,23 @@
#include "sanitizer_common/sanitizer_platform.h"
#if SANITIZER_FREEBSD || SANITIZER_FUCHSIA || SANITIZER_LINUX || \
SANITIZER_NETBSD || SANITIZER_SOLARIS
SANITIZER_NETBSD || SANITIZER_RTEMS || SANITIZER_SOLARIS
#include "sanitizer_common/sanitizer_allocator_checks.h"
#include "sanitizer_common/sanitizer_errno.h"
#include "sanitizer_common/sanitizer_tls_get_addr.h"
#include "asan_allocator.h"
#include "asan_interceptors.h"
#include "asan_internal.h"
#include "asan_malloc_local.h"
#include "asan_stack.h"
// ---------------------- Replacement functions ---------------- {{{1
using namespace __asan; // NOLINT
static uptr allocated_for_dlsym;
static const uptr kDlsymAllocPoolSize = 1024;
static uptr last_dlsym_alloc_size_in_words;
static const uptr kDlsymAllocPoolSize = SANITIZER_RTEMS ? 4096 : 1024;
static uptr alloc_memory_for_dlsym[kDlsymAllocPoolSize];
static INLINE bool IsInDlsymAllocPool(const void *ptr) {
@ -39,21 +43,73 @@ static INLINE bool IsInDlsymAllocPool(const void *ptr) {
static void *AllocateFromLocalPool(uptr size_in_bytes) {
uptr size_in_words = RoundUpTo(size_in_bytes, kWordSize) / kWordSize;
void *mem = (void*)&alloc_memory_for_dlsym[allocated_for_dlsym];
last_dlsym_alloc_size_in_words = size_in_words;
allocated_for_dlsym += size_in_words;
CHECK_LT(allocated_for_dlsym, kDlsymAllocPoolSize);
return mem;
}
static void DeallocateFromLocalPool(const void *ptr) {
// Hack: since glibc 2.27 dlsym no longer uses stack-allocated memory to store
// error messages and instead uses malloc followed by free. To avoid pool
// exhaustion due to long object filenames, handle that special case here.
uptr prev_offset = allocated_for_dlsym - last_dlsym_alloc_size_in_words;
void *prev_mem = (void*)&alloc_memory_for_dlsym[prev_offset];
if (prev_mem == ptr) {
REAL(memset)(prev_mem, 0, last_dlsym_alloc_size_in_words * kWordSize);
allocated_for_dlsym = prev_offset;
last_dlsym_alloc_size_in_words = 0;
}
}
static int PosixMemalignFromLocalPool(void **memptr, uptr alignment,
uptr size_in_bytes) {
if (UNLIKELY(!CheckPosixMemalignAlignment(alignment)))
return errno_EINVAL;
CHECK(alignment >= kWordSize);
uptr addr = (uptr)&alloc_memory_for_dlsym[allocated_for_dlsym];
uptr aligned_addr = RoundUpTo(addr, alignment);
uptr aligned_size = RoundUpTo(size_in_bytes, kWordSize);
uptr *end_mem = (uptr*)(aligned_addr + aligned_size);
uptr allocated = end_mem - alloc_memory_for_dlsym;
if (allocated >= kDlsymAllocPoolSize)
return errno_ENOMEM;
allocated_for_dlsym = allocated;
*memptr = (void*)aligned_addr;
return 0;
}
#if SANITIZER_RTEMS
void* MemalignFromLocalPool(uptr alignment, uptr size) {
void *ptr = nullptr;
alignment = Max(alignment, kWordSize);
PosixMemalignFromLocalPool(&ptr, alignment, size);
return ptr;
}
bool IsFromLocalPool(const void *ptr) {
return IsInDlsymAllocPool(ptr);
}
#endif
static INLINE bool MaybeInDlsym() {
// Fuchsia doesn't use dlsym-based interceptors.
return !SANITIZER_FUCHSIA && asan_init_is_running;
}
static INLINE bool UseLocalPool() {
return EarlyMalloc() || MaybeInDlsym();
}
static void *ReallocFromLocalPool(void *ptr, uptr size) {
const uptr offset = (uptr)ptr - (uptr)alloc_memory_for_dlsym;
const uptr copy_size = Min(size, kDlsymAllocPoolSize - offset);
void *new_ptr;
if (UNLIKELY(MaybeInDlsym())) {
if (UNLIKELY(UseLocalPool())) {
new_ptr = AllocateFromLocalPool(size);
} else {
ENSURE_ASAN_INITED();
@ -66,8 +122,10 @@ static void *ReallocFromLocalPool(void *ptr, uptr size) {
INTERCEPTOR(void, free, void *ptr) {
GET_STACK_TRACE_FREE;
if (UNLIKELY(IsInDlsymAllocPool(ptr)))
if (UNLIKELY(IsInDlsymAllocPool(ptr))) {
DeallocateFromLocalPool(ptr);
return;
}
asan_free(ptr, &stack, FROM_MALLOC);
}
@ -81,7 +139,7 @@ INTERCEPTOR(void, cfree, void *ptr) {
#endif // SANITIZER_INTERCEPT_CFREE
INTERCEPTOR(void*, malloc, uptr size) {
if (UNLIKELY(MaybeInDlsym()))
if (UNLIKELY(UseLocalPool()))
// Hack: dlsym calls malloc before REAL(malloc) is retrieved from dlsym.
return AllocateFromLocalPool(size);
ENSURE_ASAN_INITED();
@ -90,7 +148,7 @@ INTERCEPTOR(void*, malloc, uptr size) {
}
INTERCEPTOR(void*, calloc, uptr nmemb, uptr size) {
if (UNLIKELY(MaybeInDlsym()))
if (UNLIKELY(UseLocalPool()))
// Hack: dlsym calls calloc before REAL(calloc) is retrieved from dlsym.
return AllocateFromLocalPool(nmemb * size);
ENSURE_ASAN_INITED();
@ -101,7 +159,7 @@ INTERCEPTOR(void*, calloc, uptr nmemb, uptr size) {
INTERCEPTOR(void*, realloc, void *ptr, uptr size) {
if (UNLIKELY(IsInDlsymAllocPool(ptr)))
return ReallocFromLocalPool(ptr, size);
if (UNLIKELY(MaybeInDlsym()))
if (UNLIKELY(UseLocalPool()))
return AllocateFromLocalPool(size);
ENSURE_ASAN_INITED();
GET_STACK_TRACE_MALLOC;
@ -122,10 +180,12 @@ INTERCEPTOR(void*, __libc_memalign, uptr boundary, uptr size) {
}
#endif // SANITIZER_INTERCEPT_MEMALIGN
#if SANITIZER_INTERCEPT_ALIGNED_ALLOC
INTERCEPTOR(void*, aligned_alloc, uptr boundary, uptr size) {
GET_STACK_TRACE_MALLOC;
return asan_memalign(boundary, size, &stack, FROM_MALLOC);
return asan_aligned_alloc(boundary, size, &stack);
}
#endif // SANITIZER_INTERCEPT_ALIGNED_ALLOC
INTERCEPTOR(uptr, malloc_usable_size, void *ptr) {
GET_CURRENT_PC_BP_SP;
@ -154,8 +214,9 @@ INTERCEPTOR(int, mallopt, int cmd, int value) {
#endif // SANITIZER_INTERCEPT_MALLOPT_AND_MALLINFO
INTERCEPTOR(int, posix_memalign, void **memptr, uptr alignment, uptr size) {
if (UNLIKELY(UseLocalPool()))
return PosixMemalignFromLocalPool(memptr, alignment, size);
GET_STACK_TRACE_MALLOC;
// Printf("posix_memalign: %zx %zu\n", alignment, size);
return asan_posix_memalign(memptr, alignment, size, &stack);
}

View File

@ -0,0 +1,44 @@
//===-- asan_malloc_local.h -------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file is a part of AddressSanitizer, an address sanity checker.
//
// Provide interfaces to check for and handle local pool memory allocation.
//===----------------------------------------------------------------------===//
#ifndef ASAN_MALLOC_LOCAL_H
#define ASAN_MALLOC_LOCAL_H
#include "sanitizer_common/sanitizer_platform.h"
#include "asan_internal.h"
// On RTEMS, we use the local pool to handle memory allocation when the ASan
// run-time is not up.
static INLINE bool EarlyMalloc() {
return SANITIZER_RTEMS && (!__asan::asan_inited ||
__asan::asan_init_is_running);
}
void* MemalignFromLocalPool(uptr alignment, uptr size);
#if SANITIZER_RTEMS
bool IsFromLocalPool(const void *ptr);
#define ALLOCATE_FROM_LOCAL_POOL UNLIKELY(EarlyMalloc())
#define IS_FROM_LOCAL_POOL(ptr) UNLIKELY(IsFromLocalPool(ptr))
#else // SANITIZER_RTEMS
#define ALLOCATE_FROM_LOCAL_POOL 0
#define IS_FROM_LOCAL_POOL(ptr) 0
#endif // SANITIZER_RTEMS
#endif // ASAN_MALLOC_LOCAL_H

View File

@ -38,6 +38,9 @@ using namespace __asan;
#define COMMON_MALLOC_CALLOC(count, size) \
GET_STACK_TRACE_MALLOC; \
void *p = asan_calloc(count, size, &stack);
#define COMMON_MALLOC_POSIX_MEMALIGN(memptr, alignment, size) \
GET_STACK_TRACE_MALLOC; \
int res = asan_posix_memalign(memptr, alignment, size, &stack);
#define COMMON_MALLOC_VALLOC(size) \
GET_STACK_TRACE_MALLOC; \
void *p = asan_memalign(GetPageSizeCached(), size, &stack, FROM_MALLOC);

View File

@ -122,6 +122,13 @@
// || `[0x400000000000, 0x47ffffffffff]` || LowShadow ||
// || `[0x000000000000, 0x3fffffffffff]` || LowMem ||
//
// Shadow mapping on NerBSD/i386 with SHADOW_OFFSET == 0x40000000:
// || `[0x60000000, 0xfffff000]` || HighMem ||
// || `[0x4c000000, 0x5fffffff]` || HighShadow ||
// || `[0x48000000, 0x4bffffff]` || ShadowGap ||
// || `[0x40000000, 0x47ffffff]` || LowShadow ||
// || `[0x00000000, 0x3fffffff]` || LowMem ||
//
// Default Windows/i386 mapping:
// (the exact location of HighShadow/HighMem may vary depending
// on WoW64, /LARGEADDRESSAWARE, etc).
@ -130,11 +137,17 @@
// || `[0x36000000, 0x39ffffff]` || ShadowGap ||
// || `[0x30000000, 0x35ffffff]` || LowShadow ||
// || `[0x00000000, 0x2fffffff]` || LowMem ||
//
// Shadow mapping on Myriad2 (for shadow scale 5):
// || `[0x9ff80000, 0x9fffffff]` || ShadowGap ||
// || `[0x9f000000, 0x9ff7ffff]` || LowShadow ||
// || `[0x80000000, 0x9effffff]` || LowMem ||
// || `[0x00000000, 0x7fffffff]` || Ignored ||
#if defined(ASAN_SHADOW_SCALE)
static const u64 kDefaultShadowScale = ASAN_SHADOW_SCALE;
#else
static const u64 kDefaultShadowScale = 3;
static const u64 kDefaultShadowScale = SANITIZER_MYRIAD2 ? 5 : 3;
#endif
static const u64 kDefaultShadowSentinel = ~(uptr)0;
static const u64 kDefaultShadowOffset32 = 1ULL << 29; // 0x20000000
@ -152,9 +165,19 @@ static const u64 kPPC64_ShadowOffset64 = 1ULL << 44;
static const u64 kSystemZ_ShadowOffset64 = 1ULL << 52;
static const u64 kFreeBSD_ShadowOffset32 = 1ULL << 30; // 0x40000000
static const u64 kFreeBSD_ShadowOffset64 = 1ULL << 46; // 0x400000000000
static const u64 kNetBSD_ShadowOffset32 = 1ULL << 30; // 0x40000000
static const u64 kNetBSD_ShadowOffset64 = 1ULL << 46; // 0x400000000000
static const u64 kWindowsShadowOffset32 = 3ULL << 28; // 0x30000000
static const u64 kMyriadMemoryOffset32 = 0x80000000ULL;
static const u64 kMyriadMemorySize32 = 0x20000000ULL;
static const u64 kMyriadMemoryEnd32 =
kMyriadMemoryOffset32 + kMyriadMemorySize32 - 1;
static const u64 kMyriadShadowOffset32 =
(kMyriadMemoryOffset32 + kMyriadMemorySize32 -
(kMyriadMemorySize32 >> kDefaultShadowScale));
static const u64 kMyriadCacheBitMask32 = 0x40000000ULL;
#define SHADOW_SCALE kDefaultShadowScale
#if SANITIZER_FUCHSIA
@ -166,6 +189,8 @@ static const u64 kWindowsShadowOffset32 = 3ULL << 28; // 0x30000000
# define SHADOW_OFFSET kMIPS32_ShadowOffset32
# elif SANITIZER_FREEBSD
# define SHADOW_OFFSET kFreeBSD_ShadowOffset32
# elif SANITIZER_NETBSD
# define SHADOW_OFFSET kNetBSD_ShadowOffset32
# elif SANITIZER_WINDOWS
# define SHADOW_OFFSET kWindowsShadowOffset32
# elif SANITIZER_IOS
@ -174,6 +199,8 @@ static const u64 kWindowsShadowOffset32 = 3ULL << 28; // 0x30000000
# else
# define SHADOW_OFFSET kIosShadowOffset32
# endif
# elif SANITIZER_MYRIAD2
# define SHADOW_OFFSET kMyriadShadowOffset32
# else
# define SHADOW_OFFSET kDefaultShadowOffset32
# endif
@ -212,6 +239,39 @@ static const u64 kWindowsShadowOffset32 = 3ULL << 28; // 0x30000000
#endif
#define SHADOW_GRANULARITY (1ULL << SHADOW_SCALE)
#define DO_ASAN_MAPPING_PROFILE 0 // Set to 1 to profile the functions below.
#if DO_ASAN_MAPPING_PROFILE
# define PROFILE_ASAN_MAPPING() AsanMappingProfile[__LINE__]++;
#else
# define PROFILE_ASAN_MAPPING()
#endif
// If 1, all shadow boundaries are constants.
// Don't set to 1 other than for testing.
#define ASAN_FIXED_MAPPING 0
namespace __asan {
extern uptr AsanMappingProfile[];
#if ASAN_FIXED_MAPPING
// Fixed mapping for 64-bit Linux. Mostly used for performance comparison
// with non-fixed mapping. As of r175253 (Feb 2013) the performance
// difference between fixed and non-fixed mapping is below the noise level.
static uptr kHighMemEnd = 0x7fffffffffffULL;
static uptr kMidMemBeg = 0x3000000000ULL;
static uptr kMidMemEnd = 0x4fffffffffULL;
#else
extern uptr kHighMemEnd, kMidMemBeg, kMidMemEnd; // Initialized in __asan_init.
#endif
} // namespace __asan
#if SANITIZER_MYRIAD2
#include "asan_mapping_myriad.h"
#else
#define MEM_TO_SHADOW(mem) (((mem) >> SHADOW_SCALE) + (SHADOW_OFFSET))
#define kLowMemBeg 0
@ -243,36 +303,11 @@ static const u64 kWindowsShadowOffset32 = 3ULL << 28; // 0x30000000
#define kShadowGap3Beg (kMidMemBeg ? kMidMemEnd + 1 : 0)
#define kShadowGap3End (kMidMemBeg ? kHighShadowBeg - 1 : 0)
#define DO_ASAN_MAPPING_PROFILE 0 // Set to 1 to profile the functions below.
#if DO_ASAN_MAPPING_PROFILE
# define PROFILE_ASAN_MAPPING() AsanMappingProfile[__LINE__]++;
#else
# define PROFILE_ASAN_MAPPING()
#endif
// If 1, all shadow boundaries are constants.
// Don't set to 1 other than for testing.
#define ASAN_FIXED_MAPPING 0
namespace __asan {
extern uptr AsanMappingProfile[];
#if ASAN_FIXED_MAPPING
// Fixed mapping for 64-bit Linux. Mostly used for performance comparison
// with non-fixed mapping. As of r175253 (Feb 2013) the performance
// difference between fixed and non-fixed mapping is below the noise level.
static uptr kHighMemEnd = 0x7fffffffffffULL;
static uptr kMidMemBeg = 0x3000000000ULL;
static uptr kMidMemEnd = 0x4fffffffffULL;
#else
extern uptr kHighMemEnd, kMidMemBeg, kMidMemEnd; // Initialized in __asan_init.
#endif
static inline bool AddrIsInLowMem(uptr a) {
PROFILE_ASAN_MAPPING();
return a < kLowMemEnd;
return a <= kLowMemEnd;
}
static inline bool AddrIsInLowShadow(uptr a) {
@ -280,16 +315,26 @@ static inline bool AddrIsInLowShadow(uptr a) {
return a >= kLowShadowBeg && a <= kLowShadowEnd;
}
static inline bool AddrIsInHighMem(uptr a) {
PROFILE_ASAN_MAPPING();
return a >= kHighMemBeg && a <= kHighMemEnd;
}
static inline bool AddrIsInMidMem(uptr a) {
PROFILE_ASAN_MAPPING();
return kMidMemBeg && a >= kMidMemBeg && a <= kMidMemEnd;
}
static inline bool AddrIsInMidShadow(uptr a) {
PROFILE_ASAN_MAPPING();
return kMidMemBeg && a >= kMidShadowBeg && a <= kMidShadowEnd;
}
static inline bool AddrIsInHighMem(uptr a) {
PROFILE_ASAN_MAPPING();
return kHighMemBeg && a >= kHighMemBeg && a <= kHighMemEnd;
}
static inline bool AddrIsInHighShadow(uptr a) {
PROFILE_ASAN_MAPPING();
return kHighMemBeg && a >= kHighShadowBeg && a <= kHighShadowEnd;
}
static inline bool AddrIsInShadowGap(uptr a) {
PROFILE_ASAN_MAPPING();
if (kMidMemBeg) {
@ -305,6 +350,12 @@ static inline bool AddrIsInShadowGap(uptr a) {
return a >= kShadowGapBeg && a <= kShadowGapEnd;
}
} // namespace __asan
#endif // SANITIZER_MYRIAD2
namespace __asan {
static inline bool AddrIsInMem(uptr a) {
PROFILE_ASAN_MAPPING();
return AddrIsInLowMem(a) || AddrIsInMidMem(a) || AddrIsInHighMem(a) ||
@ -317,16 +368,6 @@ static inline uptr MemToShadow(uptr p) {
return MEM_TO_SHADOW(p);
}
static inline bool AddrIsInHighShadow(uptr a) {
PROFILE_ASAN_MAPPING();
return a >= kHighShadowBeg && a <= kHighMemEnd;
}
static inline bool AddrIsInMidShadow(uptr a) {
PROFILE_ASAN_MAPPING();
return kMidMemBeg && a >= kMidShadowBeg && a <= kMidMemEnd;
}
static inline bool AddrIsInShadow(uptr a) {
PROFILE_ASAN_MAPPING();
return AddrIsInLowShadow(a) || AddrIsInMidShadow(a) || AddrIsInHighShadow(a);
@ -339,6 +380,8 @@ static inline bool AddrIsAlignedByGranularity(uptr a) {
static inline bool AddressIsPoisoned(uptr a) {
PROFILE_ASAN_MAPPING();
if (SANITIZER_MYRIAD2 && !AddrIsInMem(a) && !AddrIsInShadow(a))
return false;
const uptr kAccessSize = 1;
u8 *shadow_address = (u8*)MEM_TO_SHADOW(a);
s8 shadow_value = *shadow_address;

View File

@ -0,0 +1,86 @@
//===-- asan_mapping_myriad.h -----------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file is a part of AddressSanitizer, an address sanity checker.
//
// Myriad-specific definitions for ASan memory mapping.
//===----------------------------------------------------------------------===//
#ifndef ASAN_MAPPING_MYRIAD_H
#define ASAN_MAPPING_MYRIAD_H
#define RAW_ADDR(mem) ((mem) & ~kMyriadCacheBitMask32)
#define MEM_TO_SHADOW(mem) \
(((RAW_ADDR(mem) - kLowMemBeg) >> SHADOW_SCALE) + (SHADOW_OFFSET))
#define kLowMemBeg kMyriadMemoryOffset32
#define kLowMemEnd (SHADOW_OFFSET - 1)
#define kLowShadowBeg SHADOW_OFFSET
#define kLowShadowEnd MEM_TO_SHADOW(kLowMemEnd)
#define kHighMemBeg 0
#define kHighShadowBeg 0
#define kHighShadowEnd 0
#define kMidShadowBeg 0
#define kMidShadowEnd 0
#define kShadowGapBeg (kLowShadowEnd + 1)
#define kShadowGapEnd kMyriadMemoryEnd32
#define kShadowGap2Beg 0
#define kShadowGap2End 0
#define kShadowGap3Beg 0
#define kShadowGap3End 0
namespace __asan {
static inline bool AddrIsInLowMem(uptr a) {
PROFILE_ASAN_MAPPING();
a = RAW_ADDR(a);
return a >= kLowMemBeg && a <= kLowMemEnd;
}
static inline bool AddrIsInLowShadow(uptr a) {
PROFILE_ASAN_MAPPING();
a = RAW_ADDR(a);
return a >= kLowShadowBeg && a <= kLowShadowEnd;
}
static inline bool AddrIsInMidMem(uptr a) {
PROFILE_ASAN_MAPPING();
return false;
}
static inline bool AddrIsInMidShadow(uptr a) {
PROFILE_ASAN_MAPPING();
return false;
}
static inline bool AddrIsInHighMem(uptr a) {
PROFILE_ASAN_MAPPING();
return false;
}
static inline bool AddrIsInHighShadow(uptr a) {
PROFILE_ASAN_MAPPING();
return false;
}
static inline bool AddrIsInShadowGap(uptr a) {
PROFILE_ASAN_MAPPING();
a = RAW_ADDR(a);
return a >= kShadowGapBeg && a <= kShadowGapEnd;
}
} // namespace __asan
#endif // ASAN_MAPPING_MYRIAD_H

View File

@ -31,9 +31,9 @@ struct AllocationSite {
class HeapProfile {
public:
HeapProfile() : allocations_(1024) {}
HeapProfile() { allocations_.reserve(1024); }
void ProcessChunk(const AsanChunkView& cv) {
void ProcessChunk(const AsanChunkView &cv) {
if (cv.IsAllocated()) {
total_allocated_user_size_ += cv.UsedSize();
total_allocated_count_++;
@ -49,10 +49,10 @@ class HeapProfile {
}
void Print(uptr top_percent, uptr max_number_of_contexts) {
InternalSort(&allocations_, allocations_.size(),
[](const AllocationSite &a, const AllocationSite &b) {
return a.total_size > b.total_size;
});
Sort(allocations_.data(), allocations_.size(),
[](const AllocationSite &a, const AllocationSite &b) {
return a.total_size > b.total_size;
});
CHECK(total_allocated_user_size_);
uptr total_shown = 0;
Printf("Live Heap Allocations: %zd bytes in %zd chunks; quarantined: "

View File

@ -14,6 +14,8 @@
#include "asan_allocator.h"
#include "asan_internal.h"
#include "asan_malloc_local.h"
#include "asan_report.h"
#include "asan_stack.h"
#include "interception/interception.h"
@ -67,16 +69,28 @@ struct nothrow_t {};
enum class align_val_t: size_t {};
} // namespace std
// TODO(alekseys): throw std::bad_alloc instead of dying on OOM.
// TODO(alekseyshl): throw std::bad_alloc instead of dying on OOM.
// For local pool allocation, align to SHADOW_GRANULARITY to match asan
// allocator behavior.
#define OPERATOR_NEW_BODY(type, nothrow) \
if (ALLOCATE_FROM_LOCAL_POOL) {\
void *res = MemalignFromLocalPool(SHADOW_GRANULARITY, size);\
if (!nothrow) CHECK(res);\
return res;\
}\
GET_STACK_TRACE_MALLOC;\
void *res = asan_memalign(0, size, &stack, type);\
if (!nothrow && UNLIKELY(!res)) DieOnFailure::OnOOM();\
if (!nothrow && UNLIKELY(!res)) ReportOutOfMemory(size, &stack);\
return res;
#define OPERATOR_NEW_BODY_ALIGN(type, nothrow) \
if (ALLOCATE_FROM_LOCAL_POOL) {\
void *res = MemalignFromLocalPool((uptr)align, size);\
if (!nothrow) CHECK(res);\
return res;\
}\
GET_STACK_TRACE_MALLOC;\
void *res = asan_memalign((uptr)align, size, &stack, type);\
if (!nothrow && UNLIKELY(!res)) DieOnFailure::OnOOM();\
if (!nothrow && UNLIKELY(!res)) ReportOutOfMemory(size, &stack);\
return res;
// On OS X it's not enough to just provide our own 'operator new' and
@ -128,18 +142,22 @@ INTERCEPTOR(void *, _ZnamRKSt9nothrow_t, size_t size, std::nothrow_t const&) {
#endif // !SANITIZER_MAC
#define OPERATOR_DELETE_BODY(type) \
if (IS_FROM_LOCAL_POOL(ptr)) return;\
GET_STACK_TRACE_FREE;\
asan_delete(ptr, 0, 0, &stack, type);
#define OPERATOR_DELETE_BODY_SIZE(type) \
if (IS_FROM_LOCAL_POOL(ptr)) return;\
GET_STACK_TRACE_FREE;\
asan_delete(ptr, size, 0, &stack, type);
#define OPERATOR_DELETE_BODY_ALIGN(type) \
if (IS_FROM_LOCAL_POOL(ptr)) return;\
GET_STACK_TRACE_FREE;\
asan_delete(ptr, 0, static_cast<uptr>(align), &stack, type);
#define OPERATOR_DELETE_BODY_SIZE_ALIGN(type) \
if (IS_FROM_LOCAL_POOL(ptr)) return;\
GET_STACK_TRACE_FREE;\
asan_delete(ptr, size, static_cast<uptr>(align), &stack, type);

View File

@ -32,7 +32,7 @@ bool CanPoisonMemory() {
}
void PoisonShadow(uptr addr, uptr size, u8 value) {
if (!CanPoisonMemory()) return;
if (value && !CanPoisonMemory()) return;
CHECK(AddrIsAlignedByGranularity(addr));
CHECK(AddrIsInMem(addr));
CHECK(AddrIsAlignedByGranularity(addr + size));
@ -182,8 +182,15 @@ int __asan_address_is_poisoned(void const volatile *addr) {
uptr __asan_region_is_poisoned(uptr beg, uptr size) {
if (!size) return 0;
uptr end = beg + size;
if (!AddrIsInMem(beg)) return beg;
if (!AddrIsInMem(end)) return end;
if (SANITIZER_MYRIAD2) {
// On Myriad, address not in DRAM range need to be treated as
// unpoisoned.
if (!AddrIsInMem(beg) && !AddrIsInShadow(beg)) return 0;
if (!AddrIsInMem(end) && !AddrIsInShadow(end)) return 0;
} else {
if (!AddrIsInMem(beg)) return beg;
if (!AddrIsInMem(end)) return end;
}
CHECK_LT(beg, end);
uptr aligned_b = RoundUpTo(beg, SHADOW_GRANULARITY);
uptr aligned_e = RoundDownTo(end, SHADOW_GRANULARITY);
@ -452,4 +459,3 @@ bool WordIsPoisoned(uptr addr) {
return (__asan_region_is_poisoned(addr, sizeof(uptr)) != 0);
}
}

View File

@ -38,7 +38,7 @@ void PoisonShadowPartialRightRedzone(uptr addr,
// performance-critical code with care.
ALWAYS_INLINE void FastPoisonShadow(uptr aligned_beg, uptr aligned_size,
u8 value) {
DCHECK(CanPoisonMemory());
DCHECK(!value || CanPoisonMemory());
uptr shadow_beg = MEM_TO_SHADOW(aligned_beg);
uptr shadow_end = MEM_TO_SHADOW(
aligned_beg + aligned_size - SHADOW_GRANULARITY) + 1;
@ -51,6 +51,9 @@ ALWAYS_INLINE void FastPoisonShadow(uptr aligned_beg, uptr aligned_size,
// changed at all. It doesn't currently have an efficient means
// to zero a bunch of pages, but maybe we should add one.
SANITIZER_FUCHSIA == 1 ||
// RTEMS doesn't have have pages, let alone a fast way to zero
// them, so default to memset.
SANITIZER_RTEMS == 1 ||
shadow_end - shadow_beg < common_flags()->clear_shadow_mmap_threshold) {
REAL(memset)((void*)shadow_beg, value, shadow_end - shadow_beg);
} else {

View File

@ -84,7 +84,7 @@ static void PrintZoneForPointer(uptr ptr, uptr zone_ptr,
bool ParseFrameDescription(const char *frame_descr,
InternalMmapVector<StackVarDescr> *vars) {
CHECK(frame_descr);
char *p;
const char *p;
// This string is created by the compiler and has the following form:
// "n alloc_1 alloc_2 ... alloc_n"
// where alloc_i looks like "offset size len ObjectName"
@ -134,6 +134,10 @@ class ScopedInErrorReport {
}
~ScopedInErrorReport() {
if (halt_on_error_ && !__sanitizer_acquire_crash_state()) {
asanThreadRegistry().Unlock();
return;
}
ASAN_ON_ERROR();
if (current_error_.IsValid()) current_error_.Print();
@ -152,7 +156,7 @@ class ScopedInErrorReport {
// Copy the message buffer so that we could start logging without holding a
// lock that gets aquired during printing.
InternalScopedBuffer<char> buffer_copy(kErrorMessageBufferSize);
InternalMmapVector<char> buffer_copy(kErrorMessageBufferSize);
{
BlockingMutexLock l(&error_message_buf_mutex);
internal_memcpy(buffer_copy.data(),
@ -202,7 +206,7 @@ class ScopedInErrorReport {
bool halt_on_error_;
};
ErrorDescription ScopedInErrorReport::current_error_;
ErrorDescription ScopedInErrorReport::current_error_(LINKER_INITIALIZED);
void ReportDeadlySignal(const SignalContext &sig) {
ScopedInErrorReport in_report(/*fatal*/ true);
@ -254,6 +258,62 @@ void ReportSanitizerGetAllocatedSizeNotOwned(uptr addr,
in_report.ReportError(error);
}
void ReportCallocOverflow(uptr count, uptr size, BufferedStackTrace *stack) {
ScopedInErrorReport in_report(/*fatal*/ true);
ErrorCallocOverflow error(GetCurrentTidOrInvalid(), stack, count, size);
in_report.ReportError(error);
}
void ReportPvallocOverflow(uptr size, BufferedStackTrace *stack) {
ScopedInErrorReport in_report(/*fatal*/ true);
ErrorPvallocOverflow error(GetCurrentTidOrInvalid(), stack, size);
in_report.ReportError(error);
}
void ReportInvalidAllocationAlignment(uptr alignment,
BufferedStackTrace *stack) {
ScopedInErrorReport in_report(/*fatal*/ true);
ErrorInvalidAllocationAlignment error(GetCurrentTidOrInvalid(), stack,
alignment);
in_report.ReportError(error);
}
void ReportInvalidAlignedAllocAlignment(uptr size, uptr alignment,
BufferedStackTrace *stack) {
ScopedInErrorReport in_report(/*fatal*/ true);
ErrorInvalidAlignedAllocAlignment error(GetCurrentTidOrInvalid(), stack,
size, alignment);
in_report.ReportError(error);
}
void ReportInvalidPosixMemalignAlignment(uptr alignment,
BufferedStackTrace *stack) {
ScopedInErrorReport in_report(/*fatal*/ true);
ErrorInvalidPosixMemalignAlignment error(GetCurrentTidOrInvalid(), stack,
alignment);
in_report.ReportError(error);
}
void ReportAllocationSizeTooBig(uptr user_size, uptr total_size, uptr max_size,
BufferedStackTrace *stack) {
ScopedInErrorReport in_report(/*fatal*/ true);
ErrorAllocationSizeTooBig error(GetCurrentTidOrInvalid(), stack, user_size,
total_size, max_size);
in_report.ReportError(error);
}
void ReportRssLimitExceeded(BufferedStackTrace *stack) {
ScopedInErrorReport in_report(/*fatal*/ true);
ErrorRssLimitExceeded error(GetCurrentTidOrInvalid(), stack);
in_report.ReportError(error);
}
void ReportOutOfMemory(uptr requested_size, BufferedStackTrace *stack) {
ScopedInErrorReport in_report(/*fatal*/ true);
ErrorOutOfMemory error(GetCurrentTidOrInvalid(), stack, requested_size);
in_report.ReportError(error);
}
void ReportStringFunctionMemoryRangesOverlap(const char *function,
const char *offset1, uptr length1,
const char *offset2, uptr length2,
@ -343,7 +403,11 @@ static bool IsInvalidPointerPair(uptr a1, uptr a2) {
}
static INLINE void CheckForInvalidPointerPair(void *p1, void *p2) {
if (!flags()->detect_invalid_pointer_pairs) return;
switch (flags()->detect_invalid_pointer_pairs) {
case 0 : return;
case 1 : if (p1 == nullptr || p2 == nullptr) return; break;
}
uptr a1 = reinterpret_cast<uptr>(p1);
uptr a2 = reinterpret_cast<uptr>(p2);

View File

@ -58,6 +58,18 @@ void ReportAllocTypeMismatch(uptr addr, BufferedStackTrace *free_stack,
void ReportMallocUsableSizeNotOwned(uptr addr, BufferedStackTrace *stack);
void ReportSanitizerGetAllocatedSizeNotOwned(uptr addr,
BufferedStackTrace *stack);
void ReportCallocOverflow(uptr count, uptr size, BufferedStackTrace *stack);
void ReportPvallocOverflow(uptr size, BufferedStackTrace *stack);
void ReportInvalidAllocationAlignment(uptr alignment,
BufferedStackTrace *stack);
void ReportInvalidAlignedAllocAlignment(uptr size, uptr alignment,
BufferedStackTrace *stack);
void ReportInvalidPosixMemalignAlignment(uptr alignment,
BufferedStackTrace *stack);
void ReportAllocationSizeTooBig(uptr user_size, uptr total_size, uptr max_size,
BufferedStackTrace *stack);
void ReportRssLimitExceeded(BufferedStackTrace *stack);
void ReportOutOfMemory(uptr requested_size, BufferedStackTrace *stack);
void ReportStringFunctionMemoryRangesOverlap(const char *function,
const char *offset1, uptr length1,
const char *offset2, uptr length2,

253
lib/asan/asan_rtems.cc Normal file
View File

@ -0,0 +1,253 @@
//===-- asan_rtems.cc -----------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file is a part of AddressSanitizer, an address sanity checker.
//
// RTEMS-specific details.
//===----------------------------------------------------------------------===//
#include "sanitizer_common/sanitizer_rtems.h"
#if SANITIZER_RTEMS
#include "asan_internal.h"
#include "asan_interceptors.h"
#include "asan_mapping.h"
#include "asan_poisoning.h"
#include "asan_report.h"
#include "asan_stack.h"
#include "sanitizer_common/sanitizer_common.h"
#include "sanitizer_common/sanitizer_libc.h"
#include <pthread.h>
#include <stdlib.h>
namespace __asan {
static void ResetShadowMemory() {
uptr shadow_start = SHADOW_OFFSET;
uptr shadow_end = MEM_TO_SHADOW(kMyriadMemoryEnd32);
uptr gap_start = MEM_TO_SHADOW(shadow_start);
uptr gap_end = MEM_TO_SHADOW(shadow_end);
REAL(memset)((void *)shadow_start, 0, shadow_end - shadow_start);
REAL(memset)((void *)gap_start, kAsanShadowGap, gap_end - gap_start);
}
void InitializeShadowMemory() {
kHighMemEnd = 0;
kMidMemBeg = 0;
kMidMemEnd = 0;
ResetShadowMemory();
}
void AsanApplyToGlobals(globals_op_fptr op, const void *needle) {
UNIMPLEMENTED();
}
void AsanCheckDynamicRTPrereqs() {}
void AsanCheckIncompatibleRT() {}
void InitializeAsanInterceptors() {}
void InitializePlatformInterceptors() {}
void InitializePlatformExceptionHandlers() {}
// RTEMS only support static linking; it sufficies to return with no
// error.
void *AsanDoesNotSupportStaticLinkage() { return nullptr; }
void AsanOnDeadlySignal(int signo, void *siginfo, void *context) {
UNIMPLEMENTED();
}
void EarlyInit() {
// Provide early initialization of shadow memory so that
// instrumented code running before full initialzation will not
// report spurious errors.
ResetShadowMemory();
}
// We can use a plain thread_local variable for TSD.
static thread_local void *per_thread;
void *AsanTSDGet() { return per_thread; }
void AsanTSDSet(void *tsd) { per_thread = tsd; }
// There's no initialization needed, and the passed-in destructor
// will never be called. Instead, our own thread destruction hook
// (below) will call AsanThread::TSDDtor directly.
void AsanTSDInit(void (*destructor)(void *tsd)) {
DCHECK(destructor == &PlatformTSDDtor);
}
void PlatformTSDDtor(void *tsd) { UNREACHABLE(__func__); }
//
// Thread registration. We provide an API similar to the Fushia port.
//
struct AsanThread::InitOptions {
uptr stack_bottom, stack_size, tls_bottom, tls_size;
};
// Shared setup between thread creation and startup for the initial thread.
static AsanThread *CreateAsanThread(StackTrace *stack, u32 parent_tid,
uptr user_id, bool detached,
uptr stack_bottom, uptr stack_size,
uptr tls_bottom, uptr tls_size) {
// In lieu of AsanThread::Create.
AsanThread *thread = (AsanThread *)MmapOrDie(sizeof(AsanThread), __func__);
AsanThreadContext::CreateThreadContextArgs args = {thread, stack};
asanThreadRegistry().CreateThread(user_id, detached, parent_tid, &args);
// On other systems, AsanThread::Init() is called from the new
// thread itself. But on RTEMS we already know the stack address
// range beforehand, so we can do most of the setup right now.
const AsanThread::InitOptions options = {stack_bottom, stack_size,
tls_bottom, tls_size};
thread->Init(&options);
return thread;
}
// This gets the same arguments passed to Init by CreateAsanThread, above.
// We're in the creator thread before the new thread is actually started, but
// its stack and tls address range are already known.
void AsanThread::SetThreadStackAndTls(const AsanThread::InitOptions *options) {
DCHECK_NE(GetCurrentThread(), this);
DCHECK_NE(GetCurrentThread(), nullptr);
CHECK_NE(options->stack_bottom, 0);
CHECK_NE(options->stack_size, 0);
stack_bottom_ = options->stack_bottom;
stack_top_ = options->stack_bottom + options->stack_size;
tls_begin_ = options->tls_bottom;
tls_end_ = options->tls_bottom + options->tls_size;
}
// Called by __asan::AsanInitInternal (asan_rtl.c). Unlike other ports, the
// main thread on RTEMS does not require special treatment; its AsanThread is
// already created by the provided hooks. This function simply looks up and
// returns the created thread.
AsanThread *CreateMainThread() {
return GetThreadContextByTidLocked(0)->thread;
}
// This is called before each thread creation is attempted. So, in
// its first call, the calling thread is the initial and sole thread.
static void *BeforeThreadCreateHook(uptr user_id, bool detached,
uptr stack_bottom, uptr stack_size,
uptr tls_bottom, uptr tls_size) {
EnsureMainThreadIDIsCorrect();
// Strict init-order checking is thread-hostile.
if (flags()->strict_init_order) StopInitOrderChecking();
GET_STACK_TRACE_THREAD;
u32 parent_tid = GetCurrentTidOrInvalid();
return CreateAsanThread(&stack, parent_tid, user_id, detached,
stack_bottom, stack_size, tls_bottom, tls_size);
}
// This is called after creating a new thread (in the creating thread),
// with the pointer returned by BeforeThreadCreateHook (above).
static void ThreadCreateHook(void *hook, bool aborted) {
AsanThread *thread = static_cast<AsanThread *>(hook);
if (!aborted) {
// The thread was created successfully.
// ThreadStartHook is already running in the new thread.
} else {
// The thread wasn't created after all.
// Clean up everything we set up in BeforeThreadCreateHook.
asanThreadRegistry().FinishThread(thread->tid());
UnmapOrDie(thread, sizeof(AsanThread));
}
}
// This is called (1) in the newly-created thread before it runs anything else,
// with the pointer returned by BeforeThreadCreateHook (above). (2) before a
// thread restart.
static void ThreadStartHook(void *hook, uptr os_id) {
if (!hook)
return;
AsanThread *thread = static_cast<AsanThread *>(hook);
SetCurrentThread(thread);
ThreadStatus status =
asanThreadRegistry().GetThreadLocked(thread->tid())->status;
DCHECK(status == ThreadStatusCreated || status == ThreadStatusRunning);
// Determine whether we are starting or restarting the thread.
if (status == ThreadStatusCreated)
// In lieu of AsanThread::ThreadStart.
asanThreadRegistry().StartThread(thread->tid(), os_id,
/*workerthread*/ false, nullptr);
else {
// In a thread restart, a thread may resume execution at an
// arbitrary function entry point, with its stack and TLS state
// reset. We unpoison the stack in that case.
PoisonShadow(thread->stack_bottom(), thread->stack_size(), 0);
}
}
// Each thread runs this just before it exits,
// with the pointer returned by BeforeThreadCreateHook (above).
// All per-thread destructors have already been called.
static void ThreadExitHook(void *hook, uptr os_id) {
AsanThread *thread = static_cast<AsanThread *>(hook);
if (thread)
AsanThread::TSDDtor(thread->context());
}
static void HandleExit() {
// Disable ASan by setting it to uninitialized. Also reset the
// shadow memory to avoid reporting errors after the run-time has
// been desroyed.
if (asan_inited) {
asan_inited = false;
ResetShadowMemory();
}
}
} // namespace __asan
// These are declared (in extern "C") by <some_path/sanitizer.h>.
// The system runtime will call our definitions directly.
extern "C" {
void __sanitizer_early_init() {
__asan::EarlyInit();
}
void *__sanitizer_before_thread_create_hook(uptr thread, bool detached,
const char *name,
void *stack_base, size_t stack_size,
void *tls_base, size_t tls_size) {
return __asan::BeforeThreadCreateHook(
thread, detached,
reinterpret_cast<uptr>(stack_base), stack_size,
reinterpret_cast<uptr>(tls_base), tls_size);
}
void __sanitizer_thread_create_hook(void *handle, uptr thread, int status) {
__asan::ThreadCreateHook(handle, status != 0);
}
void __sanitizer_thread_start_hook(void *handle, uptr self) {
__asan::ThreadStartHook(handle, self);
}
void __sanitizer_thread_exit_hook(void *handle, uptr self) {
__asan::ThreadExitHook(handle, self);
}
void __sanitizer_exit() {
__asan::HandleExit();
}
} // "C"
#endif // SANITIZER_RTEMS

View File

@ -56,7 +56,8 @@ static void AsanDie() {
UnmapOrDie((void*)kLowShadowBeg, kMidMemBeg - kLowShadowBeg);
UnmapOrDie((void*)kMidMemEnd, kHighShadowEnd - kMidMemEnd);
} else {
UnmapOrDie((void*)kLowShadowBeg, kHighShadowEnd - kLowShadowBeg);
if (kHighShadowEnd)
UnmapOrDie((void*)kLowShadowBeg, kHighShadowEnd - kLowShadowBeg);
}
}
}
@ -65,8 +66,14 @@ static void AsanCheckFailed(const char *file, int line, const char *cond,
u64 v1, u64 v2) {
Report("AddressSanitizer CHECK failed: %s:%d \"%s\" (0x%zx, 0x%zx)\n", file,
line, cond, (uptr)v1, (uptr)v2);
// FIXME: check for infinite recursion without a thread-local counter here.
PRINT_CURRENT_STACK_CHECK();
// Print a stack trace the first time we come here. Otherwise, we probably
// failed a CHECK during symbolization.
static atomic_uint32_t num_calls;
if (atomic_fetch_add(&num_calls, 1, memory_order_relaxed) == 0) {
PRINT_CURRENT_STACK_CHECK();
}
Die();
}
@ -140,6 +147,8 @@ ASAN_REPORT_ERROR_N(load, false)
ASAN_REPORT_ERROR_N(store, true)
#define ASAN_MEMORY_ACCESS_CALLBACK_BODY(type, is_write, size, exp_arg, fatal) \
if (SANITIZER_MYRIAD2 && !AddrIsInMem(addr) && !AddrIsInShadow(addr)) \
return; \
uptr sp = MEM_TO_SHADOW(addr); \
uptr s = size <= SHADOW_GRANULARITY ? *reinterpret_cast<u8 *>(sp) \
: *reinterpret_cast<u16 *>(sp); \
@ -306,6 +315,7 @@ static void asan_atexit() {
}
static void InitializeHighMemEnd() {
#if !SANITIZER_MYRIAD2
#if !ASAN_FIXED_MAPPING
kHighMemEnd = GetMaxUserVirtualAddress();
// Increase kHighMemEnd to make sure it's properly
@ -313,13 +323,16 @@ static void InitializeHighMemEnd() {
kHighMemEnd |= SHADOW_GRANULARITY * GetMmapGranularity() - 1;
#endif // !ASAN_FIXED_MAPPING
CHECK_EQ((kHighMemBeg % GetMmapGranularity()), 0);
#endif // !SANITIZER_MYRIAD2
}
void PrintAddressSpaceLayout() {
Printf("|| `[%p, %p]` || HighMem ||\n",
(void*)kHighMemBeg, (void*)kHighMemEnd);
Printf("|| `[%p, %p]` || HighShadow ||\n",
(void*)kHighShadowBeg, (void*)kHighShadowEnd);
if (kHighMemBeg) {
Printf("|| `[%p, %p]` || HighMem ||\n",
(void*)kHighMemBeg, (void*)kHighMemEnd);
Printf("|| `[%p, %p]` || HighShadow ||\n",
(void*)kHighShadowBeg, (void*)kHighShadowEnd);
}
if (kMidMemBeg) {
Printf("|| `[%p, %p]` || ShadowGap3 ||\n",
(void*)kShadowGap3Beg, (void*)kShadowGap3End);
@ -338,11 +351,14 @@ void PrintAddressSpaceLayout() {
Printf("|| `[%p, %p]` || LowMem ||\n",
(void*)kLowMemBeg, (void*)kLowMemEnd);
}
Printf("MemToShadow(shadow): %p %p %p %p",
Printf("MemToShadow(shadow): %p %p",
(void*)MEM_TO_SHADOW(kLowShadowBeg),
(void*)MEM_TO_SHADOW(kLowShadowEnd),
(void*)MEM_TO_SHADOW(kHighShadowBeg),
(void*)MEM_TO_SHADOW(kHighShadowEnd));
(void*)MEM_TO_SHADOW(kLowShadowEnd));
if (kHighMemBeg) {
Printf(" %p %p",
(void*)MEM_TO_SHADOW(kHighShadowBeg),
(void*)MEM_TO_SHADOW(kHighShadowEnd));
}
if (kMidMemBeg) {
Printf(" %p %p",
(void*)MEM_TO_SHADOW(kMidShadowBeg),
@ -374,6 +390,7 @@ static void AsanInitInternal() {
asan_init_is_running = true;
CacheBinaryName();
CheckASLR();
// Initialize flags. This must be done early, because most of the
// initialization steps look at flags().
@ -526,6 +543,9 @@ void NOINLINE __asan_handle_no_return() {
if (curr_thread) {
top = curr_thread->stack_top();
bottom = ((uptr)&local_stack - PageSize) & ~(PageSize - 1);
} else if (SANITIZER_RTEMS) {
// Give up On RTEMS.
return;
} else {
CHECK(!SANITIZER_FUCHSIA);
// If we haven't seen this thread, try asking the OS for stack bounds.

View File

@ -14,8 +14,9 @@
#include "sanitizer_common/sanitizer_platform.h"
// asan_fuchsia.cc has its own InitializeShadowMemory implementation.
#if !SANITIZER_FUCHSIA
// asan_fuchsia.cc and asan_rtems.cc have their own
// InitializeShadowMemory implementation.
#if !SANITIZER_FUCHSIA && !SANITIZER_RTEMS
#include "asan_internal.h"
#include "asan_mapping.h"
@ -30,8 +31,7 @@ void ReserveShadowMemoryRange(uptr beg, uptr end, const char *name) {
CHECK_EQ(((end + 1) % GetMmapGranularity()), 0);
uptr size = end - beg + 1;
DecreaseTotalMmap(size); // Don't count the shadow against mmap_limit_mb.
void *res = MmapFixedNoReserve(beg, size, name);
if (res != (void *)beg) {
if (!MmapFixedNoReserve(beg, size, name)) {
Report(
"ReserveShadowMemoryRange failed while trying to map 0x%zx bytes. "
"Perhaps you're using ulimit -v\n",
@ -162,4 +162,4 @@ void InitializeShadowMemory() {
} // namespace __asan
#endif // !SANITIZER_FUCHSIA
#endif // !SANITIZER_FUCHSIA && !SANITIZER_RTEMS

View File

@ -221,22 +221,25 @@ FakeStack *AsanThread::AsyncSignalSafeLazyInitFakeStack() {
void AsanThread::Init(const InitOptions *options) {
next_stack_top_ = next_stack_bottom_ = 0;
atomic_store(&stack_switching_, false, memory_order_release);
fake_stack_ = nullptr; // Will be initialized lazily if needed.
CHECK_EQ(this->stack_size(), 0U);
SetThreadStackAndTls(options);
CHECK_GT(this->stack_size(), 0U);
CHECK(AddrIsInMem(stack_bottom_));
CHECK(AddrIsInMem(stack_top_ - 1));
ClearShadowForThreadStackAndTLS();
fake_stack_ = nullptr;
if (__asan_option_detect_stack_use_after_return)
AsyncSignalSafeLazyInitFakeStack();
int local = 0;
VReport(1, "T%d: stack [%p,%p) size 0x%zx; local=%p\n", tid(),
(void *)stack_bottom_, (void *)stack_top_, stack_top_ - stack_bottom_,
&local);
}
// Fuchsia doesn't use ThreadStart.
// asan_fuchsia.c defines CreateMainThread and SetThreadStackAndTls.
#if !SANITIZER_FUCHSIA
// Fuchsia and RTEMS don't use ThreadStart.
// asan_fuchsia.c/asan_rtems.c define CreateMainThread and
// SetThreadStackAndTls.
#if !SANITIZER_FUCHSIA && !SANITIZER_RTEMS
thread_return_t AsanThread::ThreadStart(
tid_t os_id, atomic_uintptr_t *signal_thread_is_registered) {
@ -296,12 +299,17 @@ void AsanThread::SetThreadStackAndTls(const InitOptions *options) {
CHECK(AddrIsInStack((uptr)&local));
}
#endif // !SANITIZER_FUCHSIA
#endif // !SANITIZER_FUCHSIA && !SANITIZER_RTEMS
void AsanThread::ClearShadowForThreadStackAndTLS() {
PoisonShadow(stack_bottom_, stack_top_ - stack_bottom_, 0);
if (tls_begin_ != tls_end_)
PoisonShadow(tls_begin_, tls_end_ - tls_begin_, 0);
if (tls_begin_ != tls_end_) {
uptr tls_begin_aligned = RoundDownTo(tls_begin_, SHADOW_GRANULARITY);
uptr tls_end_aligned = RoundUpTo(tls_end_, SHADOW_GRANULARITY);
FastPoisonShadowPartialRightRedzone(tls_begin_aligned,
tls_end_ - tls_begin_aligned,
tls_end_aligned - tls_end_, 0);
}
}
bool AsanThread::GetStackFrameAccessByAddr(uptr addr,
@ -386,6 +394,9 @@ static bool ThreadStackContainsAddress(ThreadContextBase *tctx_base,
}
AsanThread *GetCurrentThread() {
if (SANITIZER_RTEMS && !asan_inited)
return nullptr;
AsanThreadContext *context =
reinterpret_cast<AsanThreadContext *>(AsanTSDGet());
if (!context) {
@ -477,6 +488,11 @@ void UnlockThreadRegistry() {
__asan::asanThreadRegistry().Unlock();
}
ThreadRegistry *GetThreadRegistryLocked() {
__asan::asanThreadRegistry().CheckLocked();
return &__asan::asanThreadRegistry();
}
void EnsureMainThreadIDIsCorrect() {
__asan::EnsureMainThreadIDIsCorrect();
}

View File

@ -222,8 +222,8 @@ uptr FindDynamicShadowStart() {
uptr alignment = 8 * granularity;
uptr left_padding = granularity;
uptr space_size = kHighShadowEnd + left_padding;
uptr shadow_start =
FindAvailableMemoryRange(space_size, alignment, granularity, nullptr);
uptr shadow_start = FindAvailableMemoryRange(space_size, alignment,
granularity, nullptr, nullptr);
CHECK_NE((uptr)0, shadow_start);
CHECK(IsAligned(shadow_start, alignment));
return shadow_start;
@ -265,11 +265,6 @@ ShadowExceptionHandler(PEXCEPTION_POINTERS exception_pointers) {
// Determine the address of the page that is being accessed.
uptr page = RoundDownTo(addr, page_size);
// Query the existing page.
MEMORY_BASIC_INFORMATION mem_info = {};
if (::VirtualQuery((LPVOID)page, &mem_info, sizeof(mem_info)) == 0)
return EXCEPTION_CONTINUE_SEARCH;
// Commit the page.
uptr result =
(uptr)::VirtualAlloc((LPVOID)page, page_size, MEM_COMMIT, PAGE_READWRITE);

View File

@ -99,7 +99,7 @@ INTERCEPTOR(int, _except_handler4, void *a, void *b, void *c, void *d) {
}
#endif
// Window specific functions not included in asan_interface.inc.
// Windows specific functions not included in asan_interface.inc.
INTERCEPT_WRAP_W_V(__asan_should_detect_stack_use_after_return)
INTERCEPT_WRAP_W_V(__asan_get_shadow_memory_dynamic_address)
INTERCEPT_WRAP_W_W(__asan_unhandled_exception_filter)

View File

@ -309,7 +309,7 @@ if [[ -n "$ASAN_RT64" ]]; then
cp "$ASAN_RT_PATH/$ASAN_RT64" "$TMPDIR/"
fi
ASAN_OPTIONS=start_deactivated=1,malloc_context_size=0
ASAN_OPTIONS=start_deactivated=1
# The name of a symlink to libclang_rt.asan-$ARCH-android.so used in LD_PRELOAD.
# The idea is to have the same name in lib and lib64 to keep it from falling
@ -336,6 +336,13 @@ exec $_to \$@
EOF
}
# On Android-L not allowing user segv handler breaks some applications.
# Since ~May 2017 this is the default setting; included for compatibility with
# older library versions.
if [[ PRE_L -eq 0 ]]; then
ASAN_OPTIONS="$ASAN_OPTIONS,allow_user_segv_handler=1"
fi
if [[ x$extra_options != x ]] ; then
ASAN_OPTIONS="$ASAN_OPTIONS,$extra_options"
fi

View File

@ -237,6 +237,9 @@ if(COMPILER_RT_CAN_EXECUTE_TESTS AND NOT ANDROID)
if(APPLE)
darwin_filter_host_archs(ASAN_SUPPORTED_ARCH ASAN_TEST_ARCH)
endif()
if(OS_NAME MATCHES "SunOS")
list(REMOVE_ITEM ASAN_TEST_ARCH x86_64)
endif()
foreach(arch ${ASAN_TEST_ARCH})
@ -248,6 +251,8 @@ if(COMPILER_RT_CAN_EXECUTE_TESTS AND NOT ANDROID)
$<TARGET_OBJECTS:RTInterception.osx>
$<TARGET_OBJECTS:RTSanitizerCommon.osx>
$<TARGET_OBJECTS:RTSanitizerCommonLibc.osx>
$<TARGET_OBJECTS:RTSanitizerCommonCoverage.osx>
$<TARGET_OBJECTS:RTSanitizerCommonSymbolizer.osx>
$<TARGET_OBJECTS:RTLSanCommon.osx>
$<TARGET_OBJECTS:RTUbsan.osx>)
else()
@ -257,6 +262,8 @@ if(COMPILER_RT_CAN_EXECUTE_TESTS AND NOT ANDROID)
$<TARGET_OBJECTS:RTInterception.${arch}>
$<TARGET_OBJECTS:RTSanitizerCommon.${arch}>
$<TARGET_OBJECTS:RTSanitizerCommonLibc.${arch}>
$<TARGET_OBJECTS:RTSanitizerCommonCoverage.${arch}>
$<TARGET_OBJECTS:RTSanitizerCommonSymbolizer.${arch}>
$<TARGET_OBJECTS:RTLSanCommon.${arch}>
$<TARGET_OBJECTS:RTUbsan.${arch}>
$<TARGET_OBJECTS:RTUbsan_cxx.${arch}>)
@ -280,6 +287,8 @@ if(ANDROID)
$<TARGET_OBJECTS:RTInterception.${arch}>
$<TARGET_OBJECTS:RTSanitizerCommon.${arch}>
$<TARGET_OBJECTS:RTSanitizerCommonLibc.${arch}>
$<TARGET_OBJECTS:RTSanitizerCommonCoverage.${arch}>
$<TARGET_OBJECTS:RTSanitizerCommonSymbolizer.${arch}>
$<TARGET_OBJECTS:RTUbsan.${arch}>
$<TARGET_OBJECTS:RTUbsan_cxx.${arch}>
${COMPILER_RT_GTEST_SOURCE}

View File

@ -25,6 +25,11 @@
#endif
#endif
#if defined(__sun__) && defined(__svr4__)
using std::_setjmp;
using std::_longjmp;
#endif
NOINLINE void *malloc_fff(size_t size) {
void *res = malloc/**/(size); break_optimization(0); return res;}
NOINLINE void *malloc_eee(size_t size) {

View File

@ -173,8 +173,8 @@ set(GENERIC_TF_SOURCES
trunctfsf2.c)
option(COMPILER_RT_EXCLUDE_ATOMIC_BUILTIN
"Skip the atomic builtin (this may be needed if system headers are unavailable)"
Off)
"Skip the atomic builtin (these should normally be provided by a shared library)"
On)
if(NOT FUCHSIA AND NOT COMPILER_RT_BAREMETAL_BUILD)
set(GENERIC_SOURCES
@ -406,6 +406,7 @@ if(MINGW)
arm/aeabi_ldivmod.S
arm/aeabi_uidivmod.S
arm/aeabi_uldivmod.S
arm/chkstk.S
divmoddi4.c
divmodsi4.c
divdi3.c
@ -459,6 +460,41 @@ set(armv6m_SOURCES ${thumb1_SOURCES})
set(armv7m_SOURCES ${arm_SOURCES})
set(armv7em_SOURCES ${arm_SOURCES})
# hexagon arch
set(hexagon_SOURCES ${GENERIC_SOURCES} ${GENERIC_TF_SOURCES})
set(hexagon_SOURCES
hexagon/common_entry_exit_abi1.S
hexagon/common_entry_exit_abi2.S
hexagon/common_entry_exit_legacy.S
hexagon/dfaddsub.S
hexagon/dfdiv.S
hexagon/dffma.S
hexagon/dfminmax.S
hexagon/dfmul.S
hexagon/dfsqrt.S
hexagon/divdi3.S
hexagon/divsi3.S
hexagon/fabs_opt.S
hexagon/fastmath2_dlib_asm.S
hexagon/fastmath2_ldlib_asm.S
hexagon/fastmath_dlib_asm.S
hexagon/fma_opt.S
hexagon/fmax_opt.S
hexagon/fmin_opt.S
hexagon/memcpy_forward_vp4cp4n2.S
hexagon/memcpy_likely_aligned.S
hexagon/moddi3.S
hexagon/modsi3.S
hexagon/sfdiv_opt.S
hexagon/sfsqrt_opt.S
hexagon/udivdi3.S
hexagon/udivmoddi4.S
hexagon/udivmodsi4.S
hexagon/udivsi3.S
hexagon/umoddi3.S
hexagon/umodsi3.S)
set(mips_SOURCES ${GENERIC_SOURCES})
set(mipsel_SOURCES ${mips_SOURCES})
set(mips64_SOURCES ${GENERIC_TF_SOURCES}
@ -480,6 +516,12 @@ set(powerpc64_SOURCES
${GENERIC_SOURCES})
set(powerpc64le_SOURCES ${powerpc64_SOURCES})
set(riscv_SOURCES ${GENERIC_SOURCES} ${GENERIC_TF_SOURCES})
set(riscv32_SOURCES
riscv/mulsi3.S
${riscv_SOURCES})
set(riscv64_SOURCES ${riscv_SOURCES})
set(wasm32_SOURCES
${GENERIC_TF_SOURCES}
${GENERIC_SOURCES})
@ -542,6 +584,12 @@ else ()
list(APPEND BUILTIN_CFLAGS -fomit-frame-pointer -DCOMPILER_RT_ARMHF_TARGET)
endif()
# For RISCV32, we must force enable int128 for compiling long
# double routines.
if("${arch}" STREQUAL "riscv32")
list(APPEND BUILTIN_CFLAGS -fforce-enable-int128)
endif()
add_compiler_rt_runtime(clang_rt.builtins
STATIC
ARCHS ${arch}

34
lib/builtins/arm/chkstk.S Normal file
View File

@ -0,0 +1,34 @@
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
#include "../assembly.h"
// __chkstk routine
// This routine is windows specific.
// http://msdn.microsoft.com/en-us/library/ms648426.aspx
// This clobbers the register r12, and the condition codes, and uses r5 and r6
// as temporaries by backing them up and restoring them afterwards.
// Does not modify any memory or the stack pointer.
// movw r4, #256 // Number of bytes of stack, in units of 4 byte
// bl __chkstk
// sub.w sp, sp, r4
#define PAGE_SIZE 4096
.p2align 2
DEFINE_COMPILERRT_FUNCTION(__chkstk)
lsl r4, r4, #2
mov r12, sp
push {r5, r6}
mov r5, r4
1:
sub r12, r12, #PAGE_SIZE
subs r5, r5, #PAGE_SIZE
ldr r6, [r12]
bgt 1b
pop {r5, r6}
bx lr
END_COMPILERRT_FUNCTION(__chkstk)

View File

@ -33,6 +33,11 @@ uintptr_t GetCurrentProcess(void);
#include <machine/sysarch.h>
#endif
#if defined(__OpenBSD__) && defined(__mips__)
#include <sys/types.h>
#include <machine/sysarch.h>
#endif
#if defined(__linux__) && defined(__mips__)
#include <sys/cachectl.h>
#include <sys/syscall.h>
@ -96,6 +101,8 @@ void __clear_cache(void *start, void *end) {
* Intel processors have a unified instruction and data cache
* so there is nothing to do
*/
#elif defined(_WIN32) && (defined(__arm__) || defined(__aarch64__))
FlushInstructionCache(GetCurrentProcess(), start, end - start);
#elif defined(__arm__) && !defined(__APPLE__)
#if defined(__FreeBSD__) || defined(__NetBSD__)
struct arm_sync_icache_args arg;
@ -123,8 +130,6 @@ void __clear_cache(void *start, void *end) {
: "r"(syscall_nr), "r"(start_reg), "r"(end_reg),
"r"(flags));
assert(start_reg == 0 && "Cache flush syscall failed.");
#elif defined(_WIN32)
FlushInstructionCache(GetCurrentProcess(), start, end - start);
#else
compilerrt_abort();
#endif
@ -142,6 +147,8 @@ void __clear_cache(void *start, void *end) {
#else
syscall(__NR_cacheflush, start, (end_int - start_int), BCACHE);
#endif
#elif defined(__mips__) && defined(__OpenBSD__)
cacheflush(start, (uintptr_t)end - (uintptr_t)start, BCACHE);
#elif defined(__aarch64__) && !defined(__APPLE__)
uint64_t xstart = (uint64_t)(uintptr_t) start;
uint64_t xend = (uint64_t)(uintptr_t) end;
@ -156,12 +163,14 @@ void __clear_cache(void *start, void *end) {
* uintptr_t in case this runs in an IPL32 environment.
*/
const size_t dcache_line_size = 4 << ((ctr_el0 >> 16) & 15);
for (addr = xstart; addr < xend; addr += dcache_line_size)
for (addr = xstart & ~(dcache_line_size - 1); addr < xend;
addr += dcache_line_size)
__asm __volatile("dc cvau, %0" :: "r"(addr));
__asm __volatile("dsb ish");
const size_t icache_line_size = 4 << ((ctr_el0 >> 0) & 15);
for (addr = xstart; addr < xend; addr += icache_line_size)
for (addr = xstart & ~(icache_line_size - 1); addr < xend;
addr += icache_line_size)
__asm __volatile("ic ivau, %0" :: "r"(addr));
__asm __volatile("isb sy");
#elif defined (__powerpc64__)

View File

@ -16,6 +16,12 @@
/* Returns: the number of leading 0-bits */
#if !defined(__clang__) && (defined(__sparc64__) || defined(__mips64) || defined(__riscv__))
/* gcc resolves __builtin_clz -> __clzdi2 leading to infinite recursion */
#define __builtin_clz(a) __clzsi2(a)
extern si_int __clzsi2(si_int);
#endif
/* Precondition: a != 0 */
COMPILER_RT_ABI si_int

View File

@ -416,9 +416,9 @@ static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
*Subtype = AMDFAM15H_BDVER3;
break; // "bdver3"; 30h-3Fh: Steamroller
}
if (Model >= 0x10 && Model <= 0x1f) {
if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) {
*Subtype = AMDFAM15H_BDVER2;
break; // "bdver2"; 10h-1Fh: Piledriver
break; // "bdver2"; 02h, 10h-1Fh: Piledriver
}
if (Model <= 0x0f) {
*Subtype = AMDFAM15H_BDVER1;

View File

@ -16,6 +16,12 @@
/* Returns: the number of trailing 0-bits */
#if !defined(__clang__) && (defined(__sparc64__) || defined(__mips64) || defined(__riscv__))
/* gcc resolves __builtin_ctz -> __ctzdi2 leading to infinite recursion */
#define __builtin_ctz(a) __ctzsi2(a)
extern si_int __ctzsi2(si_int);
#endif
/* Precondition: a != 0 */
COMPILER_RT_ABI si_int

View File

@ -14,7 +14,22 @@
#include "int_lib.h"
#include "int_util.h"
#ifdef __BIONIC__
/* There are 4 pthread key cleanup rounds on Bionic. Delay emutls deallocation
to round 2. We need to delay deallocation because:
- Android versions older than M lack __cxa_thread_atexit_impl, so apps
use a pthread key destructor to call C++ destructors.
- Apps might use __thread/thread_local variables in pthread destructors.
We can't wait until the final two rounds, because jemalloc needs two rounds
after the final malloc/free call to free its thread-specific data (see
https://reviews.llvm.org/D46978#1107507). */
#define EMUTLS_SKIP_DESTRUCTOR_ROUNDS 1
#else
#define EMUTLS_SKIP_DESTRUCTOR_ROUNDS 0
#endif
typedef struct emutls_address_array {
uintptr_t skip_destructor_rounds;
uintptr_t size; /* number of elements in the 'data' array */
void* data[];
} emutls_address_array;
@ -65,9 +80,30 @@ static __inline void emutls_memalign_free(void *base) {
#endif
}
static __inline void emutls_setspecific(emutls_address_array *value) {
pthread_setspecific(emutls_pthread_key, (void*) value);
}
static __inline emutls_address_array* emutls_getspecific() {
return (emutls_address_array*) pthread_getspecific(emutls_pthread_key);
}
static void emutls_key_destructor(void* ptr) {
emutls_shutdown((emutls_address_array*)ptr);
free(ptr);
emutls_address_array *array = (emutls_address_array*)ptr;
if (array->skip_destructor_rounds > 0) {
/* emutls is deallocated using a pthread key destructor. These
* destructors are called in several rounds to accommodate destructor
* functions that (re)initialize key values with pthread_setspecific.
* Delay the emutls deallocation to accommodate other end-of-thread
* cleanup tasks like calling thread_local destructors (e.g. the
* __cxa_thread_atexit fallback in libc++abi).
*/
array->skip_destructor_rounds--;
emutls_setspecific(array);
} else {
emutls_shutdown(array);
free(ptr);
}
}
static __inline void emutls_init(void) {
@ -88,15 +124,7 @@ static __inline void emutls_unlock() {
pthread_mutex_unlock(&emutls_mutex);
}
static __inline void emutls_setspecific(emutls_address_array *value) {
pthread_setspecific(emutls_pthread_key, (void*) value);
}
static __inline emutls_address_array* emutls_getspecific() {
return (emutls_address_array*) pthread_getspecific(emutls_pthread_key);
}
#else
#else /* _WIN32 */
#include <windows.h>
#include <malloc.h>
@ -222,11 +250,11 @@ static __inline void __atomic_store_n(void *ptr, uintptr_t val, unsigned type) {
InterlockedExchangePointer((void *volatile *)ptr, (void *)val);
}
#endif
#endif /* __ATOMIC_RELEASE */
#pragma warning (pop)
#endif
#endif /* _WIN32 */
static size_t emutls_num_object = 0; /* number of allocated TLS objects */
@ -314,11 +342,12 @@ static __inline void emutls_check_array_set_size(emutls_address_array *array,
* which must be no smaller than the given index.
*/
static __inline uintptr_t emutls_new_data_array_size(uintptr_t index) {
/* Need to allocate emutls_address_array with one extra slot
* to store the data array size.
/* Need to allocate emutls_address_array with extra slots
* to store the header.
* Round up the emutls_address_array size to multiple of 16.
*/
return ((index + 1 + 15) & ~((uintptr_t)15)) - 1;
uintptr_t header_words = sizeof(emutls_address_array) / sizeof(void *);
return ((index + header_words + 15) & ~((uintptr_t)15)) - header_words;
}
/* Returns the size in bytes required for an emutls_address_array with
@ -337,8 +366,10 @@ emutls_get_address_array(uintptr_t index) {
if (array == NULL) {
uintptr_t new_size = emutls_new_data_array_size(index);
array = (emutls_address_array*) malloc(emutls_asize(new_size));
if (array)
if (array) {
memset(array->data, 0, new_size * sizeof(void*));
array->skip_destructor_rounds = EMUTLS_SKIP_DESTRUCTOR_ROUNDS;
}
emutls_check_array_set_size(array, new_size);
} else if (index > array->size) {
uintptr_t orig_size = array->size;

View File

@ -0,0 +1,103 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
/* Functions that implement common sequences in function prologues and epilogues
used to save code size */
.macro FUNCTION_BEGIN name
.text
.globl \name
.type \name, @function
.falign
\name:
.endm
.macro FUNCTION_END name
.size \name, . - \name
.endm
.macro FALLTHROUGH_TAIL_CALL name0 name1
.size \name0, . - \name0
.globl \name1
.type \name1, @function
.falign
\name1:
.endm
/* Save r25:24 at fp+#-8 and r27:26 at fp+#-16. */
/* The compiler knows that the __save_* functions clobber LR. No other
registers should be used without informing the compiler. */
/* Since we can only issue one store per packet, we don't hurt performance by
simply jumping to the right point in this sequence of stores. */
FUNCTION_BEGIN __save_r24_through_r27
memd(fp+#-16) = r27:26
FALLTHROUGH_TAIL_CALL __save_r24_through_r27 __save_r24_through_r25
{
memd(fp+#-8) = r25:24
jumpr lr
}
FUNCTION_END __save_r24_through_r25
/* For each of the *_before_tailcall functions, jumpr lr is executed in parallel
with deallocframe. That way, the return gets the old value of lr, which is
where these functions need to return, and at the same time, lr gets the value
it needs going into the tail call. */
FUNCTION_BEGIN __restore_r24_through_r27_and_deallocframe_before_tailcall
r27:26 = memd(fp+#-16)
FALLTHROUGH_TAIL_CALL __restore_r24_through_r27_and_deallocframe_before_tailcall __restore_r24_through_r25_and_deallocframe_before_tailcall
{
r25:24 = memd(fp+#-8)
deallocframe
jumpr lr
}
FUNCTION_END __restore_r24_through_r25_and_deallocframe_before_tailcall
/* Here we use the extra load bandwidth to restore LR early, allowing the return
to occur in parallel with the deallocframe. */
FUNCTION_BEGIN __restore_r24_through_r27_and_deallocframe
{
lr = memw(fp+#4)
r27:26 = memd(fp+#-16)
}
{
r25:24 = memd(fp+#-8)
deallocframe
jumpr lr
}
FUNCTION_END __restore_r24_through_r27_and_deallocframe
/* Here the load bandwidth is maximized. */
FUNCTION_BEGIN __restore_r24_through_r25_and_deallocframe
{
r25:24 = memd(fp+#-8)
deallocframe
}
jumpr lr
FUNCTION_END __restore_r24_through_r25_and_deallocframe

View File

@ -0,0 +1,268 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
/* Functions that implement common sequences in function prologues and epilogues
used to save code size */
.macro FUNCTION_BEGIN name
.p2align 2
.section .text.\name,"ax",@progbits
.globl \name
.type \name, @function
\name:
.endm
.macro FUNCTION_END name
.size \name, . - \name
.endm
.macro FALLTHROUGH_TAIL_CALL name0 name1
.p2align 2
.size \name0, . - \name0
.globl \name1
.type \name1, @function
\name1:
.endm
/* Save r17:16 at fp+#-8, r19:18 at fp+#-16, r21:20 at fp+#-24, r23:22 at
fp+#-32, r25:24 at fp+#-40, and r27:26 at fp+#-48.
The compiler knows that the __save_* functions clobber LR. No other
registers should be used without informing the compiler. */
FUNCTION_BEGIN __save_r16_through_r27
{
memd(fp+#-48) = r27:26
memd(fp+#-40) = r25:24
}
{
memd(fp+#-32) = r23:22
memd(fp+#-24) = r21:20
}
{
memd(fp+#-16) = r19:18
memd(fp+#-8) = r17:16
jumpr lr
}
FUNCTION_END __save_r16_through_r27
FUNCTION_BEGIN __save_r16_through_r25
{
memd(fp+#-40) = r25:24
memd(fp+#-32) = r23:22
}
{
memd(fp+#-24) = r21:20
memd(fp+#-16) = r19:18
}
{
memd(fp+#-8) = r17:16
jumpr lr
}
FUNCTION_END __save_r16_through_r25
FUNCTION_BEGIN __save_r16_through_r23
{
memd(fp+#-32) = r23:22
memd(fp+#-24) = r21:20
}
{
memd(fp+#-16) = r19:18
memd(fp+#-8) = r17:16
jumpr lr
}
FUNCTION_END __save_r16_through_r23
FUNCTION_BEGIN __save_r16_through_r21
{
memd(fp+#-24) = r21:20
memd(fp+#-16) = r19:18
}
{
memd(fp+#-8) = r17:16
jumpr lr
}
FUNCTION_END __save_r16_through_r21
FUNCTION_BEGIN __save_r16_through_r19
{
memd(fp+#-16) = r19:18
memd(fp+#-8) = r17:16
jumpr lr
}
FUNCTION_END __save_r16_through_r19
FUNCTION_BEGIN __save_r16_through_r17
{
memd(fp+#-8) = r17:16
jumpr lr
}
FUNCTION_END __save_r16_through_r17
/* For each of the *_before_tailcall functions, jumpr lr is executed in parallel
with deallocframe. That way, the return gets the old value of lr, which is
where these functions need to return, and at the same time, lr gets the value
it needs going into the tail call. */
FUNCTION_BEGIN __restore_r16_through_r27_and_deallocframe_before_tailcall
r27:26 = memd(fp+#-48)
{
r25:24 = memd(fp+#-40)
r23:22 = memd(fp+#-32)
}
{
r21:20 = memd(fp+#-24)
r19:18 = memd(fp+#-16)
}
{
r17:16 = memd(fp+#-8)
deallocframe
jumpr lr
}
FUNCTION_END __restore_r16_through_r27_and_deallocframe_before_tailcall
FUNCTION_BEGIN __restore_r16_through_r25_and_deallocframe_before_tailcall
{
r25:24 = memd(fp+#-40)
r23:22 = memd(fp+#-32)
}
{
r21:20 = memd(fp+#-24)
r19:18 = memd(fp+#-16)
}
{
r17:16 = memd(fp+#-8)
deallocframe
jumpr lr
}
FUNCTION_END __restore_r16_through_r25_and_deallocframe_before_tailcall
FUNCTION_BEGIN __restore_r16_through_r23_and_deallocframe_before_tailcall
{
r23:22 = memd(fp+#-32)
r21:20 = memd(fp+#-24)
}
r19:18 = memd(fp+#-16)
{
r17:16 = memd(fp+#-8)
deallocframe
jumpr lr
}
FUNCTION_END __restore_r16_through_r23_and_deallocframe_before_tailcall
FUNCTION_BEGIN __restore_r16_through_r21_and_deallocframe_before_tailcall
{
r21:20 = memd(fp+#-24)
r19:18 = memd(fp+#-16)
}
{
r17:16 = memd(fp+#-8)
deallocframe
jumpr lr
}
FUNCTION_END __restore_r16_through_r19_and_deallocframe_before_tailcall
FUNCTION_BEGIN __restore_r16_through_r19_and_deallocframe_before_tailcall
r19:18 = memd(fp+#-16)
{
r17:16 = memd(fp+#-8)
deallocframe
jumpr lr
}
FUNCTION_END __restore_r16_through_r19_and_deallocframe_before_tailcall
FUNCTION_BEGIN __restore_r16_through_r17_and_deallocframe_before_tailcall
{
r17:16 = memd(fp+#-8)
deallocframe
jumpr lr
}
FUNCTION_END __restore_r16_through_r17_and_deallocframe_before_tailcall
FUNCTION_BEGIN __restore_r16_through_r27_and_deallocframe
r27:26 = memd(fp+#-48)
{
r25:24 = memd(fp+#-40)
r23:22 = memd(fp+#-32)
}
{
r21:20 = memd(fp+#-24)
r19:18 = memd(fp+#-16)
}
{
r17:16 = memd(fp+#-8)
dealloc_return
}
FUNCTION_END __restore_r16_through_r27_and_deallocframe
FUNCTION_BEGIN __restore_r16_through_r25_and_deallocframe
{
r25:24 = memd(fp+#-40)
r23:22 = memd(fp+#-32)
}
{
r21:20 = memd(fp+#-24)
r19:18 = memd(fp+#-16)
}
{
r17:16 = memd(fp+#-8)
dealloc_return
}
FUNCTION_END __restore_r16_through_r25_and_deallocframe
FUNCTION_BEGIN __restore_r16_through_r23_and_deallocframe
{
r23:22 = memd(fp+#-32)
}
{
r21:20 = memd(fp+#-24)
r19:18 = memd(fp+#-16)
}
{
r17:16 = memd(fp+#-8)
dealloc_return
}
FUNCTION_END __restore_r16_through_r23_and_deallocframe
FUNCTION_BEGIN __restore_r16_through_r21_and_deallocframe
{
r21:20 = memd(fp+#-24)
r19:18 = memd(fp+#-16)
}
{
r17:16 = memd(fp+#-8)
dealloc_return
}
FUNCTION_END __restore_r16_through_r21_and_deallocframe
FUNCTION_BEGIN __restore_r16_through_r19_and_deallocframe
{
r19:18 = memd(fp+#-16)
r17:16 = memd(fp+#-8)
}
{
dealloc_return
}
FUNCTION_END __restore_r16_through_r19_and_deallocframe
FUNCTION_BEGIN __restore_r16_through_r17_and_deallocframe
{
r17:16 = memd(fp+#-8)
dealloc_return
}
FUNCTION_END __restore_r16_through_r17_and_deallocframe
FUNCTION_BEGIN __deallocframe
dealloc_return
FUNCTION_END __deallocframe

View File

@ -0,0 +1,157 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
/* Functions that implement common sequences in function prologues and epilogues
used to save code size */
.macro FUNCTION_BEGIN name
.text
.globl \name
.type \name, @function
.falign
\name:
.endm
.macro FUNCTION_END name
.size \name, . - \name
.endm
.macro FALLTHROUGH_TAIL_CALL name0 name1
.size \name0, . - \name0
.globl \name1
.type \name1, @function
.falign
\name1:
.endm
/* Save r27:26 at fp+#-8, r25:24 at fp+#-16, r23:22 at fp+#-24, r21:20 at
fp+#-32, r19:18 at fp+#-40, and r17:16 at fp+#-48. */
/* The compiler knows that the __save_* functions clobber LR. No other
registers should be used without informing the compiler. */
/* Since we can only issue one store per packet, we don't hurt performance by
simply jumping to the right point in this sequence of stores. */
FUNCTION_BEGIN __save_r27_through_r16
memd(fp+#-48) = r17:16
FALLTHROUGH_TAIL_CALL __save_r27_through_r16 __save_r27_through_r18
memd(fp+#-40) = r19:18
FALLTHROUGH_TAIL_CALL __save_r27_through_r18 __save_r27_through_r20
memd(fp+#-32) = r21:20
FALLTHROUGH_TAIL_CALL __save_r27_through_r20 __save_r27_through_r22
memd(fp+#-24) = r23:22
FALLTHROUGH_TAIL_CALL __save_r27_through_r22 __save_r27_through_r24
memd(fp+#-16) = r25:24
{
memd(fp+#-8) = r27:26
jumpr lr
}
FUNCTION_END __save_r27_through_r24
/* For each of the *_before_sibcall functions, jumpr lr is executed in parallel
with deallocframe. That way, the return gets the old value of lr, which is
where these functions need to return, and at the same time, lr gets the value
it needs going into the sibcall. */
FUNCTION_BEGIN __restore_r27_through_r20_and_deallocframe_before_sibcall
{
r21:20 = memd(fp+#-32)
r23:22 = memd(fp+#-24)
}
FALLTHROUGH_TAIL_CALL __restore_r27_through_r20_and_deallocframe_before_sibcall __restore_r27_through_r24_and_deallocframe_before_sibcall
{
r25:24 = memd(fp+#-16)
jump __restore_r27_through_r26_and_deallocframe_before_sibcall
}
FUNCTION_END __restore_r27_through_r24_and_deallocframe_before_sibcall
FUNCTION_BEGIN __restore_r27_through_r16_and_deallocframe_before_sibcall
r17:16 = memd(fp+#-48)
FALLTHROUGH_TAIL_CALL __restore_r27_through_r16_and_deallocframe_before_sibcall __restore_r27_through_r18_and_deallocframe_before_sibcall
{
r19:18 = memd(fp+#-40)
r21:20 = memd(fp+#-32)
}
FALLTHROUGH_TAIL_CALL __restore_r27_through_r18_and_deallocframe_before_sibcall __restore_r27_through_r22_and_deallocframe_before_sibcall
{
r23:22 = memd(fp+#-24)
r25:24 = memd(fp+#-16)
}
FALLTHROUGH_TAIL_CALL __restore_r27_through_r22_and_deallocframe_before_sibcall __restore_r27_through_r26_and_deallocframe_before_sibcall
{
r27:26 = memd(fp+#-8)
deallocframe
jumpr lr
}
FUNCTION_END __restore_r27_through_r26_and_deallocframe_before_sibcall
/* Here we use the extra load bandwidth to restore LR early, allowing the return
to occur in parallel with the deallocframe. */
FUNCTION_BEGIN __restore_r27_through_r16_and_deallocframe
{
r17:16 = memd(fp+#-48)
r19:18 = memd(fp+#-40)
}
FALLTHROUGH_TAIL_CALL __restore_r27_through_r16_and_deallocframe __restore_r27_through_r20_and_deallocframe
{
r21:20 = memd(fp+#-32)
r23:22 = memd(fp+#-24)
}
FALLTHROUGH_TAIL_CALL __restore_r27_through_r20_and_deallocframe __restore_r27_through_r24_and_deallocframe
{
lr = memw(fp+#4)
r25:24 = memd(fp+#-16)
}
{
r27:26 = memd(fp+#-8)
deallocframe
jumpr lr
}
FUNCTION_END __restore_r27_through_r24_and_deallocframe
/* Here the load bandwidth is maximized for all three functions. */
FUNCTION_BEGIN __restore_r27_through_r18_and_deallocframe
{
r19:18 = memd(fp+#-40)
r21:20 = memd(fp+#-32)
}
FALLTHROUGH_TAIL_CALL __restore_r27_through_r18_and_deallocframe __restore_r27_through_r22_and_deallocframe
{
r23:22 = memd(fp+#-24)
r25:24 = memd(fp+#-16)
}
FALLTHROUGH_TAIL_CALL __restore_r27_through_r22_and_deallocframe __restore_r27_through_r26_and_deallocframe
{
r27:26 = memd(fp+#-8)
deallocframe
}
jumpr lr
FUNCTION_END __restore_r27_through_r26_and_deallocframe

View File

@ -0,0 +1,398 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
/* Double Precision Multiply */
#define A r1:0
#define AH r1
#define AL r0
#define B r3:2
#define BH r3
#define BL r2
#define EXPA r4
#define EXPB r5
#define EXPB_A r5:4
#define ZTMP r7:6
#define ZTMPH r7
#define ZTMPL r6
#define ATMP r13:12
#define ATMPH r13
#define ATMPL r12
#define BTMP r9:8
#define BTMPH r9
#define BTMPL r8
#define ATMP2 r11:10
#define ATMP2H r11
#define ATMP2L r10
#define EXPDIFF r15
#define EXTRACTOFF r14
#define EXTRACTAMT r15:14
#define TMP r28
#define MANTBITS 52
#define HI_MANTBITS 20
#define EXPBITS 11
#define BIAS 1024
#define MANTISSA_TO_INT_BIAS 52
#define SR_BIT_INEXACT 5
#ifndef SR_ROUND_OFF
#define SR_ROUND_OFF 22
#endif
#define NORMAL p3
#define BIGB p2
#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG
#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG
#define END(TAG) .size TAG,.-TAG
.text
.global __hexagon_adddf3
.global __hexagon_subdf3
.type __hexagon_adddf3, @function
.type __hexagon_subdf3, @function
Q6_ALIAS(adddf3)
FAST_ALIAS(adddf3)
FAST2_ALIAS(adddf3)
Q6_ALIAS(subdf3)
FAST_ALIAS(subdf3)
FAST2_ALIAS(subdf3)
.p2align 5
__hexagon_adddf3:
{
EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS)
EXPB = extractu(BH,#EXPBITS,#HI_MANTBITS)
ATMP = combine(##0x20000000,#0)
}
{
NORMAL = dfclass(A,#2)
NORMAL = dfclass(B,#2)
BTMP = ATMP
BIGB = cmp.gtu(EXPB,EXPA) // Is B substantially greater than A?
}
{
if (!NORMAL) jump .Ladd_abnormal // If abnormal, go to special code
if (BIGB) A = B // if B >> A, swap A and B
if (BIGB) B = A // If B >> A, swap A and B
if (BIGB) EXPB_A = combine(EXPA,EXPB) // swap exponents
}
{
ATMP = insert(A,#MANTBITS,#EXPBITS-2) // Q1.62
BTMP = insert(B,#MANTBITS,#EXPBITS-2) // Q1.62
EXPDIFF = sub(EXPA,EXPB)
ZTMP = combine(#62,#1)
}
#undef BIGB
#undef NORMAL
#define B_POS p3
#define A_POS p2
#define NO_STICKIES p1
.Ladd_continue:
{
EXPDIFF = min(EXPDIFF,ZTMPH) // If exponent difference >= ~60,
// will collapse to sticky bit
ATMP2 = neg(ATMP)
A_POS = cmp.gt(AH,#-1)
EXTRACTOFF = #0
}
{
if (!A_POS) ATMP = ATMP2
ATMP2 = extractu(BTMP,EXTRACTAMT)
BTMP = ASR(BTMP,EXPDIFF)
#undef EXTRACTAMT
#undef EXPDIFF
#undef EXTRACTOFF
#define ZERO r15:14
ZERO = #0
}
{
NO_STICKIES = cmp.eq(ATMP2,ZERO)
if (!NO_STICKIES.new) BTMPL = or(BTMPL,ZTMPL)
EXPB = add(EXPA,#-BIAS-60)
B_POS = cmp.gt(BH,#-1)
}
{
ATMP = add(ATMP,BTMP) // ADD!!!
ATMP2 = sub(ATMP,BTMP) // Negate and ADD --> SUB!!!
ZTMP = combine(#54,##2045)
}
{
p0 = cmp.gtu(EXPA,ZTMPH) // must be pretty high in case of large cancellation
p0 = !cmp.gtu(EXPA,ZTMPL)
if (!p0.new) jump:nt .Ladd_ovf_unf
if (!B_POS) ATMP = ATMP2 // if B neg, pick difference
}
{
A = convert_d2df(ATMP) // Convert to Double Precision, taking care of flags, etc. So nice!
p0 = cmp.eq(ATMPH,#0)
p0 = cmp.eq(ATMPL,#0)
if (p0.new) jump:nt .Ladd_zero // or maybe conversion handles zero case correctly?
}
{
AH += asl(EXPB,#HI_MANTBITS)
jumpr r31
}
.falign
__hexagon_subdf3:
{
BH = togglebit(BH,#31)
jump __qdsp_adddf3
}
.falign
.Ladd_zero:
// True zero, full cancellation
// +0 unless round towards negative infinity
{
TMP = USR
A = #0
BH = #1
}
{
TMP = extractu(TMP,#2,#22)
BH = asl(BH,#31)
}
{
p0 = cmp.eq(TMP,#2)
if (p0.new) AH = xor(AH,BH)
jumpr r31
}
.falign
.Ladd_ovf_unf:
// Overflow or Denormal is possible
// Good news: Underflow flag is not possible!
/*
* ATMP has 2's complement value
*
* EXPA has A's exponent, EXPB has EXPA-BIAS-60
*
* Convert, extract exponent, add adjustment.
* If > 2046, overflow
* If <= 0, denormal
*
* Note that we've not done our zero check yet, so do that too
*
*/
{
A = convert_d2df(ATMP)
p0 = cmp.eq(ATMPH,#0)
p0 = cmp.eq(ATMPL,#0)
if (p0.new) jump:nt .Ladd_zero
}
{
TMP = extractu(AH,#EXPBITS,#HI_MANTBITS)
AH += asl(EXPB,#HI_MANTBITS)
}
{
EXPB = add(EXPB,TMP)
B = combine(##0x00100000,#0)
}
{
p0 = cmp.gt(EXPB,##BIAS+BIAS-2)
if (p0.new) jump:nt .Ladd_ovf
}
{
p0 = cmp.gt(EXPB,#0)
if (p0.new) jumpr:t r31
TMP = sub(#1,EXPB)
}
{
B = insert(A,#MANTBITS,#0)
A = ATMP
}
{
B = lsr(B,TMP)
}
{
A = insert(B,#63,#0)
jumpr r31
}
.falign
.Ladd_ovf:
// We get either max finite value or infinity. Either way, overflow+inexact
{
A = ATMP // 2's complement value
TMP = USR
ATMP = combine(##0x7fefffff,#-1) // positive max finite
}
{
EXPB = extractu(TMP,#2,#SR_ROUND_OFF) // rounding bits
TMP = or(TMP,#0x28) // inexact + overflow
BTMP = combine(##0x7ff00000,#0) // positive infinity
}
{
USR = TMP
EXPB ^= lsr(AH,#31) // Does sign match rounding?
TMP = EXPB // unmodified rounding mode
}
{
p0 = !cmp.eq(TMP,#1) // If not round-to-zero and
p0 = !cmp.eq(EXPB,#2) // Not rounding the other way,
if (p0.new) ATMP = BTMP // we should get infinity
}
{
A = insert(ATMP,#63,#0) // insert inf/maxfinite, leave sign
}
{
p0 = dfcmp.eq(A,A)
jumpr r31
}
.Ladd_abnormal:
{
ATMP = extractu(A,#63,#0) // strip off sign
BTMP = extractu(B,#63,#0) // strip off sign
}
{
p3 = cmp.gtu(ATMP,BTMP)
if (!p3.new) A = B // sort values
if (!p3.new) B = A // sort values
}
{
// Any NaN --> NaN, possibly raise invalid if sNaN
p0 = dfclass(A,#0x0f) // A not NaN?
if (!p0.new) jump:nt .Linvalid_nan_add
if (!p3) ATMP = BTMP
if (!p3) BTMP = ATMP
}
{
// Infinity + non-infinity number is infinity
// Infinity + infinity --> inf or nan
p1 = dfclass(A,#0x08) // A is infinity
if (p1.new) jump:nt .Linf_add
}
{
p2 = dfclass(B,#0x01) // B is zero
if (p2.new) jump:nt .LB_zero // so return A or special 0+0
ATMP = #0
}
// We are left with adding one or more subnormals
{
p0 = dfclass(A,#4)
if (p0.new) jump:nt .Ladd_two_subnormal
ATMP = combine(##0x20000000,#0)
}
{
EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS)
EXPB = #1
// BTMP already ABS(B)
BTMP = asl(BTMP,#EXPBITS-2)
}
#undef ZERO
#define EXTRACTOFF r14
#define EXPDIFF r15
{
ATMP = insert(A,#MANTBITS,#EXPBITS-2)
EXPDIFF = sub(EXPA,EXPB)
ZTMP = combine(#62,#1)
jump .Ladd_continue
}
.Ladd_two_subnormal:
{
ATMP = extractu(A,#63,#0)
BTMP = extractu(B,#63,#0)
}
{
ATMP = neg(ATMP)
BTMP = neg(BTMP)
p0 = cmp.gt(AH,#-1)
p1 = cmp.gt(BH,#-1)
}
{
if (p0) ATMP = A
if (p1) BTMP = B
}
{
ATMP = add(ATMP,BTMP)
}
{
BTMP = neg(ATMP)
p0 = cmp.gt(ATMPH,#-1)
B = #0
}
{
if (!p0) A = BTMP
if (p0) A = ATMP
BH = ##0x80000000
}
{
if (!p0) AH = or(AH,BH)
p0 = dfcmp.eq(A,B)
if (p0.new) jump:nt .Lzero_plus_zero
}
{
jumpr r31
}
.Linvalid_nan_add:
{
TMP = convert_df2sf(A) // will generate invalid if sNaN
p0 = dfclass(B,#0x0f) // if B is not NaN
if (p0.new) B = A // make it whatever A is
}
{
BL = convert_df2sf(B) // will generate invalid if sNaN
A = #-1
jumpr r31
}
.falign
.LB_zero:
{
p0 = dfcmp.eq(ATMP,A) // is A also zero?
if (!p0.new) jumpr:t r31 // If not, just return A
}
// 0 + 0 is special
// if equal integral values, they have the same sign, which is fine for all rounding
// modes.
// If unequal in sign, we get +0 for all rounding modes except round down
.Lzero_plus_zero:
{
p0 = cmp.eq(A,B)
if (p0.new) jumpr:t r31
}
{
TMP = USR
}
{
TMP = extractu(TMP,#2,#SR_ROUND_OFF)
A = #0
}
{
p0 = cmp.eq(TMP,#2)
if (p0.new) AH = ##0x80000000
jumpr r31
}
.Linf_add:
// adding infinities is only OK if they are equal
{
p0 = !cmp.eq(AH,BH) // Do they have different signs
p0 = dfclass(B,#8) // And is B also infinite?
if (!p0.new) jumpr:t r31 // If not, just a normal inf
}
{
BL = ##0x7f800001 // sNAN
}
{
A = convert_sf2df(BL) // trigger invalid, set NaN
jumpr r31
}
END(__hexagon_adddf3)

View File

@ -0,0 +1,492 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
/* Double Precision Divide */
#define A r1:0
#define AH r1
#define AL r0
#define B r3:2
#define BH r3
#define BL r2
#define Q r5:4
#define QH r5
#define QL r4
#define PROD r7:6
#define PRODHI r7
#define PRODLO r6
#define SFONE r8
#define SFDEN r9
#define SFERROR r10
#define SFRECIP r11
#define EXPBA r13:12
#define EXPB r13
#define EXPA r12
#define REMSUB2 r15:14
#define SIGN r28
#define Q_POSITIVE p3
#define NORMAL p2
#define NO_OVF_UNF p1
#define P_TMP p0
#define RECIPEST_SHIFT 3
#define QADJ 61
#define DFCLASS_NORMAL 0x02
#define DFCLASS_NUMBER 0x0F
#define DFCLASS_INFINITE 0x08
#define DFCLASS_ZERO 0x01
#define DFCLASS_NONZERO (DFCLASS_NUMBER ^ DFCLASS_ZERO)
#define DFCLASS_NONINFINITE (DFCLASS_NUMBER ^ DFCLASS_INFINITE)
#define DF_MANTBITS 52
#define DF_EXPBITS 11
#define SF_MANTBITS 23
#define SF_EXPBITS 8
#define DF_BIAS 0x3ff
#define SR_ROUND_OFF 22
#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG
#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG
#define END(TAG) .size TAG,.-TAG
.text
.global __hexagon_divdf3
.type __hexagon_divdf3,@function
Q6_ALIAS(divdf3)
FAST_ALIAS(divdf3)
FAST2_ALIAS(divdf3)
.p2align 5
__hexagon_divdf3:
{
NORMAL = dfclass(A,#DFCLASS_NORMAL)
NORMAL = dfclass(B,#DFCLASS_NORMAL)
EXPBA = combine(BH,AH)
SIGN = xor(AH,BH)
}
#undef A
#undef AH
#undef AL
#undef B
#undef BH
#undef BL
#define REM r1:0
#define REMHI r1
#define REMLO r0
#define DENOM r3:2
#define DENOMHI r3
#define DENOMLO r2
{
if (!NORMAL) jump .Ldiv_abnormal
PROD = extractu(DENOM,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS)
SFONE = ##0x3f800001
}
{
SFDEN = or(SFONE,PRODLO)
EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-32)
EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-32)
Q_POSITIVE = cmp.gt(SIGN,#-1)
}
#undef SIGN
#define ONE r28
.Ldenorm_continue:
{
SFRECIP,P_TMP = sfrecipa(SFONE,SFDEN)
SFERROR = and(SFONE,#-2)
ONE = #1
EXPA = sub(EXPA,EXPB)
}
#undef EXPB
#define RECIPEST r13
{
SFERROR -= sfmpy(SFRECIP,SFDEN):lib
REMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32)
RECIPEST = ##0x00800000 << RECIPEST_SHIFT
}
{
SFRECIP += sfmpy(SFRECIP,SFERROR):lib
DENOMHI = insert(ONE,#DF_EXPBITS+1,#DF_MANTBITS-32)
SFERROR = and(SFONE,#-2)
}
{
SFERROR -= sfmpy(SFRECIP,SFDEN):lib
QH = #-DF_BIAS+1
QL = #DF_BIAS-1
}
{
SFRECIP += sfmpy(SFRECIP,SFERROR):lib
NO_OVF_UNF = cmp.gt(EXPA,QH)
NO_OVF_UNF = !cmp.gt(EXPA,QL)
}
{
RECIPEST = insert(SFRECIP,#SF_MANTBITS,#RECIPEST_SHIFT)
Q = #0
EXPA = add(EXPA,#-QADJ)
}
#undef SFERROR
#undef SFRECIP
#define TMP r10
#define TMP1 r11
{
RECIPEST = add(RECIPEST,#((-3) << RECIPEST_SHIFT))
}
#define DIV_ITER1B(QSHIFTINSN,QSHIFT,REMSHIFT,EXTRA) \
{ \
PROD = mpyu(RECIPEST,REMHI); \
REM = asl(REM,# ## ( REMSHIFT )); \
}; \
{ \
PRODLO = # ## 0; \
REM -= mpyu(PRODHI,DENOMLO); \
REMSUB2 = mpyu(PRODHI,DENOMHI); \
}; \
{ \
Q += QSHIFTINSN(PROD, # ## ( QSHIFT )); \
REM -= asl(REMSUB2, # ## 32); \
EXTRA \
}
DIV_ITER1B(ASL,14,15,)
DIV_ITER1B(ASR,1,15,)
DIV_ITER1B(ASR,16,15,)
DIV_ITER1B(ASR,31,15,PROD=# ( 0 );)
#undef REMSUB2
#define TMPPAIR r15:14
#define TMPPAIRHI r15
#define TMPPAIRLO r14
#undef RECIPEST
#define EXPB r13
{
// compare or sub with carry
TMPPAIR = sub(REM,DENOM)
P_TMP = cmp.gtu(DENOM,REM)
// set up amt to add to q
if (!P_TMP.new) PRODLO = #2
}
{
Q = add(Q,PROD)
if (!P_TMP) REM = TMPPAIR
TMPPAIR = #0
}
{
P_TMP = cmp.eq(REM,TMPPAIR)
if (!P_TMP.new) QL = or(QL,ONE)
}
{
PROD = neg(Q)
}
{
if (!Q_POSITIVE) Q = PROD
}
#undef REM
#undef REMHI
#undef REMLO
#undef DENOM
#undef DENOMLO
#undef DENOMHI
#define A r1:0
#define AH r1
#define AL r0
#define B r3:2
#define BH r3
#define BL r2
{
A = convert_d2df(Q)
if (!NO_OVF_UNF) jump .Ldiv_ovf_unf
}
{
AH += asl(EXPA,#DF_MANTBITS-32)
jumpr r31
}
.Ldiv_ovf_unf:
{
AH += asl(EXPA,#DF_MANTBITS-32)
EXPB = extractu(AH,#DF_EXPBITS,#DF_MANTBITS-32)
}
{
PROD = abs(Q)
EXPA = add(EXPA,EXPB)
}
{
P_TMP = cmp.gt(EXPA,##DF_BIAS+DF_BIAS) // overflow
if (P_TMP.new) jump:nt .Ldiv_ovf
}
{
P_TMP = cmp.gt(EXPA,#0)
if (P_TMP.new) jump:nt .Lpossible_unf // round up to normal possible...
}
/* Underflow */
/* We know what the infinite range exponent should be (EXPA) */
/* Q is 2's complement, PROD is abs(Q) */
/* Normalize Q, shift right, add a high bit, convert, change exponent */
#define FUDGE1 7 // how much to shift right
#define FUDGE2 4 // how many guard/round to keep at lsbs
{
EXPB = add(clb(PROD),#-1) // doesn't need to be added in since
EXPA = sub(#FUDGE1,EXPA) // we extract post-converted exponent
TMP = USR
TMP1 = #63
}
{
EXPB = min(EXPA,TMP1)
TMP1 = or(TMP,#0x030)
PROD = asl(PROD,EXPB)
EXPA = #0
}
{
TMPPAIR = extractu(PROD,EXPBA) // bits that will get shifted out
PROD = lsr(PROD,EXPB) // shift out bits
B = #1
}
{
P_TMP = cmp.gtu(B,TMPPAIR)
if (!P_TMP.new) PRODLO = or(BL,PRODLO)
PRODHI = setbit(PRODHI,#DF_MANTBITS-32+FUDGE2)
}
{
Q = neg(PROD)
P_TMP = bitsclr(PRODLO,#(1<<FUDGE2)-1)
if (!P_TMP.new) TMP = TMP1
}
{
USR = TMP
if (Q_POSITIVE) Q = PROD
TMP = #-DF_BIAS-(DF_MANTBITS+FUDGE2)
}
{
A = convert_d2df(Q)
}
{
AH += asl(TMP,#DF_MANTBITS-32)
jumpr r31
}
.Lpossible_unf:
/* If upper parts of Q were all F's, but abs(A) == 0x00100000_00000000, we rounded up to min_normal */
/* The answer is correct, but we need to raise Underflow */
{
B = extractu(A,#63,#0)
TMPPAIR = combine(##0x00100000,#0) // min normal
TMP = #0x7FFF
}
{
P_TMP = dfcmp.eq(TMPPAIR,B) // Is everything zero in the rounded value...
P_TMP = bitsset(PRODHI,TMP) // but a bunch of bits set in the unrounded abs(quotient)?
}
#if (__HEXAGON_ARCH__ == 60)
TMP = USR // If not, just return
if (!P_TMP) jumpr r31 // Else, we want to set Unf+Inexact
// Note that inexact is already set...
#else
{
if (!P_TMP) jumpr r31 // If not, just return
TMP = USR // Else, we want to set Unf+Inexact
} // Note that inexact is already set...
#endif
{
TMP = or(TMP,#0x30)
}
{
USR = TMP
}
{
p0 = dfcmp.eq(A,A)
jumpr r31
}
.Ldiv_ovf:
/*
* Raise Overflow, and choose the correct overflow value (saturated normal or infinity)
*/
{
TMP = USR
B = combine(##0x7fefffff,#-1)
AH = mux(Q_POSITIVE,#0,#-1)
}
{
PROD = combine(##0x7ff00000,#0)
QH = extractu(TMP,#2,#SR_ROUND_OFF)
TMP = or(TMP,#0x28)
}
{
USR = TMP
QH ^= lsr(AH,#31)
QL = QH
}
{
p0 = !cmp.eq(QL,#1) // if not round-to-zero
p0 = !cmp.eq(QH,#2) // and not rounding the other way
if (p0.new) B = PROD // go to inf
p0 = dfcmp.eq(B,B) // get exceptions
}
{
A = insert(B,#63,#0)
jumpr r31
}
#undef ONE
#define SIGN r28
#undef NORMAL
#undef NO_OVF_UNF
#define P_INF p1
#define P_ZERO p2
.Ldiv_abnormal:
{
P_TMP = dfclass(A,#DFCLASS_NUMBER)
P_TMP = dfclass(B,#DFCLASS_NUMBER)
Q_POSITIVE = cmp.gt(SIGN,#-1)
}
{
P_INF = dfclass(A,#DFCLASS_INFINITE)
P_INF = dfclass(B,#DFCLASS_INFINITE)
}
{
P_ZERO = dfclass(A,#DFCLASS_ZERO)
P_ZERO = dfclass(B,#DFCLASS_ZERO)
}
{
if (!P_TMP) jump .Ldiv_nan
if (P_INF) jump .Ldiv_invalid
}
{
if (P_ZERO) jump .Ldiv_invalid
}
{
P_ZERO = dfclass(A,#DFCLASS_NONZERO) // nonzero
P_ZERO = dfclass(B,#DFCLASS_NONINFINITE) // non-infinite
}
{
P_INF = dfclass(A,#DFCLASS_NONINFINITE) // non-infinite
P_INF = dfclass(B,#DFCLASS_NONZERO) // nonzero
}
{
if (!P_ZERO) jump .Ldiv_zero_result
if (!P_INF) jump .Ldiv_inf_result
}
/* Now we've narrowed it down to (de)normal / (de)normal */
/* Set up A/EXPA B/EXPB and go back */
#undef P_ZERO
#undef P_INF
#define P_TMP2 p1
{
P_TMP = dfclass(A,#DFCLASS_NORMAL)
P_TMP2 = dfclass(B,#DFCLASS_NORMAL)
TMP = ##0x00100000
}
{
EXPBA = combine(BH,AH)
AH = insert(TMP,#DF_EXPBITS+1,#DF_MANTBITS-32) // clear out hidden bit, sign bit
BH = insert(TMP,#DF_EXPBITS+1,#DF_MANTBITS-32) // clear out hidden bit, sign bit
}
{
if (P_TMP) AH = or(AH,TMP) // if normal, add back in hidden bit
if (P_TMP2) BH = or(BH,TMP) // if normal, add back in hidden bit
}
{
QH = add(clb(A),#-DF_EXPBITS)
QL = add(clb(B),#-DF_EXPBITS)
TMP = #1
}
{
EXPA = extractu(EXPA,#DF_EXPBITS,#DF_MANTBITS-32)
EXPB = extractu(EXPB,#DF_EXPBITS,#DF_MANTBITS-32)
}
{
A = asl(A,QH)
B = asl(B,QL)
if (!P_TMP) EXPA = sub(TMP,QH)
if (!P_TMP2) EXPB = sub(TMP,QL)
} // recreate values needed by resume coke
{
PROD = extractu(B,#SF_MANTBITS,#DF_MANTBITS-SF_MANTBITS)
}
{
SFDEN = or(SFONE,PRODLO)
jump .Ldenorm_continue
}
.Ldiv_zero_result:
{
AH = xor(AH,BH)
B = #0
}
{
A = insert(B,#63,#0)
jumpr r31
}
.Ldiv_inf_result:
{
p2 = dfclass(B,#DFCLASS_ZERO)
p2 = dfclass(A,#DFCLASS_NONINFINITE)
}
{
TMP = USR
if (!p2) jump 1f
AH = xor(AH,BH)
}
{
TMP = or(TMP,#0x04) // DBZ
}
{
USR = TMP
}
1:
{
B = combine(##0x7ff00000,#0)
p0 = dfcmp.uo(B,B) // take possible exception
}
{
A = insert(B,#63,#0)
jumpr r31
}
.Ldiv_nan:
{
p0 = dfclass(A,#0x10)
p1 = dfclass(B,#0x10)
if (!p0.new) A = B
if (!p1.new) B = A
}
{
QH = convert_df2sf(A) // get possible invalid exceptions
QL = convert_df2sf(B)
}
{
A = #-1
jumpr r31
}
.Ldiv_invalid:
{
TMP = ##0x7f800001
}
{
A = convert_sf2df(TMP) // get invalid, get DF qNaN
jumpr r31
}
END(__hexagon_divdf3)

View File

@ -0,0 +1,705 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
#define END(TAG) .size TAG,.-TAG
/* Double Precision Multiply */
#define A r1:0
#define AH r1
#define AL r0
#define B r3:2
#define BH r3
#define BL r2
#define C r5:4
#define CH r5
#define CL r4
#define BTMP r15:14
#define BTMPH r15
#define BTMPL r14
#define ATMP r13:12
#define ATMPH r13
#define ATMPL r12
#define CTMP r11:10
#define CTMPH r11
#define CTMPL r10
#define PP_LL r9:8
#define PP_LL_H r9
#define PP_LL_L r8
#define PP_ODD r7:6
#define PP_ODD_H r7
#define PP_ODD_L r6
#define PP_HH r17:16
#define PP_HH_H r17
#define PP_HH_L r16
#define EXPA r18
#define EXPB r19
#define EXPBA r19:18
#define TMP r28
#define P_TMP p0
#define PROD_NEG p3
#define EXACT p2
#define SWAP p1
#define MANTBITS 52
#define HI_MANTBITS 20
#define EXPBITS 11
#define BIAS 1023
#define STACKSPACE 32
#define ADJUST 4
#define FUDGE 7
#define FUDGE2 3
#ifndef SR_ROUND_OFF
#define SR_ROUND_OFF 22
#endif
/*
* First, classify for normal values, and abort if abnormal
*
* Next, unpack mantissa into 0x1000_0000_0000_0000 + mant<<8
*
* Since we know that the 2 MSBs of the H registers is zero, we should never carry
* the partial products that involve the H registers
*
* Try to buy X slots, at the expense of latency if needed
*
* We will have PP_HH with the upper bits of the product, PP_LL with the lower
* PP_HH can have a maximum of 0x03FF_FFFF_FFFF_FFFF or thereabouts
* PP_HH can have a minimum of 0x0100_0000_0000_0000
*
* 0x0100_0000_0000_0000 has EXP of EXPA+EXPB-BIAS
*
* We need to align CTMP.
* If CTMP >> PP, convert PP to 64 bit with sticky, align CTMP, and follow normal add
* If CTMP << PP align CTMP and add 128 bits. Then compute sticky
* If CTMP ~= PP, align CTMP and add 128 bits. May have massive cancellation.
*
* Convert partial product and CTMP to 2's complement prior to addition
*
* After we add, we need to normalize into upper 64 bits, then compute sticky.
*
*
*/
.text
.global __hexagon_fmadf4
.type __hexagon_fmadf4,@function
.global __hexagon_fmadf5
.type __hexagon_fmadf5,@function
.global fma
.type fma,@function
Q6_ALIAS(fmadf5)
.p2align 5
__hexagon_fmadf4:
__hexagon_fmadf5:
fma:
{
P_TMP = dfclass(A,#2)
P_TMP = dfclass(B,#2)
ATMP = #0
BTMP = #0
}
{
ATMP = insert(A,#MANTBITS,#EXPBITS-3)
BTMP = insert(B,#MANTBITS,#EXPBITS-3)
PP_ODD_H = ##0x10000000
allocframe(#STACKSPACE)
}
{
PP_LL = mpyu(ATMPL,BTMPL)
if (!P_TMP) jump .Lfma_abnormal_ab
ATMPH = or(ATMPH,PP_ODD_H)
BTMPH = or(BTMPH,PP_ODD_H)
}
{
P_TMP = dfclass(C,#2)
if (!P_TMP.new) jump:nt .Lfma_abnormal_c
CTMP = combine(PP_ODD_H,#0)
PP_ODD = combine(#0,PP_LL_H)
}
.Lfma_abnormal_c_restart:
{
PP_ODD += mpyu(BTMPL,ATMPH)
CTMP = insert(C,#MANTBITS,#EXPBITS-3)
memd(r29+#0) = PP_HH
memd(r29+#8) = EXPBA
}
{
PP_ODD += mpyu(ATMPL,BTMPH)
EXPBA = neg(CTMP)
P_TMP = cmp.gt(CH,#-1)
TMP = xor(AH,BH)
}
{
EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS)
EXPB = extractu(BH,#EXPBITS,#HI_MANTBITS)
PP_HH = combine(#0,PP_ODD_H)
if (!P_TMP) CTMP = EXPBA
}
{
PP_HH += mpyu(ATMPH,BTMPH)
PP_LL = combine(PP_ODD_L,PP_LL_L)
#undef PP_ODD
#undef PP_ODD_H
#undef PP_ODD_L
#undef ATMP
#undef ATMPL
#undef ATMPH
#undef BTMP
#undef BTMPL
#undef BTMPH
#define RIGHTLEFTSHIFT r13:12
#define RIGHTSHIFT r13
#define LEFTSHIFT r12
EXPA = add(EXPA,EXPB)
#undef EXPB
#undef EXPBA
#define EXPC r19
#define EXPCA r19:18
EXPC = extractu(CH,#EXPBITS,#HI_MANTBITS)
}
/* PP_HH:PP_LL now has product */
/* CTMP is negated */
/* EXPA,B,C are extracted */
/*
* We need to negate PP
* Since we will be adding with carry later, if we need to negate,
* just invert all bits now, which we can do conditionally and in parallel
*/
#define PP_HH_TMP r15:14
#define PP_LL_TMP r7:6
{
EXPA = add(EXPA,#-BIAS+(ADJUST))
PROD_NEG = !cmp.gt(TMP,#-1)
PP_LL_TMP = #0
PP_HH_TMP = #0
}
{
PP_LL_TMP = sub(PP_LL_TMP,PP_LL,PROD_NEG):carry
P_TMP = !cmp.gt(TMP,#-1)
SWAP = cmp.gt(EXPC,EXPA) // If C >> PP
if (SWAP.new) EXPCA = combine(EXPA,EXPC)
}
{
PP_HH_TMP = sub(PP_HH_TMP,PP_HH,PROD_NEG):carry
if (P_TMP) PP_LL = PP_LL_TMP
#undef PP_LL_TMP
#define CTMP2 r7:6
#define CTMP2H r7
#define CTMP2L r6
CTMP2 = #0
EXPC = sub(EXPA,EXPC)
}
{
if (P_TMP) PP_HH = PP_HH_TMP
P_TMP = cmp.gt(EXPC,#63)
if (SWAP) PP_LL = CTMP2
if (SWAP) CTMP2 = PP_LL
}
#undef PP_HH_TMP
//#define ONE r15:14
//#define S_ONE r14
#define ZERO r15:14
#define S_ZERO r15
#undef PROD_NEG
#define P_CARRY p3
{
if (SWAP) PP_HH = CTMP // Swap C and PP
if (SWAP) CTMP = PP_HH
if (P_TMP) EXPC = add(EXPC,#-64)
TMP = #63
}
{
// If diff > 63, pre-shift-right by 64...
if (P_TMP) CTMP2 = CTMP
TMP = asr(CTMPH,#31)
RIGHTSHIFT = min(EXPC,TMP)
LEFTSHIFT = #0
}
#undef C
#undef CH
#undef CL
#define STICKIES r5:4
#define STICKIESH r5
#define STICKIESL r4
{
if (P_TMP) CTMP = combine(TMP,TMP) // sign extension of pre-shift-right-64
STICKIES = extract(CTMP2,RIGHTLEFTSHIFT)
CTMP2 = lsr(CTMP2,RIGHTSHIFT)
LEFTSHIFT = sub(#64,RIGHTSHIFT)
}
{
ZERO = #0
TMP = #-2
CTMP2 |= lsl(CTMP,LEFTSHIFT)
CTMP = asr(CTMP,RIGHTSHIFT)
}
{
P_CARRY = cmp.gtu(STICKIES,ZERO) // If we have sticky bits from C shift
if (P_CARRY.new) CTMP2L = and(CTMP2L,TMP) // make sure adding 1 == OR
#undef ZERO
#define ONE r15:14
#define S_ONE r14
ONE = #1
STICKIES = #0
}
{
PP_LL = add(CTMP2,PP_LL,P_CARRY):carry // use the carry to add the sticky
}
{
PP_HH = add(CTMP,PP_HH,P_CARRY):carry
TMP = #62
}
/*
* PP_HH:PP_LL now holds the sum
* We may need to normalize left, up to ??? bits.
*
* I think that if we have massive cancellation, the range we normalize by
* is still limited
*/
{
LEFTSHIFT = add(clb(PP_HH),#-2)
if (!cmp.eq(LEFTSHIFT.new,TMP)) jump:t 1f // all sign bits?
}
/* We had all sign bits, shift left by 62. */
{
CTMP = extractu(PP_LL,#62,#2)
PP_LL = asl(PP_LL,#62)
EXPA = add(EXPA,#-62) // And adjust exponent of result
}
{
PP_HH = insert(CTMP,#62,#0) // Then shift 63
}
{
LEFTSHIFT = add(clb(PP_HH),#-2)
}
.falign
1:
{
CTMP = asl(PP_HH,LEFTSHIFT)
STICKIES |= asl(PP_LL,LEFTSHIFT)
RIGHTSHIFT = sub(#64,LEFTSHIFT)
EXPA = sub(EXPA,LEFTSHIFT)
}
{
CTMP |= lsr(PP_LL,RIGHTSHIFT)
EXACT = cmp.gtu(ONE,STICKIES)
TMP = #BIAS+BIAS-2
}
{
if (!EXACT) CTMPL = or(CTMPL,S_ONE)
// If EXPA is overflow/underflow, jump to ovf_unf
P_TMP = !cmp.gt(EXPA,TMP)
P_TMP = cmp.gt(EXPA,#1)
if (!P_TMP.new) jump:nt .Lfma_ovf_unf
}
{
// XXX: FIXME: should PP_HH for check of zero be CTMP?
P_TMP = cmp.gtu(ONE,CTMP) // is result true zero?
A = convert_d2df(CTMP)
EXPA = add(EXPA,#-BIAS-60)
PP_HH = memd(r29+#0)
}
{
AH += asl(EXPA,#HI_MANTBITS)
EXPCA = memd(r29+#8)
if (!P_TMP) dealloc_return // not zero, return
}
.Ladd_yields_zero:
/* We had full cancellation. Return +/- zero (-0 when round-down) */
{
TMP = USR
A = #0
}
{
TMP = extractu(TMP,#2,#SR_ROUND_OFF)
PP_HH = memd(r29+#0)
EXPCA = memd(r29+#8)
}
{
p0 = cmp.eq(TMP,#2)
if (p0.new) AH = ##0x80000000
dealloc_return
}
#undef RIGHTLEFTSHIFT
#undef RIGHTSHIFT
#undef LEFTSHIFT
#undef CTMP2
#undef CTMP2H
#undef CTMP2L
.Lfma_ovf_unf:
{
p0 = cmp.gtu(ONE,CTMP)
if (p0.new) jump:nt .Ladd_yields_zero
}
{
A = convert_d2df(CTMP)
EXPA = add(EXPA,#-BIAS-60)
TMP = EXPA
}
#define NEW_EXPB r7
#define NEW_EXPA r6
{
AH += asl(EXPA,#HI_MANTBITS)
NEW_EXPB = extractu(AH,#EXPBITS,#HI_MANTBITS)
}
{
NEW_EXPA = add(EXPA,NEW_EXPB)
PP_HH = memd(r29+#0)
EXPCA = memd(r29+#8)
#undef PP_HH
#undef PP_HH_H
#undef PP_HH_L
#undef EXPCA
#undef EXPC
#undef EXPA
#undef PP_LL
#undef PP_LL_H
#undef PP_LL_L
#define EXPA r6
#define EXPB r7
#define EXPBA r7:6
#define ATMP r9:8
#define ATMPH r9
#define ATMPL r8
#undef NEW_EXPB
#undef NEW_EXPA
ATMP = abs(CTMP)
}
{
p0 = cmp.gt(EXPA,##BIAS+BIAS)
if (p0.new) jump:nt .Lfma_ovf
}
{
p0 = cmp.gt(EXPA,#0)
if (p0.new) jump:nt .Lpossible_unf
}
{
// TMP has original EXPA.
// ATMP is corresponding value
// Normalize ATMP and shift right to correct location
EXPB = add(clb(ATMP),#-2) // Amount to left shift to normalize
EXPA = sub(#1+5,TMP) // Amount to right shift to denormalize
p3 = cmp.gt(CTMPH,#-1)
}
/* Underflow */
/* We know that the infinte range exponent should be EXPA */
/* CTMP is 2's complement, ATMP is abs(CTMP) */
{
EXPA = add(EXPA,EXPB) // how much to shift back right
ATMP = asl(ATMP,EXPB) // shift left
AH = USR
TMP = #63
}
{
EXPB = min(EXPA,TMP)
EXPA = #0
AL = #0x0030
}
{
B = extractu(ATMP,EXPBA)
ATMP = asr(ATMP,EXPB)
}
{
p0 = cmp.gtu(ONE,B)
if (!p0.new) ATMPL = or(ATMPL,S_ONE)
ATMPH = setbit(ATMPH,#HI_MANTBITS+FUDGE2)
}
{
CTMP = neg(ATMP)
p1 = bitsclr(ATMPL,#(1<<FUDGE2)-1)
if (!p1.new) AH = or(AH,AL)
B = #0
}
{
if (p3) CTMP = ATMP
USR = AH
TMP = #-BIAS-(MANTBITS+FUDGE2)
}
{
A = convert_d2df(CTMP)
}
{
AH += asl(TMP,#HI_MANTBITS)
dealloc_return
}
.Lpossible_unf:
{
TMP = ##0x7fefffff
ATMP = abs(CTMP)
}
{
p0 = cmp.eq(AL,#0)
p0 = bitsclr(AH,TMP)
if (!p0.new) dealloc_return:t
TMP = #0x7fff
}
{
p0 = bitsset(ATMPH,TMP)
BH = USR
BL = #0x0030
}
{
if (p0) BH = or(BH,BL)
}
{
USR = BH
}
{
p0 = dfcmp.eq(A,A)
dealloc_return
}
.Lfma_ovf:
{
TMP = USR
CTMP = combine(##0x7fefffff,#-1)
A = CTMP
}
{
ATMP = combine(##0x7ff00000,#0)
BH = extractu(TMP,#2,#SR_ROUND_OFF)
TMP = or(TMP,#0x28)
}
{
USR = TMP
BH ^= lsr(AH,#31)
BL = BH
}
{
p0 = !cmp.eq(BL,#1)
p0 = !cmp.eq(BH,#2)
}
{
p0 = dfcmp.eq(ATMP,ATMP)
if (p0.new) CTMP = ATMP
}
{
A = insert(CTMP,#63,#0)
dealloc_return
}
#undef CTMP
#undef CTMPH
#undef CTMPL
#define BTMP r11:10
#define BTMPH r11
#define BTMPL r10
#undef STICKIES
#undef STICKIESH
#undef STICKIESL
#define C r5:4
#define CH r5
#define CL r4
.Lfma_abnormal_ab:
{
ATMP = extractu(A,#63,#0)
BTMP = extractu(B,#63,#0)
deallocframe
}
{
p3 = cmp.gtu(ATMP,BTMP)
if (!p3.new) A = B // sort values
if (!p3.new) B = A
}
{
p0 = dfclass(A,#0x0f) // A NaN?
if (!p0.new) jump:nt .Lnan
if (!p3) ATMP = BTMP
if (!p3) BTMP = ATMP
}
{
p1 = dfclass(A,#0x08) // A is infinity
p1 = dfclass(B,#0x0e) // B is nonzero
}
{
p0 = dfclass(A,#0x08) // a is inf
p0 = dfclass(B,#0x01) // b is zero
}
{
if (p1) jump .Lab_inf
p2 = dfclass(B,#0x01)
}
{
if (p0) jump .Linvalid
if (p2) jump .Lab_true_zero
TMP = ##0x7c000000
}
// We are left with a normal or subnormal times a subnormal, A > B
// If A and B are both very small, we will go to a single sticky bit; replace
// A and B lower 63 bits with 0x0010_0000_0000_0000, which yields equivalent results
// if A and B might multiply to something bigger, decrease A exp and increase B exp
// and start over
{
p0 = bitsclr(AH,TMP)
if (p0.new) jump:nt .Lfma_ab_tiny
}
{
TMP = add(clb(BTMP),#-EXPBITS)
}
{
BTMP = asl(BTMP,TMP)
}
{
B = insert(BTMP,#63,#0)
AH -= asl(TMP,#HI_MANTBITS)
}
jump fma
.Lfma_ab_tiny:
ATMP = combine(##0x00100000,#0)
{
A = insert(ATMP,#63,#0)
B = insert(ATMP,#63,#0)
}
jump fma
.Lab_inf:
{
B = lsr(B,#63)
p0 = dfclass(C,#0x10)
}
{
A ^= asl(B,#63)
if (p0) jump .Lnan
}
{
p1 = dfclass(C,#0x08)
if (p1.new) jump:nt .Lfma_inf_plus_inf
}
/* A*B is +/- inf, C is finite. Return A */
{
jumpr r31
}
.falign
.Lfma_inf_plus_inf:
{ // adding infinities of different signs is invalid
p0 = dfcmp.eq(A,C)
if (!p0.new) jump:nt .Linvalid
}
{
jumpr r31
}
.Lnan:
{
p0 = dfclass(B,#0x10)
p1 = dfclass(C,#0x10)
if (!p0.new) B = A
if (!p1.new) C = A
}
{ // find sNaNs
BH = convert_df2sf(B)
BL = convert_df2sf(C)
}
{
BH = convert_df2sf(A)
A = #-1
jumpr r31
}
.Linvalid:
{
TMP = ##0x7f800001 // sp snan
}
{
A = convert_sf2df(TMP)
jumpr r31
}
.Lab_true_zero:
// B is zero, A is finite number
{
p0 = dfclass(C,#0x10)
if (p0.new) jump:nt .Lnan
if (p0.new) A = C
}
{
p0 = dfcmp.eq(B,C) // is C also zero?
AH = lsr(AH,#31) // get sign
}
{
BH ^= asl(AH,#31) // form correctly signed zero in B
if (!p0) A = C // If C is not zero, return C
if (!p0) jumpr r31
}
/* B has correctly signed zero, C is also zero */
.Lzero_plus_zero:
{
p0 = cmp.eq(B,C) // yes, scalar equals. +0++0 or -0+-0
if (p0.new) jumpr:t r31
A = B
}
{
TMP = USR
}
{
TMP = extractu(TMP,#2,#SR_ROUND_OFF)
A = #0
}
{
p0 = cmp.eq(TMP,#2)
if (p0.new) AH = ##0x80000000
jumpr r31
}
#undef BTMP
#undef BTMPH
#undef BTMPL
#define CTMP r11:10
.falign
.Lfma_abnormal_c:
/* We know that AB is normal * normal */
/* C is not normal: zero, subnormal, inf, or NaN. */
{
p0 = dfclass(C,#0x10) // is C NaN?
if (p0.new) jump:nt .Lnan
if (p0.new) A = C // move NaN to A
deallocframe
}
{
p0 = dfclass(C,#0x08) // is C inf?
if (p0.new) A = C // return C
if (p0.new) jumpr:nt r31
}
// zero or subnormal
// If we have a zero, and we know AB is normal*normal, we can just call normal multiply
{
p0 = dfclass(C,#0x01) // is C zero?
if (p0.new) jump:nt __hexagon_muldf3
TMP = #1
}
// Left with: subnormal
// Adjust C and jump back to restart
{
allocframe(#STACKSPACE) // oops, deallocated above, re-allocate frame
CTMP = #0
CH = insert(TMP,#EXPBITS,#HI_MANTBITS)
jump .Lfma_abnormal_c_restart
}
END(fma)

View File

@ -0,0 +1,79 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#define A r1:0
#define B r3:2
#define ATMP r5:4
#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
#define END(TAG) .size TAG,.-TAG
/*
* Min and Max return A if B is NaN, or B if A is NaN
* Otherwise, they return the smaller or bigger value
*
* If values are equal, we want to favor -0.0 for min and +0.0 for max.
*/
/*
* Compares always return false for NaN
* if (isnan(A)) A = B; if (A > B) A = B will only trigger at most one of those options.
*/
.text
.global __hexagon_mindf3
.global __hexagon_maxdf3
.global fmin
.type fmin,@function
.global fmax
.type fmax,@function
.type __hexagon_mindf3,@function
.type __hexagon_maxdf3,@function
Q6_ALIAS(mindf3)
Q6_ALIAS(maxdf3)
.p2align 5
__hexagon_mindf3:
fmin:
{
p0 = dfclass(A,#0x10) // If A is a number
p1 = dfcmp.gt(A,B) // AND B > A, don't swap
ATMP = A
}
{
if (p0) A = B // if A is NaN use B
if (p1) A = B // gt is always false if either is NaN
p2 = dfcmp.eq(A,B) // if A == B
if (!p2.new) jumpr:t r31
}
/* A == B, return A|B to select -0.0 over 0.0 */
{
A = or(ATMP,B)
jumpr r31
}
END(__hexagon_mindf3)
.falign
__hexagon_maxdf3:
fmax:
{
p0 = dfclass(A,#0x10)
p1 = dfcmp.gt(B,A)
ATMP = A
}
{
if (p0) A = B
if (p1) A = B
p2 = dfcmp.eq(A,B)
if (!p2.new) jumpr:t r31
}
/* A == B, return A&B to select 0.0 over -0.0 */
{
A = and(ATMP,B)
jumpr r31
}
END(__hexagon_maxdf3)

View File

@ -0,0 +1,418 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
/* Double Precision Multiply */
#define A r1:0
#define AH r1
#define AL r0
#define B r3:2
#define BH r3
#define BL r2
#define BTMP r5:4
#define BTMPH r5
#define BTMPL r4
#define PP_ODD r7:6
#define PP_ODD_H r7
#define PP_ODD_L r6
#define ONE r9:8
#define S_ONE r8
#define S_ZERO r9
#define PP_HH r11:10
#define PP_HH_H r11
#define PP_HH_L r10
#define ATMP r13:12
#define ATMPH r13
#define ATMPL r12
#define PP_LL r15:14
#define PP_LL_H r15
#define PP_LL_L r14
#define TMP r28
#define MANTBITS 52
#define HI_MANTBITS 20
#define EXPBITS 11
#define BIAS 1024
#define MANTISSA_TO_INT_BIAS 52
/* Some constant to adjust normalization amount in error code */
/* Amount to right shift the partial product to get to a denorm */
#define FUDGE 5
#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG
#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG
#define END(TAG) .size TAG,.-TAG
#define SR_ROUND_OFF 22
.text
.global __hexagon_muldf3
.type __hexagon_muldf3,@function
Q6_ALIAS(muldf3)
FAST_ALIAS(muldf3)
FAST2_ALIAS(muldf3)
.p2align 5
__hexagon_muldf3:
{
p0 = dfclass(A,#2)
p0 = dfclass(B,#2)
ATMP = combine(##0x40000000,#0)
}
{
ATMP = insert(A,#MANTBITS,#EXPBITS-1)
BTMP = asl(B,#EXPBITS-1)
TMP = #-BIAS
ONE = #1
}
{
PP_ODD = mpyu(BTMPL,ATMPH)
BTMP = insert(ONE,#2,#62)
}
/* since we know that the MSB of the H registers is zero, we should never carry */
/* H <= 2^31-1. L <= 2^32-1. Therefore, HL <= 2^63-2^32-2^31+1 */
/* Adding 2 HLs, we get 2^64-3*2^32+2 maximum. */
/* Therefore, we can add 3 2^32-1 values safely without carry. We only need one. */
{
PP_LL = mpyu(ATMPL,BTMPL)
PP_ODD += mpyu(ATMPL,BTMPH)
}
{
PP_ODD += lsr(PP_LL,#32)
PP_HH = mpyu(ATMPH,BTMPH)
BTMP = combine(##BIAS+BIAS-4,#0)
}
{
PP_HH += lsr(PP_ODD,#32)
if (!p0) jump .Lmul_abnormal
p1 = cmp.eq(PP_LL_L,#0) // 64 lsb's 0?
p1 = cmp.eq(PP_ODD_L,#0) // 64 lsb's 0?
}
/*
* PP_HH can have a maximum of 0x3FFF_FFFF_FFFF_FFFF or thereabouts
* PP_HH can have a minimum of 0x1000_0000_0000_0000 or so
*/
#undef PP_ODD
#undef PP_ODD_H
#undef PP_ODD_L
#define EXP10 r7:6
#define EXP1 r7
#define EXP0 r6
{
if (!p1) PP_HH_L = or(PP_HH_L,S_ONE)
EXP0 = extractu(AH,#EXPBITS,#HI_MANTBITS)
EXP1 = extractu(BH,#EXPBITS,#HI_MANTBITS)
}
{
PP_LL = neg(PP_HH)
EXP0 += add(TMP,EXP1)
TMP = xor(AH,BH)
}
{
if (!p2.new) PP_HH = PP_LL
p2 = cmp.gt(TMP,#-1)
p0 = !cmp.gt(EXP0,BTMPH)
p0 = cmp.gt(EXP0,BTMPL)
if (!p0.new) jump:nt .Lmul_ovf_unf
}
{
A = convert_d2df(PP_HH)
EXP0 = add(EXP0,#-BIAS-58)
}
{
AH += asl(EXP0,#HI_MANTBITS)
jumpr r31
}
.falign
.Lpossible_unf:
/* We end up with a positive exponent */
/* But we may have rounded up to an exponent of 1. */
/* If the exponent is 1, if we rounded up to it
* we need to also raise underflow
* Fortunately, this is pretty easy to detect, we must have +/- 0x0010_0000_0000_0000
* And the PP should also have more than one bit set
*/
/* Note: ATMP should have abs(PP_HH) */
/* Note: BTMPL should have 0x7FEFFFFF */
{
p0 = cmp.eq(AL,#0)
p0 = bitsclr(AH,BTMPL)
if (!p0.new) jumpr:t r31
BTMPH = #0x7fff
}
{
p0 = bitsset(ATMPH,BTMPH)
BTMPL = USR
BTMPH = #0x030
}
{
if (p0) BTMPL = or(BTMPL,BTMPH)
}
{
USR = BTMPL
}
{
p0 = dfcmp.eq(A,A)
jumpr r31
}
.falign
.Lmul_ovf_unf:
{
A = convert_d2df(PP_HH)
ATMP = abs(PP_HH) // take absolute value
EXP1 = add(EXP0,#-BIAS-58)
}
{
AH += asl(EXP1,#HI_MANTBITS)
EXP1 = extractu(AH,#EXPBITS,#HI_MANTBITS)
BTMPL = ##0x7FEFFFFF
}
{
EXP1 += add(EXP0,##-BIAS-58)
//BTMPH = add(clb(ATMP),#-2)
BTMPH = #0
}
{
p0 = cmp.gt(EXP1,##BIAS+BIAS-2) // overflow
if (p0.new) jump:nt .Lmul_ovf
}
{
p0 = cmp.gt(EXP1,#0)
if (p0.new) jump:nt .Lpossible_unf
BTMPH = sub(EXP0,BTMPH)
TMP = #63 // max amount to shift
}
/* Underflow */
/*
* PP_HH has the partial product with sticky LSB.
* PP_HH can have a maximum of 0x3FFF_FFFF_FFFF_FFFF or thereabouts
* PP_HH can have a minimum of 0x1000_0000_0000_0000 or so
* The exponent of PP_HH is in EXP1, which is non-positive (0 or negative)
* That's the exponent that happens after the normalization
*
* EXP0 has the exponent that, when added to the normalized value, is out of range.
*
* Strategy:
*
* * Shift down bits, with sticky bit, such that the bits are aligned according
* to the LZ count and appropriate exponent, but not all the way to mantissa
* field, keep around the last few bits.
* * Put a 1 near the MSB
* * Check the LSBs for inexact; if inexact also set underflow
* * Convert [u]d2df -- will correctly round according to rounding mode
* * Replace exponent field with zero
*
*
*/
{
BTMPL = #0 // offset for extract
BTMPH = sub(#FUDGE,BTMPH) // amount to right shift
}
{
p3 = cmp.gt(PP_HH_H,#-1) // is it positive?
BTMPH = min(BTMPH,TMP) // Don't shift more than 63
PP_HH = ATMP
}
{
TMP = USR
PP_LL = extractu(PP_HH,BTMP)
}
{
PP_HH = asr(PP_HH,BTMPH)
BTMPL = #0x0030 // underflow flag
AH = insert(S_ZERO,#EXPBITS,#HI_MANTBITS)
}
{
p0 = cmp.gtu(ONE,PP_LL) // Did we extract all zeros?
if (!p0.new) PP_HH_L = or(PP_HH_L,S_ONE) // add sticky bit
PP_HH_H = setbit(PP_HH_H,#HI_MANTBITS+3) // Add back in a bit so we can use convert instruction
}
{
PP_LL = neg(PP_HH)
p1 = bitsclr(PP_HH_L,#0x7) // Are the LSB's clear?
if (!p1.new) TMP = or(BTMPL,TMP) // If not, Inexact+Underflow
}
{
if (!p3) PP_HH = PP_LL
USR = TMP
}
{
A = convert_d2df(PP_HH) // Do rounding
p0 = dfcmp.eq(A,A) // realize exception
}
{
AH = insert(S_ZERO,#EXPBITS-1,#HI_MANTBITS+1) // Insert correct exponent
jumpr r31
}
.falign
.Lmul_ovf:
// We get either max finite value or infinity. Either way, overflow+inexact
{
TMP = USR
ATMP = combine(##0x7fefffff,#-1) // positive max finite
A = PP_HH
}
{
PP_LL_L = extractu(TMP,#2,#SR_ROUND_OFF) // rounding bits
TMP = or(TMP,#0x28) // inexact + overflow
BTMP = combine(##0x7ff00000,#0) // positive infinity
}
{
USR = TMP
PP_LL_L ^= lsr(AH,#31) // Does sign match rounding?
TMP = PP_LL_L // unmodified rounding mode
}
{
p0 = !cmp.eq(TMP,#1) // If not round-to-zero and
p0 = !cmp.eq(PP_LL_L,#2) // Not rounding the other way,
if (p0.new) ATMP = BTMP // we should get infinity
p0 = dfcmp.eq(A,A) // Realize FP exception if enabled
}
{
A = insert(ATMP,#63,#0) // insert inf/maxfinite, leave sign
jumpr r31
}
.Lmul_abnormal:
{
ATMP = extractu(A,#63,#0) // strip off sign
BTMP = extractu(B,#63,#0) // strip off sign
}
{
p3 = cmp.gtu(ATMP,BTMP)
if (!p3.new) A = B // sort values
if (!p3.new) B = A // sort values
}
{
// Any NaN --> NaN, possibly raise invalid if sNaN
p0 = dfclass(A,#0x0f) // A not NaN?
if (!p0.new) jump:nt .Linvalid_nan
if (!p3) ATMP = BTMP
if (!p3) BTMP = ATMP
}
{
// Infinity * nonzero number is infinity
p1 = dfclass(A,#0x08) // A is infinity
p1 = dfclass(B,#0x0e) // B is nonzero
}
{
// Infinity * zero --> NaN, raise invalid
// Other zeros return zero
p0 = dfclass(A,#0x08) // A is infinity
p0 = dfclass(B,#0x01) // B is zero
}
{
if (p1) jump .Ltrue_inf
p2 = dfclass(B,#0x01)
}
{
if (p0) jump .Linvalid_zeroinf
if (p2) jump .Ltrue_zero // so return zero
TMP = ##0x7c000000
}
// We are left with a normal or subnormal times a subnormal. A > B
// If A and B are both very small (exp(a) < BIAS-MANTBITS),
// we go to a single sticky bit, which we can round easily.
// If A and B might multiply to something bigger, decrease A exponent and increase
// B exponent and try again
{
p0 = bitsclr(AH,TMP)
if (p0.new) jump:nt .Lmul_tiny
}
{
TMP = cl0(BTMP)
}
{
TMP = add(TMP,#-EXPBITS)
}
{
BTMP = asl(BTMP,TMP)
}
{
B = insert(BTMP,#63,#0)
AH -= asl(TMP,#HI_MANTBITS)
}
jump __hexagon_muldf3
.Lmul_tiny:
{
TMP = USR
A = xor(A,B) // get sign bit
}
{
TMP = or(TMP,#0x30) // Inexact + Underflow
A = insert(ONE,#63,#0) // put in rounded up value
BTMPH = extractu(TMP,#2,#SR_ROUND_OFF) // get rounding mode
}
{
USR = TMP
p0 = cmp.gt(BTMPH,#1) // Round towards pos/neg inf?
if (!p0.new) AL = #0 // If not, zero
BTMPH ^= lsr(AH,#31) // rounding my way --> set LSB
}
{
p0 = cmp.eq(BTMPH,#3) // if rounding towards right inf
if (!p0.new) AL = #0 // don't go to zero
jumpr r31
}
.Linvalid_zeroinf:
{
TMP = USR
}
{
A = #-1
TMP = or(TMP,#2)
}
{
USR = TMP
}
{
p0 = dfcmp.uo(A,A) // force exception if enabled
jumpr r31
}
.Linvalid_nan:
{
p0 = dfclass(B,#0x0f) // if B is not NaN
TMP = convert_df2sf(A) // will generate invalid if sNaN
if (p0.new) B = A // make it whatever A is
}
{
BL = convert_df2sf(B) // will generate invalid if sNaN
A = #-1
jumpr r31
}
.falign
.Ltrue_zero:
{
A = B
B = A
}
.Ltrue_inf:
{
BH = extract(BH,#1,#31)
}
{
AH ^= asl(BH,#31)
jumpr r31
}
END(__hexagon_muldf3)
#undef ATMP
#undef ATMPL
#undef ATMPH
#undef BTMP
#undef BTMPL
#undef BTMPH

View File

@ -0,0 +1,406 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
/* Double Precision square root */
#define EXP r28
#define A r1:0
#define AH r1
#define AL r0
#define SFSH r3:2
#define SF_S r3
#define SF_H r2
#define SFHALF_SONE r5:4
#define S_ONE r4
#define SFHALF r5
#define SF_D r6
#define SF_E r7
#define RECIPEST r8
#define SFRAD r9
#define FRACRAD r11:10
#define FRACRADH r11
#define FRACRADL r10
#define ROOT r13:12
#define ROOTHI r13
#define ROOTLO r12
#define PROD r15:14
#define PRODHI r15
#define PRODLO r14
#define P_TMP p0
#define P_EXP1 p1
#define NORMAL p2
#define SF_EXPBITS 8
#define SF_MANTBITS 23
#define DF_EXPBITS 11
#define DF_MANTBITS 52
#define DF_BIAS 0x3ff
#define DFCLASS_ZERO 0x01
#define DFCLASS_NORMAL 0x02
#define DFCLASS_DENORMAL 0x02
#define DFCLASS_INFINITE 0x08
#define DFCLASS_NAN 0x10
#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG; .type __qdsp_##TAG,@function
#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG; .type __hexagon_fast_##TAG,@function
#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG; .type __hexagon_fast2_##TAG,@function
#define END(TAG) .size TAG,.-TAG
.text
.global __hexagon_sqrtdf2
.type __hexagon_sqrtdf2,@function
.global __hexagon_sqrt
.type __hexagon_sqrt,@function
Q6_ALIAS(sqrtdf2)
Q6_ALIAS(sqrt)
FAST_ALIAS(sqrtdf2)
FAST_ALIAS(sqrt)
FAST2_ALIAS(sqrtdf2)
FAST2_ALIAS(sqrt)
.type sqrt,@function
.p2align 5
__hexagon_sqrtdf2:
__hexagon_sqrt:
{
PROD = extractu(A,#SF_MANTBITS+1,#DF_MANTBITS-SF_MANTBITS)
EXP = extractu(AH,#DF_EXPBITS,#DF_MANTBITS-32)
SFHALF_SONE = combine(##0x3f000004,#1)
}
{
NORMAL = dfclass(A,#DFCLASS_NORMAL) // Is it normal
NORMAL = cmp.gt(AH,#-1) // and positive?
if (!NORMAL.new) jump:nt .Lsqrt_abnormal
SFRAD = or(SFHALF,PRODLO)
}
#undef NORMAL
.Ldenormal_restart:
{
FRACRAD = A
SF_E,P_TMP = sfinvsqrta(SFRAD)
SFHALF = and(SFHALF,#-16)
SFSH = #0
}
#undef A
#undef AH
#undef AL
#define ERROR r1:0
#define ERRORHI r1
#define ERRORLO r0
// SF_E : reciprocal square root
// SF_H : half rsqrt
// sf_S : square root
// SF_D : error term
// SFHALF: 0.5
{
SF_S += sfmpy(SF_E,SFRAD):lib // s0: root
SF_H += sfmpy(SF_E,SFHALF):lib // h0: 0.5*y0. Could also decrement exponent...
SF_D = SFHALF
#undef SFRAD
#define SHIFTAMT r9
SHIFTAMT = and(EXP,#1)
}
{
SF_D -= sfmpy(SF_S,SF_H):lib // d0: 0.5-H*S = 0.5-0.5*~1
FRACRADH = insert(S_ONE,#DF_EXPBITS+1,#DF_MANTBITS-32) // replace upper bits with hidden
P_EXP1 = cmp.gtu(SHIFTAMT,#0)
}
{
SF_S += sfmpy(SF_S,SF_D):lib // s1: refine sqrt
SF_H += sfmpy(SF_H,SF_D):lib // h1: refine half-recip
SF_D = SFHALF
SHIFTAMT = mux(P_EXP1,#8,#9)
}
{
SF_D -= sfmpy(SF_S,SF_H):lib // d1: error term
FRACRAD = asl(FRACRAD,SHIFTAMT) // Move fracrad bits to right place
SHIFTAMT = mux(P_EXP1,#3,#2)
}
{
SF_H += sfmpy(SF_H,SF_D):lib // d2: rsqrt
// cool trick: half of 1/sqrt(x) has same mantissa as 1/sqrt(x).
PROD = asl(FRACRAD,SHIFTAMT) // fracrad<<(2+exp1)
}
{
SF_H = and(SF_H,##0x007fffff)
}
{
SF_H = add(SF_H,##0x00800000 - 3)
SHIFTAMT = mux(P_EXP1,#7,#8)
}
{
RECIPEST = asl(SF_H,SHIFTAMT)
SHIFTAMT = mux(P_EXP1,#15-(1+1),#15-(1+0))
}
{
ROOT = mpyu(RECIPEST,PRODHI) // root = mpyu_full(recipest,hi(fracrad<<(2+exp1)))
}
#undef SFSH // r3:2
#undef SF_H // r2
#undef SF_S // r3
#undef S_ONE // r4
#undef SFHALF // r5
#undef SFHALF_SONE // r5:4
#undef SF_D // r6
#undef SF_E // r7
#define HL r3:2
#define LL r5:4
#define HH r7:6
#undef P_EXP1
#define P_CARRY0 p1
#define P_CARRY1 p2
#define P_CARRY2 p3
/* Iteration 0 */
/* Maybe we can save a cycle by starting with ERROR=asl(fracrad), then as we multiply */
/* We can shift and subtract instead of shift and add? */
{
ERROR = asl(FRACRAD,#15)
PROD = mpyu(ROOTHI,ROOTHI)
P_CARRY0 = cmp.eq(r0,r0)
}
{
ERROR -= asl(PROD,#15)
PROD = mpyu(ROOTHI,ROOTLO)
P_CARRY1 = cmp.eq(r0,r0)
}
{
ERROR -= lsr(PROD,#16)
P_CARRY2 = cmp.eq(r0,r0)
}
{
ERROR = mpyu(ERRORHI,RECIPEST)
}
{
ROOT += lsr(ERROR,SHIFTAMT)
SHIFTAMT = add(SHIFTAMT,#16)
ERROR = asl(FRACRAD,#31) // for next iter
}
/* Iteration 1 */
{
PROD = mpyu(ROOTHI,ROOTHI)
ERROR -= mpyu(ROOTHI,ROOTLO) // amount is 31, no shift needed
}
{
ERROR -= asl(PROD,#31)
PROD = mpyu(ROOTLO,ROOTLO)
}
{
ERROR -= lsr(PROD,#33)
}
{
ERROR = mpyu(ERRORHI,RECIPEST)
}
{
ROOT += lsr(ERROR,SHIFTAMT)
SHIFTAMT = add(SHIFTAMT,#16)
ERROR = asl(FRACRAD,#47) // for next iter
}
/* Iteration 2 */
{
PROD = mpyu(ROOTHI,ROOTHI)
}
{
ERROR -= asl(PROD,#47)
PROD = mpyu(ROOTHI,ROOTLO)
}
{
ERROR -= asl(PROD,#16) // bidir shr 31-47
PROD = mpyu(ROOTLO,ROOTLO)
}
{
ERROR -= lsr(PROD,#17) // 64-47
}
{
ERROR = mpyu(ERRORHI,RECIPEST)
}
{
ROOT += lsr(ERROR,SHIFTAMT)
}
#undef ERROR
#undef PROD
#undef PRODHI
#undef PRODLO
#define REM_HI r15:14
#define REM_HI_HI r15
#define REM_LO r1:0
#undef RECIPEST
#undef SHIFTAMT
#define TWOROOT_LO r9:8
/* Adjust Root */
{
HL = mpyu(ROOTHI,ROOTLO)
LL = mpyu(ROOTLO,ROOTLO)
REM_HI = #0
REM_LO = #0
}
{
HL += lsr(LL,#33)
LL += asl(HL,#33)
P_CARRY0 = cmp.eq(r0,r0)
}
{
HH = mpyu(ROOTHI,ROOTHI)
REM_LO = sub(REM_LO,LL,P_CARRY0):carry
TWOROOT_LO = #1
}
{
HH += lsr(HL,#31)
TWOROOT_LO += asl(ROOT,#1)
}
#undef HL
#undef LL
#define REM_HI_TMP r3:2
#define REM_HI_TMP_HI r3
#define REM_LO_TMP r5:4
{
REM_HI = sub(FRACRAD,HH,P_CARRY0):carry
REM_LO_TMP = sub(REM_LO,TWOROOT_LO,P_CARRY1):carry
#undef FRACRAD
#undef HH
#define ZERO r11:10
#define ONE r7:6
ONE = #1
ZERO = #0
}
{
REM_HI_TMP = sub(REM_HI,ZERO,P_CARRY1):carry
ONE = add(ROOT,ONE)
EXP = add(EXP,#-DF_BIAS) // subtract bias --> signed exp
}
{
// If carry set, no borrow: result was still positive
if (P_CARRY1) ROOT = ONE
if (P_CARRY1) REM_LO = REM_LO_TMP
if (P_CARRY1) REM_HI = REM_HI_TMP
}
{
REM_LO_TMP = sub(REM_LO,TWOROOT_LO,P_CARRY2):carry
ONE = #1
EXP = asr(EXP,#1) // divide signed exp by 2
}
{
REM_HI_TMP = sub(REM_HI,ZERO,P_CARRY2):carry
ONE = add(ROOT,ONE)
}
{
if (P_CARRY2) ROOT = ONE
if (P_CARRY2) REM_LO = REM_LO_TMP
// since tworoot <= 2^32, remhi must be zero
#undef REM_HI_TMP
#undef REM_HI_TMP_HI
#define S_ONE r2
#define ADJ r3
S_ONE = #1
}
{
P_TMP = cmp.eq(REM_LO,ZERO) // is the low part zero
if (!P_TMP.new) ROOTLO = or(ROOTLO,S_ONE) // if so, it's exact... hopefully
ADJ = cl0(ROOT)
EXP = add(EXP,#-63)
}
#undef REM_LO
#define RET r1:0
#define RETHI r1
{
RET = convert_ud2df(ROOT) // set up mantissa, maybe set inexact flag
EXP = add(EXP,ADJ) // add back bias
}
{
RETHI += asl(EXP,#DF_MANTBITS-32) // add exponent adjust
jumpr r31
}
#undef REM_LO_TMP
#undef REM_HI_TMP
#undef REM_HI_TMP_HI
#undef REM_LO
#undef REM_HI
#undef TWOROOT_LO
#undef RET
#define A r1:0
#define AH r1
#define AL r1
#undef S_ONE
#define TMP r3:2
#define TMPHI r3
#define TMPLO r2
#undef P_CARRY0
#define P_NEG p1
#define SFHALF r5
#define SFRAD r9
.Lsqrt_abnormal:
{
P_TMP = dfclass(A,#DFCLASS_ZERO) // zero?
if (P_TMP.new) jumpr:t r31
}
{
P_TMP = dfclass(A,#DFCLASS_NAN)
if (P_TMP.new) jump:nt .Lsqrt_nan
}
{
P_TMP = cmp.gt(AH,#-1)
if (!P_TMP.new) jump:nt .Lsqrt_invalid_neg
if (!P_TMP.new) EXP = ##0x7F800001 // sNaN
}
{
P_TMP = dfclass(A,#DFCLASS_INFINITE)
if (P_TMP.new) jumpr:nt r31
}
// If we got here, we're denormal
// prepare to restart
{
A = extractu(A,#DF_MANTBITS,#0) // Extract mantissa
}
{
EXP = add(clb(A),#-DF_EXPBITS) // how much to normalize?
}
{
A = asl(A,EXP) // Shift mantissa
EXP = sub(#1,EXP) // Form exponent
}
{
AH = insert(EXP,#1,#DF_MANTBITS-32) // insert lsb of exponent
}
{
TMP = extractu(A,#SF_MANTBITS+1,#DF_MANTBITS-SF_MANTBITS) // get sf value (mant+exp1)
SFHALF = ##0x3f000004 // form half constant
}
{
SFRAD = or(SFHALF,TMPLO) // form sf value
SFHALF = and(SFHALF,#-16)
jump .Ldenormal_restart // restart
}
.Lsqrt_nan:
{
EXP = convert_df2sf(A) // if sNaN, get invalid
A = #-1 // qNaN
jumpr r31
}
.Lsqrt_invalid_neg:
{
A = convert_sf2df(EXP) // Invalid,NaNval
jumpr r31
}
END(__hexagon_sqrt)
END(__hexagon_sqrtdf2)

View File

@ -0,0 +1,85 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
.macro FUNCTION_BEGIN name
.text
.p2align 5
.globl \name
.type \name, @function
\name:
.endm
.macro FUNCTION_END name
.size \name, . - \name
.endm
FUNCTION_BEGIN __hexagon_divdi3
{
p2 = tstbit(r1,#31)
p3 = tstbit(r3,#31)
}
{
r1:0 = abs(r1:0)
r3:2 = abs(r3:2)
}
{
r6 = cl0(r1:0) // count leading 0's of dividend (numerator)
r7 = cl0(r3:2) // count leading 0's of divisor (denominator)
r5:4 = r3:2 // divisor moved into working registers
r3:2 = r1:0 // dividend is the initial remainder, r3:2 contains remainder
}
{
p3 = xor(p2,p3)
r10 = sub(r7,r6) // left shift count for bit & divisor
r1:0 = #0 // initialize quotient to 0
r15:14 = #1 // initialize bit to 1
}
{
r11 = add(r10,#1) // loop count is 1 more than shift count
r13:12 = lsl(r5:4,r10) // shift divisor msb into same bit position as dividend msb
r15:14 = lsl(r15:14,r10) // shift the bit left by same amount as divisor
}
{
p0 = cmp.gtu(r5:4,r3:2) // check if divisor > dividend
loop0(1f,r11) // register loop
}
{
if (p0) jump .hexagon_divdi3_return // if divisor > dividend, we're done, so return
}
.falign
1:
{
p0 = cmp.gtu(r13:12,r3:2) // set predicate reg if shifted divisor > current remainder
}
{
r7:6 = sub(r3:2, r13:12) // subtract shifted divisor from current remainder
r9:8 = add(r1:0, r15:14) // save current quotient to temp (r9:8)
}
{
r1:0 = vmux(p0, r1:0, r9:8) // choose either current quotient or new quotient (r9:8)
r3:2 = vmux(p0, r3:2, r7:6) // choose either current remainder or new remainder (r7:6)
}
{
r15:14 = lsr(r15:14, #1) // shift bit right by 1 for next iteration
r13:12 = lsr(r13:12, #1) // shift "shifted divisor" right by 1 for next iteration
}:endloop0
.hexagon_divdi3_return:
{
r3:2 = neg(r1:0)
}
{
r1:0 = vmux(p3,r3:2,r1:0)
jumpr r31
}
FUNCTION_END __hexagon_divdi3
.globl __qdsp_divdi3
.set __qdsp_divdi3, __hexagon_divdi3

View File

@ -0,0 +1,84 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
.macro FUNCTION_BEGIN name
.text
.p2align 5
.globl \name
.type \name, @function
\name:
.endm
.macro FUNCTION_END name
.size \name, . - \name
.endm
FUNCTION_BEGIN __hexagon_divsi3
{
p0 = cmp.ge(r0,#0)
p1 = cmp.ge(r1,#0)
r1 = abs(r0)
r2 = abs(r1)
}
{
r3 = cl0(r1)
r4 = cl0(r2)
r5 = sub(r1,r2)
p2 = cmp.gtu(r2,r1)
}
#if (__HEXAGON_ARCH__ == 60)
{
r0 = #0
p1 = xor(p0,p1)
p0 = cmp.gtu(r2,r5)
}
if (p2) jumpr r31
#else
{
r0 = #0
p1 = xor(p0,p1)
p0 = cmp.gtu(r2,r5)
if (p2) jumpr r31
}
#endif
{
r0 = mux(p1,#-1,#1)
if (p0) jumpr r31
r4 = sub(r4,r3)
r3 = #1
}
{
r0 = #0
r3:2 = vlslw(r3:2,r4)
loop0(1f,r4)
}
.falign
1:
{
p0 = cmp.gtu(r2,r1)
if (!p0.new) r1 = sub(r1,r2)
if (!p0.new) r0 = add(r0,r3)
r3:2 = vlsrw(r3:2,#1)
}:endloop0
{
p0 = cmp.gtu(r2,r1)
if (!p0.new) r0 = add(r0,r3)
if (!p1) jumpr r31
}
{
r0 = neg(r0)
jumpr r31
}
FUNCTION_END __hexagon_divsi3
.globl __qdsp_divsi3
.set __qdsp_divsi3, __hexagon_divsi3

View File

@ -0,0 +1,37 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
.macro FUNCTION_BEGIN name
.text
.p2align 5
.globl \name
.type \name, @function
\name:
.endm
.macro FUNCTION_END name
.size \name, . - \name
.endm
FUNCTION_BEGIN fabs
{
r1 = clrbit(r1, #31)
jumpr r31
}
FUNCTION_END fabs
FUNCTION_BEGIN fabsf
{
r0 = clrbit(r0, #31)
jumpr r31
}
FUNCTION_END fabsf
.globl fabsl
.set fabsl, fabs

View File

@ -0,0 +1,491 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
/* ==================================================================== */
/* FUNCTIONS Optimized double floating point operators */
/* ==================================================================== */
/* c = dadd_asm(a, b) */
/* ==================================================================== *
fast2_QDOUBLE fast2_dadd(fast2_QDOUBLE a,fast2_QDOUBLE b) {
fast2_QDOUBLE c;
lint manta = a & MANTMASK;
int expa = Q6_R_sxth_R(a) ;
lint mantb = b & MANTMASK;
int expb = Q6_R_sxth_R(b) ;
int exp, expdiff, j, k, hi, lo, cn;
lint mant;
expdiff = (int) Q6_P_vabsdiffh_PP(a, b);
expdiff = Q6_R_sxth_R(expdiff) ;
if (expdiff > 63) { expdiff = 62;}
if (expa > expb) {
exp = expa + 1;
expa = 1;
expb = expdiff + 1;
} else {
exp = expb + 1;
expb = 1;
expa = expdiff + 1;
}
mant = (manta>>expa) + (mantb>>expb);
hi = (int) (mant>>32);
lo = (int) (mant);
k = Q6_R_normamt_R(hi);
if(hi == 0 || hi == -1) k = 31+Q6_R_normamt_R(lo);
mant = (mant << k);
cn = (mant == 0x8000000000000000LL);
exp = exp - k + cn;
if (mant == 0 || mant == -1) exp = 0x8001;
c = (mant & MANTMASK) | (((lint) exp) & EXP_MASK);
return(c);
}
* ==================================================================== */
.text
.global fast2_dadd_asm
.type fast2_dadd_asm, @function
fast2_dadd_asm:
#define manta R0
#define mantexpa R1:0
#define lmanta R1:0
#define mantb R2
#define mantexpb R3:2
#define lmantb R3:2
#define expa R4
#define expb R5
#define mantexpd R7:6
#define expd R6
#define exp R8
#define c63 R9
#define lmant R1:0
#define manth R1
#define mantl R0
#define minmin R11:10 // exactly 0x000000000000008001LL
#define minminl R10
#define k R4
#define ce P0
.falign
{
mantexpd = VABSDIFFH(mantexpa, mantexpb) //represented as 0x08001LL
c63 = #62
expa = SXTH(manta)
expb = SXTH(mantb)
} {
expd = SXTH(expd)
ce = CMP.GT(expa, expb);
if ( ce.new) exp = add(expa, #1)
if (!ce.new) exp = add(expb, #1)
} {
if ( ce) expa = #1
if (!ce) expb = #1
manta.L = #0
expd = MIN(expd, c63)
} {
if (!ce) expa = add(expd, #1)
if ( ce) expb = add(expd, #1)
mantb.L = #0
minmin = #0
} {
lmanta = ASR(lmanta, expa)
lmantb = ASR(lmantb, expb)
} {
lmant = add(lmanta, lmantb)
minminl.L = #0x8001
} {
k = clb(lmant)
c63 = #58
} {
k = add(k, #-1)
p0 = cmp.gt(k, c63)
} {
mantexpa = ASL(lmant, k)
exp = SUB(exp, k)
if(p0) jump .Ldenorma
} {
manta = insert(exp, #16, #0)
jumpr r31
}
.Ldenorma:
{
mantexpa = minmin
jumpr r31
}
/* =================================================================== *
fast2_QDOUBLE fast2_dsub(fast2_QDOUBLE a,fast2_QDOUBLE b) {
fast2_QDOUBLE c;
lint manta = a & MANTMASK;
int expa = Q6_R_sxth_R(a) ;
lint mantb = b & MANTMASK;
int expb = Q6_R_sxth_R(b) ;
int exp, expdiff, j, k;
lint mant;
expdiff = (int) Q6_P_vabsdiffh_PP(a, b);
expdiff = Q6_R_sxth_R(expdiff) ;
if (expdiff > 63) { expdiff = 62;}
if (expa > expb) {
exp = expa + 1;
expa = 1;
expb = expdiff + 1;
} else {
exp = expb + 1;
expb = 1;
expa = expdiff + 1;
}
mant = (manta>>expa) - (mantb>>expb);
k = Q6_R_clb_P(mant)-1;
mant = (mant << k);
exp = exp - k;
if (mant == 0 || mant == -1) exp = 0x8001;
c = (mant & MANTMASK) | (((lint) exp) & EXP_MASK);
return(c);
}
* ==================================================================== */
.text
.global fast2_dsub_asm
.type fast2_dsub_asm, @function
fast2_dsub_asm:
#define manta R0
#define mantexpa R1:0
#define lmanta R1:0
#define mantb R2
#define mantexpb R3:2
#define lmantb R3:2
#define expa R4
#define expb R5
#define mantexpd R7:6
#define expd R6
#define exp R8
#define c63 R9
#define lmant R1:0
#define manth R1
#define mantl R0
#define minmin R11:10 // exactly 0x000000000000008001LL
#define minminl R10
#define k R4
#define ce P0
.falign
{
mantexpd = VABSDIFFH(mantexpa, mantexpb) //represented as 0x08001LL
c63 = #62
expa = SXTH(manta)
expb = SXTH(mantb)
} {
expd = SXTH(expd)
ce = CMP.GT(expa, expb);
if ( ce.new) exp = add(expa, #1)
if (!ce.new) exp = add(expb, #1)
} {
if ( ce) expa = #1
if (!ce) expb = #1
manta.L = #0
expd = MIN(expd, c63)
} {
if (!ce) expa = add(expd, #1)
if ( ce) expb = add(expd, #1)
mantb.L = #0
minmin = #0
} {
lmanta = ASR(lmanta, expa)
lmantb = ASR(lmantb, expb)
} {
lmant = sub(lmanta, lmantb)
minminl.L = #0x8001
} {
k = clb(lmant)
c63 = #58
} {
k = add(k, #-1)
p0 = cmp.gt(k, c63)
} {
mantexpa = ASL(lmant, k)
exp = SUB(exp, k)
if(p0) jump .Ldenorm
} {
manta = insert(exp, #16, #0)
jumpr r31
}
.Ldenorm:
{
mantexpa = minmin
jumpr r31
}
/* ==================================================================== *
fast2_QDOUBLE fast2_dmpy(fast2_QDOUBLE a,fast2_QDOUBLE b) {
fast2_QDOUBLE c;
lint manta = a & MANTMASK;
int expa = Q6_R_sxth_R(a) ;
lint mantb = b & MANTMASK;
int expb = Q6_R_sxth_R(b) ;
int exp, k;
lint mant;
int hia, hib, hi, lo;
unsigned int loa, lob;
hia = (int)(a >> 32);
loa = Q6_R_extractu_RII((int)manta, 31, 1);
hib = (int)(b >> 32);
lob = Q6_R_extractu_RII((int)mantb, 31, 1);
mant = Q6_P_mpy_RR(hia, lob);
mant = Q6_P_mpyacc_RR(mant,hib, loa);
mant = (mant >> 30) + (Q6_P_mpy_RR(hia, hib)<<1);
hi = (int) (mant>>32);
k = Q6_R_normamt_R(hi);
mant = mant << k;
exp = expa + expb - k;
if (mant == 0 || mant == -1) exp = 0x8001;
c = (mant & MANTMASK) | (((lint) exp) & EXP_MASK);
return(c);
}
* ==================================================================== */
.text
.global fast2_dmpy_asm
.type fast2_dmpy_asm, @function
fast2_dmpy_asm:
#define mantal R0
#define mantah R1
#define mantexpa R1:0
#define mantbl R2
#define mantbh R3
#define mantexpb R3:2
#define expa R4
#define expb R5
#define c8001 R12
#define mantexpd R7:6
#define mantdh R7
#define exp R8
#define lmantc R11:10
#define kb R9
#define guard R11
#define mantal_ R12
#define mantbl_ R13
#define min R15:14
#define minh R15
.falign
{
mantbl_= lsr(mantbl, #16)
expb = sxth(mantbl)
expa = sxth(mantal)
mantal_= lsr(mantal, #16)
}
{
lmantc = mpy(mantah, mantbh)
mantexpd = mpy(mantah, mantbl_)
mantal.L = #0x0
min = #0
}
{
lmantc = add(lmantc, lmantc)
mantexpd+= mpy(mantbh, mantal_)
mantbl.L = #0x0
minh.H = #0x8000
}
{
mantexpd = asr(mantexpd, #15)
c8001.L = #0x8001
p1 = cmp.eq(mantexpa, mantexpb)
}
{
mantexpd = add(mantexpd, lmantc)
exp = add(expa, expb)
p2 = cmp.eq(mantexpa, min)
}
{
kb = clb(mantexpd)
mantexpb = abs(mantexpd)
guard = #58
}
{
p1 = and(p1, p2)
exp = sub(exp, kb)
kb = add(kb, #-1)
p0 = cmp.gt(kb, guard)
}
{
exp = add(exp, #1)
mantexpa = asl(mantexpd, kb)
if(p1) jump .Lsat //rarely happens
}
{
mantal = insert(exp,#16, #0)
if(!p0) jumpr r31
}
{
mantal = insert(c8001,#16, #0)
jumpr r31
}
.Lsat:
{
mantexpa = #-1
}
{
mantexpa = lsr(mantexpa, #1)
}
{
mantal = insert(exp,#16, #0)
jumpr r31
}
/* ==================================================================== *
int fast2_qd2f(fast2_QDOUBLE a) {
int exp;
long long int manta;
int ic, rnd, mantb;
manta = a>>32;
exp = Q6_R_sxth_R(a) ;
ic = 0x80000000 & manta;
manta = Q6_R_abs_R_sat(manta);
mantb = (manta + rnd)>>7;
rnd = 0x40
exp = (exp + 126);
if((manta & 0xff) == rnd) rnd = 0x00;
if((manta & 0x7fffffc0) == 0x7fffffc0) {
manta = 0x0; exp++;
} else {
manta= mantb & 0x007fffff;
}
exp = (exp << 23) & 0x7fffffc0;
ic = Q6_R_addacc_RR(ic, exp, manta);
return (ic);
}
* ==================================================================== */
.text
.global fast2_qd2f_asm
.type fast2_qd2f_asm, @function
fast2_qd2f_asm:
#define mantah R1
#define mantal R0
#define cff R0
#define mant R3
#define expo R4
#define rnd R5
#define mask R6
#define c07f R7
#define c80 R0
#define mantb R2
#define ic R0
.falign
{
mant = abs(mantah):sat
expo = sxth(mantal)
rnd = #0x40
mask.L = #0xffc0
}
{
cff = extractu(mant, #8, #0)
p2 = cmp.gt(expo, #126)
p3 = cmp.ge(expo, #-126)
mask.H = #0x7fff
}
{
p1 = cmp.eq(cff,#0x40)
if(p1.new) rnd = #0
expo = add(expo, #126)
if(!p3) jump .Lmin
}
{
p0 = bitsset(mant, mask)
c80.L = #0x0000
mantb = add(mant, rnd)
c07f = lsr(mask, #8)
}
{
if(p0) expo = add(expo, #1)
if(p0) mant = #0
mantb = lsr(mantb, #7)
c80.H = #0x8000
}
{
ic = and(c80, mantah)
mask &= asl(expo, #23)
if(!p0) mant = and(mantb, c07f)
if(p2) jump .Lmax
}
{
ic += add(mask, mant)
jumpr r31
}
.Lmax:
{
ic.L = #0xffff;
}
{
ic.H = #0x7f7f;
jumpr r31
}
.Lmin:
{
ic = #0x0
jumpr r31
}
/* ==================================================================== *
fast2_QDOUBLE fast2_f2qd(int ia) {
lint exp;
lint mant;
fast2_QDOUBLE c;
mant = ((ia << 7) | 0x40000000)&0x7fffff80 ;
if (ia & 0x80000000) mant = -mant;
exp = ((ia >> 23) & 0xFFLL) - 126;
c = (mant<<32) | Q6_R_zxth_R(exp);;
return(c);
}
* ==================================================================== */
.text
.global fast2_f2qd_asm
.type fast2_f2qd_asm, @function
fast2_f2qd_asm:
#define ia R0
#define mag R3
#define mantr R1
#define expr R0
#define zero R2
#define maxneg R5:4
#define maxnegl R4
.falign
{
mantr = asl(ia, #7)
p0 = tstbit(ia, #31)
maxneg = #0
mag = add(ia,ia)
}
{
mantr = setbit(mantr, #30)
expr= extractu(ia,#8,#23)
maxnegl.L = #0x8001
p1 = cmp.eq(mag, #0)
}
{
mantr= extractu(mantr, #31, #0)
expr= add(expr, #-126)
zero = #0
if(p1) jump .Lminqd
}
{
expr = zxth(expr)
if(p0) mantr= sub(zero, mantr)
jumpr r31
}
.Lminqd:
{
R1:0 = maxneg
jumpr r31
}

View File

@ -0,0 +1,345 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
/* ==================================================================== *
fast2_QLDOUBLE fast2_ldadd(fast2_QLDOUBLE a,fast2_QLDOUBLE b) {
fast2_QLDOUBLE c;
lint manta = a & MANTMASK;
int expa = Q6_R_sxth_R(a) ;
lint mantb = b & MANTMASK;
int expb = Q6_R_sxth_R(b) ;
int exp, expdiff, j, k, hi, lo, cn;
lint mant;
expdiff = (int) Q6_P_vabsdiffh_PP(a, b);
expdiff = Q6_R_sxth_R(expdiff) ;
if (expdiff > 63) { expdiff = 62;}
if (expa > expb) {
exp = expa + 1;
expa = 1;
expb = expdiff + 1;
} else {
exp = expb + 1;
expb = 1;
expa = expdiff + 1;
}
mant = (manta>>expa) + (mantb>>expb);
hi = (int) (mant>>32);
lo = (int) (mant);
k = Q6_R_normamt_R(hi);
if(hi == 0 || hi == -1) k = 31+Q6_R_normamt_R(lo);
mant = (mant << k);
cn = (mant == 0x8000000000000000LL);
exp = exp - k + cn;
if (mant == 0 || mant == -1) exp = 0x8001;
c = (mant & MANTMASK) | (((lint) exp) & EXP_MASK);
return(c);
}
* ==================================================================== */
.text
.global fast2_ldadd_asm
.type fast2_ldadd_asm, @function
fast2_ldadd_asm:
#define manta R1:0
#define lmanta R1:0
#define mantb R3:2
#define lmantb R3:2
#define expa R4
#define expb R5
#define expd R6
#define exp R8
#define c63 R9
#define lmant R1:0
#define k R4
#define ce P0
#define zero R3:2
.falign
{
expa = memw(r29+#8)
expb = memw(r29+#24)
r7 = r0
}
{
expd = sub(expa, expb):sat
ce = CMP.GT(expa, expb);
if ( ce.new) exp = add(expa, #1)
if (!ce.new) exp = add(expb, #1)
} {
expd = abs(expd):sat
if ( ce) expa = #1
if (!ce) expb = #1
c63 = #62
} {
expd = MIN(expd, c63)
manta = memd(r29+#0)
mantb = memd(r29+#16)
} {
if (!ce) expa = add(expd, #1)
if ( ce) expb = add(expd, #1)
} {
lmanta = ASR(lmanta, expa)
lmantb = ASR(lmantb, expb)
} {
lmant = add(lmanta, lmantb)
zero = #0
} {
k = clb(lmant)
c63.L =#0x0001
} {
exp -= add(k, #-1) //exp = exp - (k-1)
k = add(k, #-1)
p0 = cmp.gt(k, #58)
c63.H =#0x8000
} {
if(!p0)memw(r7+#8) = exp
lmant = ASL(lmant, k)
if(p0) jump .Ldenorma
} {
memd(r7+#0) = lmant
jumpr r31
}
.Ldenorma:
memd(r7+#0) = zero
{
memw(r7+#8) = c63
jumpr r31
}
/* =================================================================== *
fast2_QLDOUBLE fast2_ldsub(fast2_QLDOUBLE a,fast2_QLDOUBLE b) {
fast2_QLDOUBLE c;
lint manta = a & MANTMASK;
int expa = Q6_R_sxth_R(a) ;
lint mantb = b & MANTMASK;
int expb = Q6_R_sxth_R(b) ;
int exp, expdiff, j, k;
lint mant;
expdiff = (int) Q6_P_vabsdiffh_PP(a, b);
expdiff = Q6_R_sxth_R(expdiff) ;
if (expdiff > 63) { expdiff = 62;}
if (expa > expb) {
exp = expa + 1;
expa = 1;
expb = expdiff + 1;
} else {
exp = expb + 1;
expb = 1;
expa = expdiff + 1;
}
mant = (manta>>expa) - (mantb>>expb);
k = Q6_R_clb_P(mant)-1;
mant = (mant << k);
exp = exp - k;
if (mant == 0 || mant == -1) exp = 0x8001;
c = (mant & MANTMASK) | (((lint) exp) & EXP_MASK);
return(c);
}
* ==================================================================== */
.text
.global fast2_ldsub_asm
.type fast2_ldsub_asm, @function
fast2_ldsub_asm:
#define manta R1:0
#define lmanta R1:0
#define mantb R3:2
#define lmantb R3:2
#define expa R4
#define expb R5
#define expd R6
#define exp R8
#define c63 R9
#define lmant R1:0
#define k R4
#define ce P0
#define zero R3:2
.falign
{
expa = memw(r29+#8)
expb = memw(r29+#24)
r7 = r0
}
{
expd = sub(expa, expb):sat
ce = CMP.GT(expa, expb);
if ( ce.new) exp = add(expa, #1)
if (!ce.new) exp = add(expb, #1)
} {
expd = abs(expd):sat
if ( ce) expa = #1
if (!ce) expb = #1
c63 = #62
} {
expd = min(expd, c63)
manta = memd(r29+#0)
mantb = memd(r29+#16)
} {
if (!ce) expa = add(expd, #1)
if ( ce) expb = add(expd, #1)
} {
lmanta = ASR(lmanta, expa)
lmantb = ASR(lmantb, expb)
} {
lmant = sub(lmanta, lmantb)
zero = #0
} {
k = clb(lmant)
c63.L =#0x0001
} {
exp -= add(k, #-1) //exp = exp - (k+1)
k = add(k, #-1)
p0 = cmp.gt(k, #58)
c63.H =#0x8000
} {
if(!p0)memw(r7+#8) = exp
lmant = asl(lmant, k)
if(p0) jump .Ldenorma_s
} {
memd(r7+#0) = lmant
jumpr r31
}
.Ldenorma_s:
memd(r7+#0) = zero
{
memw(r7+#8) = c63
jumpr r31
}
/* ==================================================================== *
fast2_QLDOUBLE fast2_ldmpy(fast2_QLDOUBLE a,fast2_QLDOUBLE b) {
fast2_QLDOUBLE c;
lint manta = a & MANTMASK;
int expa = Q6_R_sxth_R(a) ;
lint mantb = b & MANTMASK;
int expb = Q6_R_sxth_R(b) ;
int exp, k;
lint mant;
int hia, hib, hi, lo;
unsigned int loa, lob;
hia = (int)(a >> 32);
loa = Q6_R_extractu_RII((int)manta, 31, 1);
hib = (int)(b >> 32);
lob = Q6_R_extractu_RII((int)mantb, 31, 1);
mant = Q6_P_mpy_RR(hia, lob);
mant = Q6_P_mpyacc_RR(mant,hib, loa);
mant = (mant >> 30) + (Q6_P_mpy_RR(hia, hib)<<1);
hi = (int) (mant>>32);
k = Q6_R_normamt_R(hi);
mant = mant << k;
exp = expa + expb - k;
if (mant == 0 || mant == -1) exp = 0x8001;
c = (mant & MANTMASK) | (((lint) exp) & EXP_MASK);
return(c);
}
* ==================================================================== */
.text
.global fast2_ldmpy_asm
.type fast2_ldmpy_asm, @function
fast2_ldmpy_asm:
#define mantxl_ R9
#define mantxl R14
#define mantxh R15
#define mantx R15:14
#define mantbl R2
#define mantbl_ R8
#define mantbh R3
#define mantb R3:2
#define expa R4
#define expb R5
#define c8001 R8
#define mantd R7:6
#define lmantc R11:10
#define kp R9
#define min R13:12
#define minh R13
#define max R13:12
#define maxh R13
#define ret R0
.falign
{
mantx = memd(r29+#0)
mantb = memd(r29+#16)
min = #0
}
{
mantbl_= extractu(mantbl, #31, #1)
mantxl_= extractu(mantxl, #31, #1)
minh.H = #0x8000
}
{
lmantc = mpy(mantxh, mantbh)
mantd = mpy(mantxh, mantbl_)
expa = memw(r29+#8)
expb = memw(r29+#24)
}
{
lmantc = add(lmantc, lmantc)
mantd += mpy(mantbh, mantxl_)
}
{
mantd = asr(mantd, #30)
c8001.L = #0x0001
p1 = cmp.eq(mantx, mantb)
}
{
mantd = add(mantd, lmantc)
expa= add(expa, expb)
p2 = cmp.eq(mantb, min)
}
{
kp = clb(mantd)
c8001.H = #0x8000
p1 = and(p1, p2)
}
{
expa-= add(kp, #-1)
kp = add(kp, #-1)
if(p1) jump .Lsat
}
{
mantd = asl(mantd, kp)
memw(ret+#8) = expa
p0 = cmp.gt(kp, #58)
if(p0.new) jump:NT .Ldenorm //rarely happens
}
{
memd(ret+#0) = mantd
jumpr r31
}
.Lsat:
{
max = #0
expa+= add(kp, #1)
}
{
maxh.H = #0x4000
memw(ret+#8) = expa
}
{
memd(ret+#0) = max
jumpr r31
}
.Ldenorm:
{
memw(ret+#8) = c8001
mantx = #0
}
{
memd(ret+#0) = mantx
jumpr r31
}

View File

@ -0,0 +1,400 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
/* ==================================================================== */
/* FUNCTIONS Optimized double floating point operators */
/* ==================================================================== */
/* c = dadd_asm(a, b) */
/* ====================================================================
QDOUBLE dadd(QDOUBLE a,QDOUBLE b) {
QDOUBLE c;
lint manta = a & MANTMASK;
int expa = HEXAGON_R_sxth_R(a) ;
lint mantb = b & MANTMASK;
int expb = HEXAGON_R_sxth_R(b) ;
int exp, expdiff, j, k, hi, lo, cn;
lint mant;
expdiff = (int) HEXAGON_P_vabsdiffh_PP(a, b);
expdiff = HEXAGON_R_sxth_R(expdiff) ;
if (expdiff > 63) { expdiff = 62;}
if (expa > expb) {
exp = expa + 1;
expa = 1;
expb = expdiff + 1;
} else {
exp = expb + 1;
expb = 1;
expa = expdiff + 1;
}
mant = (manta>>expa) + (mantb>>expb);
hi = (int) (mant>>32);
lo = (int) (mant);
k = HEXAGON_R_normamt_R(hi);
if(hi == 0 || hi == -1) k = 31+HEXAGON_R_normamt_R(lo);
mant = (mant << k);
cn = (mant == 0x8000000000000000LL);
exp = exp - k + cn;
if (mant == 0 || mant == -1) exp = 0x8001;
c = (mant & MANTMASK) | (((lint) exp) & EXP_MASK);
return(c);
}
* ==================================================================== */
.text
.global dadd_asm
.type dadd_asm, @function
dadd_asm:
#define manta R0
#define mantexpa R1:0
#define lmanta R1:0
#define mantb R2
#define mantexpb R3:2
#define lmantb R3:2
#define expa R4
#define expb R5
#define mantexpd R7:6
#define expd R6
#define exp R8
#define c63 R9
#define lmant R1:0
#define manth R1
#define mantl R0
#define zero R7:6
#define zerol R6
#define minus R3:2
#define minusl R2
#define maxneg R9
#define minmin R11:10 // exactly 0x800000000000000000LL
#define minminh R11
#define k R4
#define kl R5
#define ce P0
.falign
{
mantexpd = VABSDIFFH(mantexpa, mantexpb) //represented as 0x08001LL
c63 = #62
expa = SXTH(manta)
expb = SXTH(mantb)
} {
expd = SXTH(expd)
ce = CMP.GT(expa, expb);
if ( ce.new) exp = add(expa, #1)
if (!ce.new) exp = add(expb, #1)
} {
if ( ce) expa = #1
if (!ce) expb = #1
manta.L = #0
expd = MIN(expd, c63)
} {
if (!ce) expa = add(expd, #1)
if ( ce) expb = add(expd, #1)
mantb.L = #0
zero = #0
} {
lmanta = ASR(lmanta, expa)
lmantb = ASR(lmantb, expb)
minmin = #0
} {
lmant = add(lmanta, lmantb)
minus = #-1
minminh.H = #0x8000
} {
k = NORMAMT(manth)
kl = NORMAMT(mantl)
p0 = cmp.eq(manth, zerol)
p1 = cmp.eq(manth, minusl)
} {
p0 = OR(p0, p1)
if(p0.new) k = add(kl, #31)
maxneg.H = #0
} {
mantexpa = ASL(lmant, k)
exp = SUB(exp, k)
maxneg.L = #0x8001
} {
p0 = cmp.eq(mantexpa, zero)
p1 = cmp.eq(mantexpa, minus)
manta.L = #0
exp = ZXTH(exp)
} {
p2 = cmp.eq(mantexpa, minmin) //is result 0x80....0
if(p2.new) exp = add(exp, #1)
}
#if (__HEXAGON_ARCH__ == 60)
{
p0 = OR(p0, p1)
if( p0.new) manta = OR(manta,maxneg)
if(!p0.new) manta = OR(manta,exp)
}
jumpr r31
#else
{
p0 = OR(p0, p1)
if( p0.new) manta = OR(manta,maxneg)
if(!p0.new) manta = OR(manta,exp)
jumpr r31
}
#endif
/* =================================================================== *
QDOUBLE dsub(QDOUBLE a,QDOUBLE b) {
QDOUBLE c;
lint manta = a & MANTMASK;
int expa = HEXAGON_R_sxth_R(a) ;
lint mantb = b & MANTMASK;
int expb = HEXAGON_R_sxth_R(b) ;
int exp, expdiff, j, k, hi, lo, cn;
lint mant;
expdiff = (int) HEXAGON_P_vabsdiffh_PP(a, b);
expdiff = HEXAGON_R_sxth_R(expdiff) ;
if (expdiff > 63) { expdiff = 62;}
if (expa > expb) {
exp = expa + 1;
expa = 1;
expb = expdiff + 1;
} else {
exp = expb + 1;
expb = 1;
expa = expdiff + 1;
}
mant = (manta>>expa) - (mantb>>expb);
hi = (int) (mant>>32);
lo = (int) (mant);
k = HEXAGON_R_normamt_R(hi);
if(hi == 0 || hi == -1) k = 31+HEXAGON_R_normamt_R(lo);
mant = (mant << k);
cn = (mant == 0x8000000000000000LL);
exp = exp - k + cn;
if (mant == 0 || mant == -1) exp = 0x8001;
c = (mant & MANTMASK) | (((lint) exp) & EXP_MASK);
return(c);
}
* ==================================================================== */
.text
.global dsub_asm
.type dsub_asm, @function
dsub_asm:
#define manta R0
#define mantexpa R1:0
#define lmanta R1:0
#define mantb R2
#define mantexpb R3:2
#define lmantb R3:2
#define expa R4
#define expb R5
#define mantexpd R7:6
#define expd R6
#define exp R8
#define c63 R9
#define lmant R1:0
#define manth R1
#define mantl R0
#define zero R7:6
#define zerol R6
#define minus R3:2
#define minusl R2
#define maxneg R9
#define minmin R11:10 // exactly 0x800000000000000000LL
#define minminh R11
#define k R4
#define kl R5
#define ce P0
.falign
{
mantexpd = VABSDIFFH(mantexpa, mantexpb) //represented as 0x08001LL
c63 = #62
expa = SXTH(manta)
expb = SXTH(mantb)
} {
expd = SXTH(expd)
ce = CMP.GT(expa, expb);
if ( ce.new) exp = add(expa, #1)
if (!ce.new) exp = add(expb, #1)
} {
if ( ce) expa = #1
if (!ce) expb = #1
manta.L = #0
expd = MIN(expd, c63)
} {
if (!ce) expa = add(expd, #1)
if ( ce) expb = add(expd, #1)
mantb.L = #0
zero = #0
} {
lmanta = ASR(lmanta, expa)
lmantb = ASR(lmantb, expb)
minmin = #0
} {
lmant = sub(lmanta, lmantb)
minus = #-1
minminh.H = #0x8000
} {
k = NORMAMT(manth)
kl = NORMAMT(mantl)
p0 = cmp.eq(manth, zerol)
p1 = cmp.eq(manth, minusl)
} {
p0 = OR(p0, p1)
if(p0.new) k = add(kl, #31)
maxneg.H = #0
} {
mantexpa = ASL(lmant, k)
exp = SUB(exp, k)
maxneg.L = #0x8001
} {
p0 = cmp.eq(mantexpa, zero)
p1 = cmp.eq(mantexpa, minus)
manta.L = #0
exp = ZXTH(exp)
} {
p2 = cmp.eq(mantexpa, minmin) //is result 0x80....0
if(p2.new) exp = add(exp, #1)
}
#if (__HEXAGON_ARCH__ == 60)
{
p0 = OR(p0, p1)
if( p0.new) manta = OR(manta,maxneg)
if(!p0.new) manta = OR(manta,exp)
}
jumpr r31
#else
{
p0 = OR(p0, p1)
if( p0.new) manta = OR(manta,maxneg)
if(!p0.new) manta = OR(manta,exp)
jumpr r31
}
#endif
/* ==================================================================== *
QDOUBLE dmpy(QDOUBLE a,QDOUBLE b) {
QDOUBLE c;
lint manta = a & MANTMASK;
int expa = HEXAGON_R_sxth_R(a) ;
lint mantb = b & MANTMASK;
int expb = HEXAGON_R_sxth_R(b) ;
int exp, k;
lint mant;
int hia, hib, hi, lo;
unsigned int loa, lob;
hia = (int)(a >> 32);
loa = HEXAGON_R_extractu_RII((int)manta, 31, 1);
hib = (int)(b >> 32);
lob = HEXAGON_R_extractu_RII((int)mantb, 31, 1);
mant = HEXAGON_P_mpy_RR(hia, lob);
mant = HEXAGON_P_mpyacc_RR(mant,hib, loa);
mant = (mant >> 30) + (HEXAGON_P_mpy_RR(hia, hib)<<1);
hi = (int) (mant>>32);
lo = (int) (mant);
k = HEXAGON_R_normamt_R(hi);
if(hi == 0 || hi == -1) k = 31+HEXAGON_R_normamt_R(lo);
mant = mant << k;
exp = expa + expb - k;
if (mant == 0 || mant == -1) exp = 0x8001;
c = (mant & MANTMASK) | (((lint) exp) & EXP_MASK);
return(c);
}
* ==================================================================== */
.text
.global dmpy_asm
.type dmpy_asm, @function
dmpy_asm:
#define mantal R0
#define mantah R1
#define mantexpa R1:0
#define mantbl R2
#define mantbh R3
#define mantexpb R3:2
#define expa R4
#define expb R5
#define mantexpd R7:6
#define exp R8
#define lmantc R11:10
#define mantch R11
#define mantcl R10
#define zero0 R7:6
#define zero0l R6
#define minus1 R3:2
#define minus1l R2
#define maxneg R9
#define k R4
#define kl R5
.falign
{
mantbl = lsr(mantbl, #16)
mantal = lsr(mantal, #16)
expa = sxth(mantal)
expb = sxth(mantbl)
}
{
lmantc = mpy(mantah, mantbh)
mantexpd = mpy(mantah, mantbl)
}
{
lmantc = add(lmantc, lmantc) //<<1
mantexpd+= mpy(mantbh, mantal)
}
{
lmantc += asr(mantexpd, #15)
exp = add(expa, expb)
zero0 = #0
minus1 = #-1
}
{
k = normamt(mantch)
kl = normamt(mantcl)
p0 = cmp.eq(mantch, zero0l)
p1 = cmp.eq(mantch, minus1l)
}
{
p0 = or(p0, p1)
if(p0.new) k = add(kl, #31)
maxneg.H = #0
}
{
mantexpa = asl(lmantc, k)
exp = sub(exp, k)
maxneg.L = #0x8001
}
{
p0 = cmp.eq(mantexpa, zero0)
p1 = cmp.eq(mantexpa, minus1)
mantal.L = #0
exp = zxth(exp)
}
#if (__HEXAGON_ARCH__ == 60)
{
p0 = or(p0, p1)
if( p0.new) mantal = or(mantal,maxneg)
if(!p0.new) mantal = or(mantal,exp)
}
jumpr r31
#else
{
p0 = or(p0, p1)
if( p0.new) mantal = or(mantal,maxneg)
if(!p0.new) mantal = or(mantal,exp)
jumpr r31
}
#endif

View File

@ -0,0 +1,31 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
.macro FUNCTION_BEGIN name
.text
.p2align 5
.globl \name
.type \name, @function
\name:
.endm
.macro FUNCTION_END name
.size \name, . - \name
.endm
FUNCTION_BEGIN fmaf
r2 += sfmpy(r0, r1)
{
r0 = r2
jumpr r31
}
FUNCTION_END fmaf
.globl fmal
.set fmal, fma

View File

@ -0,0 +1,30 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
.macro FUNCTION_BEGIN name
.text
.p2align 5
.globl \name
.type \name, @function
\name:
.endm
.macro FUNCTION_END name
.size \name, . - \name
.endm
FUNCTION_BEGIN fmaxf
{
r0 = sfmax(r0, r1)
jumpr r31
}
FUNCTION_END fmaxf
.globl fmaxl
.set fmaxl, fmax

View File

@ -0,0 +1,30 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
.macro FUNCTION_BEGIN name
.text
.p2align 5
.globl \name
.type \name, @function
\name:
.endm
.macro FUNCTION_END name
.size \name, . - \name
.endm
FUNCTION_BEGIN fminf
{
r0 = sfmin(r0, r1)
jumpr r31
}
FUNCTION_END fminf
.globl fminl
.set fminl, fmin

View File

@ -0,0 +1,125 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// An optimized version of a memcpy which is equivalent to the following loop:
//
// volatile unsigned *dest;
// unsigned *src;
//
// for (i = 0; i < num_words; ++i)
// *dest++ = *src++;
//
// The corresponding C prototype for this function would be
// void hexagon_memcpy_forward_vp4cp4n2(volatile unsigned *dest,
// const unsigned *src,
// unsigned num_words);
//
// *** Both dest and src must be aligned to 32-bit boundaries. ***
// The code does not perform any runtime checks for this, and will fail
// in bad ways if this requirement is not met.
//
// The "forward" in the name refers to the fact that the function copies
// the words going forward in memory. It is incorrect to use this function
// for cases where the original code copied words in any other order.
//
// *** This function is only for the use by the compiler. ***
// The only indended use is for the LLVM compiler to generate calls to
// this function, when a mem-copy loop, like the one above, is detected.
.text
// Inputs:
// r0: dest
// r1: src
// r2: num_words
.globl hexagon_memcpy_forward_vp4cp4n2
.balign 32
.type hexagon_memcpy_forward_vp4cp4n2,@function
hexagon_memcpy_forward_vp4cp4n2:
// Compute r3 to be the number of words remaining in the current page.
// At the same time, compute r4 to be the number of 32-byte blocks
// remaining in the page (for prefetch).
{
r3 = sub(##4096, r1)
r5 = lsr(r2, #3)
}
{
// The word count before end-of-page is in the 12 lowest bits of r3.
// (If the address in r1 was already page-aligned, the bits are 0.)
r3 = extractu(r3, #10, #2)
r4 = extractu(r3, #7, #5)
}
{
r3 = minu(r2, r3)
r4 = minu(r5, r4)
}
{
r4 = or(r4, ##2105344) // 2105344 = 0x202000
p0 = cmp.eq(r3, #0)
if (p0.new) jump:nt .Lskipprolog
}
l2fetch(r1, r4)
{
loop0(.Lprolog, r3)
r2 = sub(r2, r3) // r2 = number of words left after the prolog.
}
.falign
.Lprolog:
{
r4 = memw(r1++#4)
memw(r0++#4) = r4.new
} :endloop0
.Lskipprolog:
{
// Let r3 = number of whole pages left (page = 1024 words).
r3 = lsr(r2, #10)
if (cmp.eq(r3.new, #0)) jump:nt .Lskipmain
}
{
loop1(.Lout, r3)
r2 = extractu(r2, #10, #0) // r2 = r2 & 1023
r3 = ##2105472 // r3 = 0x202080 (prefetch info)
}
// Iterate over pages.
.falign
.Lout:
// Prefetch each individual page.
l2fetch(r1, r3)
loop0(.Lpage, #512)
.falign
.Lpage:
r5:4 = memd(r1++#8)
{
memw(r0++#8) = r4
memw(r0+#4) = r5
} :endloop0:endloop1
.Lskipmain:
{
r3 = ##2105344 // r3 = 0x202000 (prefetch info)
r4 = lsr(r2, #3) // r4 = number of 32-byte blocks remaining.
p0 = cmp.eq(r2, #0)
if (p0.new) jumpr:nt r31
}
{
r3 = or(r3, r4)
loop0(.Lepilog, r2)
}
l2fetch(r1, r3)
.falign
.Lepilog:
{
r4 = memw(r1++#4)
memw(r0++#4) = r4.new
} :endloop0
jumpr r31
.size hexagon_memcpy_forward_vp4cp4n2, . - hexagon_memcpy_forward_vp4cp4n2

View File

@ -0,0 +1,64 @@
//===------------------------- memcopy routines ---------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
.macro FUNCTION_BEGIN name
.text
.p2align 5
.globl \name
.type \name, @function
\name:
.endm
.macro FUNCTION_END name
.size \name, . - \name
.endm
FUNCTION_BEGIN __hexagon_memcpy_likely_aligned_min32bytes_mult8bytes
{
p0 = bitsclr(r1,#7)
p0 = bitsclr(r0,#7)
if (p0.new) r5:4 = memd(r1)
r3 = #-3
}
{
if (!p0) jump .Lmemcpy_call
if (p0) memd(r0++#8) = r5:4
if (p0) r5:4 = memd(r1+#8)
r3 += lsr(r2,#3)
}
{
memd(r0++#8) = r5:4
r5:4 = memd(r1+#16)
r1 = add(r1,#24)
loop0(1f,r3)
}
.falign
1:
{
memd(r0++#8) = r5:4
r5:4 = memd(r1++#8)
}:endloop0
{
memd(r0) = r5:4
r0 -= add(r2,#-8)
jumpr r31
}
FUNCTION_END __hexagon_memcpy_likely_aligned_min32bytes_mult8bytes
.Lmemcpy_call:
#ifdef __PIC__
jump memcpy@PLT
#else
jump memcpy
#endif
.globl __qdsp_memcpy_likely_aligned_min32bytes_mult8bytes
.set __qdsp_memcpy_likely_aligned_min32bytes_mult8bytes, \
__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes

View File

@ -0,0 +1,83 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
.macro FUNCTION_BEGIN name
.text
.p2align 5
.globl \name
.type \name, @function
\name:
.endm
.macro FUNCTION_END name
.size \name, . - \name
.endm
FUNCTION_BEGIN __hexagon_moddi3
{
p3 = tstbit(r1,#31)
}
{
r1:0 = abs(r1:0)
r3:2 = abs(r3:2)
}
{
r6 = cl0(r1:0) // count leading 0's of dividend (numerator)
r7 = cl0(r3:2) // count leading 0's of divisor (denominator)
r5:4 = r3:2 // divisor moved into working registers
r3:2 = r1:0 // dividend is the initial remainder, r3:2 contains remainder
}
{
r10 = sub(r7,r6) // left shift count for bit & divisor
r1:0 = #0 // initialize quotient to 0
r15:14 = #1 // initialize bit to 1
}
{
r11 = add(r10,#1) // loop count is 1 more than shift count
r13:12 = lsl(r5:4,r10) // shift divisor msb into same bit position as dividend msb
r15:14 = lsl(r15:14,r10) // shift the bit left by same amount as divisor
}
{
p0 = cmp.gtu(r5:4,r3:2) // check if divisor > dividend
loop0(1f,r11) // register loop
}
{
if (p0) jump .hexagon_moddi3_return // if divisor > dividend, we're done, so return
}
.falign
1:
{
p0 = cmp.gtu(r13:12,r3:2) // set predicate reg if shifted divisor > current remainder
}
{
r7:6 = sub(r3:2, r13:12) // subtract shifted divisor from current remainder
r9:8 = add(r1:0, r15:14) // save current quotient to temp (r9:8)
}
{
r1:0 = vmux(p0, r1:0, r9:8) // choose either current quotient or new quotient (r9:8)
r3:2 = vmux(p0, r3:2, r7:6) // choose either current remainder or new remainder (r7:6)
}
{
r15:14 = lsr(r15:14, #1) // shift bit right by 1 for next iteration
r13:12 = lsr(r13:12, #1) // shift "shifted divisor" right by 1 for next iteration
}:endloop0
.hexagon_moddi3_return:
{
r1:0 = neg(r3:2)
}
{
r1:0 = vmux(p3,r1:0,r3:2)
jumpr r31
}
FUNCTION_END __hexagon_moddi3
.globl __qdsp_moddi3
.set __qdsp_moddi3, __hexagon_moddi3

View File

@ -0,0 +1,66 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
.macro FUNCTION_BEGIN name
.text
.p2align 5
.globl \name
.type \name, @function
\name:
.endm
.macro FUNCTION_END name
.size \name, . - \name
.endm
FUNCTION_BEGIN __hexagon_modsi3
{
p2 = cmp.ge(r0,#0)
r2 = abs(r0)
r1 = abs(r1)
}
{
r3 = cl0(r2)
r4 = cl0(r1)
p0 = cmp.gtu(r1,r2)
}
{
r3 = sub(r4,r3)
if (p0) jumpr r31
}
{
p1 = cmp.eq(r3,#0)
loop0(1f,r3)
r0 = r2
r2 = lsl(r1,r3)
}
.falign
1:
{
p0 = cmp.gtu(r2,r0)
if (!p0.new) r0 = sub(r0,r2)
r2 = lsr(r2,#1)
if (p1) r1 = #0
}:endloop0
{
p0 = cmp.gtu(r2,r0)
if (!p0.new) r0 = sub(r0,r1)
if (p2) jumpr r31
}
{
r0 = neg(r0)
jumpr r31
}
FUNCTION_END __hexagon_modsi3
.globl __qdsp_modsi3
.set __qdsp_modsi3, __hexagon_modsi3

View File

@ -0,0 +1,66 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
.macro FUNCTION_BEGIN name
.text
.p2align 5
.globl \name
.type \name, @function
\name:
.endm
.macro FUNCTION_END name
.size \name, . - \name
.endm
#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG
#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG
FUNCTION_BEGIN __hexagon_divsf3
{
r2,p0 = sfrecipa(r0,r1)
r4 = sffixupd(r0,r1)
r3 = ##0x3f800000 // 1.0
}
{
r5 = sffixupn(r0,r1)
r3 -= sfmpy(r4,r2):lib // 1-(den/recip) yields error?
r6 = ##0x80000000
r7 = r3
}
{
r2 += sfmpy(r3,r2):lib
r3 = r7
r6 = r5
r0 = and(r6,r5)
}
{
r3 -= sfmpy(r4,r2):lib
r0 += sfmpy(r5,r2):lib
}
{
r2 += sfmpy(r3,r2):lib
r6 -= sfmpy(r0,r4):lib
}
{
r0 += sfmpy(r6,r2):lib
}
{
r5 -= sfmpy(r0,r4):lib
}
{
r0 += sfmpy(r5,r2,p0):scale
jumpr r31
}
FUNCTION_END __hexagon_divsf3
Q6_ALIAS(divsf3)
FAST_ALIAS(divsf3)
FAST2_ALIAS(divsf3)

View File

@ -0,0 +1,82 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
.macro FUNCTION_BEGIN name
.text
.p2align 5
.globl \name
.type \name, @function
\name:
.endm
.macro FUNCTION_END name
.size \name, . - \name
.endm
#define RIN r0
#define S r0
#define H r1
#define D r2
#define E r3
#define HALF r4
#define R r5
#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG
#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG
FUNCTION_BEGIN __hexagon_sqrtf
{
E,p0 = sfinvsqrta(RIN)
R = sffixupr(RIN)
HALF = ##0x3f000000 // 0.5
r1:0 = combine(#0,#0) // clear S/H
}
{
S += sfmpy(E,R):lib // S0
H += sfmpy(E,HALF):lib // H0
D = HALF
E = R
}
{
D -= sfmpy(S,H):lib // d0
p1 = sfclass(R,#1) // is zero?
//E -= sfmpy(S,S):lib // e0
}
{
S += sfmpy(S,D):lib // S1
H += sfmpy(H,D):lib // H1
D = HALF
E = R
}
{
D -= sfmpy(S,H):lib // d0
E -= sfmpy(S,S):lib // e0
}
{
S += sfmpy(H,E):lib // S2
H += sfmpy(H,D):lib // H2
D = HALF
E = R
}
{
//D -= sfmpy(S,H):lib // d2
E -= sfmpy(S,S):lib // e2
if (p1) r0 = or(r0,R) // sqrt(-0.0) = -0.0
}
{
S += sfmpy(H,E,p0):scale // S3
jumpr r31
}
FUNCTION_END __hexagon_sqrtf
Q6_ALIAS(sqrtf)
FAST_ALIAS(sqrtf)
FAST2_ALIAS(sqrtf)

View File

@ -0,0 +1,71 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
.macro FUNCTION_BEGIN name
.text
.p2align 5
.globl \name
.type \name, @function
\name:
.endm
.macro FUNCTION_END name
.size \name, . - \name
.endm
FUNCTION_BEGIN __hexagon_udivdi3
{
r6 = cl0(r1:0) // count leading 0's of dividend (numerator)
r7 = cl0(r3:2) // count leading 0's of divisor (denominator)
r5:4 = r3:2 // divisor moved into working registers
r3:2 = r1:0 // dividend is the initial remainder, r3:2 contains remainder
}
{
r10 = sub(r7,r6) // left shift count for bit & divisor
r1:0 = #0 // initialize quotient to 0
r15:14 = #1 // initialize bit to 1
}
{
r11 = add(r10,#1) // loop count is 1 more than shift count
r13:12 = lsl(r5:4,r10) // shift divisor msb into same bit position as dividend msb
r15:14 = lsl(r15:14,r10) // shift the bit left by same amount as divisor
}
{
p0 = cmp.gtu(r5:4,r3:2) // check if divisor > dividend
loop0(1f,r11) // register loop
}
{
if (p0) jumpr r31 // if divisor > dividend, we're done, so return
}
.falign
1:
{
p0 = cmp.gtu(r13:12,r3:2) // set predicate reg if shifted divisor > current remainder
}
{
r7:6 = sub(r3:2, r13:12) // subtract shifted divisor from current remainder
r9:8 = add(r1:0, r15:14) // save current quotient to temp (r9:8)
}
{
r1:0 = vmux(p0, r1:0, r9:8) // choose either current quotient or new quotient (r9:8)
r3:2 = vmux(p0, r3:2, r7:6) // choose either current remainder or new remainder (r7:6)
}
{
r15:14 = lsr(r15:14, #1) // shift bit right by 1 for next iteration
r13:12 = lsr(r13:12, #1) // shift "shifted divisor" right by 1 for next iteration
}:endloop0
{
jumpr r31 // return
}
FUNCTION_END __hexagon_udivdi3
.globl __qdsp_udivdi3
.set __qdsp_udivdi3, __hexagon_udivdi3

View File

@ -0,0 +1,71 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
.macro FUNCTION_BEGIN name
.text
.p2align 5
.globl \name
.type \name, @function
\name:
.endm
.macro FUNCTION_END name
.size \name, . - \name
.endm
FUNCTION_BEGIN __hexagon_udivmoddi4
{
r6 = cl0(r1:0) // count leading 0's of dividend (numerator)
r7 = cl0(r3:2) // count leading 0's of divisor (denominator)
r5:4 = r3:2 // divisor moved into working registers
r3:2 = r1:0 // dividend is the initial remainder, r3:2 contains remainder
}
{
r10 = sub(r7,r6) // left shift count for bit & divisor
r1:0 = #0 // initialize quotient to 0
r15:14 = #1 // initialize bit to 1
}
{
r11 = add(r10,#1) // loop count is 1 more than shift count
r13:12 = lsl(r5:4,r10) // shift divisor msb into same bit position as dividend msb
r15:14 = lsl(r15:14,r10) // shift the bit left by same amount as divisor
}
{
p0 = cmp.gtu(r5:4,r3:2) // check if divisor > dividend
loop0(1f,r11) // register loop
}
{
if (p0) jumpr r31 // if divisor > dividend, we're done, so return
}
.falign
1:
{
p0 = cmp.gtu(r13:12,r3:2) // set predicate reg if shifted divisor > current remainder
}
{
r7:6 = sub(r3:2, r13:12) // subtract shifted divisor from current remainder
r9:8 = add(r1:0, r15:14) // save current quotient to temp (r9:8)
}
{
r1:0 = vmux(p0, r1:0, r9:8) // choose either current quotient or new quotient (r9:8)
r3:2 = vmux(p0, r3:2, r7:6) // choose either current remainder or new remainder (r7:6)
}
{
r15:14 = lsr(r15:14, #1) // shift bit right by 1 for next iteration
r13:12 = lsr(r13:12, #1) // shift "shifted divisor" right by 1 for next iteration
}:endloop0
{
jumpr r31 // return
}
FUNCTION_END __hexagon_udivmoddi4
.globl __qdsp_udivmoddi4
.set __qdsp_udivmoddi4, __hexagon_udivmoddi4

View File

@ -0,0 +1,60 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
.macro FUNCTION_BEGIN name
.text
.p2align 5
.globl \name
.type \name, @function
\name:
.endm
.macro FUNCTION_END name
.size \name, . - \name
.endm
FUNCTION_BEGIN __hexagon_udivmodsi4
{
r2 = cl0(r0)
r3 = cl0(r1)
r5:4 = combine(#1,#0)
p0 = cmp.gtu(r1,r0)
}
{
r6 = sub(r3,r2)
r4 = r1
r1:0 = combine(r0,r4)
if (p0) jumpr r31
}
{
r3:2 = vlslw(r5:4,r6)
loop0(1f,r6)
p0 = cmp.eq(r6,#0)
if (p0.new) r4 = #0
}
.falign
1:
{
p0 = cmp.gtu(r2,r1)
if (!p0.new) r1 = sub(r1,r2)
if (!p0.new) r0 = add(r0,r3)
r3:2 = vlsrw(r3:2,#1)
}:endloop0
{
p0 = cmp.gtu(r2,r1)
if (!p0.new) r1 = sub(r1,r4)
if (!p0.new) r0 = add(r0,r3)
jumpr r31
}
FUNCTION_END __hexagon_udivmodsi4
.globl __qdsp_udivmodsi4
.set __qdsp_udivmodsi4, __hexagon_udivmodsi4

View File

@ -0,0 +1,56 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
.macro FUNCTION_BEGIN name
.text
.p2align 5
.globl \name
.type \name, @function
\name:
.endm
.macro FUNCTION_END name
.size \name, . - \name
.endm
FUNCTION_BEGIN __hexagon_udivsi3
{
r2 = cl0(r0)
r3 = cl0(r1)
r5:4 = combine(#1,#0)
p0 = cmp.gtu(r1,r0)
}
{
r6 = sub(r3,r2)
r4 = r1
r1:0 = combine(r0,r4)
if (p0) jumpr r31
}
{
r3:2 = vlslw(r5:4,r6)
loop0(1f,r6)
}
.falign
1:
{
p0 = cmp.gtu(r2,r1)
if (!p0.new) r1 = sub(r1,r2)
if (!p0.new) r0 = add(r0,r3)
r3:2 = vlsrw(r3:2,#1)
}:endloop0
{
p0 = cmp.gtu(r2,r1)
if (!p0.new) r0 = add(r0,r3)
jumpr r31
}
FUNCTION_END __hexagon_udivsi3
.globl __qdsp_udivsi3
.set __qdsp_udivsi3, __hexagon_udivsi3

View File

@ -0,0 +1,74 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
.macro FUNCTION_BEGIN name
.text
.p2align 5
.globl \name
.type \name, @function
\name:
.endm
.macro FUNCTION_END name
.size \name, . - \name
.endm
FUNCTION_BEGIN __hexagon_umoddi3
{
r6 = cl0(r1:0) // count leading 0's of dividend (numerator)
r7 = cl0(r3:2) // count leading 0's of divisor (denominator)
r5:4 = r3:2 // divisor moved into working registers
r3:2 = r1:0 // dividend is the initial remainder, r3:2 contains remainder
}
{
r10 = sub(r7,r6) // left shift count for bit & divisor
r1:0 = #0 // initialize quotient to 0
r15:14 = #1 // initialize bit to 1
}
{
r11 = add(r10,#1) // loop count is 1 more than shift count
r13:12 = lsl(r5:4,r10) // shift divisor msb into same bit position as dividend msb
r15:14 = lsl(r15:14,r10) // shift the bit left by same amount as divisor
}
{
p0 = cmp.gtu(r5:4,r3:2) // check if divisor > dividend
loop0(1f,r11) // register loop
}
{
if (p0) jump .hexagon_umoddi3_return // if divisor > dividend, we're done, so return
}
.falign
1:
{
p0 = cmp.gtu(r13:12,r3:2) // set predicate reg if shifted divisor > current remainder
}
{
r7:6 = sub(r3:2, r13:12) // subtract shifted divisor from current remainder
r9:8 = add(r1:0, r15:14) // save current quotient to temp (r9:8)
}
{
r1:0 = vmux(p0, r1:0, r9:8) // choose either current quotient or new quotient (r9:8)
r3:2 = vmux(p0, r3:2, r7:6) // choose either current remainder or new remainder (r7:6)
}
{
r15:14 = lsr(r15:14, #1) // shift bit right by 1 for next iteration
r13:12 = lsr(r13:12, #1) // shift "shifted divisor" right by 1 for next iteration
}:endloop0
.hexagon_umoddi3_return:
{
r1:0 = r3:2
jumpr r31
}
FUNCTION_END __hexagon_umoddi3
.globl __qdsp_umoddi3
.set __qdsp_umoddi3, __hexagon_umoddi3

View File

@ -0,0 +1,55 @@
//===----------------------Hexagon builtin routine ------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
.macro FUNCTION_BEGIN name
.text
.p2align 5
.globl \name
.type \name, @function
\name:
.endm
.macro FUNCTION_END name
.size \name, . - \name
.endm
FUNCTION_BEGIN __hexagon_umodsi3
{
r2 = cl0(r0)
r3 = cl0(r1)
p0 = cmp.gtu(r1,r0)
}
{
r2 = sub(r3,r2)
if (p0) jumpr r31
}
{
loop0(1f,r2)
p1 = cmp.eq(r2,#0)
r2 = lsl(r1,r2)
}
.falign
1:
{
p0 = cmp.gtu(r2,r0)
if (!p0.new) r0 = sub(r0,r2)
r2 = lsr(r2,#1)
if (p1) r1 = #0
}:endloop0
{
p0 = cmp.gtu(r2,r0)
if (!p0.new) r0 = sub(r0,r1)
jumpr r31
}
FUNCTION_END __hexagon_umodsi3
.globl __qdsp_umodsi3
.set __qdsp_umodsi3, __hexagon_umodsi3

View File

@ -60,7 +60,7 @@ typedef union
}s;
} udwords;
#if (defined(__LP64__) || defined(__wasm__) || defined(__mips64))
#if (defined(__LP64__) || defined(__wasm__) || defined(__mips64)) || defined(__riscv)
#define CRT_HAS_128BIT
#endif

View File

@ -16,8 +16,8 @@
#ifdef __APPLE__
#include <CoreFoundation/CoreFoundation.h>
#include <dispatch/dispatch.h>
#include <TargetConditionals.h>
#include <dispatch/dispatch.h>
#include <dlfcn.h>
#include <stdint.h>
#include <stdio.h>
@ -28,6 +28,26 @@
static int32_t GlobalMajor, GlobalMinor, GlobalSubminor;
static dispatch_once_t DispatchOnceCounter;
typedef CFDataRef (*CFDataCreateWithBytesNoCopyFuncTy)(CFAllocatorRef,
const UInt8 *, CFIndex,
CFAllocatorRef);
typedef CFPropertyListRef (*CFPropertyListCreateWithDataFuncTy)(
CFAllocatorRef, CFDataRef, CFOptionFlags, CFPropertyListFormat *,
CFErrorRef *);
typedef CFPropertyListRef (*CFPropertyListCreateFromXMLDataFuncTy)(
CFAllocatorRef, CFDataRef, CFOptionFlags, CFStringRef *);
typedef CFStringRef (*CFStringCreateWithCStringNoCopyFuncTy)(CFAllocatorRef,
const char *,
CFStringEncoding,
CFAllocatorRef);
typedef const void *(*CFDictionaryGetValueFuncTy)(CFDictionaryRef,
const void *);
typedef CFTypeID (*CFGetTypeIDFuncTy)(CFTypeRef);
typedef CFTypeID (*CFStringGetTypeIDFuncTy)(void);
typedef Boolean (*CFStringGetCStringFuncTy)(CFStringRef, char *, CFIndex,
CFStringEncoding);
typedef void (*CFReleaseFuncTy)(CFTypeRef);
/* Find and parse the SystemVersion.plist file. */
static void parseSystemVersionPList(void *Unused) {
(void)Unused;
@ -37,50 +57,49 @@ static void parseSystemVersionPList(void *Unused) {
return;
const CFAllocatorRef kCFAllocatorNull =
*(const CFAllocatorRef *)NullAllocator;
typeof(CFDataCreateWithBytesNoCopy) *CFDataCreateWithBytesNoCopyFunc =
(typeof(CFDataCreateWithBytesNoCopy) *)dlsym(
RTLD_DEFAULT, "CFDataCreateWithBytesNoCopy");
CFDataCreateWithBytesNoCopyFuncTy CFDataCreateWithBytesNoCopyFunc =
(CFDataCreateWithBytesNoCopyFuncTy)dlsym(RTLD_DEFAULT,
"CFDataCreateWithBytesNoCopy");
if (!CFDataCreateWithBytesNoCopyFunc)
return;
typeof(CFPropertyListCreateWithData) *CFPropertyListCreateWithDataFunc =
(typeof(CFPropertyListCreateWithData) *)dlsym(
CFPropertyListCreateWithDataFuncTy CFPropertyListCreateWithDataFunc =
(CFPropertyListCreateWithDataFuncTy)dlsym(
RTLD_DEFAULT, "CFPropertyListCreateWithData");
/* CFPropertyListCreateWithData was introduced only in macOS 10.6+, so it
* will be NULL on earlier OS versions. */
/* CFPropertyListCreateWithData was introduced only in macOS 10.6+, so it
* will be NULL on earlier OS versions. */
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wdeprecated-declarations"
typeof(CFPropertyListCreateFromXMLData) *CFPropertyListCreateFromXMLDataFunc =
(typeof(CFPropertyListCreateFromXMLData) *)dlsym(
CFPropertyListCreateFromXMLDataFuncTy CFPropertyListCreateFromXMLDataFunc =
(CFPropertyListCreateFromXMLDataFuncTy)dlsym(
RTLD_DEFAULT, "CFPropertyListCreateFromXMLData");
#pragma clang diagnostic pop
/* CFPropertyListCreateFromXMLDataFunc is deprecated in macOS 10.10, so it
* might be NULL in future OS versions. */
if (!CFPropertyListCreateWithDataFunc && !CFPropertyListCreateFromXMLDataFunc)
return;
typeof(CFStringCreateWithCStringNoCopy) *CFStringCreateWithCStringNoCopyFunc =
(typeof(CFStringCreateWithCStringNoCopy) *)dlsym(
CFStringCreateWithCStringNoCopyFuncTy CFStringCreateWithCStringNoCopyFunc =
(CFStringCreateWithCStringNoCopyFuncTy)dlsym(
RTLD_DEFAULT, "CFStringCreateWithCStringNoCopy");
if (!CFStringCreateWithCStringNoCopyFunc)
return;
typeof(CFDictionaryGetValue) *CFDictionaryGetValueFunc =
(typeof(CFDictionaryGetValue) *)dlsym(RTLD_DEFAULT,
"CFDictionaryGetValue");
CFDictionaryGetValueFuncTy CFDictionaryGetValueFunc =
(CFDictionaryGetValueFuncTy)dlsym(RTLD_DEFAULT, "CFDictionaryGetValue");
if (!CFDictionaryGetValueFunc)
return;
typeof(CFGetTypeID) *CFGetTypeIDFunc =
(typeof(CFGetTypeID) *)dlsym(RTLD_DEFAULT, "CFGetTypeID");
CFGetTypeIDFuncTy CFGetTypeIDFunc =
(CFGetTypeIDFuncTy)dlsym(RTLD_DEFAULT, "CFGetTypeID");
if (!CFGetTypeIDFunc)
return;
typeof(CFStringGetTypeID) *CFStringGetTypeIDFunc =
(typeof(CFStringGetTypeID) *)dlsym(RTLD_DEFAULT, "CFStringGetTypeID");
CFStringGetTypeIDFuncTy CFStringGetTypeIDFunc =
(CFStringGetTypeIDFuncTy)dlsym(RTLD_DEFAULT, "CFStringGetTypeID");
if (!CFStringGetTypeIDFunc)
return;
typeof(CFStringGetCString) *CFStringGetCStringFunc =
(typeof(CFStringGetCString) *)dlsym(RTLD_DEFAULT, "CFStringGetCString");
CFStringGetCStringFuncTy CFStringGetCStringFunc =
(CFStringGetCStringFuncTy)dlsym(RTLD_DEFAULT, "CFStringGetCString");
if (!CFStringGetCStringFunc)
return;
typeof(CFRelease) *CFReleaseFunc =
(typeof(CFRelease) *)dlsym(RTLD_DEFAULT, "CFRelease");
CFReleaseFuncTy CFReleaseFunc =
(CFReleaseFuncTy)dlsym(RTLD_DEFAULT, "CFRelease");
if (!CFReleaseFunc)
return;
@ -163,10 +182,14 @@ int32_t __isOSVersionAtLeast(int32_t Major, int32_t Minor, int32_t Subminor) {
/* Populate the global version variables, if they haven't already. */
dispatch_once_f(&DispatchOnceCounter, NULL, parseSystemVersionPList);
if (Major < GlobalMajor) return 1;
if (Major > GlobalMajor) return 0;
if (Minor < GlobalMinor) return 1;
if (Minor > GlobalMinor) return 0;
if (Major < GlobalMajor)
return 1;
if (Major > GlobalMajor)
return 0;
if (Minor < GlobalMinor)
return 1;
if (Minor > GlobalMinor)
return 0;
return Subminor <= GlobalSubminor;
}

View File

@ -0,0 +1,28 @@
//===--- mulsi3.S - Integer multiplication routines routines ---===//
//
// The LLVM Compiler Infrastructure
//
// This file is dual licensed under the MIT and the University of Illinois Open
// Source Licenses. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#if !defined(__riscv_mul) && __riscv_xlen == 32
.text
.align 2
.globl __mulsi3
.type __mulsi3, @function
__mulsi3:
mv a2, a0
mv a0, zero
.L1:
andi a3, a1, 1
beqz a3, .L2
add a0, a0, a2
.L2:
srli a1, a1, 1
slli a2, a2, 1
bnez a1, .L1
ret
#endif

View File

@ -30,6 +30,8 @@ if(OS_NAME MATCHES "Linux")
OBJECT_LIBS RTInterception
RTSanitizerCommon
RTSanitizerCommonLibc
RTSanitizerCommonCoverage
RTSanitizerCommonSymbolizer
RTUbsan
CFLAGS ${CFI_CFLAGS} ${CFI_DIAG_CFLAGS}
PARENT_TARGET cfi)

View File

@ -132,7 +132,11 @@ void ShadowBuilder::Start() {
void ShadowBuilder::AddUnchecked(uptr begin, uptr end) {
uint16_t *shadow_begin = MemToShadow(begin, shadow_);
uint16_t *shadow_end = MemToShadow(end - 1, shadow_) + 1;
memset(shadow_begin, kUncheckedShadow,
// memset takes a byte, so our unchecked shadow value requires both bytes to
// be the same. Make sure we're ok during compilation.
static_assert((kUncheckedShadow & 0xff) == ((kUncheckedShadow >> 8) & 0xff),
"Both bytes of the 16-bit value must be the same!");
memset(shadow_begin, kUncheckedShadow & 0xff,
(shadow_end - shadow_begin) * sizeof(*shadow_begin));
}
@ -379,6 +383,8 @@ __cfi_slowpath_diag(u64 CallSiteTypeId, void *Ptr, void *DiagData) {
}
#endif
static void EnsureInterceptorsInitialized();
// Setup shadow for dlopen()ed libraries.
// The actual shadow setup happens after dlopen() returns, which means that
// a library can not be a target of any CFI checks while its constructors are
@ -388,6 +394,7 @@ __cfi_slowpath_diag(u64 CallSiteTypeId, void *Ptr, void *DiagData) {
// We could insert a high-priority constructor into the library, but that would
// not help with the uninstrumented libraries.
INTERCEPTOR(void*, dlopen, const char *filename, int flag) {
EnsureInterceptorsInitialized();
EnterLoader();
void *handle = REAL(dlopen)(filename, flag);
ExitLoader();
@ -395,12 +402,27 @@ INTERCEPTOR(void*, dlopen, const char *filename, int flag) {
}
INTERCEPTOR(int, dlclose, void *handle) {
EnsureInterceptorsInitialized();
EnterLoader();
int res = REAL(dlclose)(handle);
ExitLoader();
return res;
}
static BlockingMutex interceptor_init_lock(LINKER_INITIALIZED);
static bool interceptors_inited = false;
static void EnsureInterceptorsInitialized() {
BlockingMutexLock lock(&interceptor_init_lock);
if (interceptors_inited)
return;
INTERCEPT_FUNCTION(dlopen);
INTERCEPT_FUNCTION(dlclose);
interceptors_inited = true;
}
extern "C" SANITIZER_INTERFACE_ATTRIBUTE
#if !SANITIZER_CAN_USE_PREINIT_ARRAY
// On ELF platforms, the constructor is invoked using .preinit_array (see below)
@ -411,9 +433,6 @@ void __cfi_init() {
InitializeFlags();
InitShadow();
INTERCEPT_FUNCTION(dlopen);
INTERCEPT_FUNCTION(dlclose);
#ifdef CFI_ENABLE_DIAG
__ubsan::InitAsPlugin();
#endif

Some files were not shown because too many files have changed in this diff Show More