diff --git a/CMakeLists.txt b/CMakeLists.txt index 00214782ca22..3ad466901ac0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -191,6 +191,10 @@ set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${LLVM_BINARY_DIR}/lib ) add_llvm_definitions( -D__STDC_LIMIT_MACROS ) add_llvm_definitions( -D__STDC_CONSTANT_MACROS ) +option(LLVM_ENABLE_WARNINGS "Enable compiler warnings." ON) +option(LLVM_ENABLE_PEDANTIC "Compile with pedantic enabled." ON) +option(LLVM_ENABLE_WERROR "Fail and stop if a warning is triggered." OFF) + if( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT WIN32 ) # TODO: support other platforms and toolchains. option(LLVM_BUILD_32_BITS "Build 32 bits executables and libraries." OFF) @@ -226,6 +230,27 @@ if( MSVC ) add_llvm_definitions("/${LLVM_USE_CRT}") message(STATUS "Using VC++ CRT: ${LLVM_USE_CRT}") endif (NOT ${LLVM_USE_CRT} STREQUAL "") + + # Enable warnings + if (LLVM_ENABLE_WARNINGS) + add_llvm_definitions( /W4 /Wall ) + if (LLVM_ENABLE_PEDANTIC) + # No MSVC equivalent available + endif (LLVM_ENABLE_PEDANTIC) + endif (LLVM_ENABLE_WARNINGS) + if (LLVM_ENABLE_WERROR) + add_llvm_definitions( /WX ) + endif (LLVM_ENABLE_WERROR) +elseif( CMAKE_COMPILER_IS_GNUCXX ) + if (LLVM_ENABLE_WARNINGS) + add_llvm_definitions( -Wall -W -Wno-unused-parameter -Wwrite-strings ) + if (LLVM_ENABLE_PEDANTIC) + add_llvm_definitions( -pedantic -Wno-long-long ) + endif (LLVM_ENABLE_PEDANTIC) + endif (LLVM_ENABLE_WARNINGS) + if (LLVM_ENABLE_WERROR) + add_llvm_definitions( -Werror ) + endif (LLVM_ENABLE_WERROR) endif( MSVC ) include_directories( ${LLVM_BINARY_DIR}/include ${LLVM_MAIN_INCLUDE_DIR}) @@ -280,6 +305,7 @@ add_subdirectory(utils/not) set(LLVM_ENUM_ASM_PRINTERS "") set(LLVM_ENUM_ASM_PARSERS "") +set(LLVM_ENUM_DISASSEMBLERS "") foreach(t ${LLVM_TARGETS_TO_BUILD}) message(STATUS "Targeting ${t}") add_subdirectory(lib/Target/${t}) @@ -294,6 +320,11 @@ foreach(t ${LLVM_TARGETS_TO_BUILD}) set(LLVM_ENUM_ASM_PARSERS "${LLVM_ENUM_ASM_PARSERS}LLVM_ASM_PARSER(${t})\n") endif( EXISTS ${LLVM_MAIN_SRC_DIR}/lib/Target/${t}/AsmParser/CMakeLists.txt ) + if( EXISTS ${LLVM_MAIN_SRC_DIR}/lib/Target/${t}/Disassembler/CMakeLists.txt ) + add_subdirectory(lib/Target/${t}/Disassembler) + set(LLVM_ENUM_DISASSEMBLERS + "${LLVM_ENUM_DISASSEMBLERS}LLVM_DISASSEMBLER(${t})\n") + endif( EXISTS ${LLVM_MAIN_SRC_DIR}/lib/Target/${t}/Disassembler/CMakeLists.txt ) set(CURRENT_LLVM_TARGET) endforeach(t) @@ -309,6 +340,12 @@ configure_file( ${LLVM_BINARY_DIR}/include/llvm/Config/AsmParsers.def ) +# Produce llvm/Config/Disassemblers.def +configure_file( + ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/Disassemblers.def.in + ${LLVM_BINARY_DIR}/include/llvm/Config/Disassemblers.def + ) + add_subdirectory(lib/ExecutionEngine) add_subdirectory(lib/ExecutionEngine/Interpreter) add_subdirectory(lib/ExecutionEngine/JIT) @@ -319,14 +356,10 @@ add_subdirectory(lib/Archive) add_subdirectory(projects) option(LLVM_BUILD_TOOLS "Build LLVM tool programs." ON) -if(LLVM_BUILD_TOOLS) - add_subdirectory(tools) -endif() +add_subdirectory(tools) option(LLVM_BUILD_EXAMPLES "Build LLVM example programs." OFF) -if(LLVM_BUILD_EXAMPLES) - add_subdirectory(examples) -endif () +add_subdirectory(examples) install(DIRECTORY include/ DESTINATION include diff --git a/Makefile b/Makefile index 31c1b6961a3b..1ef89e4ede09 100644 --- a/Makefile +++ b/Makefile @@ -155,9 +155,11 @@ install-libs: install FilesToConfig := \ include/llvm/Config/config.h \ include/llvm/Config/Targets.def \ - include/llvm/Config/AsmPrinters.def \ + include/llvm/Config/AsmPrinters.def \ + include/llvm/Config/AsmParsers.def \ + include/llvm/Config/Disassemblers.def \ include/llvm/System/DataTypes.h \ - tools/llvmc/plugins/Base/Base.td + tools/llvmc/plugins/Base/Base.td FilesToConfigPATH := $(addprefix $(LLVM_OBJ_ROOT)/,$(FilesToConfig)) all-local:: $(FilesToConfigPATH) diff --git a/Makefile.rules b/Makefile.rules index d9b210841e6b..49ecb1e2d866 100644 --- a/Makefile.rules +++ b/Makefile.rules @@ -1565,6 +1565,11 @@ $(ObjDir)/%GenDAGISel.inc.tmp : %.td $(ObjDir)/.dir $(Echo) "Building $( conftest.$ac_ext < conftest.$ac_ext + echo '#line 13267 "configure"' > conftest.$ac_ext if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 (eval $ac_compile) 2>&5 ac_status=$? @@ -14976,11 +14982,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:14979: $lt_compile\"" >&5) + (eval echo "\"\$as_me:14985: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 - echo "$as_me:14983: \$? = $ac_status" >&5 + echo "$as_me:14989: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. @@ -15244,11 +15250,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:15247: $lt_compile\"" >&5) + (eval echo "\"\$as_me:15253: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 - echo "$as_me:15251: \$? = $ac_status" >&5 + echo "$as_me:15257: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. @@ -15348,11 +15354,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:15351: $lt_compile\"" >&5) + (eval echo "\"\$as_me:15357: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 - echo "$as_me:15355: \$? = $ac_status" >&5 + echo "$as_me:15361: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized @@ -17800,7 +17806,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext < conftest.$ac_ext <&5) + (eval echo "\"\$as_me:20277: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 - echo "$as_me:20275: \$? = $ac_status" >&5 + echo "$as_me:20281: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. @@ -20372,11 +20378,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:20375: $lt_compile\"" >&5) + (eval echo "\"\$as_me:20381: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 - echo "$as_me:20379: \$? = $ac_status" >&5 + echo "$as_me:20385: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized @@ -21942,11 +21948,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:21945: $lt_compile\"" >&5) + (eval echo "\"\$as_me:21951: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 - echo "$as_me:21949: \$? = $ac_status" >&5 + echo "$as_me:21955: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. @@ -22046,11 +22052,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:22049: $lt_compile\"" >&5) + (eval echo "\"\$as_me:22055: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 - echo "$as_me:22053: \$? = $ac_status" >&5 + echo "$as_me:22059: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized @@ -24281,11 +24287,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:24284: $lt_compile\"" >&5) + (eval echo "\"\$as_me:24290: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 - echo "$as_me:24288: \$? = $ac_status" >&5 + echo "$as_me:24294: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. @@ -24549,11 +24555,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:24552: $lt_compile\"" >&5) + (eval echo "\"\$as_me:24558: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 - echo "$as_me:24556: \$? = $ac_status" >&5 + echo "$as_me:24562: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. @@ -24653,11 +24659,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:24656: $lt_compile\"" >&5) + (eval echo "\"\$as_me:24662: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 - echo "$as_me:24660: \$? = $ac_status" >&5 + echo "$as_me:24666: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized @@ -35375,6 +35381,8 @@ ac_config_files="$ac_config_files include/llvm/Config/AsmPrinters.def" ac_config_files="$ac_config_files include/llvm/Config/AsmParsers.def" +ac_config_files="$ac_config_files include/llvm/Config/Disassemblers.def" + ac_config_headers="$ac_config_headers include/llvm/System/DataTypes.h" @@ -36002,6 +36010,7 @@ do "include/llvm/Config/Targets.def") CONFIG_FILES="$CONFIG_FILES include/llvm/Config/Targets.def" ;; "include/llvm/Config/AsmPrinters.def") CONFIG_FILES="$CONFIG_FILES include/llvm/Config/AsmPrinters.def" ;; "include/llvm/Config/AsmParsers.def") CONFIG_FILES="$CONFIG_FILES include/llvm/Config/AsmParsers.def" ;; + "include/llvm/Config/Disassemblers.def") CONFIG_FILES="$CONFIG_FILES include/llvm/Config/Disassemblers.def" ;; "include/llvm/System/DataTypes.h") CONFIG_HEADERS="$CONFIG_HEADERS include/llvm/System/DataTypes.h" ;; "Makefile.config") CONFIG_FILES="$CONFIG_FILES Makefile.config" ;; "llvm.spec") CONFIG_FILES="$CONFIG_FILES llvm.spec" ;; @@ -36175,12 +36184,12 @@ TARGETS_TO_BUILD!$TARGETS_TO_BUILD$ac_delim LLVM_ENUM_TARGETS!$LLVM_ENUM_TARGETS$ac_delim LLVM_ENUM_ASM_PRINTERS!$LLVM_ENUM_ASM_PRINTERS$ac_delim LLVM_ENUM_ASM_PARSERS!$LLVM_ENUM_ASM_PARSERS$ac_delim +LLVM_ENUM_DISASSEMBLERS!$LLVM_ENUM_DISASSEMBLERS$ac_delim ENABLE_CBE_PRINTF_A!$ENABLE_CBE_PRINTF_A$ac_delim OPTIMIZE_OPTION!$OPTIMIZE_OPTION$ac_delim EXTRA_OPTIONS!$EXTRA_OPTIONS$ac_delim BINUTILS_INCDIR!$BINUTILS_INCDIR$ac_delim ENABLE_LLVMC_DYNAMIC!$ENABLE_LLVMC_DYNAMIC$ac_delim -ENABLE_LLVMC_DYNAMIC_PLUGINS!$ENABLE_LLVMC_DYNAMIC_PLUGINS$ac_delim _ACEOF if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 97; then @@ -36222,6 +36231,7 @@ _ACEOF ac_delim='%!_!# ' for ac_last_try in false false false false false :; do cat >conf$$subs.sed <<_ACEOF +ENABLE_LLVMC_DYNAMIC_PLUGINS!$ENABLE_LLVMC_DYNAMIC_PLUGINS$ac_delim CXX!$CXX$ac_delim CXXFLAGS!$CXXFLAGS$ac_delim ac_ct_CXX!$ac_ct_CXX$ac_delim @@ -36319,7 +36329,7 @@ LIBOBJS!$LIBOBJS$ac_delim LTLIBOBJS!$LTLIBOBJS$ac_delim _ACEOF - if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 95; then + if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 96; then break elif $ac_last_try; then { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5 @@ -36338,7 +36348,7 @@ fi cat >>$CONFIG_STATUS <<_ACEOF cat >"\$tmp/subs-2.sed" <<\CEOF$ac_eof -/@[a-zA-Z_][a-zA-Z_0-9]*@/!b end +/@[a-zA-Z_][a-zA-Z_0-9]*@/!b _ACEOF sed ' s/[,\\&]/\\&/g; s/@/@|#_!!_#|/g @@ -36351,8 +36361,6 @@ N; s/^.*\n//; s/[,\\&]/\\&/g; s/@/@|#_!!_#|/g; b n ' >>$CONFIG_STATUS >$CONFIG_STATUS <<_ACEOF -:end -s/|#_!!_#|//g CEOF$ac_eof _ACEOF @@ -36600,7 +36608,7 @@ s&@abs_builddir@&$ac_abs_builddir&;t t s&@abs_top_builddir@&$ac_abs_top_builddir&;t t s&@INSTALL@&$ac_INSTALL&;t t $ac_datarootdir_hack -" $ac_file_inputs | sed -f "$tmp/subs-1.sed" | sed -f "$tmp/subs-2.sed" >$tmp/out +" $ac_file_inputs | sed -f "$tmp/subs-1.sed" | sed -f "$tmp/subs-2.sed" | sed 's/|#_!!_#|//g' >$tmp/out test -z "$ac_datarootdir_hack$ac_datarootdir_seen" && { ac_out=`sed -n '/\${datarootdir}/p' "$tmp/out"`; test -n "$ac_out"; } && diff --git a/docs/AliasAnalysis.html b/docs/AliasAnalysis.html index d0869601c626..6903ede08b48 100644 --- a/docs/AliasAnalysis.html +++ b/docs/AliasAnalysis.html @@ -225,12 +225,7 @@ method for testing dependencies between function calls. This method takes two call sites (CS1 & CS2), returns NoModRef if the two calls refer to disjoint memory locations, Ref if CS1 reads memory written by CS2, Mod if CS1 writes to memory read or written by CS2, or ModRef if CS1 might read or write memory -accessed by CS2. Note that this relation is not commutative. Clients that use -this method should be predicated on the hasNoModRefInfoForCalls() -method, which indicates whether or not an analysis can provide mod/ref -information for function call pairs (most can not). If this predicate is false, -the client shouldn't waste analysis time querying the getModRefInfo -method many times.

+accessed by CS2. Note that this relation is not commutative.

@@ -249,21 +244,6 @@ analysis implementations and can be put to good use by various clients. - -
- The getMustAliases method -
- -
- -

The getMustAliases method returns all values that are known to -always must alias a pointer. This information can be provided in some cases for -important objects like the null pointer and global values. Knowing that a -pointer always points to a particular function allows indirect calls to be -turned into direct calls, for example.

- -
-
The pointsToConstantMemory method @@ -969,7 +949,7 @@ analysis directly.

Chris Lattner
LLVM Compiler Infrastructure
- Last modified: $Date: 2009-04-25 23:11:37 +0200 (Sat, 25 Apr 2009) $ + Last modified: $Date: 2009-11-22 17:01:44 +0100 (Sun, 22 Nov 2009) $ diff --git a/docs/CMake.html b/docs/CMake.html index d83e3ad54565..40a2cec8e91b 100644 --- a/docs/CMake.html +++ b/docs/CMake.html @@ -251,10 +251,16 @@ -DLLVM_TARGETS_TO_BUILD="X86;PowerPC;Alpha".
LLVM_BUILD_TOOLS:BOOL
-
Build LLVM tools. Defaults to ON.
+
Build LLVM tools. Defaults to ON. Targets for building each tool + are generated in any case. You can build an tool separately by + invoking its target. For example, you can build llvm-as + with a makefile-based system executing make llvm-as on the + root of your build directory.
LLVM_BUILD_EXAMPLES:BOOL
-
Build LLVM examples. Defaults to ON.
+
Build LLVM examples. Defaults to OFF. Targets for building each + example are generated in any case. See documentation + for LLVM_BUILD_TOOLS above for more details.
LLVM_ENABLE_THREADS:BOOL
Build with threads support, if available. Defaults to ON.
@@ -268,10 +274,21 @@ compiler supports this flag. Some systems, like Windows, do not need this flag. Defaults to ON. +
LLVM_ENABLE_WARNINGS:BOOL
+
Enable all compiler warnings. Defaults to ON.
+ +
LLVM_ENABLE_PEDANTIC:BOOL
+
Enable pedantic mode. This disable compiler specific extensions, is + possible. Defaults to ON.
+ +
LLVM_ENABLE_WERROR:BOOL
+
Stop and fail build, if a compiler warning is + triggered. Defaults to OFF.
+
LLVM_BUILD_32_BITS:BOOL
Build 32-bits executables and libraries on 64-bits systems. This - option is available only on some 64-bits unix systems. Defaults to - OFF.
+ option is available only on some 64-bits unix systems. Defaults to + OFF.
LLVM_TARGET_ARCH:STRING
LLVM target to use for native code generation. This is required diff --git a/docs/CommandGuide/FileCheck.pod b/docs/CommandGuide/FileCheck.pod index d3f640d64dcc..32516ad87b3e 100644 --- a/docs/CommandGuide/FileCheck.pod +++ b/docs/CommandGuide/FileCheck.pod @@ -224,7 +224,7 @@ The first check line matches a regex (%[a-z]+) and captures it into the variables "REGISTER". The second line verifies that whatever is in REGISTER occurs later in the file after an "andw". FileCheck variable references are always contained in [[ ]] pairs, are named, and their names can be -formed with the regex "[a-zA-Z][a-zA-Z0-9]*". If a colon follows the +formed with the regex "[a-zA-Z_][a-zA-Z0-9_]*". If a colon follows the name, then it is a definition of the variable, if not, it is a use. FileCheck variables can be defined multiple times, and uses always get the diff --git a/docs/CommandGuide/llvmc.pod b/docs/CommandGuide/llvmc.pod index e3031e123d38..e5e065116131 100644 --- a/docs/CommandGuide/llvmc.pod +++ b/docs/CommandGuide/llvmc.pod @@ -126,24 +126,31 @@ use the B<-Wo,> option. =item B<-I> I -Add a directory to the header file search path. This option can be -repeated. +Add a directory to the header file search path. =item B<-L> I -Add I to the library search path. This option can be -repeated. +Add I to the library search path. + +=item B<-F> I + +Add I to the framework search path. =item B<-l>I Link in the library libI.[bc | a | so]. This library should be a bitcode library. +=item B<-framework> I + +Link in the library libI.[bc | a | so]. This library should +be a bitcode library. + =item B<-emit-llvm> -Make the output be LLVM bitcode (with B<-c>) or assembly (with B<-S>) instead -of native object (or assembly). If B<-emit-llvm> is given without either B<-c> -or B<-S> it has no effect. +Output LLVM bitcode (with B<-c>) or assembly (with B<-S>) instead of native +object (or assembly). If B<-emit-llvm> is given without either B<-c> or B<-S> +it has no effect. =item B<-Wa> @@ -157,6 +164,10 @@ Pass options to linker. Pass options to opt. +=item B<-Wllc> + +Pass options to llc (code generator). + =back =head1 EXIT STATUS diff --git a/docs/LangRef.html b/docs/LangRef.html index a417db011541..894ad4978043 100644 --- a/docs/LangRef.html +++ b/docs/LangRef.html @@ -291,6 +291,8 @@ 'llvm.trap' Intrinsic
  • 'llvm.stackprotector' Intrinsic
  • +
  • + 'llvm.objectsize' Intrinsic
  • @@ -1440,11 +1442,6 @@ Classifications
    -

    Note that the code generator does not yet support large integer types to be - used as function return types. The specific limit on how large a return type - the code generator can currently handle is target-dependent; currently it's - often 64 bits for 32-bit targets and 128 bits for 64-bit targets.

    - @@ -1583,11 +1580,6 @@ Classifications length array type. An implementation of 'pascal style arrays' in LLVM could use the type "{ i32, [0 x float]}", for example.

    -

    Note that the code generator does not yet support large aggregate types to be - used as function return types. The specific limit on how large an aggregate - return type the code generator can currently handle is target-dependent, and - also dependent on the aggregate element types.

    - @@ -1680,11 +1672,6 @@ Classifications -

    Note that the code generator does not yet support large aggregate types to be - used as function return types. The specific limit on how large an aggregate - return type the code generator can currently handle is target-dependent, and - also dependent on the aggregate element types.

    - @@ -1775,8 +1762,7 @@ Classifications

    A vector type is a simple derived type that represents a vector of elements. Vector types are used when multiple primitive data are operated in parallel using a single instruction (SIMD). A vector type requires a size (number of - elements) and an underlying primitive data type. Vectors must have a power - of two length (1, 2, 4, 8, 16 ...). Vector types are considered + elements) and an underlying primitive data type. Vector types are considered first class.

    Syntax:
    @@ -1803,11 +1789,6 @@ Classifications -

    Note that the code generator does not yet support large vector types to be - used as function return types. The specific limit on how large a vector - return type codegen can currently handle is target-dependent; currently it's - often a few times longer than a hardware vector register.

    - @@ -2600,14 +2581,6 @@ Instruction ret { i32, i8 } { i32 4, i8 2 } ; Return a struct of values 4 and 2 -

    Note that the code generator does not yet fully support large - return values. The specific sizes that are currently supported are - dependent on the target. For integers, on 32-bit targets the limit - is often 64 bits, and on 64-bit targets the limit is often 128 bits. - For aggregate types, the current limits are dependent on the element - types; for example targets are often limited to 2 total integer - elements and 2 total floating-point elements.

    - @@ -7275,6 +7248,41 @@ LLVM.

    + + + +
    + +
    Syntax:
    +
    +  declare i32 @llvm.objectsize.i32( i8* <ptr>, i32 <type> )
    +  declare i64 @llvm.objectsize.i64( i8* <ptr>, i32 <type> )
    +
    + +
    Overview:
    +

    The llvm.objectsize intrinsic returns the constant number of bytes + from ptr to the end of the object ptr points to if it + can deduce this at compile time. If there are any side-effects in evaluating + the argument or it cannot deduce which objects ptr points to at compile + time the intrinsic returns (size_t) -1 for type 0 + or 1 and (size_t) 0 for type 2 or 3.

    + +
    Arguments:
    +

    The llvm.objectsize intrinsic takes two arguments. The first + argument is a pointer to the object ptr and an integer type. + type is an integer ranging from 0 to 3. The lsb corresponds to + a return value based on whole objects, the second bit whether or not we + return the maximum or minimum remaining bytes computed.

    + +
    Semantics:
    +

    The llvm.objectsize intrinsic is lowered to either a constant + representing the size of the object concerned or (size_t) -1 if + it cannot be determined at compile time.

    + +
    +
    @@ -7285,7 +7293,7 @@ LLVM.

    Chris Lattner
    The LLVM Compiler Infrastructure
    - Last modified: $Date: 2009-11-09 20:01:53 +0100 (Mon, 09 Nov 2009) $ + Last modified: $Date: 2009-11-30 09:03:53 +0100 (Mon, 30 Nov 2009) $
    diff --git a/docs/SourceLevelDebugging.html b/docs/SourceLevelDebugging.html index 9b7571ad0d4b..4aca52cc2637 100644 --- a/docs/SourceLevelDebugging.html +++ b/docs/SourceLevelDebugging.html @@ -37,15 +37,10 @@
  • Debugger intrinsic functions
  • -
  • Representing stopping points in the - source program
  • +
  • Object lifetimes and scoping
  • C/C++ front-end specific debug information
    1. C/C++ source file information
    2. @@ -761,92 +756,6 @@ DW_TAG_return_variable = 258 - - - -
      -
      -  void %llvm.dbg.stoppoint( uint, uint, metadata)
      -
      - -

      This intrinsic is used to provide correspondence between the source file and - the generated code. The first argument is the line number (base 1), second - argument is the column number (0 if unknown) and the third argument the - source %llvm.dbg.compile_unit. - Code following a call to this intrinsic will - have been defined in close proximity of the line, column and file. This - information holds until the next call - to %lvm.dbg.stoppoint.

      - -
      - - - - -
      -
      -  void %llvm.dbg.func.start( metadata )
      -
      - -

      This intrinsic is used to link the debug information - in %llvm.dbg.subprogram to the - function. It defines the beginning of the function's declarative region - (scope). It also implies a call to - %llvm.dbg.stoppoint which - defines a source line "stop point". The intrinsic should be called early in - the function after the all the alloca instructions. It should be paired off - with a closing - %llvm.dbg.region.end. - The function's single argument is - the %llvm.dbg.subprogram.type.

      - -
      - - - - -
      -
      -  void %llvm.dbg.region.start( metadata )
      -
      - -

      This intrinsic is used to define the beginning of a declarative scope (ex. - block) for local language elements. It should be paired off with a closing - %llvm.dbg.region.end. The - function's single argument is - the %llvm.dbg.block which is - starting.

      - - -
      - - - - -
      -
      -  void %llvm.dbg.region.end( metadata )
      -
      - -

      This intrinsic is used to define the end of a declarative scope (ex. block) - for local language elements. It should be paired off with an - opening %llvm.dbg.region.start - or %llvm.dbg.func.start. - The function's single argument is either - the %llvm.dbg.block or - the %llvm.dbg.subprogram.type - which is ending.

      - -
      -
      llvm.dbg.declare @@ -865,70 +774,35 @@ DW_TAG_return_variable = 258
      - - - -
      - -

      LLVM debugger "stop points" are a key part of the debugging representation - that allows the LLVM to maintain simple semantics - for debugging optimized code. The basic idea is that - the front-end inserts calls to - the %llvm.dbg.stoppoint - intrinsic function at every point in the program where a debugger should be - able to inspect the program (these correspond to places a debugger stops when - you "step" through it). The front-end can choose to place these as - fine-grained as it would like (for example, before every subexpression - evaluated), but it is recommended to only put them after every source - statement that includes executable code.

      - -

      Using calls to this intrinsic function to demark legal points for the - debugger to inspect the program automatically disables any optimizations that - could potentially confuse debugging information. To - non-debug-information-aware transformations, these calls simply look like - calls to an external function, which they must assume to do anything - (including reading or writing to any part of reachable memory). On the other - hand, it does not impact many optimizations, such as code motion of - non-trapping instructions, nor does it impact optimization of subexpressions, - code duplication transformations, or basic-block reordering - transformations.

      - -
      -
      -

      In many languages, the local variables in functions can have their lifetime - or scope limited to a subset of a function. In the C family of languages, +

      In many languages, the local variables in functions can have their lifetimes + or scopes limited to a subset of a function. In the C family of languages, for example, variables are only live (readable and writable) within the source block that they are defined in. In functional languages, values are only readable after they have been defined. Though this is a very obvious - concept, it is also non-trivial to model in LLVM, because it has no notion of + concept, it is non-trivial to model in LLVM, because it has no notion of scoping in this sense, and does not want to be tied to a language's scoping rules.

      -

      In order to handle this, the LLVM debug format uses the notion of "regions" - of a function, delineated by calls to intrinsic functions. These intrinsic - functions define new regions of the program and indicate when the region - lifetime expires. Consider the following C fragment, for example:

      +

      In order to handle this, the LLVM debug format uses the metadata attached to + llvm instructions to encode line nuber and scoping information. Consider the + following C fragment, for example:

       1.  void foo() {
      -2.    int X = ...;
      -3.    int Y = ...;
      +2.    int X = 21;
      +3.    int Y = 22;
       4.    {
      -5.      int Z = ...;
      -6.      ...
      +5.      int Z = 23;
      +6.      Z = X;
       7.    }
      -8.    ...
      +8.    X = Y;
       9.  }
       
      @@ -937,98 +811,129 @@ DW_TAG_return_variable = 258
      -void %foo() {
      +define void @foo() nounwind ssp {
       entry:
      -    %X = alloca int
      -    %Y = alloca int
      -    %Z = alloca int
      -    
      -    ...
      -    
      -    call void @llvm.dbg.func.start( metadata !0)
      -    
      -    call void @llvm.dbg.stoppoint( uint 2, uint 2, metadata !1)
      -    
      -    call void @llvm.dbg.declare({}* %X, ...)
      -    call void @llvm.dbg.declare({}* %Y, ...)
      -    
      -    ;; Evaluate expression on line 2, assigning to X.
      -    
      -    call void @llvm.dbg.stoppoint( uint 3, uint 2, metadata !1)
      -    
      -    ;; Evaluate expression on line 3, assigning to Y.
      -    
      -    call void @llvm.region.start()
      -    call void @llvm.dbg.stoppoint( uint 5, uint 4, metadata !1)
      -    call void @llvm.dbg.declare({}* %X, ...)
      -    
      -    ;; Evaluate expression on line 5, assigning to Z.
      -    
      -    call void @llvm.dbg.stoppoint( uint 7, uint 2, metadata !1)
      -    call void @llvm.region.end()
      -    
      -    call void @llvm.dbg.stoppoint( uint 9, uint 2, metadata !1)
      -    
      -    call void @llvm.region.end()
      -    
      -    ret void
      +  %X = alloca i32, align 4                        ; <i32*> [#uses=4]
      +  %Y = alloca i32, align 4                        ; <i32*> [#uses=4]
      +  %Z = alloca i32, align 4                        ; <i32*> [#uses=3]
      +  %0 = bitcast i32* %X to { }*                    ; <{ }*> [#uses=1]
      +  call void @llvm.dbg.declare({ }* %0, metadata !0), !dbg !7
      +  store i32 21, i32* %X, !dbg !8
      +  %1 = bitcast i32* %Y to { }*                    ; <{ }*> [#uses=1]
      +  call void @llvm.dbg.declare({ }* %1, metadata !9), !dbg !10
      +  store i32 22, i32* %Y, !dbg !11
      +  %2 = bitcast i32* %Z to { }*                    ; <{ }*> [#uses=1]
      +  call void @llvm.dbg.declare({ }* %2, metadata !12), !dbg !14
      +  store i32 23, i32* %Z, !dbg !15
      +  %tmp = load i32* %X, !dbg !16                   ; <i32> [#uses=1]
      +  %tmp1 = load i32* %Y, !dbg !16                  ; <i32> [#uses=1]
      +  %add = add nsw i32 %tmp, %tmp1, !dbg !16        ; <i32> [#uses=1]
      +  store i32 %add, i32* %Z, !dbg !16
      +  %tmp2 = load i32* %Y, !dbg !17                  ; <i32> [#uses=1]
      +  store i32 %tmp2, i32* %X, !dbg !17
      +  ret void, !dbg !18
       }
      +
      +declare void @llvm.dbg.declare({ }*, metadata) nounwind readnone
      +
      +!0 = metadata !{i32 459008, metadata !1, metadata !"X", 
      +                metadata !3, i32 2, metadata !6}; [ DW_TAG_auto_variable ]
      +!1 = metadata !{i32 458763, metadata !2}; [DW_TAG_lexical_block ]
      +!2 = metadata !{i32 458798, i32 0, metadata !3, metadata !"foo", metadata !"foo", 
      +               metadata !"foo", metadata !3, i32 1, metadata !4, 
      +               i1 false, i1 true}; [DW_TAG_subprogram ]
      +!3 = metadata !{i32 458769, i32 0, i32 12, metadata !"foo.c", 
      +                metadata !"/private/tmp", metadata !"clang 1.1", i1 true, 
      +                i1 false, metadata !"", i32 0}; [DW_TAG_compile_unit ]
      +!4 = metadata !{i32 458773, metadata !3, metadata !"", null, i32 0, i64 0, i64 0, 
      +                i64 0, i32 0, null, metadata !5, i32 0}; [DW_TAG_subroutine_type ]
      +!5 = metadata !{null}
      +!6 = metadata !{i32 458788, metadata !3, metadata !"int", metadata !3, i32 0, 
      +                i64 32, i64 32, i64 0, i32 0, i32 5}; [DW_TAG_base_type ]
      +!7 = metadata !{i32 2, i32 7, metadata !1, null}
      +!8 = metadata !{i32 2, i32 3, metadata !1, null}
      +!9 = metadata !{i32 459008, metadata !1, metadata !"Y", metadata !3, i32 3, 
      +                metadata !6}; [ DW_TAG_auto_variable ]
      +!10 = metadata !{i32 3, i32 7, metadata !1, null}
      +!11 = metadata !{i32 3, i32 3, metadata !1, null}
      +!12 = metadata !{i32 459008, metadata !13, metadata !"Z", metadata !3, i32 5, 
      +                 metadata !6}; [ DW_TAG_auto_variable ]
      +!13 = metadata !{i32 458763, metadata !1}; [DW_TAG_lexical_block ]
      +!14 = metadata !{i32 5, i32 9, metadata !13, null}
      +!15 = metadata !{i32 5, i32 5, metadata !13, null}
      +!16 = metadata !{i32 6, i32 5, metadata !13, null}
      +!17 = metadata !{i32 8, i32 3, metadata !1, null}
      +!18 = metadata !{i32 9, i32 1, metadata !2, null}
       
      -

      This example illustrates a few important details about the LLVM debugging - information. In particular, it shows how the various intrinsics are applied - together to allow a debugger to analyze the relationship between statements, - variable definitions, and the code used to implement the function.

      +

      This example illustrates a few important details about LLVM debugging + information. In particular, it shows how the llvm.dbg.declare + intrinsic and location information, which are attached to an instruction, + are applied together to allow a debugger to analyze the relationship between + statements, variable definitions, and the code used to implement the + function.

      -

      The first - intrinsic %llvm.dbg.func.start - provides a link with the subprogram - descriptor containing the details of this function. This call also - defines the beginning of the function region, bounded by - the %llvm.region.end at the - end of the function. This region is used to bracket the lifetime of - variables declared within. For a function, this outer region defines a new - stack frame whose lifetime ends when the region is ended.

      +
      +
      +call void @llvm.dbg.declare({ }* %0, metadata !0), !dbg !7   
      +
      +
      -

      It is possible to define inner regions for short term variables by using the - %llvm.region.start - and %llvm.region.end to - bound a region. The inner region in this example would be for the block - containing the declaration of Z.

      +

      The first intrinsic + %llvm.dbg.declare + encodes debugging information for the variable X. The metadata + !dbg !7 attached to the intrinsic provides scope information for the + variable X.

      -

      Using regions to represent the boundaries of source-level functions allow - LLVM interprocedural optimizations to arbitrarily modify LLVM functions - without having to worry about breaking mapping information between the LLVM - code and the and source-level program. In particular, the inliner requires - no modification to support inlining with debugging information: there is no - explicit correlation drawn between LLVM functions and their source-level - counterparts (note however, that if the inliner inlines all instances of a - non-strong-linkage function into its caller that it will not be possible for - the user to manually invoke the inlined function from a debugger).

      +
      +
      +!7 = metadata !{i32 2, i32 7, metadata !1, null}
      +!1 = metadata !{i32 458763, metadata !2}; [DW_TAG_lexical_block ]
      +!2 = metadata !{i32 458798, i32 0, metadata !3, metadata !"foo", 
      +                metadata !"foo", metadata !"foo", metadata !3, i32 1, 
      +                metadata !4, i1 false, i1 true}; [DW_TAG_subprogram ]   
      +
      +
      -

      Once the function has been defined, - the stopping point - corresponding to line #2 (column #2) of the function is encountered. At this - point in the function, no local variables are live. As lines 2 and 3 - of the example are executed, their variable definitions are introduced into - the program using - %llvm.dbg.declare, without the - need to specify a new region. These variables do not require new regions to - be introduced because they go out of scope at the same point in the program: - line 9.

      +

      Here !7 is metadata providing location information. It has four + fields: line number, column number, scope, and original scope. The original + scope represents inline location if this instruction is inlined inside a + caller, and is null otherwise. In this example, scope is encoded by + !1. !1 represents a lexical block inside the scope + !2, where !2 is a + subprogram descriptor. This way the + location information attached to the intrinsics indicates that the + variable X is declared at line number 2 at a function level scope in + function foo.

      -

      In contrast, the Z variable goes out of scope at a different time, - on line 7. For this reason, it is defined within the inner region, which - kills the availability of Z before the code for line 8 is executed. - In this way, regions can support arbitrary source-language scoping rules, as - long as they can only be nested (ie, one scope cannot partially overlap with - a part of another scope).

      +

      Now lets take another example.

      -

      It is worth noting that this scoping mechanism is used to control scoping of - all declarations, not just variable declarations. For example, the scope of - a C++ using declaration is controlled with this and could change how name - lookup is performed.

      +
      +
      +call void @llvm.dbg.declare({ }* %2, metadata !12), !dbg !14
      +
      +
      + +

      The second intrinsic + %llvm.dbg.declare + encodes debugging information for variable Z. The metadata + !dbg !14 attached to the intrinsic provides scope information for + the variable Z.

      + +
      +
      +!13 = metadata !{i32 458763, metadata !1}; [DW_TAG_lexical_block ]
      +!14 = metadata !{i32 5, i32 9, metadata !13, null}
      +
      +
      + +

      Here !14 indicates that Z is declaread at line number 5 and + column number 9 inside of lexical scope !13. The lexical scope + itself resides inside of lexical scope !1 described above.

      + +

      The scope information attached with each instruction provides a + straightforward way to find instructions covered by a scope.

      @@ -1813,7 +1718,7 @@ enum Trees { Chris Lattner
      LLVM Compiler Infrastructure
      - Last modified: $Date: 2009-11-17 14:13:59 +0100 (Tue, 17 Nov 2009) $ + Last modified: $Date: 2009-12-01 01:59:58 +0100 (Tue, 01 Dec 2009) $ diff --git a/docs/tutorial/index.html b/docs/tutorial/index.html index bfaafe7265f3..250b533f3f8a 100644 --- a/docs/tutorial/index.html +++ b/docs/tutorial/index.html @@ -15,16 +15,6 @@
      LLVM Tutorial: Table of Contents
        -
      1. An Introduction to LLVM: Basic Concepts and Design
      2. -
      3. Simple JIT Tutorials -
          -
        1. A First Function
        2. -
        3. A More Complicated Function
        4. -
        5. Running Optimizations
        6. -
        7. Reading and Writing Bitcode
        8. -
        9. Invoking the JIT
        10. -
        -
      4. Kaleidoscope: Implementing a Language with LLVM
        1. Tutorial Introduction and the Lexer
        2. diff --git a/examples/Fibonacci/fibonacci.cpp b/examples/Fibonacci/fibonacci.cpp index b1a4691a9f6c..077cdd0f5d68 100644 --- a/examples/Fibonacci/fibonacci.cpp +++ b/examples/Fibonacci/fibonacci.cpp @@ -34,6 +34,7 @@ #include "llvm/ExecutionEngine/Interpreter.h" #include "llvm/ExecutionEngine/GenericValue.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetSelect.h" using namespace llvm; static Function *CreateFibFunction(Module *M, LLVMContext &Context) { @@ -92,6 +93,7 @@ static Function *CreateFibFunction(Module *M, LLVMContext &Context) { int main(int argc, char **argv) { int n = argc > 1 ? atol(argv[1]) : 24; + InitializeNativeTarget(); LLVMContext Context; // Create some module to put our function into it. @@ -101,7 +103,13 @@ int main(int argc, char **argv) { Function *FibF = CreateFibFunction(M, Context); // Now we going to create JIT - ExecutionEngine *EE = EngineBuilder(M).create(); + std::string errStr; + ExecutionEngine *EE = EngineBuilder(M).setErrorStr(&errStr).setEngineKind(EngineKind::JIT).create(); + + if (!EE) { + errs() << argv[0] << ": Failed to construct ExecutionEngine: " << errStr << "\n"; + return 1; + } errs() << "verifying... "; if (verifyModule(*M)) { diff --git a/include/llvm-c/Core.h b/include/llvm-c/Core.h index 7cb5bc371b7f..c741d1c19a31 100644 --- a/include/llvm-c/Core.h +++ b/include/llvm-c/Core.h @@ -870,7 +870,7 @@ LLVMValueRef LLVMBuildTruncOrBitCast(LLVMBuilderRef, LLVMValueRef Val, LLVMTypeRef DestTy, const char *Name); LLVMValueRef LLVMBuildPointerCast(LLVMBuilderRef, LLVMValueRef Val, LLVMTypeRef DestTy, const char *Name); -LLVMValueRef LLVMBuildIntCast(LLVMBuilderRef, LLVMValueRef Val, +LLVMValueRef LLVMBuildIntCast(LLVMBuilderRef, LLVMValueRef Val, /*Signed cast!*/ LLVMTypeRef DestTy, const char *Name); LLVMValueRef LLVMBuildFPCast(LLVMBuilderRef, LLVMValueRef Val, LLVMTypeRef DestTy, const char *Name); diff --git a/include/llvm/ADT/STLExtras.h b/include/llvm/ADT/STLExtras.h index a8b613307da1..32cf4590e993 100644 --- a/include/llvm/ADT/STLExtras.h +++ b/include/llvm/ADT/STLExtras.h @@ -18,6 +18,7 @@ #define LLVM_ADT_STLEXTRAS_H #include // for std::size_t +#include // for qsort #include #include #include // for std::pair diff --git a/include/llvm/ADT/SmallString.h b/include/llvm/ADT/SmallString.h index 035462515a83..05bd8a42c67f 100644 --- a/include/llvm/ADT/SmallString.h +++ b/include/llvm/ADT/SmallString.h @@ -38,12 +38,15 @@ public: // Extra methods. StringRef str() const { return StringRef(this->begin(), this->size()); } + // Implicit conversion to StringRef. + operator StringRef() const { return str(); } + const char *c_str() { this->push_back(0); this->pop_back(); return this->data(); } - + // Extra operators. const SmallString &operator=(StringRef RHS) { this->clear(); diff --git a/include/llvm/ADT/StringRef.h b/include/llvm/ADT/StringRef.h index ed651bf1a2b5..f299f5fd6517 100644 --- a/include/llvm/ADT/StringRef.h +++ b/include/llvm/ADT/StringRef.h @@ -10,9 +10,9 @@ #ifndef LLVM_ADT_STRINGREF_H #define LLVM_ADT_STRINGREF_H -#include #include #include +#include #include namespace llvm { @@ -39,6 +39,19 @@ namespace llvm { /// The length of the string. size_t Length; + // Workaround PR5482: nearly all gcc 4.x miscompile StringRef and std::min() + // Changing the arg of min to be an integer, instead of a reference to an + // integer works around this bug. + size_t min(size_t a, size_t b) const + { + return a < b ? a : b; + } + + size_t max(size_t a, size_t b) const + { + return a > b ? a : b; + } + public: /// @name Constructors /// @{ @@ -108,7 +121,7 @@ namespace llvm { /// is lexicographically less than, equal to, or greater than the \arg RHS. int compare(StringRef RHS) const { // Check the prefix for a mismatch. - if (int Res = memcmp(Data, RHS.Data, std::min(Length, RHS.Length))) + if (int Res = memcmp(Data, RHS.Data, min(Length, RHS.Length))) return Res < 0 ? -1 : 1; // Otherwise the prefixes match, so we only need to check the lengths. @@ -163,7 +176,7 @@ namespace llvm { /// \return - The index of the first occurence of \arg C, or npos if not /// found. size_t find(char C, size_t From = 0) const { - for (size_t i = std::min(From, Length), e = Length; i != e; ++i) + for (size_t i = min(From, Length), e = Length; i != e; ++i) if (Data[i] == C) return i; return npos; @@ -180,7 +193,7 @@ namespace llvm { /// \return - The index of the last occurence of \arg C, or npos if not /// found. size_t rfind(char C, size_t From = npos) const { - From = std::min(From, Length); + From = min(From, Length); size_t i = From; while (i != 0) { --i; @@ -262,8 +275,8 @@ namespace llvm { /// exceeds the number of characters remaining in the string, the string /// suffix (starting with \arg Start) will be returned. StringRef substr(size_t Start, size_t N = npos) const { - Start = std::min(Start, Length); - return StringRef(Data + Start, std::min(N, Length - Start)); + Start = min(Start, Length); + return StringRef(Data + Start, min(N, Length - Start)); } /// slice - Return a reference to the substring from [Start, End). @@ -277,8 +290,8 @@ namespace llvm { /// number of characters remaining in the string, the string suffix /// (starting with \arg Start) will be returned. StringRef slice(size_t Start, size_t End) const { - Start = std::min(Start, Length); - End = std::min(std::max(Start, End), Length); + Start = min(Start, Length); + End = min(max(Start, End), Length); return StringRef(Data + Start, End - Start); } diff --git a/include/llvm/ADT/Trie.h b/include/llvm/ADT/Trie.h index b415990b2225..6b150c8fffa0 100644 --- a/include/llvm/ADT/Trie.h +++ b/include/llvm/ADT/Trie.h @@ -309,8 +309,7 @@ struct DOTGraphTraits > : public DefaultDOTGraphTraits { return "Trie"; } - static std::string getNodeLabel(NodeType* Node, const Trie& T, - bool ShortNames) { + static std::string getNodeLabel(NodeType* Node, const Trie& T) { if (T.getRoot() == Node) return ""; else diff --git a/include/llvm/ADT/Triple.h b/include/llvm/ADT/Triple.h index a9e3e53d1d7d..fe39324dd5c7 100644 --- a/include/llvm/ADT/Triple.h +++ b/include/llvm/ADT/Triple.h @@ -64,7 +64,7 @@ public: msp430, // MSP430: msp430 pic16, // PIC16: pic16 ppc, // PPC: powerpc - ppc64, // PPC64: powerpc64 + ppc64, // PPC64: powerpc64, ppu sparc, // Sparc: sparc systemz, // SystemZ: s390x tce, // TCE (http://tce.cs.tut.fi/): tce @@ -90,6 +90,7 @@ public: DragonFly, FreeBSD, Linux, + Lv2, // PS3 MinGW32, MinGW64, NetBSD, diff --git a/include/llvm/Analysis/AliasAnalysis.h b/include/llvm/Analysis/AliasAnalysis.h index be7d5ee37b80..2d43bddf7e0b 100644 --- a/include/llvm/Analysis/AliasAnalysis.h +++ b/include/llvm/Analysis/AliasAnalysis.h @@ -94,13 +94,12 @@ public: virtual AliasResult alias(const Value *V1, unsigned V1Size, const Value *V2, unsigned V2Size); - /// getMustAliases - If there are any pointers known that must alias this - /// pointer, return them now. This allows alias-set based alias analyses to - /// perform a form a value numbering (which is exposed by load-vn). If an - /// alias analysis supports this, it should ADD any must aliased pointers to - /// the specified vector. - /// - virtual void getMustAliases(Value *P, std::vector &RetVals); + /// isNoAlias - A trivial helper function to check to see if the specified + /// pointers are no-alias. + bool isNoAlias(const Value *V1, unsigned V1Size, + const Value *V2, unsigned V2Size) { + return alias(V1, V1Size, V2, V2Size) == NoAlias; + } /// pointsToConstantMemory - If the specified pointer is known to point into /// constant global memory, return true. This allows disambiguation of store @@ -262,14 +261,6 @@ public: /// virtual ModRefResult getModRefInfo(CallSite CS1, CallSite CS2); - /// hasNoModRefInfoForCalls - Return true if the analysis has no mod/ref - /// information for pairs of function calls (other than "pure" and "const" - /// functions). This can be used by clients to avoid many pointless queries. - /// Remember that if you override this and chain to another analysis, you must - /// make sure that it doesn't have mod/ref info either. - /// - virtual bool hasNoModRefInfoForCalls() const; - public: /// Convenience functions... ModRefResult getModRefInfo(LoadInst *L, Value *P, unsigned Size); diff --git a/include/llvm/Analysis/CFGPrinter.h b/include/llvm/Analysis/CFGPrinter.h index 440d18267daf..6ad2e5a5b1f9 100644 --- a/include/llvm/Analysis/CFGPrinter.h +++ b/include/llvm/Analysis/CFGPrinter.h @@ -24,23 +24,29 @@ namespace llvm { template<> struct DOTGraphTraits : public DefaultDOTGraphTraits { + + DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} + static std::string getGraphName(const Function *F) { return "CFG for '" + F->getNameStr() + "' function"; } - static std::string getNodeLabel(const BasicBlock *Node, - const Function *Graph, - bool ShortNames) { - if (ShortNames && !Node->getName().empty()) - return Node->getNameStr() + ":"; + static std::string getSimpleNodeLabel(const BasicBlock *Node, + const Function *Graph) { + if (!Node->getName().empty()) + return Node->getNameStr(); std::string Str; raw_string_ostream OS(Str); - if (ShortNames) { - WriteAsOperand(OS, Node, false); - return OS.str(); - } + WriteAsOperand(OS, Node, false); + return OS.str(); + } + + static std::string getCompleteNodeLabel(const BasicBlock *Node, + const Function *Graph) { + std::string Str; + raw_string_ostream OS(Str); if (Node->getName().empty()) { WriteAsOperand(OS, Node, false); @@ -65,6 +71,14 @@ struct DOTGraphTraits : public DefaultDOTGraphTraits { return OutStr; } + std::string getNodeLabel(const BasicBlock *Node, + const Function *Graph) { + if (isSimple()) + return getSimpleNodeLabel(Node, Graph); + else + return getCompleteNodeLabel(Node, Graph); + } + static std::string getEdgeSourceLabel(const BasicBlock *Node, succ_const_iterator I) { // Label source of conditional branches with "T" or "F" diff --git a/include/llvm/Analysis/CaptureTracking.h b/include/llvm/Analysis/CaptureTracking.h index a0ff503a0393..493ecf517141 100644 --- a/include/llvm/Analysis/CaptureTracking.h +++ b/include/llvm/Analysis/CaptureTracking.h @@ -21,8 +21,12 @@ namespace llvm { /// by the enclosing function (which is required to exist). This routine can /// be expensive, so consider caching the results. The boolean ReturnCaptures /// specifies whether returning the value (or part of it) from the function + /// counts as capturing it or not. The boolean StoreCaptures specified whether + /// storing the value (or part of it) into memory anywhere automatically /// counts as capturing it or not. - bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures); + bool PointerMayBeCaptured(const Value *V, + bool ReturnCaptures, + bool StoreCaptures); } // end namespace llvm diff --git a/include/llvm/Analysis/DebugInfo.h b/include/llvm/Analysis/DebugInfo.h index 3c40d65a485a..866ed8a8757e 100644 --- a/include/llvm/Analysis/DebugInfo.h +++ b/include/llvm/Analysis/DebugInfo.h @@ -55,7 +55,7 @@ namespace llvm { /// not, the debug info is corrupt and we ignore it. DIDescriptor(MDNode *N, unsigned RequiredTag); - const char *getStringField(unsigned Elt) const; + StringRef getStringField(unsigned Elt) const; unsigned getUnsignedField(unsigned Elt) const { return (unsigned)getUInt64Field(Elt); } @@ -137,8 +137,8 @@ namespace llvm { } virtual ~DIScope() {} - const char *getFilename() const; - const char *getDirectory() const; + StringRef getFilename() const; + StringRef getDirectory() const; }; /// DICompileUnit - A wrapper for a compile unit. @@ -150,9 +150,9 @@ namespace llvm { } unsigned getLanguage() const { return getUnsignedField(2); } - const char *getFilename() const { return getStringField(3); } - const char *getDirectory() const { return getStringField(4); } - const char *getProducer() const { return getStringField(5); } + StringRef getFilename() const { return getStringField(3); } + StringRef getDirectory() const { return getStringField(4); } + StringRef getProducer() const { return getStringField(5); } /// isMain - Each input file is encoded as a separate compile unit in LLVM /// debugging information output. However, many target specific tool chains @@ -165,7 +165,7 @@ namespace llvm { bool isMain() const { return getUnsignedField(6); } bool isOptimized() const { return getUnsignedField(7); } - const char *getFlags() const { return getStringField(8); } + StringRef getFlags() const { return getStringField(8); } unsigned getRunTimeVersion() const { return getUnsignedField(9); } /// Verify - Verify that a compile unit is well formed. @@ -183,7 +183,7 @@ namespace llvm { explicit DIEnumerator(MDNode *N = 0) : DIDescriptor(N, dwarf::DW_TAG_enumerator) {} - const char *getName() const { return getStringField(1); } + StringRef getName() const { return getStringField(1); } uint64_t getEnumValue() const { return getUInt64Field(2); } }; @@ -217,7 +217,7 @@ namespace llvm { virtual ~DIType() {} DIDescriptor getContext() const { return getDescriptorField(1); } - const char *getName() const { return getStringField(2); } + StringRef getName() const { return getStringField(2); } DICompileUnit getCompileUnit() const{ return getFieldAs(3); } unsigned getLineNumber() const { return getUnsignedField(4); } uint64_t getSizeInBits() const { return getUInt64Field(5); } @@ -317,9 +317,9 @@ namespace llvm { virtual ~DIGlobal() {} DIDescriptor getContext() const { return getDescriptorField(2); } - const char *getName() const { return getStringField(3); } - const char *getDisplayName() const { return getStringField(4); } - const char *getLinkageName() const { return getStringField(5); } + StringRef getName() const { return getStringField(3); } + StringRef getDisplayName() const { return getStringField(4); } + StringRef getLinkageName() const { return getStringField(5); } DICompileUnit getCompileUnit() const{ return getFieldAs(6); } unsigned getLineNumber() const { return getUnsignedField(7); } DIType getType() const { return getFieldAs(8); } @@ -342,16 +342,16 @@ namespace llvm { } DIDescriptor getContext() const { return getDescriptorField(2); } - const char *getName() const { return getStringField(3); } - const char *getDisplayName() const { return getStringField(4); } - const char *getLinkageName() const { return getStringField(5); } + StringRef getName() const { return getStringField(3); } + StringRef getDisplayName() const { return getStringField(4); } + StringRef getLinkageName() const { return getStringField(5); } DICompileUnit getCompileUnit() const{ return getFieldAs(6); } unsigned getLineNumber() const { return getUnsignedField(7); } DICompositeType getType() const { return getFieldAs(8); } /// getReturnTypeName - Subprogram return types are encoded either as /// DIType or as DICompositeType. - const char *getReturnTypeName() const { + StringRef getReturnTypeName() const { DICompositeType DCT(getFieldAs(8)); if (!DCT.isNull()) { DIArray A = DCT.getTypeArray(); @@ -366,8 +366,8 @@ namespace llvm { /// compile unit, like 'static' in C. unsigned isLocalToUnit() const { return getUnsignedField(9); } unsigned isDefinition() const { return getUnsignedField(10); } - const char *getFilename() const { return getCompileUnit().getFilename();} - const char *getDirectory() const { return getCompileUnit().getDirectory();} + StringRef getFilename() const { return getCompileUnit().getFilename();} + StringRef getDirectory() const { return getCompileUnit().getDirectory();} /// Verify - Verify that a subprogram descriptor is well formed. bool Verify() const; @@ -406,7 +406,7 @@ namespace llvm { } DIDescriptor getContext() const { return getDescriptorField(1); } - const char *getName() const { return getStringField(2); } + StringRef getName() const { return getStringField(2); } DICompileUnit getCompileUnit() const{ return getFieldAs(3); } unsigned getLineNumber() const { return getUnsignedField(4); } DIType getType() const { return getFieldAs(5); } @@ -444,8 +444,8 @@ namespace llvm { DbgNode = 0; } DIScope getContext() const { return getFieldAs(1); } - const char *getDirectory() const { return getContext().getDirectory(); } - const char *getFilename() const { return getContext().getFilename(); } + StringRef getDirectory() const { return getContext().getDirectory(); } + StringRef getFilename() const { return getContext().getFilename(); } }; /// DILocation - This object holds location information. This object @@ -458,8 +458,8 @@ namespace llvm { unsigned getColumnNumber() const { return getUnsignedField(1); } DIScope getScope() const { return getFieldAs(2); } DILocation getOrigLocation() const { return getFieldAs(3); } - const char *getFilename() const { return getScope().getFilename(); } - const char *getDirectory() const { return getScope().getDirectory(); } + StringRef getFilename() const { return getScope().getFilename(); } + StringRef getDirectory() const { return getScope().getDirectory(); } }; /// DIFactory - This object assists with the construction of the various @@ -489,26 +489,26 @@ namespace llvm { /// CreateCompileUnit - Create a new descriptor for the specified compile /// unit. DICompileUnit CreateCompileUnit(unsigned LangID, - const char * Filename, - const char * Directory, - const char * Producer, + StringRef Filename, + StringRef Directory, + StringRef Producer, bool isMain = false, bool isOptimized = false, - const char *Flags = "", + StringRef Flags = "", unsigned RunTimeVer = 0); /// CreateEnumerator - Create a single enumerator value. - DIEnumerator CreateEnumerator(const char * Name, uint64_t Val); + DIEnumerator CreateEnumerator(StringRef Name, uint64_t Val); /// CreateBasicType - Create a basic type like int, float, etc. - DIBasicType CreateBasicType(DIDescriptor Context, const char * Name, + DIBasicType CreateBasicType(DIDescriptor Context, StringRef Name, DICompileUnit CompileUnit, unsigned LineNumber, uint64_t SizeInBits, uint64_t AlignInBits, uint64_t OffsetInBits, unsigned Flags, unsigned Encoding); /// CreateBasicType - Create a basic type like int, float, etc. - DIBasicType CreateBasicTypeEx(DIDescriptor Context, const char * Name, + DIBasicType CreateBasicTypeEx(DIDescriptor Context, StringRef Name, DICompileUnit CompileUnit, unsigned LineNumber, Constant *SizeInBits, Constant *AlignInBits, Constant *OffsetInBits, unsigned Flags, @@ -517,7 +517,7 @@ namespace llvm { /// CreateDerivedType - Create a derived type like const qualified type, /// pointer, typedef, etc. DIDerivedType CreateDerivedType(unsigned Tag, DIDescriptor Context, - const char * Name, + StringRef Name, DICompileUnit CompileUnit, unsigned LineNumber, uint64_t SizeInBits, uint64_t AlignInBits, @@ -527,7 +527,7 @@ namespace llvm { /// CreateDerivedType - Create a derived type like const qualified type, /// pointer, typedef, etc. DIDerivedType CreateDerivedTypeEx(unsigned Tag, DIDescriptor Context, - const char * Name, + StringRef Name, DICompileUnit CompileUnit, unsigned LineNumber, Constant *SizeInBits, Constant *AlignInBits, @@ -536,7 +536,7 @@ namespace llvm { /// CreateCompositeType - Create a composite type like array, struct, etc. DICompositeType CreateCompositeType(unsigned Tag, DIDescriptor Context, - const char * Name, + StringRef Name, DICompileUnit CompileUnit, unsigned LineNumber, uint64_t SizeInBits, @@ -548,7 +548,7 @@ namespace llvm { /// CreateCompositeType - Create a composite type like array, struct, etc. DICompositeType CreateCompositeTypeEx(unsigned Tag, DIDescriptor Context, - const char * Name, + StringRef Name, DICompileUnit CompileUnit, unsigned LineNumber, Constant *SizeInBits, @@ -560,25 +560,25 @@ namespace llvm { /// CreateSubprogram - Create a new descriptor for the specified subprogram. /// See comments in DISubprogram for descriptions of these fields. - DISubprogram CreateSubprogram(DIDescriptor Context, const char * Name, - const char * DisplayName, - const char * LinkageName, + DISubprogram CreateSubprogram(DIDescriptor Context, StringRef Name, + StringRef DisplayName, + StringRef LinkageName, DICompileUnit CompileUnit, unsigned LineNo, DIType Type, bool isLocalToUnit, bool isDefinition); /// CreateGlobalVariable - Create a new descriptor for the specified global. DIGlobalVariable - CreateGlobalVariable(DIDescriptor Context, const char * Name, - const char * DisplayName, - const char * LinkageName, + CreateGlobalVariable(DIDescriptor Context, StringRef Name, + StringRef DisplayName, + StringRef LinkageName, DICompileUnit CompileUnit, unsigned LineNo, DIType Type, bool isLocalToUnit, bool isDefinition, llvm::GlobalVariable *GV); /// CreateVariable - Create a new descriptor for the specified variable. DIVariable CreateVariable(unsigned Tag, DIDescriptor Context, - const char * Name, + StringRef Name, DICompileUnit CompileUnit, unsigned LineNo, DIType Type); @@ -598,6 +598,10 @@ namespace llvm { DILocation CreateLocation(unsigned LineNo, unsigned ColumnNo, DIScope S, DILocation OrigLoc); + /// CreateLocation - Creates a debug info location. + DILocation CreateLocation(unsigned LineNo, unsigned ColumnNo, + DIScope S, MDNode *OrigLoc = 0); + /// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call. Instruction *InsertDeclare(llvm::Value *Storage, DIVariable D, BasicBlock *InsertAtEnd); @@ -669,6 +673,12 @@ bool getLocationInfo(const Value *V, std::string &DisplayName, DebugLoc ExtractDebugLocation(DbgFuncStartInst &FSI, DebugLocTracker &DebugLocInfo); + /// getDISubprogram - Find subprogram that is enclosing this scope. + DISubprogram getDISubprogram(MDNode *Scope); + + /// getDICompositeType - Find underlying composite type. + DICompositeType getDICompositeType(DIType T); + class DebugInfoFinder { public: diff --git a/include/llvm/Analysis/InstructionSimplify.h b/include/llvm/Analysis/InstructionSimplify.h index aa5c0f554bc9..13314e6ea0e5 100644 --- a/include/llvm/Analysis/InstructionSimplify.h +++ b/include/llvm/Analysis/InstructionSimplify.h @@ -20,6 +20,11 @@ namespace llvm { class Instruction; class Value; class TargetData; + + /// SimplifyAddInst - Given operands for an Add, see if we can + /// fold the result. If not, this returns null. + Value *SimplifyAddInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW, + const TargetData *TD = 0); /// SimplifyAndInst - Given operands for an And, see if we can /// fold the result. If not, this returns null. @@ -42,6 +47,11 @@ namespace llvm { const TargetData *TD = 0); + /// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can + /// fold the result. If not, this returns null. + Value *SimplifyGEPInst(Value * const *Ops, unsigned NumOps, + const TargetData *TD = 0); + //=== Helper functions for higher up the class hierarchy. diff --git a/include/llvm/Analysis/LibCallAliasAnalysis.h b/include/llvm/Analysis/LibCallAliasAnalysis.h index 7944af3b8a5a..01f108d29042 100644 --- a/include/llvm/Analysis/LibCallAliasAnalysis.h +++ b/include/llvm/Analysis/LibCallAliasAnalysis.h @@ -49,9 +49,6 @@ namespace llvm { return false; } - /// hasNoModRefInfoForCalls - We can provide mod/ref information against - /// non-escaping allocations. - virtual bool hasNoModRefInfoForCalls() const { return false; } private: ModRefResult AnalyzeLibCallDetails(const LibCallFunctionInfo *FI, CallSite CS, Value *P, unsigned Size); diff --git a/include/llvm/Analysis/LoopInfo.h b/include/llvm/Analysis/LoopInfo.h index 6504bdce4283..9969d999e13e 100644 --- a/include/llvm/Analysis/LoopInfo.h +++ b/include/llvm/Analysis/LoopInfo.h @@ -269,8 +269,6 @@ public: /// getLoopLatch - If there is a single latch block for this loop, return it. /// A latch block is a block that contains a branch back to the header. - /// A loop header in normal form has two edges into it: one from a preheader - /// and one from a latch block. BlockT *getLoopLatch() const { BlockT *Header = getHeader(); typedef GraphTraits > InvBlockTraits; @@ -278,20 +276,12 @@ public: InvBlockTraits::child_begin(Header); typename InvBlockTraits::ChildIteratorType PE = InvBlockTraits::child_end(Header); - if (PI == PE) return 0; // no preds? - BlockT *Latch = 0; - if (contains(*PI)) - Latch = *PI; - ++PI; - if (PI == PE) return 0; // only one pred? - - if (contains(*PI)) { - if (Latch) return 0; // multiple backedges - Latch = *PI; - } - ++PI; - if (PI != PE) return 0; // more than two preds + for (; PI != PE; ++PI) + if (contains(*PI)) { + if (Latch) return 0; + Latch = *PI; + } return Latch; } diff --git a/include/llvm/Analysis/MemoryDependenceAnalysis.h b/include/llvm/Analysis/MemoryDependenceAnalysis.h index 205c34ab5c89..6b300fd9503e 100644 --- a/include/llvm/Analysis/MemoryDependenceAnalysis.h +++ b/include/llvm/Analysis/MemoryDependenceAnalysis.h @@ -30,6 +30,7 @@ namespace llvm { class TargetData; class MemoryDependenceAnalysis; class PredIteratorCache; + class DominatorTree; /// MemDepResult - A memory dependence query can return one of three different /// answers, described below. @@ -244,6 +245,29 @@ namespace llvm { BasicBlock *BB, SmallVectorImpl &Result); + /// GetPHITranslatedValue - Find an available version of the specified value + /// PHI translated across the specified edge. If MemDep isn't able to + /// satisfy this request, it returns null. + Value *GetPHITranslatedValue(Value *V, + BasicBlock *CurBB, BasicBlock *PredBB, + const TargetData *TD) const; + + /// GetAvailablePHITranslatedValue - Return the value computed by + /// PHITranslatePointer if it dominates PredBB, otherwise return null. + Value *GetAvailablePHITranslatedValue(Value *V, + BasicBlock *CurBB, BasicBlock *PredBB, + const TargetData *TD, + const DominatorTree &DT) const; + + /// InsertPHITranslatedPointer - Insert a computation of the PHI translated + /// version of 'V' for the edge PredBB->CurBB into the end of the PredBB + /// block. All newly created instructions are added to the NewInsts list. + Value *InsertPHITranslatedPointer(Value *V, + BasicBlock *CurBB, BasicBlock *PredBB, + const TargetData *TD, + const DominatorTree &DT, + SmallVectorImpl &NewInsts) const; + /// removeInstruction - Remove an instruction from the dependence analysis, /// updating the dependence of instructions that previously depended on it. void removeInstruction(Instruction *InstToRemove); diff --git a/include/llvm/Analysis/PostDominators.h b/include/llvm/Analysis/PostDominators.h index 42a16e74a247..ea14b2da9ce9 100644 --- a/include/llvm/Analysis/PostDominators.h +++ b/include/llvm/Analysis/PostDominators.h @@ -81,7 +81,10 @@ template <> struct GraphTraits } static nodes_iterator nodes_begin(PostDominatorTree *N) { - return df_begin(getEntryNode(N)); + if (getEntryNode(N)) + return df_begin(getEntryNode(N)); + else + return df_end(getEntryNode(N)); } static nodes_iterator nodes_end(PostDominatorTree *N) { diff --git a/include/llvm/Analysis/ValueTracking.h b/include/llvm/Analysis/ValueTracking.h index 038d442cc6b9..5f3c671d0f48 100644 --- a/include/llvm/Analysis/ValueTracking.h +++ b/include/llvm/Analysis/ValueTracking.h @@ -19,6 +19,7 @@ #include namespace llvm { + template class SmallVectorImpl; class Value; class Instruction; class APInt; @@ -77,6 +78,26 @@ namespace llvm { /// bool CannotBeNegativeZero(const Value *V, unsigned Depth = 0); + /// DecomposeGEPExpression - If V is a symbolic pointer expression, decompose + /// it into a base pointer with a constant offset and a number of scaled + /// symbolic offsets. + /// + /// The scaled symbolic offsets (represented by pairs of a Value* and a scale + /// in the VarIndices vector) are Value*'s that are known to be scaled by the + /// specified amount, but which may have other unrepresented high bits. As + /// such, the gep cannot necessarily be reconstructed from its decomposed + /// form. + /// + /// When TargetData is around, this function is capable of analyzing + /// everything that Value::getUnderlyingObject() can look through. When not, + /// it just looks through pointer casts. + /// + const Value *DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, + SmallVectorImpl > &VarIndices, + const TargetData *TD); + + + /// FindScalarValue - Given an aggregrate and an sequence of indices, see if /// the scalar value indexed is already around as a register, for example if /// it were inserted directly into the aggregrate. @@ -86,16 +107,14 @@ namespace llvm { Value *FindInsertedValue(Value *V, const unsigned *idx_begin, const unsigned *idx_end, - LLVMContext &Context, Instruction *InsertBefore = 0); /// This is a convenience wrapper for finding values indexed by a single index /// only. inline Value *FindInsertedValue(Value *V, const unsigned Idx, - LLVMContext &Context, Instruction *InsertBefore = 0) { const unsigned Idxs[1] = { Idx }; - return FindInsertedValue(V, &Idxs[0], &Idxs[1], Context, InsertBefore); + return FindInsertedValue(V, &Idxs[0], &Idxs[1], InsertBefore); } /// GetConstantStringInfo - This function computes the length of a diff --git a/include/llvm/CodeGen/AsmPrinter.h b/include/llvm/CodeGen/AsmPrinter.h index 109ff74c1b6d..9a07e31ac70b 100644 --- a/include/llvm/CodeGen/AsmPrinter.h +++ b/include/llvm/CodeGen/AsmPrinter.h @@ -297,7 +297,7 @@ namespace llvm { /// EmitString - Emit a string with quotes and a null terminator. /// Special characters are emitted properly. /// @verbatim (Eg. '\t') @endverbatim - void EmitString(const std::string &String) const; + void EmitString(const StringRef String) const; void EmitString(const char *String, unsigned Size) const; /// EmitFile - Emit a .file directive. @@ -345,9 +345,11 @@ namespace llvm { /// GetBlockAddressSymbol - Return the MCSymbol used to satisfy BlockAddress /// uses of the specified basic block. - MCSymbol *GetBlockAddressSymbol(const BlockAddress *BA) const; + MCSymbol *GetBlockAddressSymbol(const BlockAddress *BA, + const char *Suffix = "") const; MCSymbol *GetBlockAddressSymbol(const Function *F, - const BasicBlock *BB) const; + const BasicBlock *BB, + const char *Suffix = "") const; /// EmitBasicBlockStart - This method prints the label for the specified /// MachineBasicBlock, an alignment (if present) and a comment describing diff --git a/include/llvm/CodeGen/JITCodeEmitter.h b/include/llvm/CodeGen/JITCodeEmitter.h index 792fb5992336..ea3e59beab03 100644 --- a/include/llvm/CodeGen/JITCodeEmitter.h +++ b/include/llvm/CodeGen/JITCodeEmitter.h @@ -68,23 +68,29 @@ public: /// virtual bool finishFunction(MachineFunction &F) = 0; - /// startGVStub - This callback is invoked when the JIT needs the - /// address of a GV (e.g. function) that has not been code generated yet. - /// The StubSize specifies the total size required by the stub. + /// startGVStub - This callback is invoked when the JIT needs the address of a + /// GV (e.g. function) that has not been code generated yet. The StubSize + /// specifies the total size required by the stub. The BufferState must be + /// passed to finishGVStub, and start/finish pairs with the same BufferState + /// must be properly nested. /// - virtual void startGVStub(const GlobalValue* GV, unsigned StubSize, - unsigned Alignment = 1) = 0; + virtual void startGVStub(BufferState &BS, const GlobalValue* GV, + unsigned StubSize, unsigned Alignment = 1) = 0; - /// startGVStub - This callback is invoked when the JIT needs the address of a + /// startGVStub - This callback is invoked when the JIT needs the address of a /// GV (e.g. function) that has not been code generated yet. Buffer points to - /// memory already allocated for this stub. + /// memory already allocated for this stub. The BufferState must be passed to + /// finishGVStub, and start/finish pairs with the same BufferState must be + /// properly nested. /// - virtual void startGVStub(const GlobalValue* GV, void *Buffer, + virtual void startGVStub(BufferState &BS, void *Buffer, unsigned StubSize) = 0; - - /// finishGVStub - This callback is invoked to terminate a GV stub. + + /// finishGVStub - This callback is invoked to terminate a GV stub and returns + /// the start address of the stub. The BufferState must first have been + /// passed to startGVStub. /// - virtual void *finishGVStub(const GlobalValue* F) = 0; + virtual void *finishGVStub(BufferState &BS) = 0; /// emitByte - This callback is invoked when a byte needs to be written to the /// output stream. diff --git a/include/llvm/CodeGen/LatencyPriorityQueue.h b/include/llvm/CodeGen/LatencyPriorityQueue.h index a7cebee6064e..7ac0418c9500 100644 --- a/include/llvm/CodeGen/LatencyPriorityQueue.h +++ b/include/llvm/CodeGen/LatencyPriorityQueue.h @@ -40,18 +40,11 @@ namespace llvm { /// mobility. std::vector NumNodesSolelyBlocking; - /// IgnoreAntiDep - Ignore anti-dependencies - bool IgnoreAntiDep; - /// Queue - The queue. PriorityQueue, latency_sort> Queue; public: - LatencyPriorityQueue() : IgnoreAntiDep(false), Queue(latency_sort(this)) { - } - - void setIgnoreAntiDep(bool ignore) { - IgnoreAntiDep = ignore; + LatencyPriorityQueue() : Queue(latency_sort(this)) { } void initNodes(std::vector &sunits) { @@ -72,7 +65,7 @@ public: unsigned getLatency(unsigned NodeNum) const { assert(NodeNum < (*SUnits).size()); - return (*SUnits)[NodeNum].getHeight(IgnoreAntiDep); + return (*SUnits)[NodeNum].getHeight(); } unsigned getNumSolelyBlockNodes(unsigned NodeNum) const { diff --git a/include/llvm/CodeGen/LiveVariables.h b/include/llvm/CodeGen/LiveVariables.h index b2be569bc10a..39a4b89f54db 100644 --- a/include/llvm/CodeGen/LiveVariables.h +++ b/include/llvm/CodeGen/LiveVariables.h @@ -107,6 +107,13 @@ public: /// findKill - Find a kill instruction in MBB. Return NULL if none is found. MachineInstr *findKill(const MachineBasicBlock *MBB) const; + /// isLiveIn - Is Reg live in to MBB? This means that Reg is live through + /// MBB, or it is killed in MBB. If Reg is only used by PHI instructions in + /// MBB, it is not considered live in. + bool isLiveIn(const MachineBasicBlock &MBB, + unsigned Reg, + MachineRegisterInfo &MRI); + void dump() const; }; @@ -156,8 +163,13 @@ private: // Intermediate data structures SmallVector &Defs); void UpdatePhysRegDefs(MachineInstr *MI, SmallVector &Defs); - /// FindLastPartialDef - Return the last partial def of the specified register. - /// Also returns the sub-registers that're defined by the instruction. + /// FindLastRefOrPartRef - Return the last reference or partial reference of + /// the specified register. + MachineInstr *FindLastRefOrPartRef(unsigned Reg); + + /// FindLastPartialDef - Return the last partial def of the specified + /// register. Also returns the sub-registers that're defined by the + /// instruction. MachineInstr *FindLastPartialDef(unsigned Reg, SmallSet &PartDefRegs); @@ -267,11 +279,17 @@ public: void HandleVirtRegUse(unsigned reg, MachineBasicBlock *MBB, MachineInstr *MI); - /// addNewBlock - Add a new basic block BB as an empty succcessor to - /// DomBB. All variables that are live out of DomBB will be marked as passing - /// live through BB. This method assumes that the machine code is still in SSA - /// form. - void addNewBlock(MachineBasicBlock *BB, MachineBasicBlock *DomBB); + bool isLiveIn(unsigned Reg, const MachineBasicBlock &MBB) { + return getVarInfo(Reg).isLiveIn(MBB, Reg, *MRI); + } + + /// addNewBlock - Add a new basic block BB between DomBB and SuccBB. All + /// variables that are live out of DomBB and live into SuccBB will be marked + /// as passing live through BB. This method assumes that the machine code is + /// still in SSA form. + void addNewBlock(MachineBasicBlock *BB, + MachineBasicBlock *DomBB, + MachineBasicBlock *SuccBB); }; } // End llvm namespace diff --git a/include/llvm/CodeGen/MachineBasicBlock.h b/include/llvm/CodeGen/MachineBasicBlock.h index bb50b5df4ca8..6b4c64055bf3 100644 --- a/include/llvm/CodeGen/MachineBasicBlock.h +++ b/include/llvm/CodeGen/MachineBasicBlock.h @@ -92,10 +92,15 @@ class MachineBasicBlock : public ilist_node { public: /// getBasicBlock - Return the LLVM basic block that this instance - /// corresponded to originally. + /// corresponded to originally. Note that this may be NULL if this instance + /// does not correspond directly to an LLVM basic block. /// const BasicBlock *getBasicBlock() const { return BB; } + /// getName - Return the name of the corresponding LLVM basic block, or + /// "(null)". + StringRef getName() const; + /// hasAddressTaken - Test whether this block is potentially the target /// of an indirect branch. bool hasAddressTaken() const { return AddressTaken; } @@ -266,6 +271,12 @@ public: /// ends with an unconditional branch to some other block. bool isLayoutSuccessor(const MachineBasicBlock *MBB) const; + /// canFallThrough - Return true if the block can implicitly transfer + /// control to the block after it by falling off the end of it. This should + /// return false if it can reach the block after it, but it uses an explicit + /// branch to do so (e.g., a table jump). True is a conservative answer. + bool canFallThrough(); + /// getFirstTerminator - returns an iterator to the first terminator /// instruction of this basic block. If a terminator does not exist, /// it returns end() diff --git a/include/llvm/CodeGen/MachineCodeEmitter.h b/include/llvm/CodeGen/MachineCodeEmitter.h index c55a9e65e453..791db003ead1 100644 --- a/include/llvm/CodeGen/MachineCodeEmitter.h +++ b/include/llvm/CodeGen/MachineCodeEmitter.h @@ -48,17 +48,41 @@ class Function; /// occurred, more memory is allocated, and we reemit the code into it. /// class MachineCodeEmitter { +public: + class BufferState { + friend class MachineCodeEmitter; + /// BufferBegin/BufferEnd - Pointers to the start and end of the memory + /// allocated for this code buffer. + uint8_t *BufferBegin, *BufferEnd; + + /// CurBufferPtr - Pointer to the next byte of memory to fill when emitting + /// code. This is guranteed to be in the range [BufferBegin,BufferEnd]. If + /// this pointer is at BufferEnd, it will never move due to code emission, + /// and all code emission requests will be ignored (this is the buffer + /// overflow condition). + uint8_t *CurBufferPtr; + public: + BufferState() : BufferBegin(NULL), BufferEnd(NULL), CurBufferPtr(NULL) {} + }; + protected: - /// BufferBegin/BufferEnd - Pointers to the start and end of the memory - /// allocated for this code buffer. - uint8_t *BufferBegin, *BufferEnd; - - /// CurBufferPtr - Pointer to the next byte of memory to fill when emitting - /// code. This is guranteed to be in the range [BufferBegin,BufferEnd]. If - /// this pointer is at BufferEnd, it will never move due to code emission, and - /// all code emission requests will be ignored (this is the buffer overflow - /// condition). - uint8_t *CurBufferPtr; + /// These have the same meanings as the fields in BufferState + uint8_t *BufferBegin, *BufferEnd, *CurBufferPtr; + + /// Save or restore the current buffer state. The BufferState objects must be + /// used as a stack. + void SaveStateTo(BufferState &BS) { + assert(BS.BufferBegin == NULL && + "Can't save state into the same BufferState twice."); + BS.BufferBegin = BufferBegin; + BS.BufferEnd = BufferEnd; + BS.CurBufferPtr = CurBufferPtr; + } + void RestoreStateFrom(BufferState &BS) { + BufferBegin = BS.BufferBegin; + BufferEnd = BS.BufferEnd; + CurBufferPtr = BS.CurBufferPtr; + } public: virtual ~MachineCodeEmitter() {} diff --git a/include/llvm/CodeGen/MachineModuleInfo.h b/include/llvm/CodeGen/MachineModuleInfo.h index 47616ce3443d..bac9fce467e8 100644 --- a/include/llvm/CodeGen/MachineModuleInfo.h +++ b/include/llvm/CodeGen/MachineModuleInfo.h @@ -135,9 +135,6 @@ class MachineModuleInfo : public ImmutablePass { /// llvm.compiler.used. SmallPtrSet UsedFunctions; - /// UsedDbgLabels - labels are used by debug info entries. - SmallSet UsedDbgLabels; - bool CallsEHReturn; bool CallsUnwindInit; @@ -232,19 +229,6 @@ public: return LabelID ? LabelIDList[LabelID - 1] : 0; } - /// isDbgLabelUsed - Return true if label with LabelID is used by - /// DwarfWriter. - bool isDbgLabelUsed(unsigned LabelID) { - return UsedDbgLabels.count(LabelID); - } - - /// RecordUsedDbgLabel - Mark label with LabelID as used. This is used - /// by DwarfWriter to inform DebugLabelFolder that certain labels are - /// not to be deleted. - void RecordUsedDbgLabel(unsigned LabelID) { - UsedDbgLabels.insert(LabelID); - } - /// getFrameMoves - Returns a reference to a list of moves done in the current /// function's prologue. Used to construct frame maps for debug and exception /// handling comsumers. diff --git a/include/llvm/CodeGen/MachineOperand.h b/include/llvm/CodeGen/MachineOperand.h index eede2cc50ced..8748afcba92e 100644 --- a/include/llvm/CodeGen/MachineOperand.h +++ b/include/llvm/CodeGen/MachineOperand.h @@ -435,10 +435,12 @@ public: Op.setTargetFlags(TargetFlags); return Op; } - static MachineOperand CreateBA(BlockAddress *BA) { + static MachineOperand CreateBA(BlockAddress *BA, + unsigned char TargetFlags = 0) { MachineOperand Op(MachineOperand::MO_BlockAddress); Op.Contents.OffsetedInfo.Val.BA = BA; Op.setOffset(0); // Offset is always 0. + Op.setTargetFlags(TargetFlags); return Op; } diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h index d0d610370bd5..8e8970269e87 100644 --- a/include/llvm/CodeGen/Passes.h +++ b/include/llvm/CodeGen/Passes.h @@ -129,6 +129,10 @@ namespace llvm { /// branches. FunctionPass *createBranchFoldingPass(bool DefaultEnableTailMerge); + /// TailDuplicate Pass - Duplicate blocks with unconditional branches + /// into tails of their predecessors. + FunctionPass *createTailDuplicatePass(); + /// IfConverter Pass - This pass performs machine code if conversion. FunctionPass *createIfConverterPass(); @@ -136,11 +140,6 @@ namespace llvm { /// headers to target specific alignment boundary. FunctionPass *createCodePlacementOptPass(); - /// DebugLabelFoldingPass - This pass prunes out redundant debug labels. This - /// allows a debug emitter to determine if the range of two labels is empty, - /// by seeing if the labels map to the same reduced label. - FunctionPass *createDebugLabelFoldingPass(); - /// getRegisterAllocator - This creates an instance of the register allocator /// for the Sparc. FunctionPass *getRegisterAllocator(TargetMachine &T); diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h index d5e702031223..955965bccfa9 100644 --- a/include/llvm/CodeGen/ScheduleDAG.h +++ b/include/llvm/CodeGen/ScheduleDAG.h @@ -340,34 +340,30 @@ namespace llvm { void removePred(const SDep &D); /// getDepth - Return the depth of this node, which is the length of the - /// maximum path up to any node with has no predecessors. If IgnoreAntiDep - /// is true, ignore anti-dependence edges. - unsigned getDepth(bool IgnoreAntiDep=false) const { + /// maximum path up to any node with has no predecessors. + unsigned getDepth() const { if (!isDepthCurrent) - const_cast(this)->ComputeDepth(IgnoreAntiDep); + const_cast(this)->ComputeDepth(); return Depth; } /// getHeight - Return the height of this node, which is the length of the - /// maximum path down to any node with has no successors. If IgnoreAntiDep - /// is true, ignore anti-dependence edges. - unsigned getHeight(bool IgnoreAntiDep=false) const { + /// maximum path down to any node with has no successors. + unsigned getHeight() const { if (!isHeightCurrent) - const_cast(this)->ComputeHeight(IgnoreAntiDep); + const_cast(this)->ComputeHeight(); return Height; } /// setDepthToAtLeast - If NewDepth is greater than this node's /// depth value, set it to be the new depth value. This also - /// recursively marks successor nodes dirty. If IgnoreAntiDep is - /// true, ignore anti-dependence edges. - void setDepthToAtLeast(unsigned NewDepth, bool IgnoreAntiDep=false); + /// recursively marks successor nodes dirty. + void setDepthToAtLeast(unsigned NewDepth); /// setDepthToAtLeast - If NewDepth is greater than this node's /// depth value, set it to be the new height value. This also - /// recursively marks predecessor nodes dirty. If IgnoreAntiDep is - /// true, ignore anti-dependence edges. - void setHeightToAtLeast(unsigned NewHeight, bool IgnoreAntiDep=false); + /// recursively marks predecessor nodes dirty. + void setHeightToAtLeast(unsigned NewHeight); /// setDepthDirty - Set a flag in this node to indicate that its /// stored Depth value will require recomputation the next time @@ -400,8 +396,8 @@ namespace llvm { void print(raw_ostream &O, const ScheduleDAG *G) const; private: - void ComputeDepth(bool IgnoreAntiDep); - void ComputeHeight(bool IgnoreAntiDep); + void ComputeDepth(); + void ComputeHeight(); }; //===--------------------------------------------------------------------===// diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h index 8400e86e7eaf..e5868079cda2 100644 --- a/include/llvm/CodeGen/SelectionDAG.h +++ b/include/llvm/CodeGen/SelectionDAG.h @@ -322,12 +322,10 @@ public: unsigned char TargetFlags = 0); SDValue getValueType(EVT); SDValue getRegister(unsigned Reg, EVT VT); - SDValue getDbgStopPoint(DebugLoc DL, SDValue Root, - unsigned Line, unsigned Col, MDNode *CU); SDValue getLabel(unsigned Opcode, DebugLoc dl, SDValue Root, unsigned LabelID); - SDValue getBlockAddress(BlockAddress *BA, DebugLoc dl, - bool isTarget = false); + SDValue getBlockAddress(BlockAddress *BA, EVT VT, + bool isTarget = false, unsigned char TargetFlags = 0); SDValue getCopyToReg(SDValue Chain, DebugLoc dl, unsigned Reg, SDValue N) { return getNode(ISD::CopyToReg, dl, MVT::Other, Chain, @@ -884,6 +882,14 @@ public: /// element of the result of the vector shuffle. SDValue getShuffleScalarElt(const ShuffleVectorSDNode *N, unsigned Idx); + /// UnrollVectorOp - Utility function used by legalize and lowering to + /// "unroll" a vector operation by splitting out the scalars and operating + /// on each element individually. If the ResNE is 0, fully unroll the vector + /// op. If ResNE is less than the width of the vector op, unroll up to ResNE. + /// If the ResNE is greater than the width of the vector op, unroll the + /// vector op and fill the end of the resulting vector with UNDEFS. + SDValue UnrollVectorOp(SDNode *N, unsigned ResNE = 0); + private: bool RemoveNodeFromCSEMaps(SDNode *N); void AddModifiedNodeToCSEMaps(SDNode *N, DAGUpdateListener *UpdateListener); diff --git a/include/llvm/CodeGen/SelectionDAGISel.h b/include/llvm/CodeGen/SelectionDAGISel.h index 5d33224cbe2a..4130d2c0a25a 100644 --- a/include/llvm/CodeGen/SelectionDAGISel.h +++ b/include/llvm/CodeGen/SelectionDAGISel.h @@ -23,7 +23,7 @@ namespace llvm { class FastISel; - class SelectionDAGLowering; + class SelectionDAGBuilder; class SDValue; class MachineRegisterInfo; class MachineBasicBlock; @@ -48,7 +48,7 @@ public: MachineFunction *MF; MachineRegisterInfo *RegInfo; SelectionDAG *CurDAG; - SelectionDAGLowering *SDL; + SelectionDAGBuilder *SDB; MachineBasicBlock *BB; AliasAnalysis *AA; GCFunctionInfo *GFI; @@ -127,7 +127,8 @@ private: void SelectBasicBlock(BasicBlock *LLVMBB, BasicBlock::iterator Begin, - BasicBlock::iterator End); + BasicBlock::iterator End, + bool &HadTailCall); void CodeGenAndEmitDAG(); void LowerArguments(BasicBlock *BB); diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h index d4d40b13e45a..950fd322da2e 100644 --- a/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/include/llvm/CodeGen/SelectionDAGNodes.h @@ -494,10 +494,9 @@ namespace ISD { // Operand #last: Optional, an incoming flag. INLINEASM, - // DBG_LABEL, EH_LABEL - Represents a label in mid basic block used to track + // EH_LABEL - Represents a label in mid basic block used to track // locations needed for debug and exception handling tables. These nodes // take a chain as input and return a chain. - DBG_LABEL, EH_LABEL, // STACKSAVE - STACKSAVE has one operand, an input chain. It produces a @@ -546,18 +545,6 @@ namespace ISD { // HANDLENODE node - Used as a handle for various purposes. HANDLENODE, - // DBG_STOPPOINT - This node is used to represent a source location for - // debug info. It takes token chain as input, and carries a line number, - // column number, and a pointer to a CompileUnit object identifying - // the containing compilation unit. It produces a token chain as output. - DBG_STOPPOINT, - - // DEBUG_LOC - This node is used to represent source line information - // embedded in the code. It takes a token chain as input, then a line - // number, then a column then a file id (provided by MachineModuleInfo.) It - // produces a token chain as output. - DEBUG_LOC, - // TRAMPOLINE - This corresponds to the init_trampoline intrinsic. // It takes as input a token chain, the pointer to the trampoline, // the pointer to the nested function, the pointer to pass for the @@ -636,10 +623,6 @@ namespace ISD { /// element is not an undef. bool isScalarToVector(const SDNode *N); - /// isDebugLabel - Return true if the specified node represents a debug - /// label (i.e. ISD::DBG_LABEL or TargetInstrInfo::DBG_LABEL node). - bool isDebugLabel(const SDNode *N); - //===--------------------------------------------------------------------===// /// MemIndexedMode enum - This enum defines the load / store indexed /// addressing modes. @@ -2004,37 +1987,18 @@ public: } }; -class DbgStopPointSDNode : public SDNode { - SDUse Chain; - unsigned Line; - unsigned Column; - MDNode *CU; - friend class SelectionDAG; - DbgStopPointSDNode(SDValue ch, unsigned l, unsigned c, - MDNode *cu) - : SDNode(ISD::DBG_STOPPOINT, DebugLoc::getUnknownLoc(), - getSDVTList(MVT::Other)), Line(l), Column(c), CU(cu) { - InitOperands(&Chain, ch); - } -public: - unsigned getLine() const { return Line; } - unsigned getColumn() const { return Column; } - MDNode *getCompileUnit() const { return CU; } - - static bool classof(const DbgStopPointSDNode *) { return true; } - static bool classof(const SDNode *N) { - return N->getOpcode() == ISD::DBG_STOPPOINT; - } -}; - class BlockAddressSDNode : public SDNode { BlockAddress *BA; + unsigned char TargetFlags; friend class SelectionDAG; - BlockAddressSDNode(unsigned NodeTy, DebugLoc dl, EVT VT, BlockAddress *ba) - : SDNode(NodeTy, dl, getSDVTList(VT)), BA(ba) { + BlockAddressSDNode(unsigned NodeTy, EVT VT, BlockAddress *ba, + unsigned char Flags) + : SDNode(NodeTy, DebugLoc::getUnknownLoc(), getSDVTList(VT)), + BA(ba), TargetFlags(Flags) { } public: BlockAddress *getBlockAddress() const { return BA; } + unsigned char getTargetFlags() const { return TargetFlags; } static bool classof(const BlockAddressSDNode *) { return true; } static bool classof(const SDNode *N) { @@ -2056,8 +2020,7 @@ public: static bool classof(const LabelSDNode *) { return true; } static bool classof(const SDNode *N) { - return N->getOpcode() == ISD::DBG_LABEL || - N->getOpcode() == ISD::EH_LABEL; + return N->getOpcode() == ISD::EH_LABEL; } }; diff --git a/include/llvm/Config/Disassemblers.def.in b/include/llvm/Config/Disassemblers.def.in new file mode 100644 index 000000000000..1b136570933b --- /dev/null +++ b/include/llvm/Config/Disassemblers.def.in @@ -0,0 +1,29 @@ +//===- llvm/Config/Disassemblers.def - LLVM Assembly Parsers ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file enumerates all of the assembly-language parsers +// supported by this build of LLVM. Clients of this file should define +// the LLVM_ASM_PARSER macro to be a function-like macro with a +// single parameter (the name of the target whose assembly can be +// generated); including this file will then enumerate all of the +// targets with assembly parsers. +// +// The set of targets supported by LLVM is generated at configuration +// time, at which point this header is generated. Do not modify this +// header directly. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DISASSEMBLER +# error Please define the macro LLVM_DISASSEMBLER(TargetName) +#endif + +@LLVM_ENUM_DISASSEMBLERS@ + +#undef LLVM_DISASSEMBLER diff --git a/include/llvm/Intrinsics.td b/include/llvm/Intrinsics.td index c0cf00e8eeda..b3b0678a24ec 100644 --- a/include/llvm/Intrinsics.td +++ b/include/llvm/Intrinsics.td @@ -341,19 +341,25 @@ def int_init_trampoline : Intrinsic<[llvm_ptr_ty], // Expose the carry flag from add operations on two integrals. def int_sadd_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty], - [LLVMMatchType<0>, LLVMMatchType<0>]>; + [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem]>; def int_uadd_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty], - [LLVMMatchType<0>, LLVMMatchType<0>]>; + [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem]>; def int_ssub_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty], - [LLVMMatchType<0>, LLVMMatchType<0>]>; + [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem]>; def int_usub_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty], - [LLVMMatchType<0>, LLVMMatchType<0>]>; + [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem]>; def int_smul_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty], - [LLVMMatchType<0>, LLVMMatchType<0>]>; + [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem]>; def int_umul_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty], - [LLVMMatchType<0>, LLVMMatchType<0>]>; + [LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem]>; //===------------------------- Atomic Intrinsics --------------------------===// // diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td index 794f4bfe816f..50ee3582b721 100644 --- a/include/llvm/IntrinsicsX86.td +++ b/include/llvm/IntrinsicsX86.td @@ -671,12 +671,12 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // Align ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_ssse3_palign_r : GCCBuiltin<"__builtin_ia32_palignr">, + def int_x86_ssse3_palign_r : Intrinsic<[llvm_v1i64_ty], [llvm_v1i64_ty, - llvm_v1i64_ty, llvm_i16_ty], [IntrNoMem]>; - def int_x86_ssse3_palign_r_128 : GCCBuiltin<"__builtin_ia32_palignr128">, + llvm_v1i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_ssse3_palign_r_128 : Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, - llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; } //===----------------------------------------------------------------------===// diff --git a/include/llvm/Metadata.h b/include/llvm/Metadata.h index 1d18eba32be1..c7f2b445ee8e 100644 --- a/include/llvm/Metadata.h +++ b/include/llvm/Metadata.h @@ -91,7 +91,7 @@ class MDNode : public MetadataBase, public FoldingSetNode { MDNode(const MDNode &); // DO NOT IMPLEMENT friend class ElementVH; - // Use CallbackVH to hold MDNOde elements. + // Use CallbackVH to hold MDNode elements. struct ElementVH : public CallbackVH { MDNode *Parent; ElementVH() {} @@ -264,7 +264,7 @@ public: /// the same metadata to In2. void copyMD(Instruction *In1, Instruction *In2); - /// getHandlerNames - Populate client supplied smallvector using custome + /// getHandlerNames - Populate client supplied smallvector using custom /// metadata name and ID. void getHandlerNames(SmallVectorImpl >&) const; diff --git a/include/llvm/Support/DOTGraphTraits.h b/include/llvm/Support/DOTGraphTraits.h index 080297f8297e..54ced15321d1 100644 --- a/include/llvm/Support/DOTGraphTraits.h +++ b/include/llvm/Support/DOTGraphTraits.h @@ -27,6 +27,17 @@ namespace llvm { /// implementations. /// struct DefaultDOTGraphTraits { +private: + bool IsSimple; + +protected: + bool isSimple() { + return IsSimple; + } + +public: + DefaultDOTGraphTraits (bool simple=false) : IsSimple (simple) {} + /// getGraphName - Return the label for the graph as a whole. Printed at the /// top of the graph. /// @@ -51,8 +62,7 @@ struct DefaultDOTGraphTraits { /// getNodeLabel - Given a node and a pointer to the top level graph, return /// the label to print in the node. template - static std::string getNodeLabel(const void *Node, - const GraphType& Graph, bool ShortNames) { + std::string getNodeLabel(const void *Node, const GraphType& Graph) { return ""; } @@ -135,7 +145,9 @@ struct DefaultDOTGraphTraits { /// from DefaultDOTGraphTraits if you don't need to override everything. /// template -struct DOTGraphTraits : public DefaultDOTGraphTraits {}; +struct DOTGraphTraits : public DefaultDOTGraphTraits { + DOTGraphTraits (bool simple=false) : DefaultDOTGraphTraits (simple) {} +}; } // End llvm namespace diff --git a/include/llvm/Support/GraphWriter.h b/include/llvm/Support/GraphWriter.h index bd3fcea11025..28fa92f99e08 100644 --- a/include/llvm/Support/GraphWriter.h +++ b/include/llvm/Support/GraphWriter.h @@ -52,19 +52,48 @@ template class GraphWriter { raw_ostream &O; const GraphType &G; - bool ShortNames; typedef DOTGraphTraits DOTTraits; typedef GraphTraits GTraits; typedef typename GTraits::NodeType NodeType; typedef typename GTraits::nodes_iterator node_iterator; typedef typename GTraits::ChildIteratorType child_iterator; + DOTTraits DTraits; + + // Writes the edge labels of the node to O and returns true if there are any + // edge labels not equal to the empty string "". + bool getEdgeSourceLabels(raw_ostream &O, NodeType *Node) { + child_iterator EI = GTraits::child_begin(Node); + child_iterator EE = GTraits::child_end(Node); + bool hasEdgeSourceLabels = false; + + for (unsigned i = 0; EI != EE && i != 64; ++EI, ++i) { + std::string label = DTraits.getEdgeSourceLabel(Node, EI); + + if (label == "") + continue; + + hasEdgeSourceLabels = true; + + if (i) + O << "|"; + + O << "" << DTraits.getEdgeSourceLabel(Node, EI); + } + + if (EI != EE && hasEdgeSourceLabels) + O << "|truncated..."; + + return hasEdgeSourceLabels; + } + public: - GraphWriter(raw_ostream &o, const GraphType &g, bool SN) : - O(o), G(g), ShortNames(SN) {} + GraphWriter(raw_ostream &o, const GraphType &g, bool SN) : O(o), G(g) { + DTraits = DOTTraits(SN); +} void writeHeader(const std::string &Name) { - std::string GraphName = DOTTraits::getGraphName(G); + std::string GraphName = DTraits.getGraphName(G); if (!Name.empty()) O << "digraph \"" << DOT::EscapeString(Name) << "\" {\n"; @@ -73,14 +102,14 @@ public: else O << "digraph unnamed {\n"; - if (DOTTraits::renderGraphFromBottomUp()) + if (DTraits.renderGraphFromBottomUp()) O << "\trankdir=\"BT\";\n"; if (!Name.empty()) O << "\tlabel=\"" << DOT::EscapeString(Name) << "\";\n"; else if (!GraphName.empty()) O << "\tlabel=\"" << DOT::EscapeString(GraphName) << "\";\n"; - O << DOTTraits::getGraphProperties(G); + O << DTraits.getGraphProperties(G); O << "\n"; } @@ -105,53 +134,47 @@ public: } void writeNode(NodeType *Node) { - std::string NodeAttributes = DOTTraits::getNodeAttributes(Node, G); + std::string NodeAttributes = DTraits.getNodeAttributes(Node, G); O << "\tNode" << static_cast(Node) << " [shape=record,"; if (!NodeAttributes.empty()) O << NodeAttributes << ","; O << "label=\"{"; - if (!DOTTraits::renderGraphFromBottomUp()) { - O << DOT::EscapeString(DOTTraits::getNodeLabel(Node, G, ShortNames)); + if (!DTraits.renderGraphFromBottomUp()) { + O << DOT::EscapeString(DTraits.getNodeLabel(Node, G)); // If we should include the address of the node in the label, do so now. - if (DOTTraits::hasNodeAddressLabel(Node, G)) + if (DTraits.hasNodeAddressLabel(Node, G)) O << "|" << (void*)Node; } - // Print out the fields of the current node... - child_iterator EI = GTraits::child_begin(Node); - child_iterator EE = GTraits::child_end(Node); - if (EI != EE) { - if (!DOTTraits::renderGraphFromBottomUp()) O << "|"; - O << "{"; + std::string edgeSourceLabels; + raw_string_ostream EdgeSourceLabels(edgeSourceLabels); + bool hasEdgeSourceLabels = getEdgeSourceLabels(EdgeSourceLabels, Node); - for (unsigned i = 0; EI != EE && i != 64; ++EI, ++i) { - if (i) O << "|"; - O << "" << DOTTraits::getEdgeSourceLabel(Node, EI); - } + if (hasEdgeSourceLabels) { + if (!DTraits.renderGraphFromBottomUp()) O << "|"; - if (EI != EE) - O << "|truncated..."; - O << "}"; - if (DOTTraits::renderGraphFromBottomUp()) O << "|"; + O << "{" << EdgeSourceLabels.str() << "}"; + + if (DTraits.renderGraphFromBottomUp()) O << "|"; } - if (DOTTraits::renderGraphFromBottomUp()) { - O << DOT::EscapeString(DOTTraits::getNodeLabel(Node, G, ShortNames)); + if (DTraits.renderGraphFromBottomUp()) { + O << DOT::EscapeString(DTraits.getNodeLabel(Node, G)); // If we should include the address of the node in the label, do so now. - if (DOTTraits::hasNodeAddressLabel(Node, G)) + if (DTraits.hasNodeAddressLabel(Node, G)) O << "|" << (void*)Node; } - if (DOTTraits::hasEdgeDestLabels()) { + if (DTraits.hasEdgeDestLabels()) { O << "|{"; - unsigned i = 0, e = DOTTraits::numEdgeDestLabels(Node); + unsigned i = 0, e = DTraits.numEdgeDestLabels(Node); for (; i != e && i != 64; ++i) { if (i) O << "|"; - O << "" << DOTTraits::getEdgeDestLabel(Node, i); + O << "" << DTraits.getEdgeDestLabel(Node, i); } if (i != e) @@ -162,7 +185,8 @@ public: O << "}\"];\n"; // Finish printing the "node" line // Output all of the edges now - EI = GTraits::child_begin(Node); + child_iterator EI = GTraits::child_begin(Node); + child_iterator EE = GTraits::child_end(Node); for (unsigned i = 0; EI != EE && i != 64; ++EI, ++i) writeEdge(Node, i, EI); for (; EI != EE; ++EI) @@ -172,8 +196,8 @@ public: void writeEdge(NodeType *Node, unsigned edgeidx, child_iterator EI) { if (NodeType *TargetNode = *EI) { int DestPort = -1; - if (DOTTraits::edgeTargetsEdgeSource(Node, EI)) { - child_iterator TargetIt = DOTTraits::getEdgeTarget(Node, EI); + if (DTraits.edgeTargetsEdgeSource(Node, EI)) { + child_iterator TargetIt = DTraits.getEdgeTarget(Node, EI); // Figure out which edge this targets... unsigned Offset = @@ -181,9 +205,12 @@ public: DestPort = static_cast(Offset); } + if (DTraits.getEdgeSourceLabel(Node, EI) == "") + edgeidx = -1; + emitEdge(static_cast(Node), edgeidx, static_cast(TargetNode), DestPort, - DOTTraits::getEdgeAttributes(Node, EI)); + DTraits.getEdgeAttributes(Node, EI)); } } @@ -221,12 +248,8 @@ public: if (SrcNodePort >= 0) O << ":s" << SrcNodePort; O << " -> Node" << DestNodeID; - if (DestNodePort >= 0) { - if (DOTTraits::hasEdgeDestLabels()) - O << ":d" << DestNodePort; - else - O << ":s" << DestNodePort; - } + if (DestNodePort >= 0 && DTraits.hasEdgeDestLabels()) + O << ":d" << DestNodePort; if (!Attrs.empty()) O << "[" << Attrs << "]"; diff --git a/include/llvm/Support/NoFolder.h b/include/llvm/Support/NoFolder.h index 1f671c19250d..7f2f1497f39f 100644 --- a/include/llvm/Support/NoFolder.h +++ b/include/llvm/Support/NoFolder.h @@ -174,7 +174,7 @@ public: } Value *CreateExtractElement(Constant *Vec, Constant *Idx) const { - return new ExtractElementInst(Vec, Idx); + return ExtractElementInst::Create(Vec, Idx); } Value *CreateInsertElement(Constant *Vec, Constant *NewElt, diff --git a/include/llvm/Support/SourceMgr.h b/include/llvm/Support/SourceMgr.h index 5b6f56b43628..b695ff10fdfd 100644 --- a/include/llvm/Support/SourceMgr.h +++ b/include/llvm/Support/SourceMgr.h @@ -120,7 +120,9 @@ public: /// /// @param Type - If non-null, the kind of message (e.g., "error") which is /// prefixed to the message. - void PrintMessage(SMLoc Loc, const std::string &Msg, const char *Type) const; + /// @param ShowLine - Should the diagnostic show the source line. + void PrintMessage(SMLoc Loc, const std::string &Msg, const char *Type, + bool ShowLine = true) const; /// GetMessage - Return an SMDiagnostic at the specified location with the @@ -128,8 +130,10 @@ public: /// /// @param Type - If non-null, the kind of message (e.g., "error") which is /// prefixed to the message. + /// @param ShowLine - Should the diagnostic show the source line. SMDiagnostic GetMessage(SMLoc Loc, - const std::string &Msg, const char *Type) const; + const std::string &Msg, const char *Type, + bool ShowLine = true) const; private: @@ -143,12 +147,15 @@ class SMDiagnostic { std::string Filename; int LineNo, ColumnNo; std::string Message, LineContents; + unsigned ShowLine : 1; + public: SMDiagnostic() : LineNo(0), ColumnNo(0) {} SMDiagnostic(const std::string &FN, int Line, int Col, - const std::string &Msg, const std::string &LineStr) + const std::string &Msg, const std::string &LineStr, + bool showline = true) : Filename(FN), LineNo(Line), ColumnNo(Col), Message(Msg), - LineContents(LineStr) {} + LineContents(LineStr), ShowLine(showline) {} void Print(const char *ProgName, raw_ostream &S); }; diff --git a/include/llvm/System/Path.h b/include/llvm/System/Path.h index 3b73a128fbd1..b8554c8297ce 100644 --- a/include/llvm/System/Path.h +++ b/include/llvm/System/Path.h @@ -380,6 +380,13 @@ namespace sys { /// in the file system. bool canWrite() const; + /// This function checks that what we're trying to work only on a regular file. + /// Check for things like /dev/null, any block special file, + /// or other things that aren't "regular" regular files. + /// @returns true if the file is S_ISREG. + /// @brief Determines if the file is a regular file + bool isRegularFile() const; + /// This function determines if the path name references an executable /// file in the file system. This function checks for the existence and /// executability (by the current program) of the file. diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h index 43fd54e183a9..1ba6b2f29992 100644 --- a/include/llvm/Target/TargetInstrInfo.h +++ b/include/llvm/Target/TargetInstrInfo.h @@ -514,6 +514,13 @@ public: return false; } + /// isPredicable - Return true if the specified instruction can be predicated. + /// By default, this returns true for every instruction with a + /// PredicateOperand. + virtual bool isPredicable(MachineInstr *MI) const { + return MI->getDesc().isPredicable(); + } + /// isSafeToMoveRegClassDefs - Return true if it's safe to move a machine /// instruction that defines the specified register class. virtual bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const { @@ -536,13 +543,6 @@ public: /// length. virtual unsigned getInlineAsmLength(const char *Str, const MCAsmInfo &MAI) const; - - /// TailDuplicationLimit - Returns the limit on the number of instructions - /// in basic block MBB beyond which it will not be tail-duplicated. - virtual unsigned TailDuplicationLimit(const MachineBasicBlock &MBB, - unsigned DefaultLimit) const { - return DefaultLimit; - } }; /// TargetInstrInfoImpl - This is the default implementation of diff --git a/include/llvm/Target/TargetJITInfo.h b/include/llvm/Target/TargetJITInfo.h index 809f1830abf6..7208a8dc4464 100644 --- a/include/llvm/Target/TargetJITInfo.h +++ b/include/llvm/Target/TargetJITInfo.h @@ -18,6 +18,7 @@ #define LLVM_TARGET_TARGETJITINFO_H #include +#include "llvm/Support/ErrorHandling.h" #include "llvm/System/DataTypes.h" namespace llvm { @@ -48,22 +49,28 @@ namespace llvm { return 0; } + /// Records the required size and alignment for a call stub in bytes. + struct StubLayout { + size_t Size; + size_t Alignment; + }; + /// Returns the maximum size and alignment for a call stub on this target. + virtual StubLayout getStubLayout() { + llvm_unreachable("This target doesn't implement getStubLayout!"); + StubLayout Result = {0, 0}; + return Result; + } + /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a /// small native function that simply calls the function at the specified - /// address. Return the address of the resultant function. - virtual void *emitFunctionStub(const Function* F, void *Fn, + /// address. The JITCodeEmitter must already have storage allocated for the + /// stub. Return the address of the resultant function, which may have been + /// aligned from the address the JCE was set up to emit at. + virtual void *emitFunctionStub(const Function* F, void *Target, JITCodeEmitter &JCE) { assert(0 && "This target doesn't implement emitFunctionStub!"); return 0; } - - /// emitFunctionStubAtAddr - Use the specified JITCodeEmitter object to - /// emit a small native function that simply calls Fn. Emit the stub into - /// the supplied buffer. - virtual void emitFunctionStubAtAddr(const Function* F, void *Fn, - void *Buffer, JITCodeEmitter &JCE) { - assert(0 && "This target doesn't implement emitFunctionStubAtAddr!"); - } /// getPICJumpTableEntry - Returns the value of the jumptable entry for the /// specific basic block. diff --git a/include/llvm/Target/TargetRegisterInfo.h b/include/llvm/Target/TargetRegisterInfo.h index cd6fd286fb0a..cb29c7306eae 100644 --- a/include/llvm/Target/TargetRegisterInfo.h +++ b/include/llvm/Target/TargetRegisterInfo.h @@ -465,7 +465,7 @@ public: virtual unsigned getSubReg(unsigned RegNo, unsigned Index) const = 0; /// getSubRegIndex - For a given register pair, return the sub-register index - /// if they are second register is a sub-register of the second. Return zero + /// if the are second register is a sub-register of the first. Return zero /// otherwise. virtual unsigned getSubRegIndex(unsigned RegNo, unsigned SubRegNo) const = 0; @@ -656,7 +656,9 @@ public: MachineBasicBlock::iterator I, MachineBasicBlock::iterator &UseMI, const TargetRegisterClass *RC, - unsigned Reg) const {return false;} + unsigned Reg) const { + return false; + } /// eliminateFrameIndex - This method must be overriden to eliminate abstract /// frame indices from instructions which may use them. The instruction @@ -696,6 +698,18 @@ public: /// the stack frame of the specified index. virtual int getFrameIndexOffset(MachineFunction &MF, int FI) const; + /// getFrameIndexReference - This method should return the base register + /// and offset used to reference a frame index location. The offset is + /// returned directly, and the base register is returned via FrameReg. + virtual int getFrameIndexReference(MachineFunction &MF, int FI, + unsigned &FrameReg) const { + // By default, assume all frame indices are referenced via whatever + // getFrameRegister() says. The target can override this if it's doing + // something different. + FrameReg = getFrameRegister(MF); + return getFrameIndexOffset(MF, FI); + } + /// getRARegister - This method should return the register where the return /// address can be found. virtual unsigned getRARegister() const = 0; diff --git a/include/llvm/Target/TargetSelect.h b/include/llvm/Target/TargetSelect.h index e79f651e5da5..951e7fa86b5d 100644 --- a/include/llvm/Target/TargetSelect.h +++ b/include/llvm/Target/TargetSelect.h @@ -33,6 +33,10 @@ extern "C" { // Declare all of the available assembly parser initialization functions. #define LLVM_ASM_PARSER(TargetName) void LLVMInitialize##TargetName##AsmParser(); #include "llvm/Config/AsmParsers.def" + + // Declare all of the available disassembler initialization functions. +#define LLVM_DISASSEMBLER(TargetName) void LLVMInitialize##TargetName##Disassembler(); +#include "llvm/Config/Disassemblers.def" } namespace llvm { @@ -79,6 +83,16 @@ namespace llvm { #include "llvm/Config/AsmParsers.def" } + /// InitializeAllDisassemblers - The main program should call this function if + /// it wants all disassemblers that LLVM is configured to support, to make + /// them available via the TargetRegistry. + /// + /// It is legal for a client to make multiple calls to this function. + inline void InitializeAllDisassemblers() { +#define LLVM_DISASSEMBLER(TargetName) LLVMInitialize##TargetName##Disassembler(); +#include "llvm/Config/Disassemblers.def" + } + /// InitializeNativeTarget - The main program should call this function to /// initialize the native target corresponding to the host. This is useful /// for JIT applications to ensure that the target gets linked in correctly. diff --git a/include/llvm/Target/TargetSelectionDAG.td b/include/llvm/Target/TargetSelectionDAG.td index f123d66b5af8..7f54f819af23 100644 --- a/include/llvm/Target/TargetSelectionDAG.td +++ b/include/llvm/Target/TargetSelectionDAG.td @@ -864,10 +864,3 @@ class ComplexPattern Properties = props; list Attributes = attrs; } - -//===----------------------------------------------------------------------===// -// Dwarf support. -// -def SDT_dwarf_loc : SDTypeProfile<0, 3, - [SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>]>; -def dwarf_loc : SDNode<"ISD::DEBUG_LOC", SDT_dwarf_loc,[SDNPHasChain]>; diff --git a/include/llvm/Transforms/Utils/PromoteMemToReg.h b/include/llvm/Transforms/Utils/PromoteMemToReg.h index 71a077e8625e..35cfaddb7379 100644 --- a/include/llvm/Transforms/Utils/PromoteMemToReg.h +++ b/include/llvm/Transforms/Utils/PromoteMemToReg.h @@ -23,7 +23,6 @@ class AllocaInst; class DominatorTree; class DominanceFrontier; class AliasSetTracker; -class LLVMContext; /// isAllocaPromotable - Return true if this alloca is legal for promotion. /// This is true if there are only loads and stores to the alloca... @@ -40,7 +39,6 @@ bool isAllocaPromotable(const AllocaInst *AI); /// void PromoteMemToReg(const std::vector &Allocas, DominatorTree &DT, DominanceFrontier &DF, - LLVMContext &Context, AliasSetTracker *AST = 0); } // End llvm namespace diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp index 0234965a0065..dee9b535871a 100644 --- a/lib/Analysis/AliasAnalysis.cpp +++ b/lib/Analysis/AliasAnalysis.cpp @@ -49,21 +49,11 @@ AliasAnalysis::alias(const Value *V1, unsigned V1Size, return AA->alias(V1, V1Size, V2, V2Size); } -void AliasAnalysis::getMustAliases(Value *P, std::vector &RetVals) { - assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); - return AA->getMustAliases(P, RetVals); -} - bool AliasAnalysis::pointsToConstantMemory(const Value *P) { assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); return AA->pointsToConstantMemory(P); } -bool AliasAnalysis::hasNoModRefInfoForCalls() const { - assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); - return AA->hasNoModRefInfoForCalls(); -} - void AliasAnalysis::deleteValue(Value *V) { assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); AA->deleteValue(V); @@ -137,17 +127,18 @@ AliasAnalysis::getModRefBehavior(Function *F, AliasAnalysis::ModRefResult AliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) { - ModRefResult Mask = ModRef; ModRefBehavior MRB = getModRefBehavior(CS); if (MRB == DoesNotAccessMemory) return NoModRef; - else if (MRB == OnlyReadsMemory) + + ModRefResult Mask = ModRef; + if (MRB == OnlyReadsMemory) Mask = Ref; else if (MRB == AliasAnalysis::AccessesArguments) { bool doesAlias = false; for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); AI != AE; ++AI) - if (alias(*AI, ~0U, P, Size) != NoAlias) { + if (!isNoAlias(*AI, ~0U, P, Size)) { doesAlias = true; break; } diff --git a/lib/Analysis/AliasDebugger.cpp b/lib/Analysis/AliasDebugger.cpp index cf4727f1ebee..6868e3f2cef2 100644 --- a/lib/Analysis/AliasDebugger.cpp +++ b/lib/Analysis/AliasDebugger.cpp @@ -90,11 +90,6 @@ namespace { return AliasAnalysis::getModRefInfo(CS1,CS2); } - void getMustAliases(Value *P, std::vector &RetVals) { - assert(Vals.find(P) != Vals.end() && "Never seen value in AA before"); - return AliasAnalysis::getMustAliases(P, RetVals); - } - bool pointsToConstantMemory(const Value *P) { assert(Vals.find(P) != Vals.end() && "Never seen value in AA before"); return AliasAnalysis::pointsToConstantMemory(P); diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp index c037c8d63afb..663460037392 100644 --- a/lib/Analysis/AliasSetTracker.cpp +++ b/lib/Analysis/AliasSetTracker.cpp @@ -153,9 +153,6 @@ bool AliasSet::aliasesPointer(const Value *Ptr, unsigned Size, // Check the call sites list and invoke list... if (!CallSites.empty()) { - if (AA.hasNoModRefInfoForCalls()) - return true; - for (unsigned i = 0, e = CallSites.size(); i != e; ++i) if (AA.getModRefInfo(CallSites[i], const_cast(Ptr), Size) != AliasAnalysis::NoModRef) @@ -169,9 +166,6 @@ bool AliasSet::aliasesCallSite(CallSite CS, AliasAnalysis &AA) const { if (AA.doesNotAccessMemory(CS)) return false; - if (AA.hasNoModRefInfoForCalls()) - return true; - for (unsigned i = 0, e = CallSites.size(); i != e; ++i) if (AA.getModRefInfo(CallSites[i], CS) != AliasAnalysis::NoModRef || AA.getModRefInfo(CS, CallSites[i]) != AliasAnalysis::NoModRef) diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index b8d69f41c2a5..b2983c722e22 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -14,8 +14,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/CaptureTracking.h" -#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/Passes.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" @@ -25,12 +23,13 @@ #include "llvm/IntrinsicInst.h" #include "llvm/Operator.h" #include "llvm/Pass.h" +#include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Target/TargetData.h" -#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/GetElementPtrTypeIterator.h" #include using namespace llvm; @@ -38,26 +37,6 @@ using namespace llvm; // Useful predicates //===----------------------------------------------------------------------===// -static const Value *GetGEPOperands(const Value *V, - SmallVector &GEPOps) { - assert(GEPOps.empty() && "Expect empty list to populate!"); - GEPOps.insert(GEPOps.end(), cast(V)->op_begin()+1, - cast(V)->op_end()); - - // Accumulate all of the chained indexes into the operand array - V = cast(V)->getOperand(0); - - while (const GEPOperator *G = dyn_cast(V)) { - if (!isa(GEPOps[0]) || isa(GEPOps[0]) || - !cast(GEPOps[0])->isNullValue()) - break; // Don't handle folding arbitrary pointer offsets yet... - GEPOps.erase(GEPOps.begin()); // Drop the zero index - GEPOps.insert(GEPOps.begin(), G->op_begin()+1, G->op_end()); - V = G->getOperand(0); - } - return V; -} - /// isKnownNonNull - Return true if we know that the specified value is never /// null. static bool isKnownNonNull(const Value *V) { @@ -79,7 +58,12 @@ static bool isKnownNonNull(const Value *V) { static bool isNonEscapingLocalObject(const Value *V) { // If this is a local allocation, check to see if it escapes. if (isa(V) || isNoAliasCall(V)) - return !PointerMayBeCaptured(V, false); + // Set StoreCaptures to True so that we can assume in our callers that the + // pointer is not the result of a load instruction. Currently + // PointerMayBeCaptured doesn't have any special analysis for the + // StoreCaptures=false case; if it did, our callers could be refined to be + // more precise. + return !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true); // If this is an argument that corresponds to a byval or noalias argument, // then it has not escaped before entering the function. Check if it escapes @@ -89,7 +73,7 @@ static bool isNonEscapingLocalObject(const Value *V) { // Don't bother analyzing arguments already known not to escape. if (A->hasNoCaptureAttr()) return true; - return !PointerMayBeCaptured(V, false); + return !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true); } return false; } @@ -159,7 +143,6 @@ namespace { llvm_unreachable("This method may not be called on this function!"); } - virtual void getMustAliases(Value *P, std::vector &RetVals) { } virtual bool pointsToConstantMemory(const Value *P) { return false; } virtual ModRefResult getModRefInfo(CallSite CS, Value *P, unsigned Size) { return ModRef; @@ -167,7 +150,6 @@ namespace { virtual ModRefResult getModRefInfo(CallSite CS1, CallSite CS2) { return ModRef; } - virtual bool hasNoModRefInfoForCalls() const { return true; } virtual void deleteValue(Value *V) {} virtual void copyValue(Value *From, Value *To) {} @@ -206,10 +188,6 @@ namespace { ModRefResult getModRefInfo(CallSite CS, Value *P, unsigned Size); ModRefResult getModRefInfo(CallSite CS1, CallSite CS2); - /// hasNoModRefInfoForCalls - We can provide mod/ref information against - /// non-escaping allocations. - virtual bool hasNoModRefInfoForCalls() const { return false; } - /// pointsToConstantMemory - Chase pointers until we find a (constant /// global) or not. bool pointsToConstantMemory(const Value *P); @@ -218,13 +196,14 @@ namespace { // VisitedPHIs - Track PHI nodes visited by a aliasCheck() call. SmallPtrSet VisitedPHIs; - // aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction - // against another. - AliasResult aliasGEP(const Value *V1, unsigned V1Size, - const Value *V2, unsigned V2Size); + // aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP + // instruction against another. + AliasResult aliasGEP(const GEPOperator *V1, unsigned V1Size, + const Value *V2, unsigned V2Size, + const Value *UnderlyingV1, const Value *UnderlyingV2); - // aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI instruction - // against another. + // aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI + // instruction against another. AliasResult aliasPHI(const PHINode *PN, unsigned PNSize, const Value *V2, unsigned V2Size); @@ -234,15 +213,6 @@ namespace { AliasResult aliasCheck(const Value *V1, unsigned V1Size, const Value *V2, unsigned V2Size); - - // CheckGEPInstructions - Check two GEP instructions with known - // must-aliasing base pointers. This checks to see if the index expressions - // preclude the pointers from aliasing... - AliasResult - CheckGEPInstructions(const Type* BasePtr1Ty, - Value **GEP1Ops, unsigned NumGEP1Ops, unsigned G1Size, - const Type *BasePtr2Ty, - Value **GEP2Ops, unsigned NumGEP2Ops, unsigned G2Size); }; } // End of anonymous namespace @@ -264,107 +234,124 @@ ImmutablePass *llvm::createBasicAliasAnalysisPass() { bool BasicAliasAnalysis::pointsToConstantMemory(const Value *P) { if (const GlobalVariable *GV = dyn_cast(P->getUnderlyingObject())) + // Note: this doesn't require GV to be "ODR" because it isn't legal for a + // global to be marked constant in some modules and non-constant in others. + // GV may even be a declaration, not a definition. return GV->isConstant(); return false; } -// getModRefInfo - Check to see if the specified callsite can clobber the -// specified memory object. Since we only look at local properties of this -// function, we really can't say much about this query. We do, however, use -// simple "address taken" analysis on local objects. -// +/// getModRefInfo - Check to see if the specified callsite can clobber the +/// specified memory object. Since we only look at local properties of this +/// function, we really can't say much about this query. We do, however, use +/// simple "address taken" analysis on local objects. AliasAnalysis::ModRefResult BasicAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) { - if (!isa(P)) { - const Value *Object = P->getUnderlyingObject(); - - // If this is a tail call and P points to a stack location, we know that - // the tail call cannot access or modify the local stack. - // We cannot exclude byval arguments here; these belong to the caller of - // the current function not to the current function, and a tail callee - // may reference them. - if (isa(Object)) - if (CallInst *CI = dyn_cast(CS.getInstruction())) - if (CI->isTailCall()) - return NoModRef; - - // If the pointer is to a locally allocated object that does not escape, - // then the call can not mod/ref the pointer unless the call takes the - // argument without capturing it. - if (isNonEscapingLocalObject(Object) && CS.getInstruction() != Object) { - bool passedAsArg = false; - // TODO: Eventually only check 'nocapture' arguments. - for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end(); - CI != CE; ++CI) - if (isa((*CI)->getType()) && - alias(cast(CI), ~0U, P, ~0U) != NoAlias) - passedAsArg = true; + const Value *Object = P->getUnderlyingObject(); + + // If this is a tail call and P points to a stack location, we know that + // the tail call cannot access or modify the local stack. + // We cannot exclude byval arguments here; these belong to the caller of + // the current function not to the current function, and a tail callee + // may reference them. + if (isa(Object)) + if (CallInst *CI = dyn_cast(CS.getInstruction())) + if (CI->isTailCall()) + return NoModRef; + + // If the pointer is to a locally allocated object that does not escape, + // then the call can not mod/ref the pointer unless the call takes the pointer + // as an argument, and itself doesn't capture it. + if (!isa(Object) && CS.getInstruction() != Object && + isNonEscapingLocalObject(Object)) { + bool PassedAsArg = false; + unsigned ArgNo = 0; + for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end(); + CI != CE; ++CI, ++ArgNo) { + // Only look at the no-capture pointer arguments. + if (!isa((*CI)->getType()) || + !CS.paramHasAttr(ArgNo+1, Attribute::NoCapture)) + continue; - if (!passedAsArg) + // If this is a no-capture pointer argument, see if we can tell that it + // is impossible to alias the pointer we're checking. If not, we have to + // assume that the call could touch the pointer, even though it doesn't + // escape. + if (!isNoAlias(cast(CI), ~0U, P, ~0U)) { + PassedAsArg = true; + break; + } + } + + if (!PassedAsArg) + return NoModRef; + } + + // Finally, handle specific knowledge of intrinsics. + IntrinsicInst *II = dyn_cast(CS.getInstruction()); + if (II == 0) + return AliasAnalysis::getModRefInfo(CS, P, Size); + + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::memcpy: + case Intrinsic::memmove: { + unsigned Len = ~0U; + if (ConstantInt *LenCI = dyn_cast(II->getOperand(3))) + Len = LenCI->getZExtValue(); + Value *Dest = II->getOperand(1); + Value *Src = II->getOperand(2); + if (isNoAlias(Dest, Len, P, Size)) { + if (isNoAlias(Src, Len, P, Size)) + return NoModRef; + return Ref; + } + break; + } + case Intrinsic::memset: + // Since memset is 'accesses arguments' only, the AliasAnalysis base class + // will handle it for the variable length case. + if (ConstantInt *LenCI = dyn_cast(II->getOperand(3))) { + unsigned Len = LenCI->getZExtValue(); + Value *Dest = II->getOperand(1); + if (isNoAlias(Dest, Len, P, Size)) return NoModRef; } - - if (IntrinsicInst *II = dyn_cast(CS.getInstruction())) { - switch (II->getIntrinsicID()) { - default: break; - case Intrinsic::memcpy: - case Intrinsic::memmove: { - unsigned Len = ~0U; - if (ConstantInt *LenCI = dyn_cast(II->getOperand(3))) - Len = LenCI->getZExtValue(); - Value *Dest = II->getOperand(1); - Value *Src = II->getOperand(2); - if (alias(Dest, Len, P, Size) == NoAlias) { - if (alias(Src, Len, P, Size) == NoAlias) - return NoModRef; - return Ref; - } - } - break; - case Intrinsic::memset: - if (ConstantInt *LenCI = dyn_cast(II->getOperand(3))) { - unsigned Len = LenCI->getZExtValue(); - Value *Dest = II->getOperand(1); - if (alias(Dest, Len, P, Size) == NoAlias) - return NoModRef; - } - break; - case Intrinsic::atomic_cmp_swap: - case Intrinsic::atomic_swap: - case Intrinsic::atomic_load_add: - case Intrinsic::atomic_load_sub: - case Intrinsic::atomic_load_and: - case Intrinsic::atomic_load_nand: - case Intrinsic::atomic_load_or: - case Intrinsic::atomic_load_xor: - case Intrinsic::atomic_load_max: - case Intrinsic::atomic_load_min: - case Intrinsic::atomic_load_umax: - case Intrinsic::atomic_load_umin: - if (TD) { - Value *Op1 = II->getOperand(1); - unsigned Op1Size = TD->getTypeStoreSize(Op1->getType()); - if (alias(Op1, Op1Size, P, Size) == NoAlias) - return NoModRef; - } - break; - case Intrinsic::lifetime_start: - case Intrinsic::lifetime_end: - case Intrinsic::invariant_start: { - unsigned PtrSize = cast(II->getOperand(1))->getZExtValue(); - if (alias(II->getOperand(2), PtrSize, P, Size) == NoAlias) - return NoModRef; - } - break; - case Intrinsic::invariant_end: { - unsigned PtrSize = cast(II->getOperand(2))->getZExtValue(); - if (alias(II->getOperand(3), PtrSize, P, Size) == NoAlias) - return NoModRef; - } - break; - } + break; + case Intrinsic::atomic_cmp_swap: + case Intrinsic::atomic_swap: + case Intrinsic::atomic_load_add: + case Intrinsic::atomic_load_sub: + case Intrinsic::atomic_load_and: + case Intrinsic::atomic_load_nand: + case Intrinsic::atomic_load_or: + case Intrinsic::atomic_load_xor: + case Intrinsic::atomic_load_max: + case Intrinsic::atomic_load_min: + case Intrinsic::atomic_load_umax: + case Intrinsic::atomic_load_umin: + if (TD) { + Value *Op1 = II->getOperand(1); + unsigned Op1Size = TD->getTypeStoreSize(Op1->getType()); + if (isNoAlias(Op1, Op1Size, P, Size)) + return NoModRef; } + break; + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::invariant_start: { + unsigned PtrSize = cast(II->getOperand(1))->getZExtValue(); + if (isNoAlias(II->getOperand(2), PtrSize, P, Size)) + return NoModRef; + break; + } + case Intrinsic::invariant_end: { + unsigned PtrSize = cast(II->getOperand(2))->getZExtValue(); + if (isNoAlias(II->getOperand(3), PtrSize, P, Size)) + return NoModRef; + break; + } } // The AliasAnalysis base class has some smarts, lets use them. @@ -389,141 +376,157 @@ BasicAliasAnalysis::getModRefInfo(CallSite CS1, CallSite CS2) { return NoAA::getModRefInfo(CS1, CS2); } -// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction -// against another. -// +/// GetIndiceDifference - Dest and Src are the variable indices from two +/// decomposed GetElementPtr instructions GEP1 and GEP2 which have common base +/// pointers. Subtract the GEP2 indices from GEP1 to find the symbolic +/// difference between the two pointers. +static void GetIndiceDifference( + SmallVectorImpl > &Dest, + const SmallVectorImpl > &Src) { + if (Src.empty()) return; + + for (unsigned i = 0, e = Src.size(); i != e; ++i) { + const Value *V = Src[i].first; + int64_t Scale = Src[i].second; + + // Find V in Dest. This is N^2, but pointer indices almost never have more + // than a few variable indexes. + for (unsigned j = 0, e = Dest.size(); j != e; ++j) { + if (Dest[j].first != V) continue; + + // If we found it, subtract off Scale V's from the entry in Dest. If it + // goes to zero, remove the entry. + if (Dest[j].second != Scale) + Dest[j].second -= Scale; + else + Dest.erase(Dest.begin()+j); + Scale = 0; + break; + } + + // If we didn't consume this entry, add it to the end of the Dest list. + if (Scale) + Dest.push_back(std::make_pair(V, -Scale)); + } +} + +/// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction +/// against another pointer. We know that V1 is a GEP, but we don't know +/// anything about V2. UnderlyingV1 is GEP1->getUnderlyingObject(), +/// UnderlyingV2 is the same for V2. +/// AliasAnalysis::AliasResult -BasicAliasAnalysis::aliasGEP(const Value *V1, unsigned V1Size, - const Value *V2, unsigned V2Size) { +BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, unsigned V1Size, + const Value *V2, unsigned V2Size, + const Value *UnderlyingV1, + const Value *UnderlyingV2) { + int64_t GEP1BaseOffset; + SmallVector, 4> GEP1VariableIndices; + // If we have two gep instructions with must-alias'ing base pointers, figure // out if the indexes to the GEP tell us anything about the derived pointer. - // Note that we also handle chains of getelementptr instructions as well as - // constant expression getelementptrs here. - // - if (isa(V1) && isa(V2)) { - const User *GEP1 = cast(V1); - const User *GEP2 = cast(V2); - - // If V1 and V2 are identical GEPs, just recurse down on both of them. - // This allows us to analyze things like: - // P = gep A, 0, i, 1 - // Q = gep B, 0, i, 1 - // by just analyzing A and B. This is even safe for variable indices. - if (GEP1->getType() == GEP2->getType() && - GEP1->getNumOperands() == GEP2->getNumOperands() && - GEP1->getOperand(0)->getType() == GEP2->getOperand(0)->getType() && - // All operands are the same, ignoring the base. - std::equal(GEP1->op_begin()+1, GEP1->op_end(), GEP2->op_begin()+1)) - return aliasCheck(GEP1->getOperand(0), V1Size, - GEP2->getOperand(0), V2Size); - - // Drill down into the first non-gep value, to test for must-aliasing of - // the base pointers. - while (isa(GEP1->getOperand(0)) && - GEP1->getOperand(1) == - Constant::getNullValue(GEP1->getOperand(1)->getType())) - GEP1 = cast(GEP1->getOperand(0)); - const Value *BasePtr1 = GEP1->getOperand(0); - - while (isa(GEP2->getOperand(0)) && - GEP2->getOperand(1) == - Constant::getNullValue(GEP2->getOperand(1)->getType())) - GEP2 = cast(GEP2->getOperand(0)); - const Value *BasePtr2 = GEP2->getOperand(0); - + if (const GEPOperator *GEP2 = dyn_cast(V2)) { // Do the base pointers alias? - AliasResult BaseAlias = aliasCheck(BasePtr1, ~0U, BasePtr2, ~0U); - if (BaseAlias == NoAlias) return NoAlias; - if (BaseAlias == MustAlias) { - // If the base pointers alias each other exactly, check to see if we can - // figure out anything about the resultant pointers, to try to prove - // non-aliasing. + AliasResult BaseAlias = aliasCheck(UnderlyingV1, ~0U, UnderlyingV2, ~0U); + + // If we get a No or May, then return it immediately, no amount of analysis + // will improve this situation. + if (BaseAlias != MustAlias) return BaseAlias; + + // Otherwise, we have a MustAlias. Since the base pointers alias each other + // exactly, see if the computed offset from the common pointer tells us + // about the relation of the resulting pointer. + const Value *GEP1BasePtr = + DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, TD); + + int64_t GEP2BaseOffset; + SmallVector, 4> GEP2VariableIndices; + const Value *GEP2BasePtr = + DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices, TD); + + // If DecomposeGEPExpression isn't able to look all the way through the + // addressing operation, we must not have TD and this is too complex for us + // to handle without it. + if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) { + assert(TD == 0 && + "DecomposeGEPExpression and getUnderlyingObject disagree!"); + return MayAlias; + } + + // Subtract the GEP2 pointer from the GEP1 pointer to find out their + // symbolic difference. + GEP1BaseOffset -= GEP2BaseOffset; + GetIndiceDifference(GEP1VariableIndices, GEP2VariableIndices); + + } else { + // Check to see if these two pointers are related by the getelementptr + // instruction. If one pointer is a GEP with a non-zero index of the other + // pointer, we know they cannot alias. - // Collect all of the chained GEP operands together into one simple place - SmallVector GEP1Ops, GEP2Ops; - BasePtr1 = GetGEPOperands(V1, GEP1Ops); - BasePtr2 = GetGEPOperands(V2, GEP2Ops); + // If both accesses are unknown size, we can't do anything useful here. + if (V1Size == ~0U && V2Size == ~0U) + return MayAlias; - // If GetGEPOperands were able to fold to the same must-aliased pointer, - // do the comparison. - if (BasePtr1 == BasePtr2) { - AliasResult GAlias = - CheckGEPInstructions(BasePtr1->getType(), - &GEP1Ops[0], GEP1Ops.size(), V1Size, - BasePtr2->getType(), - &GEP2Ops[0], GEP2Ops.size(), V2Size); - if (GAlias != MayAlias) - return GAlias; - } + AliasResult R = aliasCheck(UnderlyingV1, ~0U, V2, V2Size); + if (R != MustAlias) + // If V2 may alias GEP base pointer, conservatively returns MayAlias. + // If V2 is known not to alias GEP base pointer, then the two values + // cannot alias per GEP semantics: "A pointer value formed from a + // getelementptr instruction is associated with the addresses associated + // with the first operand of the getelementptr". + return R; + + const Value *GEP1BasePtr = + DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, TD); + + // If DecomposeGEPExpression isn't able to look all the way through the + // addressing operation, we must not have TD and this is too complex for us + // to handle without it. + if (GEP1BasePtr != UnderlyingV1) { + assert(TD == 0 && + "DecomposeGEPExpression and getUnderlyingObject disagree!"); + return MayAlias; } } - - // Check to see if these two pointers are related by a getelementptr - // instruction. If one pointer is a GEP with a non-zero index of the other - // pointer, we know they cannot alias. + + // In the two GEP Case, if there is no difference in the offsets of the + // computed pointers, the resultant pointers are a must alias. This + // hapens when we have two lexically identical GEP's (for example). // - if (V1Size == ~0U || V2Size == ~0U) - return MayAlias; - - SmallVector GEPOperands; - const Value *BasePtr = GetGEPOperands(V1, GEPOperands); - - AliasResult R = aliasCheck(BasePtr, ~0U, V2, V2Size); - if (R != MustAlias) - // If V2 may alias GEP base pointer, conservatively returns MayAlias. - // If V2 is known not to alias GEP base pointer, then the two values - // cannot alias per GEP semantics: "A pointer value formed from a - // getelementptr instruction is associated with the addresses associated - // with the first operand of the getelementptr". - return R; - - // If there is at least one non-zero constant index, we know they cannot - // alias. - bool ConstantFound = false; - bool AllZerosFound = true; - for (unsigned i = 0, e = GEPOperands.size(); i != e; ++i) - if (const Constant *C = dyn_cast(GEPOperands[i])) { - if (!C->isNullValue()) { - ConstantFound = true; - AllZerosFound = false; - break; - } - } else { - AllZerosFound = false; - } - - // If we have getelementptr , 0, 0, 0, 0, ... and V2 must aliases - // the ptr, the end result is a must alias also. - if (AllZerosFound) + // In the other case, if we have getelementptr , 0, 0, 0, 0, ... and V2 + // must aliases the GEP, the end result is a must alias also. + if (GEP1BaseOffset == 0 && GEP1VariableIndices.empty()) return MustAlias; - if (ConstantFound) { - if (V2Size <= 1 && V1Size <= 1) // Just pointer check? + // If we have a known constant offset, see if this offset is larger than the + // access size being queried. If so, and if no variable indices can remove + // pieces of this constant, then we know we have a no-alias. For example, + // &A[100] != &A. + + // In order to handle cases like &A[100][i] where i is an out of range + // subscript, we have to ignore all constant offset pieces that are a multiple + // of a scaled index. Do this by removing constant offsets that are a + // multiple of any of our variable indices. This allows us to transform + // things like &A[i][1] because i has a stride of (e.g.) 8 bytes but the 1 + // provides an offset of 4 bytes (assuming a <= 4 byte access). + for (unsigned i = 0, e = GEP1VariableIndices.size(); + i != e && GEP1BaseOffset;++i) + if (int64_t RemovedOffset = GEP1BaseOffset/GEP1VariableIndices[i].second) + GEP1BaseOffset -= RemovedOffset*GEP1VariableIndices[i].second; + + // If our known offset is bigger than the access size, we know we don't have + // an alias. + if (GEP1BaseOffset) { + if (GEP1BaseOffset >= (int64_t)V2Size || + GEP1BaseOffset <= -(int64_t)V1Size) return NoAlias; - - // Otherwise we have to check to see that the distance is more than - // the size of the argument... build an index vector that is equal to - // the arguments provided, except substitute 0's for any variable - // indexes we find... - if (TD && - cast(BasePtr->getType())->getElementType()->isSized()) { - for (unsigned i = 0; i != GEPOperands.size(); ++i) - if (!isa(GEPOperands[i])) - GEPOperands[i] = Constant::getNullValue(GEPOperands[i]->getType()); - int64_t Offset = TD->getIndexedOffset(BasePtr->getType(), - &GEPOperands[0], - GEPOperands.size()); - - if (Offset >= (int64_t)V2Size || Offset <= -(int64_t)V1Size) - return NoAlias; - } } - + return MayAlias; } -// aliasSelect - Provide a bunch of ad-hoc rules to disambiguate a Select instruction -// against another. +/// aliasSelect - Provide a bunch of ad-hoc rules to disambiguate a Select +/// instruction against another. AliasAnalysis::AliasResult BasicAliasAnalysis::aliasSelect(const SelectInst *SI, unsigned SISize, const Value *V2, unsigned V2Size) { @@ -683,22 +686,31 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, unsigned V1Size, (V2Size != ~0U && isObjectSmallerThan(O1, V2Size, *TD))) return NoAlias; - // If one pointer is the result of a call/invoke and the other is a + // If one pointer is the result of a call/invoke or load and the other is a // non-escaping local object, then we know the object couldn't escape to a - // point where the call could return it. - if ((isa(O1) || isa(O1)) && - isNonEscapingLocalObject(O2) && O1 != O2) - return NoAlias; - if ((isa(O2) || isa(O2)) && - isNonEscapingLocalObject(O1) && O1 != O2) - return NoAlias; + // point where the call could return it. The load case works because + // isNonEscapingLocalObject considers all stores to be escapes (it + // passes true for the StoreCaptures argument to PointerMayBeCaptured). + if (O1 != O2) { + if ((isa(O1) || isa(O1) || isa(O1) || + isa(O1)) && + isNonEscapingLocalObject(O2)) + return NoAlias; + if ((isa(O2) || isa(O2) || isa(O2) || + isa(O2)) && + isNonEscapingLocalObject(O1)) + return NoAlias; + } + // FIXME: This isn't aggressively handling alias(GEP, PHI) for example: if the + // GEP can't simplify, we don't even look at the PHI cases. if (!isa(V1) && isa(V2)) { std::swap(V1, V2); std::swap(V1Size, V2Size); + std::swap(O1, O2); } - if (isa(V1)) - return aliasGEP(V1, V1Size, V2, V2Size); + if (const GEPOperator *GV1 = dyn_cast(V1)) + return aliasGEP(GV1, V1Size, V2, V2Size, O1, O2); if (isa(V2) && !isa(V1)) { std::swap(V1, V2); @@ -717,351 +729,5 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, unsigned V1Size, return MayAlias; } -// This function is used to determine if the indices of two GEP instructions are -// equal. V1 and V2 are the indices. -static bool IndexOperandsEqual(Value *V1, Value *V2) { - if (V1->getType() == V2->getType()) - return V1 == V2; - if (Constant *C1 = dyn_cast(V1)) - if (Constant *C2 = dyn_cast(V2)) { - // Sign extend the constants to long types, if necessary - if (C1->getType() != Type::getInt64Ty(C1->getContext())) - C1 = ConstantExpr::getSExt(C1, Type::getInt64Ty(C1->getContext())); - if (C2->getType() != Type::getInt64Ty(C1->getContext())) - C2 = ConstantExpr::getSExt(C2, Type::getInt64Ty(C1->getContext())); - return C1 == C2; - } - return false; -} - -/// CheckGEPInstructions - Check two GEP instructions with known must-aliasing -/// base pointers. This checks to see if the index expressions preclude the -/// pointers from aliasing... -AliasAnalysis::AliasResult -BasicAliasAnalysis::CheckGEPInstructions( - const Type* BasePtr1Ty, Value **GEP1Ops, unsigned NumGEP1Ops, unsigned G1S, - const Type *BasePtr2Ty, Value **GEP2Ops, unsigned NumGEP2Ops, unsigned G2S) { - // We currently can't handle the case when the base pointers have different - // primitive types. Since this is uncommon anyway, we are happy being - // extremely conservative. - if (BasePtr1Ty != BasePtr2Ty) - return MayAlias; - - const PointerType *GEPPointerTy = cast(BasePtr1Ty); - - // Find the (possibly empty) initial sequence of equal values... which are not - // necessarily constants. - unsigned NumGEP1Operands = NumGEP1Ops, NumGEP2Operands = NumGEP2Ops; - unsigned MinOperands = std::min(NumGEP1Operands, NumGEP2Operands); - unsigned MaxOperands = std::max(NumGEP1Operands, NumGEP2Operands); - unsigned UnequalOper = 0; - while (UnequalOper != MinOperands && - IndexOperandsEqual(GEP1Ops[UnequalOper], GEP2Ops[UnequalOper])) { - // Advance through the type as we go... - ++UnequalOper; - if (const CompositeType *CT = dyn_cast(BasePtr1Ty)) - BasePtr1Ty = CT->getTypeAtIndex(GEP1Ops[UnequalOper-1]); - else { - // If all operands equal each other, then the derived pointers must - // alias each other... - BasePtr1Ty = 0; - assert(UnequalOper == NumGEP1Operands && UnequalOper == NumGEP2Operands && - "Ran out of type nesting, but not out of operands?"); - return MustAlias; - } - } - - // If we have seen all constant operands, and run out of indexes on one of the - // getelementptrs, check to see if the tail of the leftover one is all zeros. - // If so, return mustalias. - if (UnequalOper == MinOperands) { - if (NumGEP1Ops < NumGEP2Ops) { - std::swap(GEP1Ops, GEP2Ops); - std::swap(NumGEP1Ops, NumGEP2Ops); - } - - bool AllAreZeros = true; - for (unsigned i = UnequalOper; i != MaxOperands; ++i) - if (!isa(GEP1Ops[i]) || - !cast(GEP1Ops[i])->isNullValue()) { - AllAreZeros = false; - break; - } - if (AllAreZeros) return MustAlias; - } - - - // So now we know that the indexes derived from the base pointers, - // which are known to alias, are different. We can still determine a - // no-alias result if there are differing constant pairs in the index - // chain. For example: - // A[i][0] != A[j][1] iff (&A[0][1]-&A[0][0] >= std::max(G1S, G2S)) - // - // We have to be careful here about array accesses. In particular, consider: - // A[1][0] vs A[0][i] - // In this case, we don't *know* that the array will be accessed in bounds: - // the index could even be negative. Because of this, we have to - // conservatively *give up* and return may alias. We disregard differing - // array subscripts that are followed by a variable index without going - // through a struct. - // - unsigned SizeMax = std::max(G1S, G2S); - if (SizeMax == ~0U) return MayAlias; // Avoid frivolous work. - - // Scan for the first operand that is constant and unequal in the - // two getelementptrs... - unsigned FirstConstantOper = UnequalOper; - for (; FirstConstantOper != MinOperands; ++FirstConstantOper) { - const Value *G1Oper = GEP1Ops[FirstConstantOper]; - const Value *G2Oper = GEP2Ops[FirstConstantOper]; - - if (G1Oper != G2Oper) // Found non-equal constant indexes... - if (Constant *G1OC = dyn_cast(const_cast(G1Oper))) - if (Constant *G2OC = dyn_cast(const_cast(G2Oper))){ - if (G1OC->getType() != G2OC->getType()) { - // Sign extend both operands to long. - const Type *Int64Ty = Type::getInt64Ty(G1OC->getContext()); - if (G1OC->getType() != Int64Ty) - G1OC = ConstantExpr::getSExt(G1OC, Int64Ty); - if (G2OC->getType() != Int64Ty) - G2OC = ConstantExpr::getSExt(G2OC, Int64Ty); - GEP1Ops[FirstConstantOper] = G1OC; - GEP2Ops[FirstConstantOper] = G2OC; - } - - if (G1OC != G2OC) { - // Handle the "be careful" case above: if this is an array/vector - // subscript, scan for a subsequent variable array index. - if (const SequentialType *STy = - dyn_cast(BasePtr1Ty)) { - const Type *NextTy = STy; - bool isBadCase = false; - - for (unsigned Idx = FirstConstantOper; - Idx != MinOperands && isa(NextTy); ++Idx) { - const Value *V1 = GEP1Ops[Idx], *V2 = GEP2Ops[Idx]; - if (!isa(V1) || !isa(V2)) { - isBadCase = true; - break; - } - // If the array is indexed beyond the bounds of the static type - // at this level, it will also fall into the "be careful" case. - // It would theoretically be possible to analyze these cases, - // but for now just be conservatively correct. - if (const ArrayType *ATy = dyn_cast(STy)) - if (cast(G1OC)->getZExtValue() >= - ATy->getNumElements() || - cast(G2OC)->getZExtValue() >= - ATy->getNumElements()) { - isBadCase = true; - break; - } - if (const VectorType *VTy = dyn_cast(STy)) - if (cast(G1OC)->getZExtValue() >= - VTy->getNumElements() || - cast(G2OC)->getZExtValue() >= - VTy->getNumElements()) { - isBadCase = true; - break; - } - STy = cast(NextTy); - NextTy = cast(NextTy)->getElementType(); - } - - if (isBadCase) G1OC = 0; - } - - // Make sure they are comparable (ie, not constant expressions), and - // make sure the GEP with the smaller leading constant is GEP1. - if (G1OC) { - Constant *Compare = ConstantExpr::getICmp(ICmpInst::ICMP_SGT, - G1OC, G2OC); - if (ConstantInt *CV = dyn_cast(Compare)) { - if (CV->getZExtValue()) { // If they are comparable and G2 > G1 - std::swap(GEP1Ops, GEP2Ops); // Make GEP1 < GEP2 - std::swap(NumGEP1Ops, NumGEP2Ops); - } - break; - } - } - } - } - BasePtr1Ty = cast(BasePtr1Ty)->getTypeAtIndex(G1Oper); - } - - // No shared constant operands, and we ran out of common operands. At this - // point, the GEP instructions have run through all of their operands, and we - // haven't found evidence that there are any deltas between the GEP's. - // However, one GEP may have more operands than the other. If this is the - // case, there may still be hope. Check this now. - if (FirstConstantOper == MinOperands) { - // Without TargetData, we won't know what the offsets are. - if (!TD) - return MayAlias; - - // Make GEP1Ops be the longer one if there is a longer one. - if (NumGEP1Ops < NumGEP2Ops) { - std::swap(GEP1Ops, GEP2Ops); - std::swap(NumGEP1Ops, NumGEP2Ops); - } - - // Is there anything to check? - if (NumGEP1Ops > MinOperands) { - for (unsigned i = FirstConstantOper; i != MaxOperands; ++i) - if (isa(GEP1Ops[i]) && - !cast(GEP1Ops[i])->isZero()) { - // Yup, there's a constant in the tail. Set all variables to - // constants in the GEP instruction to make it suitable for - // TargetData::getIndexedOffset. - for (i = 0; i != MaxOperands; ++i) - if (!isa(GEP1Ops[i])) - GEP1Ops[i] = Constant::getNullValue(GEP1Ops[i]->getType()); - // Okay, now get the offset. This is the relative offset for the full - // instruction. - int64_t Offset1 = TD->getIndexedOffset(GEPPointerTy, GEP1Ops, - NumGEP1Ops); - - // Now check without any constants at the end. - int64_t Offset2 = TD->getIndexedOffset(GEPPointerTy, GEP1Ops, - MinOperands); - - // Make sure we compare the absolute difference. - if (Offset1 > Offset2) - std::swap(Offset1, Offset2); - - // If the tail provided a bit enough offset, return noalias! - if ((uint64_t)(Offset2-Offset1) >= SizeMax) - return NoAlias; - // Otherwise break - we don't look for another constant in the tail. - break; - } - } - - // Couldn't find anything useful. - return MayAlias; - } - - // If there are non-equal constants arguments, then we can figure - // out a minimum known delta between the two index expressions... at - // this point we know that the first constant index of GEP1 is less - // than the first constant index of GEP2. - - // Advance BasePtr[12]Ty over this first differing constant operand. - BasePtr2Ty = cast(BasePtr1Ty)-> - getTypeAtIndex(GEP2Ops[FirstConstantOper]); - BasePtr1Ty = cast(BasePtr1Ty)-> - getTypeAtIndex(GEP1Ops[FirstConstantOper]); - - // We are going to be using TargetData::getIndexedOffset to determine the - // offset that each of the GEP's is reaching. To do this, we have to convert - // all variable references to constant references. To do this, we convert the - // initial sequence of array subscripts into constant zeros to start with. - const Type *ZeroIdxTy = GEPPointerTy; - for (unsigned i = 0; i != FirstConstantOper; ++i) { - if (!isa(ZeroIdxTy)) - GEP1Ops[i] = GEP2Ops[i] = - Constant::getNullValue(Type::getInt32Ty(ZeroIdxTy->getContext())); - - if (const CompositeType *CT = dyn_cast(ZeroIdxTy)) - ZeroIdxTy = CT->getTypeAtIndex(GEP1Ops[i]); - } - - // We know that GEP1Ops[FirstConstantOper] & GEP2Ops[FirstConstantOper] are ok - - // Loop over the rest of the operands... - for (unsigned i = FirstConstantOper+1; i != MaxOperands; ++i) { - const Value *Op1 = i < NumGEP1Ops ? GEP1Ops[i] : 0; - const Value *Op2 = i < NumGEP2Ops ? GEP2Ops[i] : 0; - // If they are equal, use a zero index... - if (Op1 == Op2 && BasePtr1Ty == BasePtr2Ty) { - if (!isa(Op1)) - GEP1Ops[i] = GEP2Ops[i] = Constant::getNullValue(Op1->getType()); - // Otherwise, just keep the constants we have. - } else { - if (Op1) { - if (const ConstantInt *Op1C = dyn_cast(Op1)) { - // If this is an array index, make sure the array element is in range. - if (const ArrayType *AT = dyn_cast(BasePtr1Ty)) { - if (Op1C->getZExtValue() >= AT->getNumElements()) - return MayAlias; // Be conservative with out-of-range accesses - } else if (const VectorType *VT = dyn_cast(BasePtr1Ty)) { - if (Op1C->getZExtValue() >= VT->getNumElements()) - return MayAlias; // Be conservative with out-of-range accesses - } - - } else { - // GEP1 is known to produce a value less than GEP2. To be - // conservatively correct, we must assume the largest possible - // constant is used in this position. This cannot be the initial - // index to the GEP instructions (because we know we have at least one - // element before this one with the different constant arguments), so - // we know that the current index must be into either a struct or - // array. Because we know it's not constant, this cannot be a - // structure index. Because of this, we can calculate the maximum - // value possible. - // - if (const ArrayType *AT = dyn_cast(BasePtr1Ty)) - GEP1Ops[i] = - ConstantInt::get(Type::getInt64Ty(AT->getContext()), - AT->getNumElements()-1); - else if (const VectorType *VT = dyn_cast(BasePtr1Ty)) - GEP1Ops[i] = - ConstantInt::get(Type::getInt64Ty(VT->getContext()), - VT->getNumElements()-1); - } - } - - if (Op2) { - if (const ConstantInt *Op2C = dyn_cast(Op2)) { - // If this is an array index, make sure the array element is in range. - if (const ArrayType *AT = dyn_cast(BasePtr2Ty)) { - if (Op2C->getZExtValue() >= AT->getNumElements()) - return MayAlias; // Be conservative with out-of-range accesses - } else if (const VectorType *VT = dyn_cast(BasePtr2Ty)) { - if (Op2C->getZExtValue() >= VT->getNumElements()) - return MayAlias; // Be conservative with out-of-range accesses - } - } else { // Conservatively assume the minimum value for this index - GEP2Ops[i] = Constant::getNullValue(Op2->getType()); - } - } - } - - if (BasePtr1Ty && Op1) { - if (const CompositeType *CT = dyn_cast(BasePtr1Ty)) - BasePtr1Ty = CT->getTypeAtIndex(GEP1Ops[i]); - else - BasePtr1Ty = 0; - } - - if (BasePtr2Ty && Op2) { - if (const CompositeType *CT = dyn_cast(BasePtr2Ty)) - BasePtr2Ty = CT->getTypeAtIndex(GEP2Ops[i]); - else - BasePtr2Ty = 0; - } - } - - if (TD && GEPPointerTy->getElementType()->isSized()) { - int64_t Offset1 = - TD->getIndexedOffset(GEPPointerTy, GEP1Ops, NumGEP1Ops); - int64_t Offset2 = - TD->getIndexedOffset(GEPPointerTy, GEP2Ops, NumGEP2Ops); - assert(Offset1 != Offset2 && - "There is at least one different constant here!"); - - // Make sure we compare the absolute difference. - if (Offset1 > Offset2) - std::swap(Offset1, Offset2); - - if ((uint64_t)(Offset2-Offset1) >= SizeMax) { - //cerr << "Determined that these two GEP's don't alias [" - // << SizeMax << " bytes]: \n" << *GEP1 << *GEP2; - return NoAlias; - } - } - return MayAlias; -} - -// Make sure that anything that uses AliasAnalysis pulls in this file... +// Make sure that anything that uses AliasAnalysis pulls in this file. DEFINING_FILE_FOR(BasicAliasAnalysis) diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp index f615881829c6..a276c64c9a44 100644 --- a/lib/Analysis/CaptureTracking.cpp +++ b/lib/Analysis/CaptureTracking.cpp @@ -19,6 +19,7 @@ #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Instructions.h" #include "llvm/Value.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/CallSite.h" @@ -28,8 +29,11 @@ using namespace llvm; /// by the enclosing function (which is required to exist). This routine can /// be expensive, so consider caching the results. The boolean ReturnCaptures /// specifies whether returning the value (or part of it) from the function +/// counts as capturing it or not. The boolean StoreCaptures specified whether +/// storing the value (or part of it) into memory anywhere automatically /// counts as capturing it or not. -bool llvm::PointerMayBeCaptured(const Value *V, bool ReturnCaptures) { +bool llvm::PointerMayBeCaptured(const Value *V, + bool ReturnCaptures, bool StoreCaptures) { assert(isa(V->getType()) && "Capture is for pointers only!"); SmallVector Worklist; SmallSet Visited; @@ -53,8 +57,7 @@ bool llvm::PointerMayBeCaptured(const Value *V, bool ReturnCaptures) { // Not captured if the callee is readonly, doesn't return a copy through // its return value and doesn't unwind (a readonly function can leak bits // by throwing an exception or not depending on the input value). - if (CS.onlyReadsMemory() && CS.doesNotThrow() && - I->getType() == Type::getVoidTy(V->getContext())) + if (CS.onlyReadsMemory() && CS.doesNotThrow() && I->getType()->isVoidTy()) break; // Not captured if only passed via 'nocapture' arguments. Note that @@ -82,7 +85,11 @@ bool llvm::PointerMayBeCaptured(const Value *V, bool ReturnCaptures) { break; case Instruction::Store: if (V == I->getOperand(0)) - // Stored the pointer - it may be captured. + // Stored the pointer - conservatively assume it may be captured. + // TODO: If StoreCaptures is not true, we could do Fancy analysis + // to determine whether this store is not actually an escape point. + // In that case, BasicAliasAnalysis should be updated as well to + // take advantage of this. return true; // Storing to the pointee does not cause the pointer to be captured. break; @@ -98,6 +105,18 @@ bool llvm::PointerMayBeCaptured(const Value *V, bool ReturnCaptures) { Worklist.push_back(U); } break; + case Instruction::ICmp: + // Don't count comparisons of a no-alias return value against null as + // captures. This allows us to ignore comparisons of malloc results + // with null, for example. + if (isNoAliasCall(V->stripPointerCasts())) + if (ConstantPointerNull *CPN = + dyn_cast(I->getOperand(1))) + if (CPN->getType()->getAddressSpace() == 0) + break; + // Otherwise, be conservative. There are crazy ways to capture pointers + // using comparisons. + return true; default: // Something else - be conservative and say it is captured. return true; diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 1cdadbfcea41..96f738edad4e 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -564,6 +564,7 @@ static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps, // we eliminate over-indexing of the notional static type array bounds. // This makes it easy to determine if the getelementptr is "inbounds". // Also, this helps GlobalOpt do SROA on GlobalVariables. + Ptr = cast(Ptr->stripPointerCasts()); const Type *Ty = Ptr->getType(); SmallVector NewIdxs; do { @@ -671,8 +672,13 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) { Constant *llvm::ConstantFoldConstantExpression(ConstantExpr *CE, const TargetData *TD) { SmallVector Ops; - for (User::op_iterator i = CE->op_begin(), e = CE->op_end(); i != e; ++i) - Ops.push_back(cast(*i)); + for (User::op_iterator i = CE->op_begin(), e = CE->op_end(); i != e; ++i) { + Constant *NewC = cast(*i); + // Recursively fold the ConstantExpr's operands. + if (ConstantExpr *NewCE = dyn_cast(NewC)) + NewC = ConstantFoldConstantExpression(NewCE, TD); + Ops.push_back(NewC); + } if (CE->isCompare()) return ConstantFoldCompareInstOperands(CE->getPredicate(), Ops[0], Ops[1], @@ -687,6 +693,10 @@ Constant *llvm::ConstantFoldConstantExpression(ConstantExpr *CE, /// attempting to fold instructions like loads and stores, which have no /// constant expression form. /// +/// TODO: This function neither utilizes nor preserves nsw/nuw/inbounds/etc +/// information, due to only being passed an opcode and operands. Constant +/// folding using this function strips this information. +/// Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy, Constant* const* Ops, unsigned NumOps, const TargetData *TD) { diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp index 8f62245296f5..41d803c699d5 100644 --- a/lib/Analysis/DebugInfo.cpp +++ b/lib/Analysis/DebugInfo.cpp @@ -78,19 +78,16 @@ DIDescriptor::DIDescriptor(MDNode *N, unsigned RequiredTag) { } } -const char * +StringRef DIDescriptor::getStringField(unsigned Elt) const { if (DbgNode == 0) - return NULL; + return StringRef(); if (Elt < DbgNode->getNumElements()) - if (MDString *MDS = dyn_cast_or_null(DbgNode->getElement(Elt))) { - if (MDS->getLength() == 0) - return NULL; - return MDS->getString().data(); - } + if (MDString *MDS = dyn_cast_or_null(DbgNode->getElement(Elt))) + return MDS->getString(); - return NULL; + return StringRef(); } uint64_t DIDescriptor::getUInt64Field(unsigned Elt) const { @@ -310,8 +307,8 @@ void DIDerivedType::replaceAllUsesWith(DIDescriptor &D) { bool DICompileUnit::Verify() const { if (isNull()) return false; - const char *N = getFilename(); - if (!N) + StringRef N = getFilename(); + if (N.empty()) return false; // It is possible that directory and produce string is empty. return true; @@ -366,7 +363,7 @@ bool DIGlobalVariable::Verify() const { if (isNull()) return false; - if (!getDisplayName()) + if (getDisplayName().empty()) return false; if (getContext().isNull()) @@ -426,15 +423,15 @@ uint64_t DIDerivedType::getOriginalTypeSize() const { /// information for the function F. bool DISubprogram::describes(const Function *F) { assert (F && "Invalid function"); - const char *Name = getLinkageName(); - if (!Name) + StringRef Name = getLinkageName(); + if (Name.empty()) Name = getName(); - if (strcmp(F->getName().data(), Name) == 0) + if (F->getName() == Name) return true; return false; } -const char *DIScope::getFilename() const { +StringRef DIScope::getFilename() const { if (isLexicalBlock()) return DILexicalBlock(DbgNode).getFilename(); else if (isSubprogram()) @@ -443,10 +440,10 @@ const char *DIScope::getFilename() const { return DICompileUnit(DbgNode).getFilename(); else assert (0 && "Invalid DIScope!"); - return NULL; + return StringRef(); } -const char *DIScope::getDirectory() const { +StringRef DIScope::getDirectory() const { if (isLexicalBlock()) return DILexicalBlock(DbgNode).getDirectory(); else if (isSubprogram()) @@ -455,7 +452,7 @@ const char *DIScope::getDirectory() const { return DICompileUnit(DbgNode).getDirectory(); else assert (0 && "Invalid DIScope!"); - return NULL; + return StringRef(); } //===----------------------------------------------------------------------===// @@ -481,7 +478,8 @@ void DICompileUnit::dump() const { void DIType::dump() const { if (isNull()) return; - if (const char *Res = getName()) + StringRef Res = getName(); + if (!Res.empty()) errs() << " [" << Res << "] "; unsigned Tag = getTag(); @@ -538,7 +536,8 @@ void DICompositeType::dump() const { /// dump - Print global. void DIGlobal::dump() const { - if (const char *Res = getName()) + StringRef Res = getName(); + if (!Res.empty()) errs() << " [" << Res << "] "; unsigned Tag = getTag(); @@ -562,7 +561,8 @@ void DIGlobal::dump() const { /// dump - Print subprogram. void DISubprogram::dump() const { - if (const char *Res = getName()) + StringRef Res = getName(); + if (!Res.empty()) errs() << " [" << Res << "] "; unsigned Tag = getTag(); @@ -590,7 +590,8 @@ void DIGlobalVariable::dump() const { /// dump - Print variable. void DIVariable::dump() const { - if (const char *Res = getName()) + StringRef Res = getName(); + if (!Res.empty()) errs() << " [" << Res << "] "; getCompileUnit().dump(); @@ -651,12 +652,12 @@ DISubrange DIFactory::GetOrCreateSubrange(int64_t Lo, int64_t Hi) { /// CreateCompileUnit - Create a new descriptor for the specified compile /// unit. Note that this does not unique compile units within the module. DICompileUnit DIFactory::CreateCompileUnit(unsigned LangID, - const char * Filename, - const char * Directory, - const char * Producer, + StringRef Filename, + StringRef Directory, + StringRef Producer, bool isMain, bool isOptimized, - const char *Flags, + StringRef Flags, unsigned RunTimeVer) { Value *Elts[] = { GetTagConstant(dwarf::DW_TAG_compile_unit), @@ -675,7 +676,7 @@ DICompileUnit DIFactory::CreateCompileUnit(unsigned LangID, } /// CreateEnumerator - Create a single enumerator value. -DIEnumerator DIFactory::CreateEnumerator(const char * Name, uint64_t Val){ +DIEnumerator DIFactory::CreateEnumerator(StringRef Name, uint64_t Val){ Value *Elts[] = { GetTagConstant(dwarf::DW_TAG_enumerator), MDString::get(VMContext, Name), @@ -687,7 +688,7 @@ DIEnumerator DIFactory::CreateEnumerator(const char * Name, uint64_t Val){ /// CreateBasicType - Create a basic type like int, float, etc. DIBasicType DIFactory::CreateBasicType(DIDescriptor Context, - const char * Name, + StringRef Name, DICompileUnit CompileUnit, unsigned LineNumber, uint64_t SizeInBits, @@ -712,7 +713,7 @@ DIBasicType DIFactory::CreateBasicType(DIDescriptor Context, /// CreateBasicType - Create a basic type like int, float, etc. DIBasicType DIFactory::CreateBasicTypeEx(DIDescriptor Context, - const char * Name, + StringRef Name, DICompileUnit CompileUnit, unsigned LineNumber, Constant *SizeInBits, @@ -739,7 +740,7 @@ DIBasicType DIFactory::CreateBasicTypeEx(DIDescriptor Context, /// pointer, typedef, etc. DIDerivedType DIFactory::CreateDerivedType(unsigned Tag, DIDescriptor Context, - const char * Name, + StringRef Name, DICompileUnit CompileUnit, unsigned LineNumber, uint64_t SizeInBits, @@ -767,7 +768,7 @@ DIDerivedType DIFactory::CreateDerivedType(unsigned Tag, /// pointer, typedef, etc. DIDerivedType DIFactory::CreateDerivedTypeEx(unsigned Tag, DIDescriptor Context, - const char * Name, + StringRef Name, DICompileUnit CompileUnit, unsigned LineNumber, Constant *SizeInBits, @@ -794,7 +795,7 @@ DIDerivedType DIFactory::CreateDerivedTypeEx(unsigned Tag, /// CreateCompositeType - Create a composite type like array, struct, etc. DICompositeType DIFactory::CreateCompositeType(unsigned Tag, DIDescriptor Context, - const char * Name, + StringRef Name, DICompileUnit CompileUnit, unsigned LineNumber, uint64_t SizeInBits, @@ -826,7 +827,7 @@ DICompositeType DIFactory::CreateCompositeType(unsigned Tag, /// CreateCompositeType - Create a composite type like array, struct, etc. DICompositeType DIFactory::CreateCompositeTypeEx(unsigned Tag, DIDescriptor Context, - const char * Name, + StringRef Name, DICompileUnit CompileUnit, unsigned LineNumber, Constant *SizeInBits, @@ -859,9 +860,9 @@ DICompositeType DIFactory::CreateCompositeTypeEx(unsigned Tag, /// See comments in DISubprogram for descriptions of these fields. This /// method does not unique the generated descriptors. DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context, - const char * Name, - const char * DisplayName, - const char * LinkageName, + StringRef Name, + StringRef DisplayName, + StringRef LinkageName, DICompileUnit CompileUnit, unsigned LineNo, DIType Type, bool isLocalToUnit, @@ -885,9 +886,9 @@ DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context, /// CreateGlobalVariable - Create a new descriptor for the specified global. DIGlobalVariable -DIFactory::CreateGlobalVariable(DIDescriptor Context, const char * Name, - const char * DisplayName, - const char * LinkageName, +DIFactory::CreateGlobalVariable(DIDescriptor Context, StringRef Name, + StringRef DisplayName, + StringRef LinkageName, DICompileUnit CompileUnit, unsigned LineNo, DIType Type,bool isLocalToUnit, bool isDefinition, llvm::GlobalVariable *Val) { @@ -919,7 +920,7 @@ DIFactory::CreateGlobalVariable(DIDescriptor Context, const char * Name, /// CreateVariable - Create a new descriptor for the specified variable. DIVariable DIFactory::CreateVariable(unsigned Tag, DIDescriptor Context, - const char * Name, + StringRef Name, DICompileUnit CompileUnit, unsigned LineNo, DIType Type) { Value *Elts[] = { @@ -976,6 +977,17 @@ DILocation DIFactory::CreateLocation(unsigned LineNo, unsigned ColumnNo, return DILocation(MDNode::get(VMContext, &Elts[0], 4)); } +/// CreateLocation - Creates a debug info location. +DILocation DIFactory::CreateLocation(unsigned LineNo, unsigned ColumnNo, + DIScope S, MDNode *OrigLoc) { + Value *Elts[] = { + ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), + ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo), + S.getNode(), + OrigLoc + }; + return DILocation(MDNode::get(VMContext, &Elts[0], 4)); +} //===----------------------------------------------------------------------===// // DIFactory: Routines for inserting code into a function @@ -1263,7 +1275,8 @@ bool getLocationInfo(const Value *V, std::string &DisplayName, if (!DIGV) return false; DIGlobalVariable Var(cast(DIGV)); - if (const char *D = Var.getDisplayName()) + StringRef D = Var.getDisplayName(); + if (!D.empty()) DisplayName = D; LineNo = Var.getLineNumber(); Unit = Var.getCompileUnit(); @@ -1273,18 +1286,22 @@ bool getLocationInfo(const Value *V, std::string &DisplayName, if (!DDI) return false; DIVariable Var(cast(DDI->getVariable())); - if (const char *D = Var.getName()) + StringRef D = Var.getName(); + if (!D.empty()) DisplayName = D; LineNo = Var.getLineNumber(); Unit = Var.getCompileUnit(); TypeD = Var.getType(); } - if (const char *T = TypeD.getName()) + StringRef T = TypeD.getName(); + if (!T.empty()) Type = T; - if (const char *F = Unit.getFilename()) + StringRef F = Unit.getFilename(); + if (!F.empty()) File = F; - if (const char *D = Unit.getDirectory()) + StringRef D = Unit.getDirectory(); + if (!D.empty()) Dir = D; return true; } @@ -1398,4 +1415,36 @@ bool getLocationInfo(const Value *V, std::string &DisplayName, return DebugLoc::get(Id); } + + /// getDISubprogram - Find subprogram that is enclosing this scope. + DISubprogram getDISubprogram(MDNode *Scope) { + DIDescriptor D(Scope); + if (D.isNull()) + return DISubprogram(); + + if (D.isCompileUnit()) + return DISubprogram(); + + if (D.isSubprogram()) + return DISubprogram(Scope); + + if (D.isLexicalBlock()) + return getDISubprogram(DILexicalBlock(Scope).getContext().getNode()); + + return DISubprogram(); + } + + /// getDICompositeType - Find underlying composite type. + DICompositeType getDICompositeType(DIType T) { + if (T.isNull()) + return DICompositeType(); + + if (T.isCompositeType()) + return DICompositeType(T.getNode()); + + if (T.isDerivedType()) + return getDICompositeType(DIDerivedType(T.getNode()).getTypeDerivedFrom()); + + return DICompositeType(); + } } diff --git a/lib/Analysis/DomPrinter.cpp b/lib/Analysis/DomPrinter.cpp index f1b44d0356ea..32b8994f0289 100644 --- a/lib/Analysis/DomPrinter.cpp +++ b/lib/Analysis/DomPrinter.cpp @@ -30,46 +30,55 @@ using namespace llvm; namespace llvm { template<> struct DOTGraphTraits : public DefaultDOTGraphTraits { - static std::string getNodeLabel(DomTreeNode *Node, DomTreeNode *Graph, - bool ShortNames) { + + DOTGraphTraits (bool isSimple=false) + : DefaultDOTGraphTraits(isSimple) {} + + std::string getNodeLabel(DomTreeNode *Node, DomTreeNode *Graph) { BasicBlock *BB = Node->getBlock(); if (!BB) return "Post dominance root node"; - return DOTGraphTraits::getNodeLabel(BB, BB->getParent(), - ShortNames); + + if (isSimple()) + return DOTGraphTraits + ::getSimpleNodeLabel(BB, BB->getParent()); + else + return DOTGraphTraits + ::getCompleteNodeLabel(BB, BB->getParent()); } }; template<> struct DOTGraphTraits : public DOTGraphTraits { + DOTGraphTraits (bool isSimple=false) + : DOTGraphTraits(isSimple) {} + static std::string getGraphName(DominatorTree *DT) { return "Dominator tree"; } - static std::string getNodeLabel(DomTreeNode *Node, - DominatorTree *G, - bool ShortNames) { - return DOTGraphTraits::getNodeLabel(Node, G->getRootNode(), - ShortNames); + std::string getNodeLabel(DomTreeNode *Node, DominatorTree *G) { + return DOTGraphTraits::getNodeLabel(Node, G->getRootNode()); } }; template<> struct DOTGraphTraits : public DOTGraphTraits { + + DOTGraphTraits (bool isSimple=false) + : DOTGraphTraits(isSimple) {} + static std::string getGraphName(PostDominatorTree *DT) { return "Post dominator tree"; } - static std::string getNodeLabel(DomTreeNode *Node, - PostDominatorTree *G, - bool ShortNames) { - return DOTGraphTraits::getNodeLabel(Node, - G->getRootNode(), - ShortNames); + + std::string getNodeLabel(DomTreeNode *Node, PostDominatorTree *G ) { + return DOTGraphTraits::getNodeLabel(Node, G->getRootNode()); } }; } @@ -85,9 +94,11 @@ struct GenericGraphViewer : public FunctionPass { virtual bool runOnFunction(Function &F) { Analysis *Graph; - + std::string Title, GraphName; Graph = &getAnalysis(); - ViewGraph(Graph, Name, OnlyBBS); + GraphName = DOTGraphTraits::getGraphName(Graph); + Title = GraphName + " for '" + F.getNameStr() + "' function"; + ViewGraph(Graph, Name, OnlyBBS, Title); return false; } @@ -163,8 +174,12 @@ struct GenericGraphPrinter : public FunctionPass { raw_fd_ostream File(Filename.c_str(), ErrorInfo); Graph = &getAnalysis(); + std::string Title, GraphName; + GraphName = DOTGraphTraits::getGraphName(Graph); + Title = GraphName + " for '" + F.getNameStr() + "' function"; + if (ErrorInfo.empty()) - WriteGraph(File, Graph, OnlyBBS); + WriteGraph(File, Graph, OnlyBBS, Name, Title); else errs() << " error opening file for writing!"; errs() << "\n"; diff --git a/lib/Analysis/IPA/Andersens.cpp b/lib/Analysis/IPA/Andersens.cpp index 40a8cd5b23e4..e12db817440c 100644 --- a/lib/Analysis/IPA/Andersens.cpp +++ b/lib/Analysis/IPA/Andersens.cpp @@ -484,7 +484,6 @@ namespace { const Value *V2, unsigned V2Size); virtual ModRefResult getModRefInfo(CallSite CS, Value *P, unsigned Size); virtual ModRefResult getModRefInfo(CallSite CS1, CallSite CS2); - void getMustAliases(Value *P, std::vector &RetVals); bool pointsToConstantMemory(const Value *P); virtual void deleteValue(Value *V) { @@ -680,32 +679,6 @@ Andersens::getModRefInfo(CallSite CS1, CallSite CS2) { return AliasAnalysis::getModRefInfo(CS1,CS2); } -/// getMustAlias - We can provide must alias information if we know that a -/// pointer can only point to a specific function or the null pointer. -/// Unfortunately we cannot determine must-alias information for global -/// variables or any other memory memory objects because we do not track whether -/// a pointer points to the beginning of an object or a field of it. -void Andersens::getMustAliases(Value *P, std::vector &RetVals) { - Node *N = &GraphNodes[FindNode(getNode(P))]; - if (N->PointsTo->count() == 1) { - Node *Pointee = &GraphNodes[N->PointsTo->find_first()]; - // If a function is the only object in the points-to set, then it must be - // the destination. Note that we can't handle global variables here, - // because we don't know if the pointer is actually pointing to a field of - // the global or to the beginning of it. - if (Value *V = Pointee->getValue()) { - if (Function *F = dyn_cast(V)) - RetVals.push_back(F); - } else { - // If the object in the points-to set is the null object, then the null - // pointer is a must alias. - if (Pointee == &GraphNodes[NullObject]) - RetVals.push_back(Constant::getNullValue(P->getType())); - } - } - AliasAnalysis::getMustAliases(P, RetVals); -} - /// pointsToConstantMemory - If we can determine that this pointer only points /// to constant memory, return true. In practice, this means that if the /// pointer can only point to constant globals, functions, or the null pointer, diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp index ddd6ff9bd825..a979a99a4de8 100644 --- a/lib/Analysis/IPA/GlobalsModRef.cpp +++ b/lib/Analysis/IPA/GlobalsModRef.cpp @@ -111,7 +111,6 @@ namespace { ModRefResult getModRefInfo(CallSite CS1, CallSite CS2) { return AliasAnalysis::getModRefInfo(CS1,CS2); } - bool hasNoModRefInfoForCalls() const { return false; } /// getModRefBehavior - Return the behavior of the specified function if /// called from the specified call site. The call site may be null in which diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index efe40e4c6d1f..37747b65174c 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -24,7 +24,6 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Support/CommandLine.h" #include using namespace llvm; @@ -32,10 +31,6 @@ char IVUsers::ID = 0; static RegisterPass X("iv-users", "Induction Variable Users", false, true); -static cl::opt -SimplifyIVUsers("simplify-iv-users", cl::Hidden, cl::init(false), - cl::desc("Restrict IV Users to loop-invariant strides")); - Pass *llvm::createIVUsersPass() { return new IVUsers(); } @@ -214,8 +209,7 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) { return false; // Non-reducible symbolic expression, bail out. // Keep things simple. Don't touch loop-variant strides. - if (SimplifyIVUsers && !Stride->isLoopInvariant(L) - && L->contains(I->getParent())) + if (!Stride->isLoopInvariant(L) && L->contains(I->getParent())) return false; SmallPtrSet UniqueUsers; diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index f9953e3c98be..b53ac13925b1 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -21,10 +21,38 @@ using namespace llvm; using namespace llvm::PatternMatch; +/// SimplifyAddInst - Given operands for an Add, see if we can +/// fold the result. If not, this returns null. +Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, + const TargetData *TD) { + if (Constant *CLHS = dyn_cast(Op0)) { + if (Constant *CRHS = dyn_cast(Op1)) { + Constant *Ops[] = { CLHS, CRHS }; + return ConstantFoldInstOperands(Instruction::Add, CLHS->getType(), + Ops, 2, TD); + } + + // Canonicalize the constant to the RHS. + std::swap(Op0, Op1); + } + + if (Constant *Op1C = dyn_cast(Op1)) { + // X + undef -> undef + if (isa(Op1C)) + return Op1C; + + // X + 0 --> X + if (Op1C->isNullValue()) + return Op0; + } + + // FIXME: Could pull several more out of instcombine. + return 0; +} + /// SimplifyAndInst - Given operands for an And, see if we can /// fold the result. If not, this returns null. -Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, - const TargetData *TD) { +Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD) { if (Constant *CLHS = dyn_cast(Op0)) { if (Constant *CRHS = dyn_cast(Op1)) { Constant *Ops[] = { CLHS, CRHS }; @@ -83,8 +111,7 @@ Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, /// SimplifyOrInst - Given operands for an Or, see if we can /// fold the result. If not, this returns null. -Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, - const TargetData *TD) { +Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD) { if (Constant *CLHS = dyn_cast(Op0)) { if (Constant *CRHS = dyn_cast(Op1)) { Constant *Ops[] = { CLHS, CRHS }; @@ -142,8 +169,6 @@ Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, } - - static const Type *GetCompareTy(Value *Op) { return CmpInst::makeCmpResultType(Op->getType()); } @@ -264,6 +289,34 @@ Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS, return 0; } +/// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can +/// fold the result. If not, this returns null. +Value *llvm::SimplifyGEPInst(Value *const *Ops, unsigned NumOps, + const TargetData *TD) { + // getelementptr P -> P. + if (NumOps == 1) + return Ops[0]; + + // TODO. + //if (isa(Ops[0])) + // return UndefValue::get(GEP.getType()); + + // getelementptr P, 0 -> P. + if (NumOps == 2) + if (ConstantInt *C = dyn_cast(Ops[1])) + if (C->isZero()) + return Ops[0]; + + // Check to see if this is constant foldable. + for (unsigned i = 0; i != NumOps; ++i) + if (!isa(Ops[i])) + return 0; + + return ConstantExpr::getGetElementPtr(cast(Ops[0]), + (Constant *const*)Ops+1, NumOps-1); +} + + //=== Helper functions for higher up the class hierarchy. /// SimplifyBinOp - Given operands for a BinaryOperator, see if we can @@ -299,6 +352,10 @@ Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD) { switch (I->getOpcode()) { default: return ConstantFoldInstruction(I, TD); + case Instruction::Add: + return SimplifyAddInst(I->getOperand(0), I->getOperand(1), + cast(I)->hasNoSignedWrap(), + cast(I)->hasNoUnsignedWrap(), TD); case Instruction::And: return SimplifyAndInst(I->getOperand(0), I->getOperand(1), TD); case Instruction::Or: @@ -309,6 +366,10 @@ Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD) { case Instruction::FCmp: return SimplifyFCmpInst(cast(I)->getPredicate(), I->getOperand(0), I->getOperand(1), TD); + case Instruction::GetElementPtr: { + SmallVector Ops(I->op_begin(), I->op_end()); + return SimplifyGEPInst(&Ops[0], Ops.size(), TD); + } } } diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp index 1c614b0e06ba..4de756c41b0e 100644 --- a/lib/Analysis/LoopInfo.cpp +++ b/lib/Analysis/LoopInfo.cpp @@ -243,6 +243,11 @@ unsigned Loop::getSmallConstantTripMultiple() const { case BinaryOperator::Mul: Result = dyn_cast(BO->getOperand(1)); break; + case BinaryOperator::Shl: + if (ConstantInt *CI = dyn_cast(BO->getOperand(1))) + if (CI->getValue().getActiveBits() <= 5) + return 1u << CI->getZExtValue(); + break; default: break; } diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index 0ec0e74233b3..ae6f970eff4c 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -20,6 +20,8 @@ #include "llvm/IntrinsicInst.h" #include "llvm/Function.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" @@ -117,10 +119,6 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall, Pointer = Inst->getOperand(1); // calls to free() erase the entire structure PointerSize = ~0ULL; - } else if (isFreeCall(Inst)) { - Pointer = Inst->getOperand(0); - // calls to free() erase the entire structure - PointerSize = ~0ULL; } else if (isa(Inst) || isa(Inst)) { // Debug intrinsics don't cause dependences. if (isa(Inst)) continue; @@ -174,7 +172,7 @@ MemDepResult MemoryDependenceAnalysis:: getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB) { - Value* invariantTag = 0; + Value *invariantTag = 0; // Walk backwards through the basic block, looking for dependencies. while (ScanIt != BB->begin()) { @@ -185,12 +183,12 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad, if (invariantTag == Inst) { invariantTag = 0; continue; - } else if (IntrinsicInst* II = dyn_cast(Inst)) { + } else if (IntrinsicInst *II = dyn_cast(Inst)) { // If we pass an invariant-end marker, then we've just entered an // invariant region and can start ignoring dependencies. if (II->getIntrinsicID() == Intrinsic::invariant_end) { uint64_t invariantSize = ~0ULL; - if (ConstantInt* CI = dyn_cast(II->getOperand(2))) + if (ConstantInt *CI = dyn_cast(II->getOperand(2))) invariantSize = CI->getZExtValue(); AliasAnalysis::AliasResult R = @@ -203,9 +201,9 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad, // If we reach a lifetime begin or end marker, then the query ends here // because the value is undefined. } else if (II->getIntrinsicID() == Intrinsic::lifetime_start || - II->getIntrinsicID() == Intrinsic::lifetime_end) { + II->getIntrinsicID() == Intrinsic::lifetime_end) { uint64_t invariantSize = ~0ULL; - if (ConstantInt* CI = dyn_cast(II->getOperand(1))) + if (ConstantInt *CI = dyn_cast(II->getOperand(1))) invariantSize = CI->getZExtValue(); AliasAnalysis::AliasResult R = @@ -371,20 +369,41 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) { // calls to free() erase the entire structure, not just a field. MemSize = ~0UL; } else if (isa(QueryInst) || isa(QueryInst)) { - CallSite QueryCS = CallSite::get(QueryInst); - bool isReadOnly = AA->onlyReadsMemory(QueryCS); - LocalCache = getCallSiteDependencyFrom(QueryCS, isReadOnly, ScanPos, - QueryParent); + int IntrinsicID = 0; // Intrinsic IDs start at 1. + if (IntrinsicInst *II = dyn_cast(QueryInst)) + IntrinsicID = II->getIntrinsicID(); + + switch (IntrinsicID) { + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::invariant_start: + MemPtr = QueryInst->getOperand(2); + MemSize = cast(QueryInst->getOperand(1))->getZExtValue(); + break; + case Intrinsic::invariant_end: + MemPtr = QueryInst->getOperand(3); + MemSize = cast(QueryInst->getOperand(2))->getZExtValue(); + break; + default: + CallSite QueryCS = CallSite::get(QueryInst); + bool isReadOnly = AA->onlyReadsMemory(QueryCS); + LocalCache = getCallSiteDependencyFrom(QueryCS, isReadOnly, ScanPos, + QueryParent); + } } else { // Non-memory instruction. LocalCache = MemDepResult::getClobber(--BasicBlock::iterator(ScanPos)); } // If we need to do a pointer scan, make it happen. - if (MemPtr) - LocalCache = getPointerDependencyFrom(MemPtr, MemSize, - isa(QueryInst), - ScanPos, QueryParent); + if (MemPtr) { + bool isLoad = !QueryInst->mayWriteToMemory(); + if (IntrinsicInst *II = dyn_cast(QueryInst)) { + isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_end; + } + LocalCache = getPointerDependencyFrom(MemPtr, MemSize, isLoad, ScanPos, + QueryParent); + } // Remember the result! if (Instruction *I = LocalCache.getInst()) @@ -688,6 +707,274 @@ SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache, } } +/// isPHITranslatable - Return true if the specified computation is derived from +/// a PHI node in the current block and if it is simple enough for us to handle. +static bool isPHITranslatable(Instruction *Inst) { + if (isa(Inst)) + return true; + + // We can handle bitcast of a PHI, but the PHI needs to be in the same block + // as the bitcast. + if (BitCastInst *BC = dyn_cast(Inst)) { + Instruction *OpI = dyn_cast(BC->getOperand(0)); + if (OpI == 0 || OpI->getParent() != Inst->getParent()) + return true; + return isPHITranslatable(OpI); + } + + // We can translate a GEP if all of its operands defined in this block are phi + // translatable. + if (GetElementPtrInst *GEP = dyn_cast(Inst)) { + for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) { + Instruction *OpI = dyn_cast(GEP->getOperand(i)); + if (OpI == 0 || OpI->getParent() != Inst->getParent()) + continue; + + if (!isPHITranslatable(OpI)) + return false; + } + return true; + } + + if (Inst->getOpcode() == Instruction::Add && + isa(Inst->getOperand(1))) { + Instruction *OpI = dyn_cast(Inst->getOperand(0)); + if (OpI == 0 || OpI->getParent() != Inst->getParent()) + return true; + return isPHITranslatable(OpI); + } + + // cerr << "MEMDEP: Could not PHI translate: " << *Pointer; + // if (isa(PtrInst) || isa(PtrInst)) + // cerr << "OP:\t\t\t\t" << *PtrInst->getOperand(0); + + return false; +} + +/// GetPHITranslatedValue - Given a computation that satisfied the +/// isPHITranslatable predicate, see if we can translate the computation into +/// the specified predecessor block. If so, return that value. +Value *MemoryDependenceAnalysis:: +GetPHITranslatedValue(Value *InVal, BasicBlock *CurBB, BasicBlock *Pred, + const TargetData *TD) const { + // If the input value is not an instruction, or if it is not defined in CurBB, + // then we don't need to phi translate it. + Instruction *Inst = dyn_cast(InVal); + if (Inst == 0 || Inst->getParent() != CurBB) + return InVal; + + if (PHINode *PN = dyn_cast(Inst)) + return PN->getIncomingValueForBlock(Pred); + + // Handle bitcast of PHI. + if (BitCastInst *BC = dyn_cast(Inst)) { + // PHI translate the input operand. + Value *PHIIn = GetPHITranslatedValue(BC->getOperand(0), CurBB, Pred, TD); + if (PHIIn == 0) return 0; + + // Constants are trivial to phi translate. + if (Constant *C = dyn_cast(PHIIn)) + return ConstantExpr::getBitCast(C, BC->getType()); + + // Otherwise we have to see if a bitcasted version of the incoming pointer + // is available. If so, we can use it, otherwise we have to fail. + for (Value::use_iterator UI = PHIIn->use_begin(), E = PHIIn->use_end(); + UI != E; ++UI) { + if (BitCastInst *BCI = dyn_cast(*UI)) + if (BCI->getType() == BC->getType()) + return BCI; + } + return 0; + } + + // Handle getelementptr with at least one PHI translatable operand. + if (GetElementPtrInst *GEP = dyn_cast(Inst)) { + SmallVector GEPOps; + BasicBlock *CurBB = GEP->getParent(); + for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) { + Value *GEPOp = GEP->getOperand(i); + // No PHI translation is needed of operands whose values are live in to + // the predecessor block. + if (!isa(GEPOp) || + cast(GEPOp)->getParent() != CurBB) { + GEPOps.push_back(GEPOp); + continue; + } + + // If the operand is a phi node, do phi translation. + Value *InOp = GetPHITranslatedValue(GEPOp, CurBB, Pred, TD); + if (InOp == 0) return 0; + + GEPOps.push_back(InOp); + } + + // Simplify the GEP to handle 'gep x, 0' -> x etc. + if (Value *V = SimplifyGEPInst(&GEPOps[0], GEPOps.size(), TD)) + return V; + + // Scan to see if we have this GEP available. + Value *APHIOp = GEPOps[0]; + for (Value::use_iterator UI = APHIOp->use_begin(), E = APHIOp->use_end(); + UI != E; ++UI) { + if (GetElementPtrInst *GEPI = dyn_cast(*UI)) + if (GEPI->getType() == GEP->getType() && + GEPI->getNumOperands() == GEPOps.size() && + GEPI->getParent()->getParent() == CurBB->getParent()) { + bool Mismatch = false; + for (unsigned i = 0, e = GEPOps.size(); i != e; ++i) + if (GEPI->getOperand(i) != GEPOps[i]) { + Mismatch = true; + break; + } + if (!Mismatch) + return GEPI; + } + } + return 0; + } + + // Handle add with a constant RHS. + if (Inst->getOpcode() == Instruction::Add && + isa(Inst->getOperand(1))) { + // PHI translate the LHS. + Value *LHS; + Constant *RHS = cast(Inst->getOperand(1)); + Instruction *OpI = dyn_cast(Inst->getOperand(0)); + bool isNSW = cast(Inst)->hasNoSignedWrap(); + bool isNUW = cast(Inst)->hasNoUnsignedWrap(); + + if (OpI == 0 || OpI->getParent() != Inst->getParent()) + LHS = Inst->getOperand(0); + else { + LHS = GetPHITranslatedValue(Inst->getOperand(0), CurBB, Pred, TD); + if (LHS == 0) + return 0; + } + + // If the PHI translated LHS is an add of a constant, fold the immediates. + if (BinaryOperator *BOp = dyn_cast(LHS)) + if (BOp->getOpcode() == Instruction::Add) + if (ConstantInt *CI = dyn_cast(BOp->getOperand(1))) { + LHS = BOp->getOperand(0); + RHS = ConstantExpr::getAdd(RHS, CI); + isNSW = isNUW = false; + } + + // See if the add simplifies away. + if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, TD)) + return Res; + + // Otherwise, see if we have this add available somewhere. + for (Value::use_iterator UI = LHS->use_begin(), E = LHS->use_end(); + UI != E; ++UI) { + if (BinaryOperator *BO = dyn_cast(*UI)) + if (BO->getOperand(0) == LHS && BO->getOperand(1) == RHS && + BO->getParent()->getParent() == CurBB->getParent()) + return BO; + } + + return 0; + } + + return 0; +} + +/// GetAvailablePHITranslatePointer - Return the value computed by +/// PHITranslatePointer if it dominates PredBB, otherwise return null. +Value *MemoryDependenceAnalysis:: +GetAvailablePHITranslatedValue(Value *V, + BasicBlock *CurBB, BasicBlock *PredBB, + const TargetData *TD, + const DominatorTree &DT) const { + // See if PHI translation succeeds. + V = GetPHITranslatedValue(V, CurBB, PredBB, TD); + if (V == 0) return 0; + + // Make sure the value is live in the predecessor. + if (Instruction *Inst = dyn_cast_or_null(V)) + if (!DT.dominates(Inst->getParent(), PredBB)) + return 0; + return V; +} + + +/// InsertPHITranslatedPointer - Insert a computation of the PHI translated +/// version of 'V' for the edge PredBB->CurBB into the end of the PredBB +/// block. All newly created instructions are added to the NewInsts list. +/// +Value *MemoryDependenceAnalysis:: +InsertPHITranslatedPointer(Value *InVal, BasicBlock *CurBB, + BasicBlock *PredBB, const TargetData *TD, + const DominatorTree &DT, + SmallVectorImpl &NewInsts) const { + // See if we have a version of this value already available and dominating + // PredBB. If so, there is no need to insert a new copy. + if (Value *Res = GetAvailablePHITranslatedValue(InVal, CurBB, PredBB, TD, DT)) + return Res; + + // If we don't have an available version of this value, it must be an + // instruction. + Instruction *Inst = cast(InVal); + + // Handle bitcast of PHI translatable value. + if (BitCastInst *BC = dyn_cast(Inst)) { + Value *OpVal = InsertPHITranslatedPointer(BC->getOperand(0), + CurBB, PredBB, TD, DT, NewInsts); + if (OpVal == 0) return 0; + + // Otherwise insert a bitcast at the end of PredBB. + BitCastInst *New = new BitCastInst(OpVal, InVal->getType(), + InVal->getName()+".phi.trans.insert", + PredBB->getTerminator()); + NewInsts.push_back(New); + return New; + } + + // Handle getelementptr with at least one PHI operand. + if (GetElementPtrInst *GEP = dyn_cast(Inst)) { + SmallVector GEPOps; + BasicBlock *CurBB = GEP->getParent(); + for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) { + Value *OpVal = InsertPHITranslatedPointer(GEP->getOperand(i), + CurBB, PredBB, TD, DT, NewInsts); + if (OpVal == 0) return 0; + GEPOps.push_back(OpVal); + } + + GetElementPtrInst *Result = + GetElementPtrInst::Create(GEPOps[0], GEPOps.begin()+1, GEPOps.end(), + InVal->getName()+".phi.trans.insert", + PredBB->getTerminator()); + Result->setIsInBounds(GEP->isInBounds()); + NewInsts.push_back(Result); + return Result; + } + +#if 0 + // FIXME: This code works, but it is unclear that we actually want to insert + // a big chain of computation in order to make a value available in a block. + // This needs to be evaluated carefully to consider its cost trade offs. + + // Handle add with a constant RHS. + if (Inst->getOpcode() == Instruction::Add && + isa(Inst->getOperand(1))) { + // PHI translate the LHS. + Value *OpVal = InsertPHITranslatedPointer(Inst->getOperand(0), + CurBB, PredBB, TD, DT, NewInsts); + if (OpVal == 0) return 0; + + BinaryOperator *Res = BinaryOperator::CreateAdd(OpVal, Inst->getOperand(1), + InVal->getName()+".phi.trans.insert", + PredBB->getTerminator()); + Res->setHasNoSignedWrap(cast(Inst)->hasNoSignedWrap()); + Res->setHasNoUnsignedWrap(cast(Inst)->hasNoUnsignedWrap()); + NewInsts.push_back(Res); + return Res; + } +#endif + + return 0; +} /// getNonLocalPointerDepFromBB - Perform a dependency query based on /// pointer/pointeesize starting at the end of StartBB. Add any clobber/def @@ -831,66 +1118,107 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize, NumSortedEntries = Cache->size(); } - // If this is directly a PHI node, just use the incoming values for each - // pred as the phi translated version. - if (PHINode *PtrPHI = dyn_cast(PtrInst)) { - Cache = 0; + // If this is a computation derived from a PHI node, use the suitably + // translated incoming values for each pred as the phi translated version. + if (!isPHITranslatable(PtrInst)) + goto PredTranslationFailure; + + Cache = 0; - for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) { - BasicBlock *Pred = *PI; - Value *PredPtr = PtrPHI->getIncomingValueForBlock(Pred); - - // Check to see if we have already visited this pred block with another - // pointer. If so, we can't do this lookup. This failure can occur - // with PHI translation when a critical edge exists and the PHI node in - // the successor translates to a pointer value different than the - // pointer the block was first analyzed with. - std::pair::iterator, bool> - InsertRes = Visited.insert(std::make_pair(Pred, PredPtr)); + for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) { + BasicBlock *Pred = *PI; + // Get the PHI translated pointer in this predecessor. This can fail and + // return null if not translatable. + Value *PredPtr = GetPHITranslatedValue(PtrInst, BB, Pred, TD); + + // Check to see if we have already visited this pred block with another + // pointer. If so, we can't do this lookup. This failure can occur + // with PHI translation when a critical edge exists and the PHI node in + // the successor translates to a pointer value different than the + // pointer the block was first analyzed with. + std::pair::iterator, bool> + InsertRes = Visited.insert(std::make_pair(Pred, PredPtr)); - if (!InsertRes.second) { - // If the predecessor was visited with PredPtr, then we already did - // the analysis and can ignore it. - if (InsertRes.first->second == PredPtr) - continue; - - // Otherwise, the block was previously analyzed with a different - // pointer. We can't represent the result of this case, so we just - // treat this as a phi translation failure. - goto PredTranslationFailure; - } - - // FIXME: it is entirely possible that PHI translating will end up with - // the same value. Consider PHI translating something like: - // X = phi [x, bb1], [y, bb2]. PHI translating for bb1 doesn't *need* - // to recurse here, pedantically speaking. + if (!InsertRes.second) { + // If the predecessor was visited with PredPtr, then we already did + // the analysis and can ignore it. + if (InsertRes.first->second == PredPtr) + continue; - // If we have a problem phi translating, fall through to the code below - // to handle the failure condition. - if (getNonLocalPointerDepFromBB(PredPtr, PointeeSize, isLoad, Pred, - Result, Visited)) - goto PredTranslationFailure; + // Otherwise, the block was previously analyzed with a different + // pointer. We can't represent the result of this case, so we just + // treat this as a phi translation failure. + goto PredTranslationFailure; } - // Refresh the CacheInfo/Cache pointer so that it isn't invalidated. - CacheInfo = &NonLocalPointerDeps[CacheKey]; - Cache = &CacheInfo->second; - NumSortedEntries = Cache->size(); + // If PHI translation was unable to find an available pointer in this + // predecessor, then we have to assume that the pointer is clobbered in + // that predecessor. We can still do PRE of the load, which would insert + // a computation of the pointer in this predecessor. + if (PredPtr == 0) { + // Add the entry to the Result list. + NonLocalDepEntry Entry(Pred, + MemDepResult::getClobber(Pred->getTerminator())); + Result.push_back(Entry); + + // Add it to the cache for this CacheKey so that subsequent queries get + // this result. + Cache = &NonLocalPointerDeps[CacheKey].second; + MemoryDependenceAnalysis::NonLocalDepInfo::iterator It = + std::upper_bound(Cache->begin(), Cache->end(), Entry); + + if (It != Cache->begin() && prior(It)->first == Pred) + --It; + + if (It == Cache->end() || It->first != Pred) { + Cache->insert(It, Entry); + // Add it to the reverse map. + ReverseNonLocalPtrDeps[Pred->getTerminator()].insert(CacheKey); + } else if (!It->second.isDirty()) { + // noop + } else if (It->second.getInst() == Pred->getTerminator()) { + // Same instruction, clear the dirty marker. + It->second = Entry.second; + } else if (It->second.getInst() == 0) { + // Dirty, with no instruction, just add this. + It->second = Entry.second; + ReverseNonLocalPtrDeps[Pred->getTerminator()].insert(CacheKey); + } else { + // Otherwise, dirty with a different instruction. + RemoveFromReverseMap(ReverseNonLocalPtrDeps, It->second.getInst(), + CacheKey); + It->second = Entry.second; + ReverseNonLocalPtrDeps[Pred->getTerminator()].insert(CacheKey); + } + Cache = 0; + continue; + } + + // FIXME: it is entirely possible that PHI translating will end up with + // the same value. Consider PHI translating something like: + // X = phi [x, bb1], [y, bb2]. PHI translating for bb1 doesn't *need* + // to recurse here, pedantically speaking. - // Since we did phi translation, the "Cache" set won't contain all of the - // results for the query. This is ok (we can still use it to accelerate - // specific block queries) but we can't do the fastpath "return all - // results from the set" Clear out the indicator for this. - CacheInfo->first = BBSkipFirstBlockPair(); - SkipFirstBlock = false; - continue; + // If we have a problem phi translating, fall through to the code below + // to handle the failure condition. + if (getNonLocalPointerDepFromBB(PredPtr, PointeeSize, isLoad, Pred, + Result, Visited)) + goto PredTranslationFailure; } - // TODO: BITCAST, GEP. + // Refresh the CacheInfo/Cache pointer so that it isn't invalidated. + CacheInfo = &NonLocalPointerDeps[CacheKey]; + Cache = &CacheInfo->second; + NumSortedEntries = Cache->size(); - // cerr << "MEMDEP: Could not PHI translate: " << *Pointer; - // if (isa(PtrInst) || isa(PtrInst)) - // cerr << "OP:\t\t\t\t" << *PtrInst->getOperand(0); + // Since we did phi translation, the "Cache" set won't contain all of the + // results for the query. This is ok (we can still use it to accelerate + // specific block queries) but we can't do the fastpath "return all + // results from the set" Clear out the indicator for this. + CacheInfo->first = BBSkipFirstBlockPair(); + SkipFirstBlock = false; + continue; + PredTranslationFailure: if (Cache == 0) { diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index ea4af40ea746..c6835ef08559 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -3644,7 +3644,7 @@ EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C, /// the addressed element of the initializer or null if the index expression is /// invalid. static Constant * -GetAddressedElementFromGlobal(LLVMContext &Context, GlobalVariable *GV, +GetAddressedElementFromGlobal(GlobalVariable *GV, const std::vector &Indices) { Constant *Init = GV->getInitializer(); for (unsigned i = 0, e = Indices.size(); i != e; ++i) { @@ -3732,7 +3732,7 @@ ScalarEvolution::ComputeLoadConstantCompareBackedgeTakenCount( // Form the GEP offset. Indexes[VarIdxNum] = Val; - Constant *Result = GetAddressedElementFromGlobal(getContext(), GV, Indexes); + Constant *Result = GetAddressedElementFromGlobal(GV, Indexes); if (Result == 0) break; // Cannot compute! // Evaluate the condition for this iteration. diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index b0e6900b30d6..31d3ccca36ad 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -325,7 +325,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, APInt Mask2(Mask.shl(ShiftAmt)); ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero,KnownOne, TD, Depth+1); - assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); // high bits known zero. @@ -343,7 +343,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, APInt Mask2(Mask.shl(ShiftAmt)); ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero, KnownOne, TD, Depth+1); - assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); @@ -380,7 +380,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, } // fall through case Instruction::Add: { - // If one of the operands has trailing zeros, than the bits that the + // If one of the operands has trailing zeros, then the bits that the // other operand has in those bit positions will be preserved in the // result. For an add, this works with either operand. For a subtract, // this only works if the known zeros are in the right operand. @@ -436,7 +436,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, KnownZero |= KnownZero2 & Mask; - assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); } } break; @@ -449,7 +449,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, KnownZero |= ~LowBits & Mask; ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero, KnownOne, TD, Depth+1); - assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); break; } } @@ -833,14 +833,12 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, switch (I->getOpcode()) { default: break; - case Instruction::SExt: { + case Instruction::SExt: if (!LookThroughSExt) return false; // otherwise fall through to ZExt - } - case Instruction::ZExt: { + case Instruction::ZExt: return ComputeMultiple(I->getOperand(0), Base, Multiple, LookThroughSExt, Depth+1); - } case Instruction::Shl: case Instruction::Mul: { Value *Op0 = I->getOperand(0); @@ -950,6 +948,195 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) { return false; } + +/// GetLinearExpression - Analyze the specified value as a linear expression: +/// "A*V + B", where A and B are constant integers. Return the scale and offset +/// values as APInts and return V as a Value*. The incoming Value is known to +/// have IntegerType. Note that this looks through extends, so the high bits +/// may not be represented in the result. +static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset, + const TargetData *TD, unsigned Depth) { + assert(isa(V->getType()) && "Not an integer value"); + + // Limit our recursion depth. + if (Depth == 6) { + Scale = 1; + Offset = 0; + return V; + } + + if (BinaryOperator *BOp = dyn_cast(V)) { + if (ConstantInt *RHSC = dyn_cast(BOp->getOperand(1))) { + switch (BOp->getOpcode()) { + default: break; + case Instruction::Or: + // X|C == X+C if all the bits in C are unset in X. Otherwise we can't + // analyze it. + if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), TD)) + break; + // FALL THROUGH. + case Instruction::Add: + V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, TD, Depth+1); + Offset += RHSC->getValue(); + return V; + case Instruction::Mul: + V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, TD, Depth+1); + Offset *= RHSC->getValue(); + Scale *= RHSC->getValue(); + return V; + case Instruction::Shl: + V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, TD, Depth+1); + Offset <<= RHSC->getValue().getLimitedValue(); + Scale <<= RHSC->getValue().getLimitedValue(); + return V; + } + } + } + + // Since clients don't care about the high bits of the value, just scales and + // offsets, we can look through extensions. + if (isa(V) || isa(V)) { + Value *CastOp = cast(V)->getOperand(0); + unsigned OldWidth = Scale.getBitWidth(); + unsigned SmallWidth = CastOp->getType()->getPrimitiveSizeInBits(); + Scale.trunc(SmallWidth); + Offset.trunc(SmallWidth); + Value *Result = GetLinearExpression(CastOp, Scale, Offset, TD, Depth+1); + Scale.zext(OldWidth); + Offset.zext(OldWidth); + return Result; + } + + Scale = 1; + Offset = 0; + return V; +} + +/// DecomposeGEPExpression - If V is a symbolic pointer expression, decompose it +/// into a base pointer with a constant offset and a number of scaled symbolic +/// offsets. +/// +/// The scaled symbolic offsets (represented by pairs of a Value* and a scale in +/// the VarIndices vector) are Value*'s that are known to be scaled by the +/// specified amount, but which may have other unrepresented high bits. As such, +/// the gep cannot necessarily be reconstructed from its decomposed form. +/// +/// When TargetData is around, this function is capable of analyzing everything +/// that Value::getUnderlyingObject() can look through. When not, it just looks +/// through pointer casts. +/// +const Value *llvm::DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, + SmallVectorImpl > &VarIndices, + const TargetData *TD) { + // Limit recursion depth to limit compile time in crazy cases. + unsigned MaxLookup = 6; + + BaseOffs = 0; + do { + // See if this is a bitcast or GEP. + const Operator *Op = dyn_cast(V); + if (Op == 0) { + // The only non-operator case we can handle are GlobalAliases. + if (const GlobalAlias *GA = dyn_cast(V)) { + if (!GA->mayBeOverridden()) { + V = GA->getAliasee(); + continue; + } + } + return V; + } + + if (Op->getOpcode() == Instruction::BitCast) { + V = Op->getOperand(0); + continue; + } + + const GEPOperator *GEPOp = dyn_cast(Op); + if (GEPOp == 0) + return V; + + // Don't attempt to analyze GEPs over unsized objects. + if (!cast(GEPOp->getOperand(0)->getType()) + ->getElementType()->isSized()) + return V; + + // If we are lacking TargetData information, we can't compute the offets of + // elements computed by GEPs. However, we can handle bitcast equivalent + // GEPs. + if (!TD) { + if (!GEPOp->hasAllZeroIndices()) + return V; + V = GEPOp->getOperand(0); + continue; + } + + // Walk the indices of the GEP, accumulating them into BaseOff/VarIndices. + gep_type_iterator GTI = gep_type_begin(GEPOp); + for (User::const_op_iterator I = GEPOp->op_begin()+1, + E = GEPOp->op_end(); I != E; ++I) { + Value *Index = *I; + // Compute the (potentially symbolic) offset in bytes for this index. + if (const StructType *STy = dyn_cast(*GTI++)) { + // For a struct, add the member offset. + unsigned FieldNo = cast(Index)->getZExtValue(); + if (FieldNo == 0) continue; + + BaseOffs += TD->getStructLayout(STy)->getElementOffset(FieldNo); + continue; + } + + // For an array/pointer, add the element offset, explicitly scaled. + if (ConstantInt *CIdx = dyn_cast(Index)) { + if (CIdx->isZero()) continue; + BaseOffs += TD->getTypeAllocSize(*GTI)*CIdx->getSExtValue(); + continue; + } + + uint64_t Scale = TD->getTypeAllocSize(*GTI); + + // Use GetLinearExpression to decompose the index into a C1*V+C2 form. + unsigned Width = cast(Index->getType())->getBitWidth(); + APInt IndexScale(Width, 0), IndexOffset(Width, 0); + Index = GetLinearExpression(Index, IndexScale, IndexOffset, TD, 0); + + // The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale. + // This gives us an aggregate computation of (C1*Scale)*V + C2*Scale. + BaseOffs += IndexOffset.getZExtValue()*Scale; + Scale *= IndexScale.getZExtValue(); + + + // If we already had an occurrance of this index variable, merge this + // scale into it. For example, we want to handle: + // A[x][x] -> x*16 + x*4 -> x*20 + // This also ensures that 'x' only appears in the index list once. + for (unsigned i = 0, e = VarIndices.size(); i != e; ++i) { + if (VarIndices[i].first == Index) { + Scale += VarIndices[i].second; + VarIndices.erase(VarIndices.begin()+i); + break; + } + } + + // Make sure that we have a scale that makes sense for this target's + // pointer size. + if (unsigned ShiftBits = 64-TD->getPointerSizeInBits()) { + Scale <<= ShiftBits; + Scale >>= ShiftBits; + } + + if (Scale) + VarIndices.push_back(std::make_pair(Index, Scale)); + } + + // Analyze the base pointer next. + V = GEPOp->getOperand(0); + } while (--MaxLookup); + + // If the chain of expressions is too deep, just return early. + return V; +} + + // This is the recursive version of BuildSubAggregate. It takes a few different // arguments. Idxs is the index within the nested struct From that we are // looking at now (which is of type IndexedType). IdxSkip is the number of @@ -959,7 +1146,6 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) { static Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType, SmallVector &Idxs, unsigned IdxSkip, - LLVMContext &Context, Instruction *InsertBefore) { const llvm::StructType *STy = llvm::dyn_cast(IndexedType); if (STy) { @@ -971,7 +1157,7 @@ static Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType, Idxs.push_back(i); Value *PrevTo = To; To = BuildSubAggregate(From, To, STy->getElementType(i), Idxs, IdxSkip, - Context, InsertBefore); + InsertBefore); Idxs.pop_back(); if (!To) { // Couldn't find any inserted value for this index? Cleanup @@ -994,7 +1180,7 @@ static Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType, // we might be able to find the complete struct somewhere. // Find the value that is at that particular spot - Value *V = FindInsertedValue(From, Idxs.begin(), Idxs.end(), Context); + Value *V = FindInsertedValue(From, Idxs.begin(), Idxs.end()); if (!V) return NULL; @@ -1017,7 +1203,7 @@ static Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType, // // All inserted insertvalue instructions are inserted before InsertBefore static Value *BuildSubAggregate(Value *From, const unsigned *idx_begin, - const unsigned *idx_end, LLVMContext &Context, + const unsigned *idx_end, Instruction *InsertBefore) { assert(InsertBefore && "Must have someplace to insert!"); const Type *IndexedType = ExtractValueInst::getIndexedType(From->getType(), @@ -1027,8 +1213,7 @@ static Value *BuildSubAggregate(Value *From, const unsigned *idx_begin, SmallVector Idxs(idx_begin, idx_end); unsigned IdxSkip = Idxs.size(); - return BuildSubAggregate(From, To, IndexedType, Idxs, IdxSkip, - Context, InsertBefore); + return BuildSubAggregate(From, To, IndexedType, Idxs, IdxSkip, InsertBefore); } /// FindInsertedValue - Given an aggregrate and an sequence of indices, see if @@ -1038,8 +1223,7 @@ static Value *BuildSubAggregate(Value *From, const unsigned *idx_begin, /// If InsertBefore is not null, this function will duplicate (modified) /// insertvalues when a part of a nested struct is extracted. Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin, - const unsigned *idx_end, LLVMContext &Context, - Instruction *InsertBefore) { + const unsigned *idx_end, Instruction *InsertBefore) { // Nothing to index? Just return V then (this is useful at the end of our // recursion) if (idx_begin == idx_end) @@ -1063,7 +1247,7 @@ Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin, if (isa(C) || isa(C)) // Recursively process this constant return FindInsertedValue(C->getOperand(*idx_begin), idx_begin + 1, - idx_end, Context, InsertBefore); + idx_end, InsertBefore); } else if (InsertValueInst *I = dyn_cast(V)) { // Loop the indices for the insertvalue instruction in parallel with the // requested indices @@ -1082,8 +1266,7 @@ Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin, // %C = insertvalue {i32, i32 } %A, i32 11, 1 // which allows the unused 0,0 element from the nested struct to be // removed. - return BuildSubAggregate(V, idx_begin, req_idx, - Context, InsertBefore); + return BuildSubAggregate(V, idx_begin, req_idx, InsertBefore); else // We can't handle this without inserting insertvalues return 0; @@ -1094,13 +1277,13 @@ Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin, // looking for, then. if (*req_idx != *i) return FindInsertedValue(I->getAggregateOperand(), idx_begin, idx_end, - Context, InsertBefore); + InsertBefore); } // If we end up here, the indices of the insertvalue match with those // requested (though possibly only partially). Now we recursively look at // the inserted value, passing any remaining indices. return FindInsertedValue(I->getInsertedValueOperand(), req_idx, idx_end, - Context, InsertBefore); + InsertBefore); } else if (ExtractValueInst *I = dyn_cast(V)) { // If we're extracting a value from an aggregrate that was extracted from // something else, we can extract from that something else directly instead. @@ -1124,7 +1307,7 @@ Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin, && "Number of indices added not correct?"); return FindInsertedValue(I->getAggregateOperand(), Idxs.begin(), Idxs.end(), - Context, InsertBefore); + InsertBefore); } // Otherwise, we don't know (such as, extracting from a function return value // or load instruction) diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 26b6a09ab195..a92dbf82a0b9 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -2701,6 +2701,10 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { // Add all of the arguments we parsed to the function. Function::arg_iterator ArgIt = Fn->arg_begin(); for (unsigned i = 0, e = ArgList.size(); i != e; ++i, ++ArgIt) { + // If we run out of arguments in the Function prototype, exit early. + // FIXME: REMOVE THIS IN LLVM 3.0, this is just for the mismatch case above. + if (ArgIt == Fn->arg_end()) break; + // If the argument has a name, insert it into the argument symbol table. if (ArgList[i].Name.empty()) continue; diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index c37c793b56d0..8e3f8e770486 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -28,10 +28,15 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +// If DebugDiv > 0 then only break antidep with (ID % DebugDiv) == DebugMod static cl::opt -AntiDepTrials("agg-antidep-trials", - cl::desc("Maximum number of anti-dependency breaking passes"), - cl::init(1), cl::Hidden); +DebugDiv("agg-antidep-debugdiv", + cl::desc("Debug control for aggressive anti-dep breaker"), + cl::init(0), cl::Hidden); +static cl::opt +DebugMod("agg-antidep-debugmod", + cl::desc("Debug control for aggressive anti-dep breaker"), + cl::init(0), cl::Hidden); AggressiveAntiDepState::AggressiveAntiDepState(MachineBasicBlock *BB) : GroupNodes(TargetRegisterInfo::FirstVirtualRegister, 0) { @@ -108,7 +113,7 @@ AggressiveAntiDepBreaker(MachineFunction& MFi, MRI(MF.getRegInfo()), TRI(MF.getTarget().getRegisterInfo()), AllocatableSet(TRI->getAllocatableSet(MF)), - State(NULL), SavedState(NULL) { + State(NULL) { /* Collect a bitset of all registers that are only broken if they are on the critical path. */ for (unsigned i = 0, e = CriticalPathRCs.size(); i < e; ++i) { @@ -128,13 +133,6 @@ AggressiveAntiDepBreaker(MachineFunction& MFi, AggressiveAntiDepBreaker::~AggressiveAntiDepBreaker() { delete State; - delete SavedState; -} - -unsigned AggressiveAntiDepBreaker::GetMaxTrials() { - if (AntiDepTrials <= 0) - return 1; - return AntiDepTrials; } void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { @@ -206,8 +204,6 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { void AggressiveAntiDepBreaker::FinishBlock() { delete State; State = NULL; - delete SavedState; - SavedState = NULL; } void AggressiveAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count, @@ -241,10 +237,6 @@ void AggressiveAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count, } } DEBUG(errs() << '\n'); - - // We're starting a new schedule region so forget any saved state. - delete SavedState; - SavedState = NULL; } bool AggressiveAntiDepBreaker::IsImplicitDefUse(MachineInstr *MI, @@ -283,27 +275,20 @@ void AggressiveAntiDepBreaker::GetPassthruRegs(MachineInstr *MI, } } -/// AntiDepEdges - Return in Edges the anti- and output- -/// dependencies on Regs in SU that we want to consider for breaking. -static void AntiDepEdges(SUnit *SU, - const AntiDepBreaker::AntiDepRegVector& Regs, - std::vector& Edges) { - AntiDepBreaker::AntiDepRegSet RegSet; - for (unsigned i = 0, e = Regs.size(); i < e; ++i) - RegSet.insert(Regs[i]); - +/// AntiDepEdges - Return in Edges the anti- and output- dependencies +/// in SU that we want to consider for breaking. +static void AntiDepEdges(SUnit *SU, std::vector& Edges) { + SmallSet RegSet; for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end(); P != PE; ++P) { if ((P->getKind() == SDep::Anti) || (P->getKind() == SDep::Output)) { unsigned Reg = P->getReg(); - if (RegSet.count(Reg) != 0) { + if (RegSet.count(Reg) == 0) { Edges.push_back(&*P); - RegSet.erase(Reg); + RegSet.insert(Reg); } } } - - assert(RegSet.empty() && "Expected all antidep registers to be found"); } /// CriticalPathStep - Return the next SUnit after SU on the bottom-up @@ -332,7 +317,8 @@ static SUnit *CriticalPathStep(SUnit *SU) { } void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx, - const char *tag) { + const char *tag, const char *header, + const char *footer) { unsigned *KillIndices = State->GetKillIndices(); unsigned *DefIndices = State->GetDefIndices(); std::multimap& @@ -343,6 +329,8 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx, DefIndices[Reg] = ~0u; RegRefs.erase(Reg); State->LeaveGroup(Reg); + DEBUG(if (header != NULL) { + errs() << header << TRI->getName(Reg); header = NULL; }); DEBUG(errs() << "->g" << State->GetGroup(Reg) << tag); } // Repeat for subregisters. @@ -354,10 +342,14 @@ void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx, DefIndices[SubregReg] = ~0u; RegRefs.erase(SubregReg); State->LeaveGroup(SubregReg); + DEBUG(if (header != NULL) { + errs() << header << TRI->getName(Reg); header = NULL; }); DEBUG(errs() << " " << TRI->getName(SubregReg) << "->g" << State->GetGroup(SubregReg) << tag); } } + + DEBUG(if ((header == NULL) && (footer != NULL)) errs() << footer); } void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, unsigned Count, @@ -377,9 +369,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, unsigned Cou unsigned Reg = MO.getReg(); if (Reg == 0) continue; - DEBUG(errs() << "\tDead Def: " << TRI->getName(Reg)); - HandleLastUse(Reg, Count + 1, ""); - DEBUG(errs() << '\n'); + HandleLastUse(Reg, Count + 1, "", "\tDead Def: ", "\n"); } DEBUG(errs() << "\tDef Groups:"); @@ -427,15 +417,17 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, unsigned Cou if (!MO.isReg() || !MO.isDef()) continue; unsigned Reg = MO.getReg(); if (Reg == 0) continue; - // Ignore passthru registers for liveness... - if (PassthruRegs.count(Reg) != 0) continue; + // Ignore KILLs and passthru registers for liveness... + if ((MI->getOpcode() == TargetInstrInfo::KILL) || + (PassthruRegs.count(Reg) != 0)) + continue; - // Update def for Reg and subregs. + // Update def for Reg and aliases. DefIndices[Reg] = Count; - for (const unsigned *Subreg = TRI->getSubRegisters(Reg); - *Subreg; ++Subreg) { - unsigned SubregReg = *Subreg; - DefIndices[SubregReg] = Count; + for (const unsigned *Alias = TRI->getAliasSet(Reg); + *Alias; ++Alias) { + unsigned AliasReg = *Alias; + DefIndices[AliasReg] = Count; } } } @@ -589,72 +581,108 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( return false; } - // FIXME: for now just handle single register in group case... - if (Regs.size() > 1) { - DEBUG(errs() << "\tMultiple rename registers in group\n"); - return false; +#ifndef NDEBUG + // If DebugDiv > 0 then only rename (renamecnt % DebugDiv) == DebugMod + if (DebugDiv > 0) { + static int renamecnt = 0; + if (renamecnt++ % DebugDiv != DebugMod) + return false; + + errs() << "*** Performing rename " << TRI->getName(SuperReg) << + " for debug ***\n"; } +#endif // Check each possible rename register for SuperReg in round-robin // order. If that register is available, and the corresponding // registers are available for the other group subregisters, then we // can use those registers to rename. - BitVector SuperBV = RenameRegisterMap[SuperReg]; const TargetRegisterClass *SuperRC = TRI->getPhysicalRegisterRegClass(SuperReg, MVT::Other); const TargetRegisterClass::iterator RB = SuperRC->allocation_order_begin(MF); const TargetRegisterClass::iterator RE = SuperRC->allocation_order_end(MF); if (RB == RE) { - DEBUG(errs() << "\tEmpty Regclass!!\n"); + DEBUG(errs() << "\tEmpty Super Regclass!!\n"); return false; } + DEBUG(errs() << "\tFind Registers:"); + if (RenameOrder.count(SuperRC) == 0) RenameOrder.insert(RenameOrderType::value_type(SuperRC, RE)); - DEBUG(errs() << "\tFind Register:"); - const TargetRegisterClass::iterator OrigR = RenameOrder[SuperRC]; const TargetRegisterClass::iterator EndR = ((OrigR == RE) ? RB : OrigR); TargetRegisterClass::iterator R = OrigR; do { if (R == RB) R = RE; --R; - const unsigned Reg = *R; + const unsigned NewSuperReg = *R; // Don't replace a register with itself. - if (Reg == SuperReg) continue; + if (NewSuperReg == SuperReg) continue; - DEBUG(errs() << " " << TRI->getName(Reg)); - - // If Reg is dead and Reg's most recent def is not before - // SuperRegs's kill, it's safe to replace SuperReg with Reg. We - // must also check all subregisters of Reg. - if (State->IsLive(Reg) || (KillIndices[SuperReg] > DefIndices[Reg])) { - DEBUG(errs() << "(live)"); - continue; - } else { - bool found = false; - for (const unsigned *Subreg = TRI->getSubRegisters(Reg); - *Subreg; ++Subreg) { - unsigned SubregReg = *Subreg; - if (State->IsLive(SubregReg) || (KillIndices[SuperReg] > DefIndices[SubregReg])) { - DEBUG(errs() << "(subreg " << TRI->getName(SubregReg) << " live)"); - found = true; - break; - } + DEBUG(errs() << " [" << TRI->getName(NewSuperReg) << ':'); + RenameMap.clear(); + + // For each referenced group register (which must be a SuperReg or + // a subregister of SuperReg), find the corresponding subregister + // of NewSuperReg and make sure it is free to be renamed. + for (unsigned i = 0, e = Regs.size(); i != e; ++i) { + unsigned Reg = Regs[i]; + unsigned NewReg = 0; + if (Reg == SuperReg) { + NewReg = NewSuperReg; + } else { + unsigned NewSubRegIdx = TRI->getSubRegIndex(SuperReg, Reg); + if (NewSubRegIdx != 0) + NewReg = TRI->getSubReg(NewSuperReg, NewSubRegIdx); } - if (found) - continue; + + DEBUG(errs() << " " << TRI->getName(NewReg)); + + // Check if Reg can be renamed to NewReg. + BitVector BV = RenameRegisterMap[Reg]; + if (!BV.test(NewReg)) { + DEBUG(errs() << "(no rename)"); + goto next_super_reg; + } + + // If NewReg is dead and NewReg's most recent def is not before + // Regs's kill, it's safe to replace Reg with NewReg. We + // must also check all aliases of NewReg, because we can't define a + // register when any sub or super is already live. + if (State->IsLive(NewReg) || (KillIndices[Reg] > DefIndices[NewReg])) { + DEBUG(errs() << "(live)"); + goto next_super_reg; + } else { + bool found = false; + for (const unsigned *Alias = TRI->getAliasSet(NewReg); + *Alias; ++Alias) { + unsigned AliasReg = *Alias; + if (State->IsLive(AliasReg) || (KillIndices[Reg] > DefIndices[AliasReg])) { + DEBUG(errs() << "(alias " << TRI->getName(AliasReg) << " live)"); + found = true; + break; + } + } + if (found) + goto next_super_reg; + } + + // Record that 'Reg' can be renamed to 'NewReg'. + RenameMap.insert(std::pair(Reg, NewReg)); } - if (Reg != 0) { - DEBUG(errs() << '\n'); - RenameOrder.erase(SuperRC); - RenameOrder.insert(RenameOrderType::value_type(SuperRC, R)); - RenameMap.insert(std::pair(SuperReg, Reg)); - return true; - } + // If we fall-out here, then every register in the group can be + // renamed, as recorded in RenameMap. + RenameOrder.erase(SuperRC); + RenameOrder.insert(RenameOrderType::value_type(SuperRC, R)); + DEBUG(errs() << "]\n"); + return true; + + next_super_reg: + DEBUG(errs() << ']'); } while (R != EndR); DEBUG(errs() << '\n'); @@ -668,7 +696,6 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( /// unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( std::vector& SUnits, - CandidateMap& Candidates, MachineBasicBlock::iterator& Begin, MachineBasicBlock::iterator& End, unsigned InsertPosIndex) { @@ -681,16 +708,6 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( // so just duck out immediately if the block is empty. if (SUnits.empty()) return 0; - // Manage saved state to enable multiple passes... - if (AntiDepTrials > 1) { - if (SavedState == NULL) { - SavedState = new AggressiveAntiDepState(*State); - } else { - delete State; - State = new AggressiveAntiDepState(*SavedState); - } - } - // For each regclass the next register to use for renaming. RenameOrderType RenameOrder; @@ -719,21 +736,14 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( CriticalPathMI = CriticalPathSU->getInstr(); } - // Even if there are no anti-dependencies we still need to go - // through the instructions to update Def, Kills, etc. #ifndef NDEBUG - if (Candidates.empty()) { - DEBUG(errs() << "\n===== No anti-dependency candidates\n"); - } else { - DEBUG(errs() << "\n===== Attempting to break " << Candidates.size() << - " anti-dependencies\n"); - DEBUG(errs() << "Available regs:"); - for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) { - if (!State->IsLive(Reg)) - DEBUG(errs() << " " << TRI->getName(Reg)); - } - DEBUG(errs() << '\n'); + DEBUG(errs() << "\n===== Aggressive anti-dependency breaking\n"); + DEBUG(errs() << "Available regs:"); + for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) { + if (!State->IsLive(Reg)) + DEBUG(errs() << " " << TRI->getName(Reg)); } + DEBUG(errs() << '\n'); #endif // Attempt to break anti-dependence edges. Walk the instructions @@ -754,14 +764,11 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( // Process the defs in MI... PrescanInstruction(MI, Count, PassthruRegs); - // The the dependence edges that represent anti- and output- + // The dependence edges that represent anti- and output- // dependencies that are candidates for breaking. std::vector Edges; SUnit *PathSU = MISUnitMap[MI]; - AntiDepBreaker::CandidateMap::iterator - citer = Candidates.find(PathSU); - if (citer != Candidates.end()) - AntiDepEdges(PathSU, citer->second, Edges); + AntiDepEdges(PathSU, Edges); // If MI is not on the critical path, then we don't rename // registers in the CriticalPathSet. @@ -817,12 +824,32 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( // anti-dependency since those edges would prevent such // units from being scheduled past each other // regardless. + // + // Also, if there are dependencies on other SUnits with the + // same register as the anti-dependency, don't attempt to + // break it. for (SUnit::pred_iterator P = PathSU->Preds.begin(), PE = PathSU->Preds.end(); P != PE; ++P) { - if ((P->getSUnit() == NextSU) && (P->getKind() != SDep::Anti)) { + if (P->getSUnit() == NextSU ? + (P->getKind() != SDep::Anti || P->getReg() != AntiDepReg) : + (P->getKind() == SDep::Data && P->getReg() == AntiDepReg)) { + AntiDepReg = 0; + break; + } + } + for (SUnit::pred_iterator P = PathSU->Preds.begin(), + PE = PathSU->Preds.end(); P != PE; ++P) { + if ((P->getSUnit() == NextSU) && (P->getKind() != SDep::Anti) && + (P->getKind() != SDep::Output)) { DEBUG(errs() << " (real dependency)\n"); AntiDepReg = 0; break; + } else if ((P->getSUnit() != NextSU) && + (P->getKind() == SDep::Data) && + (P->getReg() == AntiDepReg)) { + DEBUG(errs() << " (other dependency)\n"); + AntiDepReg = 0; + break; } } diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h index e5c9a7bb3adf..8154d2dd5725 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.h +++ b/lib/CodeGen/AggressiveAntiDepBreaker.h @@ -27,12 +27,11 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallSet.h" +#include namespace llvm { /// Class AggressiveAntiDepState - /// Contains all the state necessary for anti-dep breaking. We place - /// into a separate class so be can conveniently save/restore it to - /// enable multi-pass anti-dep breaking. + /// Contains all the state necessary for anti-dep breaking. class AggressiveAntiDepState { public: /// RegisterReference - Information about a register reference @@ -126,23 +125,11 @@ namespace llvm { /// registers. AggressiveAntiDepState *State; - /// SavedState - The state for the start of an anti-dep - /// region. Used to restore the state at the beginning of each - /// pass - AggressiveAntiDepState *SavedState; - public: AggressiveAntiDepBreaker(MachineFunction& MFi, TargetSubtarget::RegClassVector& CriticalPathRCs); ~AggressiveAntiDepBreaker(); - /// GetMaxTrials - As anti-dependencies are broken, additional - /// dependencies may be exposed, so multiple passes are required. - unsigned GetMaxTrials(); - - /// NeedCandidates - Candidates required. - bool NeedCandidates() { return true; } - /// Start - Initialize anti-dep breaking for a new basic block. void StartBlock(MachineBasicBlock *BB); @@ -150,7 +137,6 @@ namespace llvm { /// of the ScheduleDAG and break them by renaming registers. /// unsigned BreakAntiDependencies(std::vector& SUnits, - CandidateMap& Candidates, MachineBasicBlock::iterator& Begin, MachineBasicBlock::iterator& End, unsigned InsertPosIndex); @@ -175,7 +161,9 @@ namespace llvm { /// return that register and all subregisters. void GetPassthruRegs(MachineInstr *MI, std::set& PassthruRegs); - void HandleLastUse(unsigned Reg, unsigned KillIdx, const char *tag); + void HandleLastUse(unsigned Reg, unsigned KillIdx, const char *tag, + const char *header =NULL, const char *footer =NULL); + void PrescanInstruction(MachineInstr *MI, unsigned Count, std::set& PassthruRegs); void ScanInstruction(MachineInstr *MI, unsigned Count); diff --git a/lib/CodeGen/AntiDepBreaker.h b/lib/CodeGen/AntiDepBreaker.h index b614f687a462..3ee30c6a18e3 100644 --- a/lib/CodeGen/AntiDepBreaker.h +++ b/lib/CodeGen/AntiDepBreaker.h @@ -21,9 +21,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SmallVector.h" -#include +#include namespace llvm { @@ -32,20 +30,8 @@ namespace llvm { /// anti-dependencies. class AntiDepBreaker { public: - typedef SmallSet AntiDepRegSet; - typedef SmallVector AntiDepRegVector; - typedef std::map CandidateMap; - virtual ~AntiDepBreaker(); - /// GetMaxTrials - Return the maximum number of anti-dependence - /// breaking attempts that will be made for a block. - virtual unsigned GetMaxTrials() =0; - - /// NeedCandidates - Return true if the schedule must provide - /// candidates with BreakAntiDependencies(). - virtual bool NeedCandidates() =0; - /// Start - Initialize anti-dep breaking for a new basic block. virtual void StartBlock(MachineBasicBlock *BB) =0; @@ -54,7 +40,6 @@ public: /// the number of anti-dependencies broken. /// virtual unsigned BreakAntiDependencies(std::vector& SUnits, - CandidateMap& Candidates, MachineBasicBlock::iterator& Begin, MachineBasicBlock::iterator& End, unsigned InsertPosIndex) =0; diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 08e0eae16c35..993cdbfb76c7 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -728,7 +728,7 @@ static void printStringChar(formatted_raw_ostream &O, unsigned char C) { /// EmitString - Emit a string with quotes and a null terminator. /// Special characters are emitted properly. /// \literal (Eg. '\t') \endliteral -void AsmPrinter::EmitString(const std::string &String) const { +void AsmPrinter::EmitString(const StringRef String) const { EmitString(String.data(), String.size()); } @@ -1630,12 +1630,14 @@ bool AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, return true; } -MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BlockAddress *BA) const { - return GetBlockAddressSymbol(BA->getFunction(), BA->getBasicBlock()); +MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BlockAddress *BA, + const char *Suffix) const { + return GetBlockAddressSymbol(BA->getFunction(), BA->getBasicBlock(), Suffix); } MCSymbol *AsmPrinter::GetBlockAddressSymbol(const Function *F, - const BasicBlock *BB) const { + const BasicBlock *BB, + const char *Suffix) const { assert(BB->hasName() && "Address of anonymous basic block not supported yet!"); @@ -1647,7 +1649,8 @@ MCSymbol *AsmPrinter::GetBlockAddressSymbol(const Function *F, SmallString<60> Name; raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "BA" << FuncName.size() << '_' << FuncName << '_' - << Mang->makeNameProper(BB->getName()); + << Mang->makeNameProper(BB->getName()) + << Suffix; return OutContext.GetOrCreateSymbol(Name.str()); } diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp index ecf00077fc31..0e93b9849ce5 100644 --- a/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/lib/CodeGen/AsmPrinter/DIE.cpp @@ -105,26 +105,14 @@ DIE::~DIE() { delete Children[i]; } -/// AddSiblingOffset - Add a sibling offset field to the front of the DIE. +/// addSiblingOffset - Add a sibling offset field to the front of the DIE. /// -void DIE::AddSiblingOffset() { +void DIE::addSiblingOffset() { DIEInteger *DI = new DIEInteger(0); Values.insert(Values.begin(), DI); Abbrev.AddFirstAttribute(dwarf::DW_AT_sibling, dwarf::DW_FORM_ref4); } -/// Profile - Used to gather unique data for the value folding set. -/// -void DIE::Profile(FoldingSetNodeID &ID) { - Abbrev.Profile(ID); - - for (unsigned i = 0, N = Children.size(); i < N; ++i) - ID.AddPointer(Children[i]); - - for (unsigned j = 0, M = Values.size(); j < M; ++j) - ID.AddPointer(Values[j]); -} - #ifndef NDEBUG void DIE::print(raw_ostream &O, unsigned IncIndent) { IndentCount += IncIndent; @@ -231,16 +219,6 @@ unsigned DIEInteger::SizeOf(const TargetData *TD, unsigned Form) const { return 0; } -/// Profile - Used to gather unique data for the value folding set. -/// -void DIEInteger::Profile(FoldingSetNodeID &ID, unsigned Int) { - ID.AddInteger(isInteger); - ID.AddInteger(Int); -} -void DIEInteger::Profile(FoldingSetNodeID &ID) { - Profile(ID, Integer); -} - #ifndef NDEBUG void DIEInteger::print(raw_ostream &O) { O << "Int: " << (int64_t)Integer @@ -258,16 +236,6 @@ void DIEString::EmitValue(Dwarf *D, unsigned Form) const { D->getAsm()->EmitString(Str); } -/// Profile - Used to gather unique data for the value folding set. -/// -void DIEString::Profile(FoldingSetNodeID &ID, const std::string &Str) { - ID.AddInteger(isString); - ID.AddString(Str); -} -void DIEString::Profile(FoldingSetNodeID &ID) { - Profile(ID, Str); -} - #ifndef NDEBUG void DIEString::print(raw_ostream &O) { O << "Str: \"" << Str << "\""; @@ -292,16 +260,6 @@ unsigned DIEDwarfLabel::SizeOf(const TargetData *TD, unsigned Form) const { return TD->getPointerSize(); } -/// Profile - Used to gather unique data for the value folding set. -/// -void DIEDwarfLabel::Profile(FoldingSetNodeID &ID, const DWLabel &Label) { - ID.AddInteger(isLabel); - Label.Profile(ID); -} -void DIEDwarfLabel::Profile(FoldingSetNodeID &ID) { - Profile(ID, Label); -} - #ifndef NDEBUG void DIEDwarfLabel::print(raw_ostream &O) { O << "Lbl: "; @@ -327,16 +285,6 @@ unsigned DIEObjectLabel::SizeOf(const TargetData *TD, unsigned Form) const { return TD->getPointerSize(); } -/// Profile - Used to gather unique data for the value folding set. -/// -void DIEObjectLabel::Profile(FoldingSetNodeID &ID, const std::string &Label) { - ID.AddInteger(isAsIsLabel); - ID.AddString(Label); -} -void DIEObjectLabel::Profile(FoldingSetNodeID &ID) { - Profile(ID, Label.c_str()); -} - #ifndef NDEBUG void DIEObjectLabel::print(raw_ostream &O) { O << "Obj: " << Label; @@ -363,20 +311,6 @@ unsigned DIESectionOffset::SizeOf(const TargetData *TD, unsigned Form) const { return TD->getPointerSize(); } -/// Profile - Used to gather unique data for the value folding set. -/// -void DIESectionOffset::Profile(FoldingSetNodeID &ID, const DWLabel &Label, - const DWLabel &Section) { - ID.AddInteger(isSectionOffset); - Label.Profile(ID); - Section.Profile(ID); - // IsEH and UseSet are specific to the Label/Section that we will emit the - // offset for; so Label/Section are enough for uniqueness. -} -void DIESectionOffset::Profile(FoldingSetNodeID &ID) { - Profile(ID, Label, Section); -} - #ifndef NDEBUG void DIESectionOffset::print(raw_ostream &O) { O << "Off: "; @@ -405,18 +339,6 @@ unsigned DIEDelta::SizeOf(const TargetData *TD, unsigned Form) const { return TD->getPointerSize(); } -/// Profile - Used to gather unique data for the value folding set. -/// -void DIEDelta::Profile(FoldingSetNodeID &ID, const DWLabel &LabelHi, - const DWLabel &LabelLo) { - ID.AddInteger(isDelta); - LabelHi.Profile(ID); - LabelLo.Profile(ID); -} -void DIEDelta::Profile(FoldingSetNodeID &ID) { - Profile(ID, LabelHi, LabelLo); -} - #ifndef NDEBUG void DIEDelta::print(raw_ostream &O) { O << "Del: "; @@ -436,21 +358,6 @@ void DIEEntry::EmitValue(Dwarf *D, unsigned Form) const { D->getAsm()->EmitInt32(Entry->getOffset()); } -/// Profile - Used to gather unique data for the value folding set. -/// -void DIEEntry::Profile(FoldingSetNodeID &ID, DIE *Entry) { - ID.AddInteger(isEntry); - ID.AddPointer(Entry); -} -void DIEEntry::Profile(FoldingSetNodeID &ID) { - ID.AddInteger(isEntry); - - if (Entry) - ID.AddPointer(Entry); - else - ID.AddPointer(this); -} - #ifndef NDEBUG void DIEEntry::print(raw_ostream &O) { O << format("Die: 0x%lx", (long)(intptr_t)Entry); @@ -505,11 +412,6 @@ unsigned DIEBlock::SizeOf(const TargetData *TD, unsigned Form) const { return 0; } -void DIEBlock::Profile(FoldingSetNodeID &ID) { - ID.AddInteger(isBlock); - DIE::Profile(ID); -} - #ifndef NDEBUG void DIEBlock::print(raw_ostream &O) { O << "Blk: "; diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h index 3e50a15e162d..dc6a70a6bd6a 100644 --- a/lib/CodeGen/AsmPrinter/DIE.h +++ b/lib/CodeGen/AsmPrinter/DIE.h @@ -113,7 +113,7 @@ namespace llvm { class CompileUnit; class DIEValue; - class DIE : public FoldingSetNode { + class DIE { protected: /// Abbrev - Buffer for constructing abbreviation. /// @@ -161,38 +161,28 @@ namespace llvm { void setSize(unsigned S) { Size = S; } void setAbstractCompileUnit(CompileUnit *CU) { AbstractCU = CU; } - /// AddValue - Add a value and attributes to a DIE. + /// addValue - Add a value and attributes to a DIE. /// - void AddValue(unsigned Attribute, unsigned Form, DIEValue *Value) { + void addValue(unsigned Attribute, unsigned Form, DIEValue *Value) { Abbrev.AddAttribute(Attribute, Form); Values.push_back(Value); } /// SiblingOffset - Return the offset of the debug information entry's /// sibling. - unsigned SiblingOffset() const { return Offset + Size; } + unsigned getSiblingOffset() const { return Offset + Size; } - /// AddSiblingOffset - Add a sibling offset field to the front of the DIE. + /// addSiblingOffset - Add a sibling offset field to the front of the DIE. /// - void AddSiblingOffset(); + void addSiblingOffset(); - /// AddChild - Add a child to the DIE. + /// addChild - Add a child to the DIE. /// - void AddChild(DIE *Child) { + void addChild(DIE *Child) { Abbrev.setChildrenFlag(dwarf::DW_CHILDREN_yes); Children.push_back(Child); } - /// Detach - Detaches objects connected to it after copying. - /// - void Detach() { - Children.clear(); - } - - /// Profile - Used to gather unique data for the value folding set. - /// - void Profile(FoldingSetNodeID &ID) ; - #ifndef NDEBUG void print(raw_ostream &O, unsigned IncIndent = 0); void dump(); @@ -202,7 +192,7 @@ namespace llvm { //===--------------------------------------------------------------------===// /// DIEValue - A debug information entry value. /// - class DIEValue : public FoldingSetNode { + class DIEValue { public: enum { isInteger, @@ -233,10 +223,6 @@ namespace llvm { /// virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const = 0; - /// Profile - Used to gather unique data for the value folding set. - /// - virtual void Profile(FoldingSetNodeID &ID) = 0; - // Implement isa/cast/dyncast. static bool classof(const DIEValue *) { return true; } @@ -277,10 +263,6 @@ namespace llvm { /// virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const; - /// Profile - Used to gather unique data for the value folding set. - /// - static void Profile(FoldingSetNodeID &ID, unsigned Int); - virtual void Profile(FoldingSetNodeID &ID); // Implement isa/cast/dyncast. static bool classof(const DIEInteger *) { return true; } @@ -295,9 +277,9 @@ namespace llvm { /// DIEString - A string value DIE. /// class DIEString : public DIEValue { - const std::string Str; + const StringRef Str; public: - explicit DIEString(const std::string &S) : DIEValue(isString), Str(S) {} + explicit DIEString(const StringRef S) : DIEValue(isString), Str(S) {} /// EmitValue - Emit string value. /// @@ -309,11 +291,6 @@ namespace llvm { return Str.size() + sizeof(char); // sizeof('\0'); } - /// Profile - Used to gather unique data for the value folding set. - /// - static void Profile(FoldingSetNodeID &ID, const std::string &Str); - virtual void Profile(FoldingSetNodeID &ID); - // Implement isa/cast/dyncast. static bool classof(const DIEString *) { return true; } static bool classof(const DIEValue *S) { return S->getType() == isString; } @@ -339,11 +316,6 @@ namespace llvm { /// virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const; - /// Profile - Used to gather unique data for the value folding set. - /// - static void Profile(FoldingSetNodeID &ID, const DWLabel &Label); - virtual void Profile(FoldingSetNodeID &ID); - // Implement isa/cast/dyncast. static bool classof(const DIEDwarfLabel *) { return true; } static bool classof(const DIEValue *L) { return L->getType() == isLabel; } @@ -370,11 +342,6 @@ namespace llvm { /// virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const; - /// Profile - Used to gather unique data for the value folding set. - /// - static void Profile(FoldingSetNodeID &ID, const std::string &Label); - virtual void Profile(FoldingSetNodeID &ID); - // Implement isa/cast/dyncast. static bool classof(const DIEObjectLabel *) { return true; } static bool classof(const DIEValue *L) { @@ -408,12 +375,6 @@ namespace llvm { /// virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const; - /// Profile - Used to gather unique data for the value folding set. - /// - static void Profile(FoldingSetNodeID &ID, const DWLabel &Label, - const DWLabel &Section); - virtual void Profile(FoldingSetNodeID &ID); - // Implement isa/cast/dyncast. static bool classof(const DIESectionOffset *) { return true; } static bool classof(const DIEValue *D) { @@ -443,12 +404,6 @@ namespace llvm { /// virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const; - /// Profile - Used to gather unique data for the value folding set. - /// - static void Profile(FoldingSetNodeID &ID, const DWLabel &LabelHi, - const DWLabel &LabelLo); - virtual void Profile(FoldingSetNodeID &ID); - // Implement isa/cast/dyncast. static bool classof(const DIEDelta *) { return true; } static bool classof(const DIEValue *D) { return D->getType() == isDelta; } @@ -480,11 +435,6 @@ namespace llvm { return sizeof(int32_t); } - /// Profile - Used to gather unique data for the value folding set. - /// - static void Profile(FoldingSetNodeID &ID, DIE *Entry); - virtual void Profile(FoldingSetNodeID &ID); - // Implement isa/cast/dyncast. static bool classof(const DIEEntry *) { return true; } static bool classof(const DIEValue *E) { return E->getType() == isEntry; } @@ -525,10 +475,6 @@ namespace llvm { /// virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const; - /// Profile - Used to gather unique data for the value folding set. - /// - virtual void Profile(FoldingSetNodeID &ID); - // Implement isa/cast/dyncast. static bool classof(const DIEBlock *) { return true; } static bool classof(const DIEValue *E) { return E->getType() == isBlock; } diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index c62c43545c46..c2e1e0503a88 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -39,9 +39,7 @@ static TimerGroup &getDwarfTimerGroup() { /// Configuration values for initial hash set sizes (log2). /// -static const unsigned InitDiesSetSize = 9; // log2(512) static const unsigned InitAbbreviationsSetSize = 9; // log2(512) -static const unsigned InitValuesSetSize = 9; // log2(512) namespace llvm { @@ -55,70 +53,89 @@ class CompileUnit { /// Die - Compile unit debug information entry. /// - DIE *Die; + DIE *CUDie; + + /// IndexTyDie - An anonymous type for index type. + DIE *IndexTyDie; /// GVToDieMap - Tracks the mapping of unit level debug informaton /// variables to debug information entries. /// FIXME : Rename GVToDieMap -> NodeToDieMap - std::map GVToDieMap; + ValueMap GVToDieMap; /// GVToDIEEntryMap - Tracks the mapping of unit level debug informaton /// descriptors to debug information entries using a DIEEntry proxy. /// FIXME : Rename - std::map GVToDIEEntryMap; + ValueMap GVToDIEEntryMap; /// Globals - A map of globally visible named entities for this unit. /// StringMap Globals; - /// DiesSet - Used to uniquely define dies within the compile unit. + /// GlobalTypes - A map of globally visible types for this unit. /// - FoldingSet DiesSet; + StringMap GlobalTypes; + public: CompileUnit(unsigned I, DIE *D) - : ID(I), Die(D), DiesSet(InitDiesSetSize) {} - ~CompileUnit() { delete Die; } + : ID(I), CUDie(D), IndexTyDie(0) {} + ~CompileUnit() { delete CUDie; delete IndexTyDie; } // Accessors. - unsigned getID() const { return ID; } - DIE* getDie() const { return Die; } - StringMap &getGlobals() { return Globals; } + unsigned getID() const { return ID; } + DIE* getCUDie() const { return CUDie; } + const StringMap &getGlobals() const { return Globals; } + const StringMap &getGlobalTypes() const { return GlobalTypes; } /// hasContent - Return true if this compile unit has something to write out. /// - bool hasContent() const { return !Die->getChildren().empty(); } + bool hasContent() const { return !CUDie->getChildren().empty(); } - /// AddGlobal - Add a new global entity to the compile unit. + /// addGlobal - Add a new global entity to the compile unit. /// - void AddGlobal(const std::string &Name, DIE *Die) { Globals[Name] = Die; } + void addGlobal(const std::string &Name, DIE *Die) { Globals[Name] = Die; } - /// getDieMapSlotFor - Returns the debug information entry map slot for the + /// addGlobalType - Add a new global type to the compile unit. + /// + void addGlobalType(const std::string &Name, DIE *Die) { + GlobalTypes[Name] = Die; + } + + /// getDIE - Returns the debug information entry map slot for the /// specified debug variable. - DIE *&getDieMapSlotFor(MDNode *N) { return GVToDieMap[N]; } + DIE *getDIE(MDNode *N) { return GVToDieMap.lookup(N); } - /// getDIEEntrySlotFor - Returns the debug information entry proxy slot for - /// the specified debug variable. - DIEEntry *&getDIEEntrySlotFor(MDNode *N) { - return GVToDIEEntryMap[N]; + /// insertDIE - Insert DIE into the map. + void insertDIE(MDNode *N, DIE *D) { + GVToDieMap.insert(std::make_pair(N, D)); } - /// AddDie - Adds or interns the DIE to the compile unit. + /// getDIEEntry - Returns the debug information entry for the speciefied + /// debug variable. + DIEEntry *getDIEEntry(MDNode *N) { return GVToDIEEntryMap.lookup(N); } + + /// insertDIEEntry - Insert debug information entry into the map. + void insertDIEEntry(MDNode *N, DIEEntry *E) { + GVToDIEEntryMap.insert(std::make_pair(N, E)); + } + + /// addDie - Adds or interns the DIE to the compile unit. /// - DIE *AddDie(DIE &Buffer) { - FoldingSetNodeID ID; - Buffer.Profile(ID); - void *Where; - DIE *Die = DiesSet.FindNodeOrInsertPos(ID, Where); - - if (!Die) { - Die = new DIE(Buffer); - DiesSet.InsertNode(Die, Where); - this->Die->AddChild(Die); - Buffer.Detach(); - } - - return Die; + void addDie(DIE *Buffer) { + this->CUDie->addChild(Buffer); } + + // getIndexTyDie - Get an anonymous type for index type. + DIE *getIndexTyDie() { + return IndexTyDie; + } + + // setIndexTyDie - Set D as anonymous type for index which can be reused + // later. + void setIndexTyDie(DIE *D) { + IndexTyDie = D; + } + }; //===----------------------------------------------------------------------===// @@ -147,7 +164,7 @@ public: /// class DbgScope { DbgScope *Parent; // Parent to this scope. - DIDescriptor Desc; // Debug info descriptor for scope. + DIDescriptor Desc; // Debug info descriptor for scope. WeakVH InlinedAtLocation; // Location at which scope is inlined. bool AbstractScope; // Abstract Scope unsigned StartLabelID; // Label ID of the beginning of scope. @@ -162,7 +179,7 @@ class DbgScope { public: DbgScope(DbgScope *P, DIDescriptor D, MDNode *I = 0) : Parent(P), Desc(D), InlinedAtLocation(I), AbstractScope(false), - StartLabelID(0), EndLabelID(0), + StartLabelID(0), EndLabelID(0), LastInsn(0), FirstInsn(0), IndentLevel(0) {} virtual ~DbgScope(); @@ -170,7 +187,7 @@ public: DbgScope *getParent() const { return Parent; } void setParent(DbgScope *P) { Parent = P; } DIDescriptor getDesc() const { return Desc; } - MDNode *getInlinedAt() const { + MDNode *getInlinedAt() const { return dyn_cast_or_null(InlinedAtLocation); } MDNode *getScopeNode() const { return Desc.getNode(); } @@ -187,26 +204,26 @@ public: bool isAbstractScope() const { return AbstractScope; } const MachineInstr *getFirstInsn() { return FirstInsn; } - /// AddScope - Add a scope to the scope. + /// addScope - Add a scope to the scope. /// - void AddScope(DbgScope *S) { Scopes.push_back(S); } + void addScope(DbgScope *S) { Scopes.push_back(S); } - /// AddVariable - Add a variable to the scope. + /// addVariable - Add a variable to the scope. /// - void AddVariable(DbgVariable *V) { Variables.push_back(V); } + void addVariable(DbgVariable *V) { Variables.push_back(V); } - void FixInstructionMarkers() { + void fixInstructionMarkers() { assert (getFirstInsn() && "First instruction is missing!"); if (getLastInsn()) return; - + // If a scope does not have an instruction to mark an end then use // the end of last child scope. SmallVector &Scopes = getScopes(); assert (!Scopes.empty() && "Inner most scope does not have last insn!"); DbgScope *L = Scopes.back(); if (!L->getLastInsn()) - L->FixInstructionMarkers(); + L->fixInstructionMarkers(); setLastInsn(L->getLastInsn()); } @@ -236,21 +253,6 @@ void DbgScope::dump() const { } #endif -//===----------------------------------------------------------------------===// -/// DbgConcreteScope - This class is used to track a scope that holds concrete -/// instance information. -/// -class DbgConcreteScope : public DbgScope { - CompileUnit *Unit; - DIE *Die; // Debug info for this concrete scope. -public: - DbgConcreteScope(DIDescriptor D) : DbgScope(NULL, D) {} - - // Accessors. - DIE *getDie() const { return Die; } - void setDie(DIE *D) { Die = D; } -}; - DbgScope::~DbgScope() { for (unsigned i = 0, N = Scopes.size(); i < N; ++i) delete Scopes[i]; @@ -263,7 +265,7 @@ DbgScope::~DbgScope() { DwarfDebug::DwarfDebug(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T) : Dwarf(OS, A, T, "dbg"), ModuleCU(0), AbbreviationsSet(InitAbbreviationsSetSize), Abbreviations(), - ValuesSet(InitValuesSetSize), Values(), StringPool(), + DIEValues(), StringPool(), SectionSourceLines(), didInitial(false), shouldEmit(false), CurrentFnDbgScope(0), DebugTimer(0) { if (TimePassesIsEnabled) @@ -271,15 +273,15 @@ DwarfDebug::DwarfDebug(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T) getDwarfTimerGroup()); } DwarfDebug::~DwarfDebug() { - for (unsigned j = 0, M = Values.size(); j < M; ++j) - delete Values[j]; + for (unsigned j = 0, M = DIEValues.size(); j < M; ++j) + delete DIEValues[j]; delete DebugTimer; } -/// AssignAbbrevNumber - Define a unique number for the abbreviation. +/// assignAbbrevNumber - Define a unique number for the abbreviation. /// -void DwarfDebug::AssignAbbrevNumber(DIEAbbrev &Abbrev) { +void DwarfDebug::assignAbbrevNumber(DIEAbbrev &Abbrev) { // Profile the node so that we can make it unique. FoldingSetNodeID ID; Abbrev.Profile(ID); @@ -300,224 +302,120 @@ void DwarfDebug::AssignAbbrevNumber(DIEAbbrev &Abbrev) { } } -/// CreateDIEEntry - Creates a new DIEEntry to be a proxy for a debug +/// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug /// information entry. -DIEEntry *DwarfDebug::CreateDIEEntry(DIE *Entry) { - DIEEntry *Value; - - if (Entry) { - FoldingSetNodeID ID; - DIEEntry::Profile(ID, Entry); - void *Where; - Value = static_cast(ValuesSet.FindNodeOrInsertPos(ID, Where)); - - if (Value) return Value; - - Value = new DIEEntry(Entry); - ValuesSet.InsertNode(Value, Where); - } else { - Value = new DIEEntry(Entry); - } - - Values.push_back(Value); +DIEEntry *DwarfDebug::createDIEEntry(DIE *Entry) { + DIEEntry *Value = new DIEEntry(Entry); + DIEValues.push_back(Value); return Value; } -/// SetDIEEntry - Set a DIEEntry once the debug information entry is defined. +/// addUInt - Add an unsigned integer attribute data and value. /// -void DwarfDebug::SetDIEEntry(DIEEntry *Value, DIE *Entry) { - Value->setEntry(Entry); - - // Add to values set if not already there. If it is, we merely have a - // duplicate in the values list (no harm.) - ValuesSet.GetOrInsertNode(Value); -} - -/// AddUInt - Add an unsigned integer attribute data and value. -/// -void DwarfDebug::AddUInt(DIE *Die, unsigned Attribute, +void DwarfDebug::addUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer) { if (!Form) Form = DIEInteger::BestForm(false, Integer); - - FoldingSetNodeID ID; - DIEInteger::Profile(ID, Integer); - void *Where; - DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where); - - if (!Value) { - Value = new DIEInteger(Integer); - ValuesSet.InsertNode(Value, Where); - Values.push_back(Value); - } - - Die->AddValue(Attribute, Form, Value); + DIEValue *Value = new DIEInteger(Integer); + DIEValues.push_back(Value); + Die->addValue(Attribute, Form, Value); } -/// AddSInt - Add an signed integer attribute data and value. +/// addSInt - Add an signed integer attribute data and value. /// -void DwarfDebug::AddSInt(DIE *Die, unsigned Attribute, +void DwarfDebug::addSInt(DIE *Die, unsigned Attribute, unsigned Form, int64_t Integer) { if (!Form) Form = DIEInteger::BestForm(true, Integer); - - FoldingSetNodeID ID; - DIEInteger::Profile(ID, (uint64_t)Integer); - void *Where; - DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where); - - if (!Value) { - Value = new DIEInteger(Integer); - ValuesSet.InsertNode(Value, Where); - Values.push_back(Value); - } - - Die->AddValue(Attribute, Form, Value); + DIEValue *Value = new DIEInteger(Integer); + DIEValues.push_back(Value); + Die->addValue(Attribute, Form, Value); } -/// AddString - Add a string attribute data and value. +/// addString - Add a string attribute data and value. /// -void DwarfDebug::AddString(DIE *Die, unsigned Attribute, unsigned Form, - const std::string &String) { - FoldingSetNodeID ID; - DIEString::Profile(ID, String); - void *Where; - DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where); - - if (!Value) { - Value = new DIEString(String); - ValuesSet.InsertNode(Value, Where); - Values.push_back(Value); - } - - Die->AddValue(Attribute, Form, Value); +void DwarfDebug::addString(DIE *Die, unsigned Attribute, unsigned Form, + const StringRef String) { + DIEValue *Value = new DIEString(String); + DIEValues.push_back(Value); + Die->addValue(Attribute, Form, Value); } -/// AddLabel - Add a Dwarf label attribute data and value. +/// addLabel - Add a Dwarf label attribute data and value. /// -void DwarfDebug::AddLabel(DIE *Die, unsigned Attribute, unsigned Form, +void DwarfDebug::addLabel(DIE *Die, unsigned Attribute, unsigned Form, const DWLabel &Label) { - FoldingSetNodeID ID; - DIEDwarfLabel::Profile(ID, Label); - void *Where; - DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where); - - if (!Value) { - Value = new DIEDwarfLabel(Label); - ValuesSet.InsertNode(Value, Where); - Values.push_back(Value); - } - - Die->AddValue(Attribute, Form, Value); + DIEValue *Value = new DIEDwarfLabel(Label); + DIEValues.push_back(Value); + Die->addValue(Attribute, Form, Value); } -/// AddObjectLabel - Add an non-Dwarf label attribute data and value. +/// addObjectLabel - Add an non-Dwarf label attribute data and value. /// -void DwarfDebug::AddObjectLabel(DIE *Die, unsigned Attribute, unsigned Form, +void DwarfDebug::addObjectLabel(DIE *Die, unsigned Attribute, unsigned Form, const std::string &Label) { - FoldingSetNodeID ID; - DIEObjectLabel::Profile(ID, Label); - void *Where; - DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where); - - if (!Value) { - Value = new DIEObjectLabel(Label); - ValuesSet.InsertNode(Value, Where); - Values.push_back(Value); - } - - Die->AddValue(Attribute, Form, Value); + DIEValue *Value = new DIEObjectLabel(Label); + DIEValues.push_back(Value); + Die->addValue(Attribute, Form, Value); } -/// AddSectionOffset - Add a section offset label attribute data and value. +/// addSectionOffset - Add a section offset label attribute data and value. /// -void DwarfDebug::AddSectionOffset(DIE *Die, unsigned Attribute, unsigned Form, +void DwarfDebug::addSectionOffset(DIE *Die, unsigned Attribute, unsigned Form, const DWLabel &Label, const DWLabel &Section, bool isEH, bool useSet) { - FoldingSetNodeID ID; - DIESectionOffset::Profile(ID, Label, Section); - void *Where; - DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where); - - if (!Value) { - Value = new DIESectionOffset(Label, Section, isEH, useSet); - ValuesSet.InsertNode(Value, Where); - Values.push_back(Value); - } - - Die->AddValue(Attribute, Form, Value); + DIEValue *Value = new DIESectionOffset(Label, Section, isEH, useSet); + DIEValues.push_back(Value); + Die->addValue(Attribute, Form, Value); } -/// AddDelta - Add a label delta attribute data and value. +/// addDelta - Add a label delta attribute data and value. /// -void DwarfDebug::AddDelta(DIE *Die, unsigned Attribute, unsigned Form, +void DwarfDebug::addDelta(DIE *Die, unsigned Attribute, unsigned Form, const DWLabel &Hi, const DWLabel &Lo) { - FoldingSetNodeID ID; - DIEDelta::Profile(ID, Hi, Lo); - void *Where; - DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where); - - if (!Value) { - Value = new DIEDelta(Hi, Lo); - ValuesSet.InsertNode(Value, Where); - Values.push_back(Value); - } - - Die->AddValue(Attribute, Form, Value); + DIEValue *Value = new DIEDelta(Hi, Lo); + DIEValues.push_back(Value); + Die->addValue(Attribute, Form, Value); } -/// AddBlock - Add block data. +/// addBlock - Add block data. /// -void DwarfDebug::AddBlock(DIE *Die, unsigned Attribute, unsigned Form, +void DwarfDebug::addBlock(DIE *Die, unsigned Attribute, unsigned Form, DIEBlock *Block) { Block->ComputeSize(TD); - FoldingSetNodeID ID; - Block->Profile(ID); - void *Where; - DIEValue *Value = ValuesSet.FindNodeOrInsertPos(ID, Where); - - if (!Value) { - Value = Block; - ValuesSet.InsertNode(Value, Where); - Values.push_back(Value); - } else { - // Already exists, reuse the previous one. - delete Block; - Block = cast(Value); - } - - Die->AddValue(Attribute, Block->BestForm(), Value); + DIEValues.push_back(Block); + Die->addValue(Attribute, Block->BestForm(), Block); } -/// AddSourceLine - Add location information to specified debug information +/// addSourceLine - Add location information to specified debug information /// entry. -void DwarfDebug::AddSourceLine(DIE *Die, const DIVariable *V) { +void DwarfDebug::addSourceLine(DIE *Die, const DIVariable *V) { // If there is no compile unit specified, don't add a line #. if (V->getCompileUnit().isNull()) return; unsigned Line = V->getLineNumber(); - unsigned FileID = FindCompileUnit(V->getCompileUnit()).getID(); + unsigned FileID = findCompileUnit(V->getCompileUnit()).getID(); assert(FileID && "Invalid file id"); - AddUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); - AddUInt(Die, dwarf::DW_AT_decl_line, 0, Line); + addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); } -/// AddSourceLine - Add location information to specified debug information +/// addSourceLine - Add location information to specified debug information /// entry. -void DwarfDebug::AddSourceLine(DIE *Die, const DIGlobal *G) { +void DwarfDebug::addSourceLine(DIE *Die, const DIGlobal *G) { // If there is no compile unit specified, don't add a line #. if (G->getCompileUnit().isNull()) return; unsigned Line = G->getLineNumber(); - unsigned FileID = FindCompileUnit(G->getCompileUnit()).getID(); + unsigned FileID = findCompileUnit(G->getCompileUnit()).getID(); assert(FileID && "Invalid file id"); - AddUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); - AddUInt(Die, dwarf::DW_AT_decl_line, 0, Line); + addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); } -/// AddSourceLine - Add location information to specified debug information +/// addSourceLine - Add location information to specified debug information /// entry. -void DwarfDebug::AddSourceLine(DIE *Die, const DISubprogram *SP) { +void DwarfDebug::addSourceLine(DIE *Die, const DISubprogram *SP) { // If there is no compile unit specified, don't add a line #. if (SP->getCompileUnit().isNull()) return; @@ -527,25 +425,25 @@ void DwarfDebug::AddSourceLine(DIE *Die, const DISubprogram *SP) { unsigned Line = SP->getLineNumber(); - unsigned FileID = FindCompileUnit(SP->getCompileUnit()).getID(); + unsigned FileID = findCompileUnit(SP->getCompileUnit()).getID(); assert(FileID && "Invalid file id"); - AddUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); - AddUInt(Die, dwarf::DW_AT_decl_line, 0, Line); + addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); } -/// AddSourceLine - Add location information to specified debug information +/// addSourceLine - Add location information to specified debug information /// entry. -void DwarfDebug::AddSourceLine(DIE *Die, const DIType *Ty) { +void DwarfDebug::addSourceLine(DIE *Die, const DIType *Ty) { // If there is no compile unit specified, don't add a line #. DICompileUnit CU = Ty->getCompileUnit(); if (CU.isNull()) return; unsigned Line = Ty->getLineNumber(); - unsigned FileID = FindCompileUnit(CU).getID(); + unsigned FileID = findCompileUnit(CU).getID(); assert(FileID && "Invalid file id"); - AddUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); - AddUInt(Die, dwarf::DW_AT_decl_line, 0, Line); + addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); } /* Byref variables, in Blocks, are declared by the programmer as @@ -571,12 +469,12 @@ void DwarfDebug::AddSourceLine(DIE *Die, const DIType *Ty) { side, the Debug Information Entry for the variable VarName needs to have a DW_AT_location that tells the debugger how to unwind through the pointers and __Block_byref_x_VarName struct to find the actual - value of the variable. The function AddBlockByrefType does this. */ + value of the variable. The function addBlockByrefType does this. */ /// Find the type the programmer originally declared the variable to be /// and return that type. /// -DIType DwarfDebug::GetBlockByrefType(DIType Ty, std::string Name) { +DIType DwarfDebug::getBlockByrefType(DIType Ty, std::string Name) { DIType subType = Ty; unsigned tag = Ty.getTag(); @@ -596,19 +494,19 @@ DIType DwarfDebug::GetBlockByrefType(DIType Ty, std::string Name) { for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { DIDescriptor Element = Elements.getElement(i); DIDerivedType DT = DIDerivedType(Element.getNode()); - if (strcmp(Name.c_str(), DT.getName()) == 0) + if (Name == DT.getName()) return (DT.getTypeDerivedFrom()); } return Ty; } -/// AddComplexAddress - Start with the address based on the location provided, +/// addComplexAddress - Start with the address based on the location provided, /// and generate the DWARF information necessary to find the actual variable /// given the extra address information encoded in the DIVariable, starting from /// the starting location. Add the DWARF information to the die. /// -void DwarfDebug::AddComplexAddress(DbgVariable *&DV, DIE *Die, +void DwarfDebug::addComplexAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute, const MachineLocation &Location) { const DIVariable &VD = DV->getVariable(); @@ -621,36 +519,36 @@ void DwarfDebug::AddComplexAddress(DbgVariable *&DV, DIE *Die, if (Location.isReg()) { if (Reg < 32) { - AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg); + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg); } else { Reg = Reg - dwarf::DW_OP_reg0; - AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg); - AddUInt(Block, 0, dwarf::DW_FORM_udata, Reg); + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg); + addUInt(Block, 0, dwarf::DW_FORM_udata, Reg); } } else { if (Reg < 32) - AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg); + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg); else { - AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx); - AddUInt(Block, 0, dwarf::DW_FORM_udata, Reg); + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx); + addUInt(Block, 0, dwarf::DW_FORM_udata, Reg); } - AddUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset()); + addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset()); } for (unsigned i = 0, N = VD.getNumAddrElements(); i < N; ++i) { uint64_t Element = VD.getAddrElement(i); if (Element == DIFactory::OpPlus) { - AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - AddUInt(Block, 0, dwarf::DW_FORM_udata, VD.getAddrElement(++i)); + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(Block, 0, dwarf::DW_FORM_udata, VD.getAddrElement(++i)); } else if (Element == DIFactory::OpDeref) { - AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); } else llvm_unreachable("unknown DIFactory Opcode"); } // Now attach the location information to the DIE. - AddBlock(Die, Attribute, 0, Block); + addBlock(Die, Attribute, 0, Block); } /* Byref variables, in Blocks, are declared by the programmer as "SomeType @@ -662,7 +560,7 @@ void DwarfDebug::AddComplexAddress(DbgVariable *&DV, DIE *Die, However, as far as the original *programmer* is concerned, the variable should still have type 'SomeType', as originally declared. - The function GetBlockByrefType dives into the __Block_byref_x_VarName + The function getBlockByrefType dives into the __Block_byref_x_VarName struct to find the original type of the variable, which is then assigned to the variable's Debug Information Entry as its real type. So far, so good. However now the debugger will expect the variable VarName to have the type @@ -707,13 +605,13 @@ void DwarfDebug::AddComplexAddress(DbgVariable *&DV, DIE *Die, That is what this function does. */ -/// AddBlockByrefAddress - Start with the address based on the location +/// addBlockByrefAddress - Start with the address based on the location /// provided, and generate the DWARF information necessary to find the /// actual Block variable (navigating the Block struct) based on the /// starting location. Add the DWARF information to the die. For /// more information, read large comment just above here. /// -void DwarfDebug::AddBlockByrefAddress(DbgVariable *&DV, DIE *Die, +void DwarfDebug::addBlockByrefAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute, const MachineLocation &Location) { const DIVariable &VD = DV->getVariable(); @@ -722,7 +620,7 @@ void DwarfDebug::AddBlockByrefAddress(DbgVariable *&DV, DIE *Die, unsigned Tag = Ty.getTag(); bool isPointer = false; - const char *varName = VD.getName(); + StringRef varName = VD.getName(); if (Tag == dwarf::DW_TAG_pointer_type) { DIDerivedType DTy = DIDerivedType(Ty.getNode()); @@ -742,10 +640,10 @@ void DwarfDebug::AddBlockByrefAddress(DbgVariable *&DV, DIE *Die, for (unsigned i = 0, N = Fields.getNumElements(); i < N; ++i) { DIDescriptor Element = Fields.getElement(i); DIDerivedType DT = DIDerivedType(Element.getNode()); - const char *fieldName = DT.getName(); - if (strcmp(fieldName, "__forwarding") == 0) + StringRef fieldName = DT.getName(); + if (fieldName == "__forwarding") forwardingField = Element; - else if (strcmp(fieldName, varName) == 0) + else if (fieldName == varName) varField = Element; } @@ -766,148 +664,144 @@ void DwarfDebug::AddBlockByrefAddress(DbgVariable *&DV, DIE *Die, if (Location.isReg()) { if (Reg < 32) - AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg); + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg); else { Reg = Reg - dwarf::DW_OP_reg0; - AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg); - AddUInt(Block, 0, dwarf::DW_FORM_udata, Reg); + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg); + addUInt(Block, 0, dwarf::DW_FORM_udata, Reg); } } else { if (Reg < 32) - AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg); + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg); else { - AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx); - AddUInt(Block, 0, dwarf::DW_FORM_udata, Reg); + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx); + addUInt(Block, 0, dwarf::DW_FORM_udata, Reg); } - AddUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset()); + addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset()); } // If we started with a pointer to the __Block_byref... struct, then // the first thing we need to do is dereference the pointer (DW_OP_deref). if (isPointer) - AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); // Next add the offset for the '__forwarding' field: // DW_OP_plus_uconst ForwardingFieldOffset. Note there's no point in // adding the offset if it's 0. if (forwardingFieldOffset > 0) { - AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - AddUInt(Block, 0, dwarf::DW_FORM_udata, forwardingFieldOffset); + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(Block, 0, dwarf::DW_FORM_udata, forwardingFieldOffset); } // Now dereference the __forwarding field to get to the real __Block_byref // struct: DW_OP_deref. - AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); // Now that we've got the real __Block_byref... struct, add the offset // for the variable's field to get to the location of the actual variable: // DW_OP_plus_uconst varFieldOffset. Again, don't add if it's 0. if (varFieldOffset > 0) { - AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - AddUInt(Block, 0, dwarf::DW_FORM_udata, varFieldOffset); + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(Block, 0, dwarf::DW_FORM_udata, varFieldOffset); } // Now attach the location information to the DIE. - AddBlock(Die, Attribute, 0, Block); + addBlock(Die, Attribute, 0, Block); } -/// AddAddress - Add an address attribute to a die based on the location +/// addAddress - Add an address attribute to a die based on the location /// provided. -void DwarfDebug::AddAddress(DIE *Die, unsigned Attribute, +void DwarfDebug::addAddress(DIE *Die, unsigned Attribute, const MachineLocation &Location) { unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false); DIEBlock *Block = new DIEBlock(); if (Location.isReg()) { if (Reg < 32) { - AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg); + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg); } else { - AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx); - AddUInt(Block, 0, dwarf::DW_FORM_udata, Reg); + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx); + addUInt(Block, 0, dwarf::DW_FORM_udata, Reg); } } else { if (Reg < 32) { - AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg); + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg); } else { - AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx); - AddUInt(Block, 0, dwarf::DW_FORM_udata, Reg); + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx); + addUInt(Block, 0, dwarf::DW_FORM_udata, Reg); } - AddUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset()); + addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset()); } - AddBlock(Die, Attribute, 0, Block); + addBlock(Die, Attribute, 0, Block); } -/// AddType - Add a new type attribute to the specified entity. -void DwarfDebug::AddType(CompileUnit *DW_Unit, DIE *Entity, DIType Ty) { +/// addType - Add a new type attribute to the specified entity. +void DwarfDebug::addType(CompileUnit *DW_Unit, DIE *Entity, DIType Ty) { if (Ty.isNull()) return; // Check for pre-existence. - DIEEntry *&Slot = DW_Unit->getDIEEntrySlotFor(Ty.getNode()); + DIEEntry *Entry = DW_Unit->getDIEEntry(Ty.getNode()); // If it exists then use the existing value. - if (Slot) { - Entity->AddValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Slot); + if (Entry) { + Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry); return; } // Set up proxy. - Slot = CreateDIEEntry(); + Entry = createDIEEntry(); + DW_Unit->insertDIEEntry(Ty.getNode(), Entry); // Construct type. - DIE Buffer(dwarf::DW_TAG_base_type); + DIE *Buffer = new DIE(dwarf::DW_TAG_base_type); if (Ty.isBasicType()) - ConstructTypeDIE(DW_Unit, Buffer, DIBasicType(Ty.getNode())); + constructTypeDIE(DW_Unit, *Buffer, DIBasicType(Ty.getNode())); else if (Ty.isCompositeType()) - ConstructTypeDIE(DW_Unit, Buffer, DICompositeType(Ty.getNode())); + constructTypeDIE(DW_Unit, *Buffer, DICompositeType(Ty.getNode())); else { assert(Ty.isDerivedType() && "Unknown kind of DIType"); - ConstructTypeDIE(DW_Unit, Buffer, DIDerivedType(Ty.getNode())); + constructTypeDIE(DW_Unit, *Buffer, DIDerivedType(Ty.getNode())); } // Add debug information entry to entity and appropriate context. DIE *Die = NULL; DIDescriptor Context = Ty.getContext(); if (!Context.isNull()) - Die = DW_Unit->getDieMapSlotFor(Context.getNode()); + Die = DW_Unit->getDIE(Context.getNode()); - if (Die) { - DIE *Child = new DIE(Buffer); - Die->AddChild(Child); - Buffer.Detach(); - SetDIEEntry(Slot, Child); - } else { - Die = DW_Unit->AddDie(Buffer); - SetDIEEntry(Slot, Die); - } - - Entity->AddValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Slot); + if (Die) + Die->addChild(Buffer); + else + DW_Unit->addDie(Buffer); + Entry->setEntry(Buffer); + Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry); } -/// ConstructTypeDIE - Construct basic type die from DIBasicType. -void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, +/// constructTypeDIE - Construct basic type die from DIBasicType. +void DwarfDebug::constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, DIBasicType BTy) { // Get core information. - const char *Name = BTy.getName(); + StringRef Name = BTy.getName(); Buffer.setTag(dwarf::DW_TAG_base_type); - AddUInt(&Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, + addUInt(&Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, BTy.getEncoding()); // Add name if not anonymous or intermediate type. - if (Name) - AddString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + if (!Name.empty()) + addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); uint64_t Size = BTy.getSizeInBits() >> 3; - AddUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); + addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); } -/// ConstructTypeDIE - Construct derived type die from DIDerivedType. -void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, +/// constructTypeDIE - Construct derived type die from DIDerivedType. +void DwarfDebug::constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, DIDerivedType DTy) { // Get core information. - const char *Name = DTy.getName(); + StringRef Name = DTy.getName(); uint64_t Size = DTy.getSizeInBits() >> 3; unsigned Tag = DTy.getTag(); @@ -918,26 +812,26 @@ void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, // Map to main type, void will not have a type. DIType FromTy = DTy.getTypeDerivedFrom(); - AddType(DW_Unit, &Buffer, FromTy); + addType(DW_Unit, &Buffer, FromTy); // Add name if not anonymous or intermediate type. - if (Name && Tag != dwarf::DW_TAG_pointer_type) - AddString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + if (!Name.empty()) + addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); // Add size if non-zero (derived types might be zero-sized.) if (Size) - AddUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); + addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); // Add source line info if available and TyDesc is not a forward declaration. if (!DTy.isForwardDecl()) - AddSourceLine(&Buffer, &DTy); + addSourceLine(&Buffer, &DTy); } -/// ConstructTypeDIE - Construct type DIE from DICompositeType. -void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, +/// constructTypeDIE - Construct type DIE from DICompositeType. +void DwarfDebug::constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, DICompositeType CTy) { // Get core information. - const char *Name = CTy.getName(); + StringRef Name = CTy.getName(); uint64_t Size = CTy.getSizeInBits() >> 3; unsigned Tag = CTy.getTag(); @@ -946,7 +840,7 @@ void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, switch (Tag) { case dwarf::DW_TAG_vector_type: case dwarf::DW_TAG_array_type: - ConstructArrayTypeDIE(DW_Unit, Buffer, &CTy); + constructArrayTypeDIE(DW_Unit, Buffer, &CTy); break; case dwarf::DW_TAG_enumeration_type: { DIArray Elements = CTy.getTypeArray(); @@ -956,8 +850,8 @@ void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, DIE *ElemDie = NULL; DIEnumerator Enum(Elements.getElement(i).getNode()); if (!Enum.isNull()) { - ElemDie = ConstructEnumTypeDIE(DW_Unit, &Enum); - Buffer.AddChild(ElemDie); + ElemDie = constructEnumTypeDIE(DW_Unit, &Enum); + Buffer.addChild(ElemDie); } } } @@ -966,17 +860,17 @@ void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, // Add return type. DIArray Elements = CTy.getTypeArray(); DIDescriptor RTy = Elements.getElement(0); - AddType(DW_Unit, &Buffer, DIType(RTy.getNode())); + addType(DW_Unit, &Buffer, DIType(RTy.getNode())); // Add prototype flag. - AddUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1); + addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1); // Add arguments. for (unsigned i = 1, N = Elements.getNumElements(); i < N; ++i) { DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); DIDescriptor Ty = Elements.getElement(i); - AddType(DW_Unit, Arg, DIType(Ty.getNode())); - Buffer.AddChild(Arg); + addType(DW_Unit, Arg, DIType(Ty.getNode())); + Buffer.addChild(Arg); } } break; @@ -997,20 +891,20 @@ void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, continue; DIE *ElemDie = NULL; if (Element.getTag() == dwarf::DW_TAG_subprogram) - ElemDie = CreateSubprogramDIE(DW_Unit, + ElemDie = createSubprogramDIE(DW_Unit, DISubprogram(Element.getNode())); else - ElemDie = CreateMemberDIE(DW_Unit, + ElemDie = createMemberDIE(DW_Unit, DIDerivedType(Element.getNode())); - Buffer.AddChild(ElemDie); + Buffer.addChild(ElemDie); } if (CTy.isAppleBlockExtension()) - AddUInt(&Buffer, dwarf::DW_AT_APPLE_block, dwarf::DW_FORM_flag, 1); + addUInt(&Buffer, dwarf::DW_AT_APPLE_block, dwarf::DW_FORM_flag, 1); unsigned RLang = CTy.getRunTimeLang(); if (RLang) - AddUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class, + addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class, dwarf::DW_FORM_data1, RLang); break; } @@ -1019,136 +913,143 @@ void DwarfDebug::ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, } // Add name if not anonymous or intermediate type. - if (Name) - AddString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + if (!Name.empty()) + addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) { // Add size if non-zero (derived types might be zero-sized.) if (Size) - AddUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); + addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); else { // Add zero size if it is not a forward declaration. if (CTy.isForwardDecl()) - AddUInt(&Buffer, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); + addUInt(&Buffer, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); else - AddUInt(&Buffer, dwarf::DW_AT_byte_size, 0, 0); + addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, 0); } // Add source line info if available. if (!CTy.isForwardDecl()) - AddSourceLine(&Buffer, &CTy); + addSourceLine(&Buffer, &CTy); } } -/// ConstructSubrangeDIE - Construct subrange DIE from DISubrange. -void DwarfDebug::ConstructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){ +/// constructSubrangeDIE - Construct subrange DIE from DISubrange. +void DwarfDebug::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){ int64_t L = SR.getLo(); int64_t H = SR.getHi(); DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type); - AddDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy); + addDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy); if (L) - AddSInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L); + addSInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L); if (H) - AddSInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H); + addSInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H); - Buffer.AddChild(DW_Subrange); + Buffer.addChild(DW_Subrange); } -/// ConstructArrayTypeDIE - Construct array type DIE from DICompositeType. -void DwarfDebug::ConstructArrayTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, +/// constructArrayTypeDIE - Construct array type DIE from DICompositeType. +void DwarfDebug::constructArrayTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, DICompositeType *CTy) { Buffer.setTag(dwarf::DW_TAG_array_type); if (CTy->getTag() == dwarf::DW_TAG_vector_type) - AddUInt(&Buffer, dwarf::DW_AT_GNU_vector, dwarf::DW_FORM_flag, 1); + addUInt(&Buffer, dwarf::DW_AT_GNU_vector, dwarf::DW_FORM_flag, 1); // Emit derived type. - AddType(DW_Unit, &Buffer, CTy->getTypeDerivedFrom()); + addType(DW_Unit, &Buffer, CTy->getTypeDerivedFrom()); DIArray Elements = CTy->getTypeArray(); - // Construct an anonymous type for index type. - DIE IdxBuffer(dwarf::DW_TAG_base_type); - AddUInt(&IdxBuffer, dwarf::DW_AT_byte_size, 0, sizeof(int32_t)); - AddUInt(&IdxBuffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, - dwarf::DW_ATE_signed); - DIE *IndexTy = DW_Unit->AddDie(IdxBuffer); + // Get an anonymous type for index type. + DIE *IdxTy = DW_Unit->getIndexTyDie(); + if (!IdxTy) { + // Construct an anonymous type for index type. + IdxTy = new DIE(dwarf::DW_TAG_base_type); + addUInt(IdxTy, dwarf::DW_AT_byte_size, 0, sizeof(int32_t)); + addUInt(IdxTy, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, + dwarf::DW_ATE_signed); + DW_Unit->addDie(IdxTy); + DW_Unit->setIndexTyDie(IdxTy); + } // Add subranges to array type. for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { DIDescriptor Element = Elements.getElement(i); if (Element.getTag() == dwarf::DW_TAG_subrange_type) - ConstructSubrangeDIE(Buffer, DISubrange(Element.getNode()), IndexTy); + constructSubrangeDIE(Buffer, DISubrange(Element.getNode()), IdxTy); } } -/// ConstructEnumTypeDIE - Construct enum type DIE from DIEnumerator. -DIE *DwarfDebug::ConstructEnumTypeDIE(CompileUnit *DW_Unit, DIEnumerator *ETy) { +/// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator. +DIE *DwarfDebug::constructEnumTypeDIE(CompileUnit *DW_Unit, DIEnumerator *ETy) { DIE *Enumerator = new DIE(dwarf::DW_TAG_enumerator); - const char *Name = ETy->getName(); - AddString(Enumerator, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + StringRef Name = ETy->getName(); + addString(Enumerator, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); int64_t Value = ETy->getEnumValue(); - AddSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value); + addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value); return Enumerator; } -/// CreateGlobalVariableDIE - Create new DIE using GV. -DIE *DwarfDebug::CreateGlobalVariableDIE(CompileUnit *DW_Unit, +/// createGlobalVariableDIE - Create new DIE using GV. +DIE *DwarfDebug::createGlobalVariableDIE(CompileUnit *DW_Unit, const DIGlobalVariable &GV) { - // If the global variable was optmized out then no need to create debug info entry. + // If the global variable was optmized out then no need to create debug info + // entry. if (!GV.getGlobal()) return NULL; - if (!GV.getDisplayName()) return NULL; + if (GV.getDisplayName().empty()) return NULL; DIE *GVDie = new DIE(dwarf::DW_TAG_variable); - AddString(GVDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, + addString(GVDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, GV.getDisplayName()); - const char *LinkageName = GV.getLinkageName(); - if (LinkageName) { + StringRef LinkageName = GV.getLinkageName(); + if (!LinkageName.empty()) { // Skip special LLVM prefix that is used to inform the asm printer to not // emit usual symbol prefix before the symbol name. This happens for // Objective-C symbol names and symbol whose name is replaced using GCC's // __asm__ attribute. if (LinkageName[0] == 1) - LinkageName = &LinkageName[1]; - AddString(GVDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string, + LinkageName = LinkageName.substr(1); + addString(GVDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string, LinkageName); } - AddType(DW_Unit, GVDie, GV.getType()); + addType(DW_Unit, GVDie, GV.getType()); if (!GV.isLocalToUnit()) - AddUInt(GVDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); - AddSourceLine(GVDie, &GV); + addUInt(GVDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); + addSourceLine(GVDie, &GV); // Add address. DIEBlock *Block = new DIEBlock(); - AddUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); - AddObjectLabel(Block, 0, dwarf::DW_FORM_udata, + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); + addObjectLabel(Block, 0, dwarf::DW_FORM_udata, Asm->Mang->getMangledName(GV.getGlobal())); - AddBlock(GVDie, dwarf::DW_AT_location, 0, Block); + addBlock(GVDie, dwarf::DW_AT_location, 0, Block); return GVDie; } -/// CreateMemberDIE - Create new member DIE. -DIE *DwarfDebug::CreateMemberDIE(CompileUnit *DW_Unit, const DIDerivedType &DT){ +/// createMemberDIE - Create new member DIE. +DIE *DwarfDebug::createMemberDIE(CompileUnit *DW_Unit, const DIDerivedType &DT){ DIE *MemberDie = new DIE(DT.getTag()); - if (const char *Name = DT.getName()) - AddString(MemberDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + StringRef Name = DT.getName(); + if (!Name.empty()) + addString(MemberDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + + addType(DW_Unit, MemberDie, DT.getTypeDerivedFrom()); - AddType(DW_Unit, MemberDie, DT.getTypeDerivedFrom()); - - AddSourceLine(MemberDie, &DT); + addSourceLine(MemberDie, &DT); DIEBlock *MemLocationDie = new DIEBlock(); - AddUInt(MemLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(MemLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); uint64_t Size = DT.getSizeInBits(); uint64_t FieldSize = DT.getOriginalTypeSize(); if (Size != FieldSize) { // Handle bitfield. - AddUInt(MemberDie, dwarf::DW_AT_byte_size, 0, DT.getOriginalTypeSize()>>3); - AddUInt(MemberDie, dwarf::DW_AT_bit_size, 0, DT.getSizeInBits()); + addUInt(MemberDie, dwarf::DW_AT_byte_size, 0, DT.getOriginalTypeSize()>>3); + addUInt(MemberDie, dwarf::DW_AT_bit_size, 0, DT.getSizeInBits()); uint64_t Offset = DT.getOffsetInBits(); uint64_t FieldOffset = Offset; @@ -1159,49 +1060,48 @@ DIE *DwarfDebug::CreateMemberDIE(CompileUnit *DW_Unit, const DIDerivedType &DT){ // Maybe we need to work from the other end. if (TD->isLittleEndian()) Offset = FieldSize - (Offset + Size); - AddUInt(MemberDie, dwarf::DW_AT_bit_offset, 0, Offset); + addUInt(MemberDie, dwarf::DW_AT_bit_offset, 0, Offset); // Here WD_AT_data_member_location points to the anonymous // field that includes this bit field. - AddUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, FieldOffset >> 3); + addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, FieldOffset >> 3); } else // This is not a bitfield. - AddUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits() >> 3); + addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits() >> 3); - AddBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie); + addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie); if (DT.isProtected()) - AddUInt(MemberDie, dwarf::DW_AT_accessibility, 0, + addUInt(MemberDie, dwarf::DW_AT_accessibility, 0, dwarf::DW_ACCESS_protected); else if (DT.isPrivate()) - AddUInt(MemberDie, dwarf::DW_AT_accessibility, 0, + addUInt(MemberDie, dwarf::DW_AT_accessibility, 0, dwarf::DW_ACCESS_private); return MemberDie; } -/// CreateSubprogramDIE - Create new DIE using SP. -DIE *DwarfDebug::CreateSubprogramDIE(CompileUnit *DW_Unit, +/// createSubprogramDIE - Create new DIE using SP. +DIE *DwarfDebug::createSubprogramDIE(CompileUnit *DW_Unit, const DISubprogram &SP, bool IsConstructor, bool IsInlined) { DIE *SPDie = new DIE(dwarf::DW_TAG_subprogram); + addString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, SP.getName()); - const char * Name = SP.getName(); - AddString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); - - const char *LinkageName = SP.getLinkageName(); - if (LinkageName) { - // Skip special LLVM prefix that is used to inform the asm printer to not emit - // usual symbol prefix before the symbol name. This happens for Objective-C - // symbol names and symbol whose name is replaced using GCC's __asm__ attribute. + StringRef LinkageName = SP.getLinkageName(); + if (!LinkageName.empty()) { + // Skip special LLVM prefix that is used to inform the asm printer to not + // emit usual symbol prefix before the symbol name. This happens for + // Objective-C symbol names and symbol whose name is replaced using GCC's + // __asm__ attribute. if (LinkageName[0] == 1) - LinkageName = &LinkageName[1]; - AddString(SPDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string, + LinkageName = LinkageName.substr(1); + addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string, LinkageName); } - AddSourceLine(SPDie, &SP); + addSourceLine(SPDie, &SP); DICompositeType SPTy = SP.getType(); DIArray Args = SPTy.getTypeArray(); @@ -1210,53 +1110,52 @@ DIE *DwarfDebug::CreateSubprogramDIE(CompileUnit *DW_Unit, unsigned Lang = SP.getCompileUnit().getLanguage(); if (Lang == dwarf::DW_LANG_C99 || Lang == dwarf::DW_LANG_C89 || Lang == dwarf::DW_LANG_ObjC) - AddUInt(SPDie, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1); + addUInt(SPDie, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1); // Add Return Type. unsigned SPTag = SPTy.getTag(); if (!IsConstructor) { if (Args.isNull() || SPTag != dwarf::DW_TAG_subroutine_type) - AddType(DW_Unit, SPDie, SPTy); + addType(DW_Unit, SPDie, SPTy); else - AddType(DW_Unit, SPDie, DIType(Args.getElement(0).getNode())); + addType(DW_Unit, SPDie, DIType(Args.getElement(0).getNode())); } if (!SP.isDefinition()) { - AddUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); + addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); // Add arguments. Do not add arguments for subprogram definition. They will // be handled through RecordVariable. if (SPTag == dwarf::DW_TAG_subroutine_type) for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); - AddType(DW_Unit, Arg, DIType(Args.getElement(i).getNode())); - AddUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); // ?? - SPDie->AddChild(Arg); + addType(DW_Unit, Arg, DIType(Args.getElement(i).getNode())); + addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); // ?? + SPDie->addChild(Arg); } } // DW_TAG_inlined_subroutine may refer to this DIE. - DIE *&Slot = DW_Unit->getDieMapSlotFor(SP.getNode()); - Slot = SPDie; + DW_Unit->insertDIE(SP.getNode(), SPDie); return SPDie; } -/// FindCompileUnit - Get the compile unit for the given descriptor. +/// findCompileUnit - Get the compile unit for the given descriptor. /// -CompileUnit &DwarfDebug::FindCompileUnit(DICompileUnit Unit) const { +CompileUnit &DwarfDebug::findCompileUnit(DICompileUnit Unit) const { DenseMap::const_iterator I = CompileUnitMap.find(Unit.getNode()); assert(I != CompileUnitMap.end() && "Missing compile unit."); return *I->second; } -/// CreateDbgScopeVariable - Create a new scope variable. +/// createDbgScopeVariable - Create a new scope variable. /// -DIE *DwarfDebug::CreateDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit) { +DIE *DwarfDebug::createDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit) { // Get the descriptor. const DIVariable &VD = DV->getVariable(); - const char *Name = VD.getName(); - if (!Name) + StringRef Name = VD.getName(); + if (Name.empty()) return NULL; // Translate tag to proper Dwarf tag. The result variable is dropped for @@ -1276,33 +1175,34 @@ DIE *DwarfDebug::CreateDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit) { // Define variable debug information entry. DIE *VariableDie = new DIE(Tag); - AddString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + addString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); // Add source line info if available. - AddSourceLine(VariableDie, &VD); + addSourceLine(VariableDie, &VD); // Add variable type. - // FIXME: isBlockByrefVariable should be reformulated in terms of complex + // FIXME: isBlockByrefVariable should be reformulated in terms of complex // addresses instead. if (VD.isBlockByrefVariable()) - AddType(Unit, VariableDie, GetBlockByrefType(VD.getType(), Name)); + addType(Unit, VariableDie, getBlockByrefType(VD.getType(), Name)); else - AddType(Unit, VariableDie, VD.getType()); + addType(Unit, VariableDie, VD.getType()); // Add variable address. // Variables for abstract instances of inlined functions don't get a // location. MachineLocation Location; - Location.set(RI->getFrameRegister(*MF), - RI->getFrameIndexOffset(*MF, DV->getFrameIndex())); - - + unsigned FrameReg; + int Offset = RI->getFrameIndexReference(*MF, DV->getFrameIndex(), FrameReg); + Location.set(FrameReg, Offset); + + if (VD.hasComplexAddress()) - AddComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location); + addComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location); else if (VD.isBlockByrefVariable()) - AddBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location); + addBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location); else - AddAddress(VariableDie, dwarf::DW_AT_location, Location); + addAddress(VariableDie, dwarf::DW_AT_location, Location); return VariableDie; } @@ -1329,17 +1229,17 @@ DbgScope *DwarfDebug::getUpdatedDbgScope(MDNode *N, const MachineInstr *MI, DbgScope *Parent = NULL; if (GetConcreteScope) { DILocation IL(InlinedAt); - Parent = getUpdatedDbgScope(IL.getScope().getNode(), MI, + Parent = getUpdatedDbgScope(IL.getScope().getNode(), MI, IL.getOrigLocation().getNode()); assert (Parent && "Unable to find Parent scope!"); NScope->setParent(Parent); - Parent->AddScope(NScope); + Parent->addScope(NScope); } else if (DIDescriptor(N).isLexicalBlock()) { DILexicalBlock DB(N); if (!DB.getContext().isNull()) { Parent = getUpdatedDbgScope(DB.getContext().getNode(), MI, InlinedAt); NScope->setParent(Parent); - Parent->AddScope(NScope); + Parent->addScope(NScope); } } @@ -1365,7 +1265,7 @@ DbgScope *DwarfDebug::getOrCreateAbstractScope(MDNode *N) { DbgScope *AScope = AbstractScopes.lookup(N); if (AScope) return AScope; - + DbgScope *Parent = NULL; DIDescriptor Scope(N); @@ -1379,7 +1279,7 @@ DbgScope *DwarfDebug::getOrCreateAbstractScope(MDNode *N) { AScope = new DbgScope(Parent, DIDescriptor(N), NULL); if (Parent) - Parent->AddScope(AScope); + Parent->addScope(AScope); AScope->setAbstractScope(); AbstractScopes[N] = AScope; if (DIDescriptor(N).isSubprogram()) @@ -1387,54 +1287,43 @@ DbgScope *DwarfDebug::getOrCreateAbstractScope(MDNode *N) { return AScope; } -static DISubprogram getDISubprogram(MDNode *N) { +/// updateSubprogramScopeDIE - Find DIE for the given subprogram and +/// attach appropriate DW_AT_low_pc and DW_AT_high_pc attributes. +/// If there are global variables in this scope then create and insert +/// DIEs for these variables. +DIE *DwarfDebug::updateSubprogramScopeDIE(MDNode *SPNode) { - DIDescriptor D(N); - if (D.isNull()) - return DISubprogram(); - - if (D.isCompileUnit()) - return DISubprogram(); - - if (D.isSubprogram()) - return DISubprogram(N); - - if (D.isLexicalBlock()) - return getDISubprogram(DILexicalBlock(N).getContext().getNode()); - - llvm_unreachable("Unexpected Descriptor!"); -} - -DIE *DwarfDebug::UpdateSubprogramScopeDIE(MDNode *SPNode) { - - DIE *SPDie = ModuleCU->getDieMapSlotFor(SPNode); + DIE *SPDie = ModuleCU->getDIE(SPNode); assert (SPDie && "Unable to find subprogram DIE!"); - AddLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, + addLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, DWLabel("func_begin", SubprogramCount)); - AddLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, + addLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, DWLabel("func_end", SubprogramCount)); MachineLocation Location(RI->getFrameRegister(*MF)); - AddAddress(SPDie, dwarf::DW_AT_frame_base, Location); - + addAddress(SPDie, dwarf::DW_AT_frame_base, Location); + if (!DISubprogram(SPNode).isLocalToUnit()) - AddUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); + addUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); // If there are global variables at this scope then add their dies. - for (SmallVector::iterator SGI = ScopedGVs.begin(), + for (SmallVector::iterator SGI = ScopedGVs.begin(), SGE = ScopedGVs.end(); SGI != SGE; ++SGI) { MDNode *N = dyn_cast_or_null(*SGI); if (!N) continue; DIGlobalVariable GV(N); if (GV.getContext().getNode() == SPNode) { - DIE *ScopedGVDie = CreateGlobalVariableDIE(ModuleCU, GV); + DIE *ScopedGVDie = createGlobalVariableDIE(ModuleCU, GV); if (ScopedGVDie) - SPDie->AddChild(ScopedGVDie); + SPDie->addChild(ScopedGVDie); } } + return SPDie; } -DIE *DwarfDebug::ConstructLexicalScopeDIE(DbgScope *Scope) { +/// constructLexicalScope - Construct new DW_TAG_lexical_block +/// for this scope and attach DW_AT_low_pc/DW_AT_high_pc labels. +DIE *DwarfDebug::constructLexicalScopeDIE(DbgScope *Scope) { unsigned StartID = MMI->MappedLabel(Scope->getStartLabelID()); unsigned EndID = MMI->MappedLabel(Scope->getEndLabelID()); @@ -1446,13 +1335,13 @@ DIE *DwarfDebug::ConstructLexicalScopeDIE(DbgScope *Scope) { if (Scope->isAbstractScope()) return ScopeDIE; - AddLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, - StartID ? - DWLabel("label", StartID) + addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, + StartID ? + DWLabel("label", StartID) : DWLabel("func_begin", SubprogramCount)); - AddLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, - EndID ? - DWLabel("label", EndID) + addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, + EndID ? + DWLabel("label", EndID) : DWLabel("func_end", SubprogramCount)); @@ -1460,7 +1349,10 @@ DIE *DwarfDebug::ConstructLexicalScopeDIE(DbgScope *Scope) { return ScopeDIE; } -DIE *DwarfDebug::ConstructInlinedScopeDIE(DbgScope *Scope) { +/// constructInlinedScopeDIE - This scope represents inlined body of +/// a function. Construct DIE to represent this concrete inlined copy +/// of the function. +DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) { unsigned StartID = MMI->MappedLabel(Scope->getStartLabelID()); unsigned EndID = MMI->MappedLabel(Scope->getEndLabelID()); assert (StartID && "Invalid starting label for an inlined scope!"); @@ -1475,14 +1367,14 @@ DIE *DwarfDebug::ConstructInlinedScopeDIE(DbgScope *Scope) { DIE *ScopeDIE = new DIE(dwarf::DW_TAG_inlined_subroutine); DISubprogram InlinedSP = getDISubprogram(DS.getNode()); - DIE *&OriginDIE = ModuleCU->getDieMapSlotFor(InlinedSP.getNode()); + DIE *OriginDIE = ModuleCU->getDIE(InlinedSP.getNode()); assert (OriginDIE && "Unable to find Origin DIE!"); - AddDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin, + addDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin, dwarf::DW_FORM_ref4, OriginDIE); - AddLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, + addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, DWLabel("label", StartID)); - AddLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, + addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, DWLabel("label", EndID)); InlinedSubprogramDIEs.insert(OriginDIE); @@ -1492,7 +1384,8 @@ DIE *DwarfDebug::ConstructInlinedScopeDIE(DbgScope *Scope) { I = InlineInfo.find(InlinedSP.getNode()); if (I == InlineInfo.end()) { - InlineInfo[InlinedSP.getNode()].push_back(std::make_pair(StartID, ScopeDIE)); + InlineInfo[InlinedSP.getNode()].push_back(std::make_pair(StartID, + ScopeDIE)); InlinedSPNodes.push_back(InlinedSP.getNode()); } else I->second.push_back(std::make_pair(StartID, ScopeDIE)); @@ -1500,18 +1393,20 @@ DIE *DwarfDebug::ConstructInlinedScopeDIE(DbgScope *Scope) { StringPool.insert(InlinedSP.getName()); StringPool.insert(InlinedSP.getLinkageName()); DILocation DL(Scope->getInlinedAt()); - AddUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, ModuleCU->getID()); - AddUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber()); + addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, ModuleCU->getID()); + addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber()); return ScopeDIE; } -DIE *DwarfDebug::ConstructVariableDIE(DbgVariable *DV, + +/// constructVariableDIE - Construct a DIE for the given DbgVariable. +DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope, CompileUnit *Unit) { // Get the descriptor. const DIVariable &VD = DV->getVariable(); - const char *Name = VD.getName(); - if (!Name) + StringRef Name = VD.getName(); + if (Name.empty()) return NULL; // Translate tag to proper Dwarf tag. The result variable is dropped for @@ -1536,50 +1431,74 @@ DIE *DwarfDebug::ConstructVariableDIE(DbgVariable *DV, DIE *AbsDIE = NULL; if (DbgVariable *AV = DV->getAbstractVariable()) AbsDIE = AV->getDIE(); - + if (AbsDIE) { DIScope DS(Scope->getScopeNode()); DISubprogram InlinedSP = getDISubprogram(DS.getNode()); - DIE *&OriginSPDIE = ModuleCU->getDieMapSlotFor(InlinedSP.getNode()); + DIE *OriginSPDIE = ModuleCU->getDIE(InlinedSP.getNode()); (void) OriginSPDIE; assert (OriginSPDIE && "Unable to find Origin DIE for the SP!"); DIE *AbsDIE = DV->getAbstractVariable()->getDIE(); assert (AbsDIE && "Unable to find Origin DIE for the Variable!"); - AddDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin, + addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin, dwarf::DW_FORM_ref4, AbsDIE); } else { - AddString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); - AddSourceLine(VariableDie, &VD); + addString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + addSourceLine(VariableDie, &VD); // Add variable type. - // FIXME: isBlockByrefVariable should be reformulated in terms of complex + // FIXME: isBlockByrefVariable should be reformulated in terms of complex // addresses instead. if (VD.isBlockByrefVariable()) - AddType(Unit, VariableDie, GetBlockByrefType(VD.getType(), Name)); + addType(Unit, VariableDie, getBlockByrefType(VD.getType(), Name)); else - AddType(Unit, VariableDie, VD.getType()); + addType(Unit, VariableDie, VD.getType()); } // Add variable address. if (!Scope->isAbstractScope()) { MachineLocation Location; - Location.set(RI->getFrameRegister(*MF), - RI->getFrameIndexOffset(*MF, DV->getFrameIndex())); - - + unsigned FrameReg; + int Offset = RI->getFrameIndexReference(*MF, DV->getFrameIndex(), FrameReg); + Location.set(FrameReg, Offset); + if (VD.hasComplexAddress()) - AddComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location); + addComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location); else if (VD.isBlockByrefVariable()) - AddBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location); + addBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location); else - AddAddress(VariableDie, dwarf::DW_AT_location, Location); + addAddress(VariableDie, dwarf::DW_AT_location, Location); } DV->setDIE(VariableDie); return VariableDie; } -DIE *DwarfDebug::ConstructScopeDIE(DbgScope *Scope) { + +void DwarfDebug::addPubTypes(DISubprogram SP) { + DICompositeType SPTy = SP.getType(); + unsigned SPTag = SPTy.getTag(); + if (SPTag != dwarf::DW_TAG_subroutine_type) + return; + + DIArray Args = SPTy.getTypeArray(); + if (Args.isNull()) + return; + + for (unsigned i = 0, e = Args.getNumElements(); i != e; ++i) { + DIType ATy(Args.getElement(i).getNode()); + if (ATy.isNull()) + continue; + DICompositeType CATy = getDICompositeType(ATy); + if (!CATy.isNull() && !CATy.getName().empty()) { + if (DIEEntry *Entry = ModuleCU->getDIEEntry(CATy.getNode())) + ModuleCU->addGlobalType(CATy.getName(), Entry->getEntry()); + } + } +} + +/// constructScopeDIE - Construct a DIE for this scope. +DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) { if (!Scope) return NULL; DIScope DS(Scope->getScopeNode()); @@ -1588,43 +1507,46 @@ DIE *DwarfDebug::ConstructScopeDIE(DbgScope *Scope) { DIE *ScopeDIE = NULL; if (Scope->getInlinedAt()) - ScopeDIE = ConstructInlinedScopeDIE(Scope); + ScopeDIE = constructInlinedScopeDIE(Scope); else if (DS.isSubprogram()) { if (Scope->isAbstractScope()) - ScopeDIE = ModuleCU->getDieMapSlotFor(DS.getNode()); + ScopeDIE = ModuleCU->getDIE(DS.getNode()); else - ScopeDIE = UpdateSubprogramScopeDIE(DS.getNode()); + ScopeDIE = updateSubprogramScopeDIE(DS.getNode()); } else { - ScopeDIE = ConstructLexicalScopeDIE(Scope); + ScopeDIE = constructLexicalScopeDIE(Scope); if (!ScopeDIE) return NULL; } // Add variables to scope. SmallVector &Variables = Scope->getVariables(); for (unsigned i = 0, N = Variables.size(); i < N; ++i) { - DIE *VariableDIE = ConstructVariableDIE(Variables[i], Scope, ModuleCU); - if (VariableDIE) - ScopeDIE->AddChild(VariableDIE); + DIE *VariableDIE = constructVariableDIE(Variables[i], Scope, ModuleCU); + if (VariableDIE) + ScopeDIE->addChild(VariableDIE); } // Add nested scopes. SmallVector &Scopes = Scope->getScopes(); for (unsigned j = 0, M = Scopes.size(); j < M; ++j) { // Define the Scope debug information entry. - DIE *NestedDIE = ConstructScopeDIE(Scopes[j]); - if (NestedDIE) - ScopeDIE->AddChild(NestedDIE); + DIE *NestedDIE = constructScopeDIE(Scopes[j]); + if (NestedDIE) + ScopeDIE->addChild(NestedDIE); } - return ScopeDIE; + + if (DS.isSubprogram()) + addPubTypes(DISubprogram(DS.getNode())); + + return ScopeDIE; } /// GetOrCreateSourceID - Look up the source id with the given directory and /// source file names. If none currently exists, create a new id and insert it /// in the SourceIds map. This can update DirectoryNames and SourceFileNames /// maps as well. -unsigned DwarfDebug::GetOrCreateSourceID(const char *DirName, - const char *FileName) { +unsigned DwarfDebug::GetOrCreateSourceID(StringRef DirName, StringRef FileName) { unsigned DId; StringMap::iterator DI = DirectoryIdMap.find(DirName); if (DI != DirectoryIdMap.end()) { @@ -1657,33 +1579,34 @@ unsigned DwarfDebug::GetOrCreateSourceID(const char *DirName, return SrcId; } -void DwarfDebug::ConstructCompileUnit(MDNode *N) { +void DwarfDebug::constructCompileUnit(MDNode *N) { DICompileUnit DIUnit(N); - const char *FN = DIUnit.getFilename(); - const char *Dir = DIUnit.getDirectory(); + StringRef FN = DIUnit.getFilename(); + StringRef Dir = DIUnit.getDirectory(); unsigned ID = GetOrCreateSourceID(Dir, FN); DIE *Die = new DIE(dwarf::DW_TAG_compile_unit); - AddSectionOffset(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, + addSectionOffset(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, DWLabel("section_line", 0), DWLabel("section_line", 0), false); - AddString(Die, dwarf::DW_AT_producer, dwarf::DW_FORM_string, + addString(Die, dwarf::DW_AT_producer, dwarf::DW_FORM_string, DIUnit.getProducer()); - AddUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data1, + addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data1, DIUnit.getLanguage()); - AddString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN); + addString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN); - if (Dir) - AddString(Die, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string, Dir); + if (!Dir.empty()) + addString(Die, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string, Dir); if (DIUnit.isOptimized()) - AddUInt(Die, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1); + addUInt(Die, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1); - if (const char *Flags = DIUnit.getFlags()) - AddString(Die, dwarf::DW_AT_APPLE_flags, dwarf::DW_FORM_string, Flags); + StringRef Flags = DIUnit.getFlags(); + if (!Flags.empty()) + addString(Die, dwarf::DW_AT_APPLE_flags, dwarf::DW_FORM_string, Flags); unsigned RVer = DIUnit.getRunTimeVersion(); if (RVer) - AddUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers, + addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers, dwarf::DW_FORM_data1, RVer); CompileUnit *Unit = new CompileUnit(ID, Die); @@ -1697,7 +1620,7 @@ void DwarfDebug::ConstructCompileUnit(MDNode *N) { CompileUnits.push_back(Unit); } -void DwarfDebug::ConstructGlobalVariableDIE(MDNode *N) { +void DwarfDebug::constructGlobalVariableDIE(MDNode *N) { DIGlobalVariable DI_GV(N); // If debug information is malformed then ignore it. @@ -1705,29 +1628,34 @@ void DwarfDebug::ConstructGlobalVariableDIE(MDNode *N) { return; // Check for pre-existence. - DIE *&Slot = ModuleCU->getDieMapSlotFor(DI_GV.getNode()); - if (Slot) + if (ModuleCU->getDIE(DI_GV.getNode())) return; - DIE *VariableDie = CreateGlobalVariableDIE(ModuleCU, DI_GV); + DIE *VariableDie = createGlobalVariableDIE(ModuleCU, DI_GV); // Add to map. - Slot = VariableDie; + ModuleCU->insertDIE(N, VariableDie); // Add to context owner. - ModuleCU->getDie()->AddChild(VariableDie); + ModuleCU->getCUDie()->addChild(VariableDie); // Expose as global. FIXME - need to check external flag. - ModuleCU->AddGlobal(DI_GV.getName(), VariableDie); + ModuleCU->addGlobal(DI_GV.getName(), VariableDie); + + DIType GTy = DI_GV.getType(); + if (GTy.isCompositeType() && !GTy.getName().empty()) { + DIEEntry *Entry = ModuleCU->getDIEEntry(GTy.getNode()); + assert (Entry && "Missing global type!"); + ModuleCU->addGlobalType(GTy.getName(), Entry->getEntry()); + } return; } -void DwarfDebug::ConstructSubprogram(MDNode *N) { +void DwarfDebug::constructSubprogramDIE(MDNode *N) { DISubprogram SP(N); // Check for pre-existence. - DIE *&Slot = ModuleCU->getDieMapSlotFor(N); - if (Slot) + if (ModuleCU->getDIE(N)) return; if (!SP.isDefinition()) @@ -1735,23 +1663,24 @@ void DwarfDebug::ConstructSubprogram(MDNode *N) { // class type. return; - DIE *SubprogramDie = CreateSubprogramDIE(ModuleCU, SP); + DIE *SubprogramDie = createSubprogramDIE(ModuleCU, SP); // Add to map. - Slot = SubprogramDie; + ModuleCU->insertDIE(N, SubprogramDie); // Add to context owner. - ModuleCU->getDie()->AddChild(SubprogramDie); + ModuleCU->getCUDie()->addChild(SubprogramDie); // Expose as global. - ModuleCU->AddGlobal(SP.getName(), SubprogramDie); + ModuleCU->addGlobal(SP.getName(), SubprogramDie); + return; } -/// BeginModule - Emit all Dwarf sections that should come prior to the +/// beginModule - Emit all Dwarf sections that should come prior to the /// content. Create global DIEs and emit initial debug info sections. /// This is inovked by the target AsmPrinter. -void DwarfDebug::BeginModule(Module *M, MachineModuleInfo *mmi) { +void DwarfDebug::beginModule(Module *M, MachineModuleInfo *mmi) { this->M = M; if (TimePassesIsEnabled) @@ -1766,7 +1695,7 @@ void DwarfDebug::BeginModule(Module *M, MachineModuleInfo *mmi) { // Create all the compile unit DIEs. for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(), E = DbgFinder.compile_unit_end(); I != E; ++I) - ConstructCompileUnit(*I); + constructCompileUnit(*I); if (CompileUnits.empty()) { if (TimePassesIsEnabled) @@ -1787,13 +1716,13 @@ void DwarfDebug::BeginModule(Module *M, MachineModuleInfo *mmi) { if (GV.getContext().getNode() != GV.getCompileUnit().getNode()) ScopedGVs.push_back(*I); else - ConstructGlobalVariableDIE(*I); + constructGlobalVariableDIE(*I); } // Create DIEs for each subprogram. for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(), E = DbgFinder.subprogram_end(); I != E; ++I) - ConstructSubprogram(*I); + constructSubprogramDIE(*I); MMI = mmi; shouldEmit = true; @@ -1819,15 +1748,15 @@ void DwarfDebug::BeginModule(Module *M, MachineModuleInfo *mmi) { } // Emit initial sections - EmitInitial(); + emitInitial(); if (TimePassesIsEnabled) DebugTimer->stopTimer(); } -/// EndModule - Emit all Dwarf sections that should come after the content. +/// endModule - Emit all Dwarf sections that should come after the content. /// -void DwarfDebug::EndModule() { +void DwarfDebug::endModule() { if (!ModuleCU) return; @@ -1838,7 +1767,7 @@ void DwarfDebug::EndModule() { for (SmallPtrSet::iterator AI = InlinedSubprogramDIEs.begin(), AE = InlinedSubprogramDIEs.end(); AI != AE; ++AI) { DIE *ISP = *AI; - AddUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined); + addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined); } // Standard sections final addresses. @@ -1854,52 +1783,56 @@ void DwarfDebug::EndModule() { } // Emit common frame information. - EmitCommonDebugFrame(); + emitCommonDebugFrame(); // Emit function debug frame information for (std::vector::iterator I = DebugFrames.begin(), E = DebugFrames.end(); I != E; ++I) - EmitFunctionDebugFrame(*I); + emitFunctionDebugFrame(*I); // Compute DIE offsets and sizes. - SizeAndOffsets(); + computeSizeAndOffsets(); // Emit all the DIEs into a debug info section - EmitDebugInfo(); + emitDebugInfo(); // Corresponding abbreviations into a abbrev section. - EmitAbbreviations(); + emitAbbreviations(); // Emit source line correspondence into a debug line section. - EmitDebugLines(); + emitDebugLines(); // Emit info into a debug pubnames section. - EmitDebugPubNames(); + emitDebugPubNames(); + + // Emit info into a debug pubtypes section. + emitDebugPubTypes(); // Emit info into a debug str section. - EmitDebugStr(); + emitDebugStr(); // Emit info into a debug loc section. - EmitDebugLoc(); + emitDebugLoc(); // Emit info into a debug aranges section. EmitDebugARanges(); // Emit info into a debug ranges section. - EmitDebugRanges(); + emitDebugRanges(); // Emit info into a debug macinfo section. - EmitDebugMacInfo(); + emitDebugMacInfo(); // Emit inline info. - EmitDebugInlineInfo(); + emitDebugInlineInfo(); if (TimePassesIsEnabled) DebugTimer->stopTimer(); } /// findAbstractVariable - Find abstract variable, if any, associated with Var. -DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var, unsigned FrameIdx, +DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var, + unsigned FrameIdx, DILocation &ScopeLoc) { DbgVariable *AbsDbgVariable = AbstractVariables.lookup(Var.getNode()); @@ -1911,13 +1844,13 @@ DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var, unsigned FrameIdx return NULL; AbsDbgVariable = new DbgVariable(Var, FrameIdx); - Scope->AddVariable(AbsDbgVariable); + Scope->addVariable(AbsDbgVariable); AbstractVariables[Var.getNode()] = AbsDbgVariable; return AbsDbgVariable; } -/// CollectVariableInfo - Populate DbgScope entries with variables' info. -void DwarfDebug::CollectVariableInfo() { +/// collectVariableInfo - Populate DbgScope entries with variables' info. +void DwarfDebug::collectVariableInfo() { if (!MMI) return; MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo(); @@ -1933,31 +1866,32 @@ void DwarfDebug::CollectVariableInfo() { DbgScope *Scope = ConcreteScopes.lookup(ScopeLoc.getOrigLocation().getNode()); if (!Scope) - Scope = DbgScopeMap.lookup(ScopeLoc.getScope().getNode()); + Scope = DbgScopeMap.lookup(ScopeLoc.getScope().getNode()); // If variable scope is not found then skip this variable. if (!Scope) continue; DbgVariable *RegVar = new DbgVariable(DV, VP.first); - Scope->AddVariable(RegVar); - if (DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VP.first, ScopeLoc)) + Scope->addVariable(RegVar); + if (DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VP.first, + ScopeLoc)) RegVar->setAbstractVariable(AbsDbgVariable); } } -/// BeginScope - Process beginning of a scope starting at Label. -void DwarfDebug::BeginScope(const MachineInstr *MI, unsigned Label) { +/// beginScope - Process beginning of a scope starting at Label. +void DwarfDebug::beginScope(const MachineInstr *MI, unsigned Label) { InsnToDbgScopeMapTy::iterator I = DbgScopeBeginMap.find(MI); if (I == DbgScopeBeginMap.end()) return; - ScopeVector &SD = DbgScopeBeginMap[MI]; + ScopeVector &SD = I->second; for (ScopeVector::iterator SDI = SD.begin(), SDE = SD.end(); - SDI != SDE; ++SDI) + SDI != SDE; ++SDI) (*SDI)->setStartLabelID(Label); } -/// EndScope - Process end of a scope. -void DwarfDebug::EndScope(const MachineInstr *MI) { +/// endScope - Process end of a scope. +void DwarfDebug::endScope(const MachineInstr *MI) { InsnToDbgScopeMapTy::iterator I = DbgScopeEndMap.find(MI); if (I == DbgScopeEndMap.end()) return; @@ -1967,7 +1901,7 @@ void DwarfDebug::EndScope(const MachineInstr *MI) { SmallVector &SD = I->second; for (SmallVector::iterator SDI = SD.begin(), SDE = SD.end(); - SDI != SDE; ++SDI) + SDI != SDE; ++SDI) (*SDI)->setEndLabelID(Label); return; } @@ -1981,7 +1915,7 @@ void DwarfDebug::createDbgScope(MDNode *Scope, MDNode *InlinedAt) { return; WScope = new DbgScope(NULL, DIDescriptor(Scope), NULL); DbgScopeMap.insert(std::make_pair(Scope, WScope)); - if (DIDescriptor(Scope).isLexicalBlock()) + if (DIDescriptor(Scope).isLexicalBlock()) createDbgScope(DILexicalBlock(Scope).getContext().getNode(), NULL); return; } @@ -1996,9 +1930,9 @@ void DwarfDebug::createDbgScope(MDNode *Scope, MDNode *InlinedAt) { createDbgScope(DL.getScope().getNode(), DL.getOrigLocation().getNode()); } -/// ExtractScopeInformation - Scan machine instructions in this function +/// extractScopeInformation - Scan machine instructions in this function /// and collect DbgScopes. Return true, if atleast one scope was found. -bool DwarfDebug::ExtractScopeInformation(MachineFunction *MF) { +bool DwarfDebug::extractScopeInformation(MachineFunction *MF) { // If scope information was extracted using .dbg intrinsics then there is not // any need to extract these information by scanning each instruction. if (!DbgScopeMap.empty()) @@ -2015,7 +1949,7 @@ bool DwarfDebug::ExtractScopeInformation(MachineFunction *MF) { DebugLocTuple DLT = MF->getDebugLocTuple(DL); if (!DLT.Scope) continue; // There is no need to create another DIE for compile unit. For all - // other scopes, create one DbgScope now. This will be translated + // other scopes, create one DbgScope now. This will be translated // into a scope DIE at the end. if (DIDescriptor(DLT.Scope).isCompileUnit()) continue; createDbgScope(DLT.Scope, DLT.InlinedAtLoc); @@ -2034,7 +1968,7 @@ bool DwarfDebug::ExtractScopeInformation(MachineFunction *MF) { DebugLocTuple DLT = MF->getDebugLocTuple(DL); if (!DLT.Scope) continue; // There is no need to create another DIE for compile unit. For all - // other scopes, create one DbgScope now. This will be translated + // other scopes, create one DbgScope now. This will be translated // into a scope DIE at the end. if (DIDescriptor(DLT.Scope).isCompileUnit()) continue; DbgScope *Scope = getUpdatedDbgScope(DLT.Scope, MInsn, DLT.InlinedAtLoc); @@ -2049,7 +1983,7 @@ bool DwarfDebug::ExtractScopeInformation(MachineFunction *MF) { if (DI->second->isAbstractScope()) continue; assert (DI->second->getFirstInsn() && "Invalid first instruction!"); - DI->second->FixInstructionMarkers(); + DI->second->fixInstructionMarkers(); assert (DI->second->getLastInsn() && "Invalid last instruction!"); } @@ -2083,9 +2017,9 @@ bool DwarfDebug::ExtractScopeInformation(MachineFunction *MF) { return !DbgScopeMap.empty(); } -/// BeginFunction - Gather pre-function debug information. Assumes being +/// beginFunction - Gather pre-function debug information. Assumes being /// emitted immediately after the function entry point. -void DwarfDebug::BeginFunction(MachineFunction *MF) { +void DwarfDebug::beginFunction(MachineFunction *MF) { this->MF = MF; if (!ShouldEmitDwarfDebug()) return; @@ -2093,9 +2027,10 @@ void DwarfDebug::BeginFunction(MachineFunction *MF) { if (TimePassesIsEnabled) DebugTimer->startTimer(); - if (!ExtractScopeInformation(MF)) + if (!extractScopeInformation(MF)) return; - CollectVariableInfo(); + + collectVariableInfo(); // Begin accumulating function debug information. MMI->BeginFunction(MF); @@ -2111,9 +2046,9 @@ void DwarfDebug::BeginFunction(MachineFunction *MF) { unsigned LabelID = 0; DISubprogram SP = getDISubprogram(DLT.Scope); if (!SP.isNull()) - LabelID = RecordSourceLine(SP.getLineNumber(), 0, DLT.Scope); + LabelID = recordSourceLine(SP.getLineNumber(), 0, DLT.Scope); else - LabelID = RecordSourceLine(DLT.Line, DLT.Col, DLT.Scope); + LabelID = recordSourceLine(DLT.Line, DLT.Col, DLT.Scope); Asm->printLabel(LabelID); O << '\n'; } @@ -2121,9 +2056,9 @@ void DwarfDebug::BeginFunction(MachineFunction *MF) { DebugTimer->stopTimer(); } -/// EndFunction - Gather and emit post-function debug information. +/// endFunction - Gather and emit post-function debug information. /// -void DwarfDebug::EndFunction(MachineFunction *MF) { +void DwarfDebug::endFunction(MachineFunction *MF) { if (!ShouldEmitDwarfDebug()) return; if (TimePassesIsEnabled) @@ -2148,10 +2083,10 @@ void DwarfDebug::EndFunction(MachineFunction *MF) { // Construct abstract scopes. for (SmallVector::iterator AI = AbstractScopesList.begin(), - AE = AbstractScopesList.end(); AI != AE; ++AI) - ConstructScopeDIE(*AI); + AE = AbstractScopesList.end(); AI != AE; ++AI) + constructScopeDIE(*AI); - ConstructScopeDIE(CurrentFnDbgScope); + constructScopeDIE(CurrentFnDbgScope); DebugFrames.push_back(FunctionDebugFrameInfo(SubprogramCount, MMI->getFrameMoves())); @@ -2172,10 +2107,10 @@ void DwarfDebug::EndFunction(MachineFunction *MF) { DebugTimer->stopTimer(); } -/// RecordSourceLine - Records location information and associates it with a +/// recordSourceLine - Records location information and associates it with a /// label. Returns a unique label ID used to generate a label and provide /// correspondence to the source line list. -unsigned DwarfDebug::RecordSourceLine(unsigned Line, unsigned Col, +unsigned DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, MDNode *S) { if (!MMI) return 0; @@ -2183,8 +2118,8 @@ unsigned DwarfDebug::RecordSourceLine(unsigned Line, unsigned Col, if (TimePassesIsEnabled) DebugTimer->startTimer(); - const char *Dir = NULL; - const char *Fn = NULL; + StringRef Dir; + StringRef Fn; DIDescriptor Scope(S); if (Scope.isCompileUnit()) { @@ -2234,17 +2169,18 @@ unsigned DwarfDebug::getOrCreateSourceID(const std::string &DirName, // Emit Methods //===----------------------------------------------------------------------===// -/// SizeAndOffsetDie - Compute the size and offset of a DIE. +/// computeSizeAndOffset - Compute the size and offset of a DIE. /// -unsigned DwarfDebug::SizeAndOffsetDie(DIE *Die, unsigned Offset, bool Last) { +unsigned +DwarfDebug::computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last) { // Get the children. const std::vector &Children = Die->getChildren(); // If not last sibling and has children then add sibling offset attribute. - if (!Last && !Children.empty()) Die->AddSiblingOffset(); + if (!Last && !Children.empty()) Die->addSiblingOffset(); // Record the abbreviation. - AssignAbbrevNumber(Die->getAbbrev()); + assignAbbrevNumber(Die->getAbbrev()); // Get the abbreviation for this DIE. unsigned AbbrevNumber = Die->getAbbrevNumber(); @@ -2270,7 +2206,7 @@ unsigned DwarfDebug::SizeAndOffsetDie(DIE *Die, unsigned Offset, bool Last) { "Children flag not set"); for (unsigned j = 0, M = Children.size(); j < M; ++j) - Offset = SizeAndOffsetDie(Children[j], Offset, (j + 1) == M); + Offset = computeSizeAndOffset(Children[j], Offset, (j + 1) == M); // End of children marker. Offset += sizeof(int8_t); @@ -2280,9 +2216,9 @@ unsigned DwarfDebug::SizeAndOffsetDie(DIE *Die, unsigned Offset, bool Last) { return Offset; } -/// SizeAndOffsets - Compute the size and offset of all the DIEs. +/// computeSizeAndOffsets - Compute the size and offset of all the DIEs. /// -void DwarfDebug::SizeAndOffsets() { +void DwarfDebug::computeSizeAndOffsets() { // Compute size of compile unit header. static unsigned Offset = sizeof(int32_t) + // Length of Compilation Unit Info @@ -2290,13 +2226,13 @@ void DwarfDebug::SizeAndOffsets() { sizeof(int32_t) + // Offset Into Abbrev. Section sizeof(int8_t); // Pointer Size (in bytes) - SizeAndOffsetDie(ModuleCU->getDie(), Offset, true); + computeSizeAndOffset(ModuleCU->getCUDie(), Offset, true); CompileUnitOffsets[ModuleCU] = 0; } -/// EmitInitial - Emit initial Dwarf declarations. This is necessary for cc +/// emitInitial - Emit initial Dwarf declarations. This is necessary for cc /// tools to recognize the object file contains Dwarf information. -void DwarfDebug::EmitInitial() { +void DwarfDebug::emitInitial() { // Check to see if we already emitted intial headers. if (didInitial) return; didInitial = true; @@ -2327,6 +2263,8 @@ void DwarfDebug::EmitInitial() { EmitLabel("section_loc", 0); Asm->OutStreamer.SwitchSection(TLOF.getDwarfPubNamesSection()); EmitLabel("section_pubnames", 0); + Asm->OutStreamer.SwitchSection(TLOF.getDwarfPubTypesSection()); + EmitLabel("section_pubtypes", 0); Asm->OutStreamer.SwitchSection(TLOF.getDwarfStrSection()); EmitLabel("section_str", 0); Asm->OutStreamer.SwitchSection(TLOF.getDwarfRangesSection()); @@ -2338,9 +2276,9 @@ void DwarfDebug::EmitInitial() { EmitLabel("data_begin", 0); } -/// EmitDIE - Recusively Emits a debug information entry. +/// emitDIE - Recusively Emits a debug information entry. /// -void DwarfDebug::EmitDIE(DIE *Die) { +void DwarfDebug::emitDIE(DIE *Die) { // Get the abbreviation for this DIE. unsigned AbbrevNumber = Die->getAbbrevNumber(); const DIEAbbrev *Abbrev = Abbreviations[AbbrevNumber - 1]; @@ -2370,7 +2308,7 @@ void DwarfDebug::EmitDIE(DIE *Die) { switch (Attr) { case dwarf::DW_AT_sibling: - Asm->EmitInt32(Die->SiblingOffset()); + Asm->EmitInt32(Die->getSiblingOffset()); break; case dwarf::DW_AT_abstract_origin: { DIEEntry *E = cast(Values[i]); @@ -2393,16 +2331,16 @@ void DwarfDebug::EmitDIE(DIE *Die) { const std::vector &Children = Die->getChildren(); for (unsigned j = 0, M = Children.size(); j < M; ++j) - EmitDIE(Children[j]); + emitDIE(Children[j]); Asm->EmitInt8(0); Asm->EOL("End Of Children Mark"); } } -/// EmitDebugInfo / EmitDebugInfoPerCU - Emit the debug info section. +/// emitDebugInfo / emitDebugInfoPerCU - Emit the debug info section. /// -void DwarfDebug::EmitDebugInfoPerCU(CompileUnit *Unit) { - DIE *Die = Unit->getDie(); +void DwarfDebug::emitDebugInfoPerCU(CompileUnit *Unit) { + DIE *Die = Unit->getCUDie(); // Emit the compile units header. EmitLabel("info_begin", Unit->getID()); @@ -2420,7 +2358,7 @@ void DwarfDebug::EmitDebugInfoPerCU(CompileUnit *Unit) { Asm->EOL("Offset Into Abbrev. Section"); Asm->EmitInt8(TD->getPointerSize()); Asm->EOL("Address Size (in bytes)"); - EmitDIE(Die); + emitDIE(Die); // FIXME - extra padding for gdb bug. Asm->EmitInt8(0); Asm->EOL("Extra Pad For GDB"); Asm->EmitInt8(0); Asm->EOL("Extra Pad For GDB"); @@ -2431,17 +2369,17 @@ void DwarfDebug::EmitDebugInfoPerCU(CompileUnit *Unit) { Asm->EOL(); } -void DwarfDebug::EmitDebugInfo() { +void DwarfDebug::emitDebugInfo() { // Start debug info section. Asm->OutStreamer.SwitchSection( Asm->getObjFileLowering().getDwarfInfoSection()); - EmitDebugInfoPerCU(ModuleCU); + emitDebugInfoPerCU(ModuleCU); } -/// EmitAbbreviations - Emit the abbreviation section. +/// emitAbbreviations - Emit the abbreviation section. /// -void DwarfDebug::EmitAbbreviations() const { +void DwarfDebug::emitAbbreviations() const { // Check to see if it is worth the effort. if (!Abbreviations.empty()) { // Start the debug abbrev section. @@ -2473,10 +2411,10 @@ void DwarfDebug::EmitAbbreviations() const { } } -/// EmitEndOfLineMatrix - Emit the last address of the section and the end of +/// emitEndOfLineMatrix - Emit the last address of the section and the end of /// the line matrix. /// -void DwarfDebug::EmitEndOfLineMatrix(unsigned SectionEnd) { +void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) { // Define last address of section. Asm->EmitInt8(0); Asm->EOL("Extended Op"); Asm->EmitInt8(TD->getPointerSize() + 1); Asm->EOL("Op size"); @@ -2489,9 +2427,9 @@ void DwarfDebug::EmitEndOfLineMatrix(unsigned SectionEnd) { Asm->EmitInt8(1); Asm->EOL(); } -/// EmitDebugLines - Emit source line information. +/// emitDebugLines - Emit source line information. /// -void DwarfDebug::EmitDebugLines() { +void DwarfDebug::emitDebugLines() { // If the target is using .loc/.file, the assembler will be emitting the // .debug_line table automatically. if (MAI->hasDotLocAndDotFile()) @@ -2640,22 +2578,22 @@ void DwarfDebug::EmitDebugLines() { } } - EmitEndOfLineMatrix(j + 1); + emitEndOfLineMatrix(j + 1); } if (SecSrcLinesSize == 0) // Because we're emitting a debug_line section, we still need a line // table. The linker and friends expect it to exist. If there's nothing to // put into it, emit an empty table. - EmitEndOfLineMatrix(1); + emitEndOfLineMatrix(1); EmitLabel("line_end", 0); Asm->EOL(); } -/// EmitCommonDebugFrame - Emit common frame info into a debug frame section. +/// emitCommonDebugFrame - Emit common frame info into a debug frame section. /// -void DwarfDebug::EmitCommonDebugFrame() { +void DwarfDebug::emitCommonDebugFrame() { if (!MAI->doesDwarfRequireFrameSection()) return; @@ -2698,10 +2636,10 @@ void DwarfDebug::EmitCommonDebugFrame() { Asm->EOL(); } -/// EmitFunctionDebugFrame - Emit per function frame info into a debug frame +/// emitFunctionDebugFrame - Emit per function frame info into a debug frame /// section. void -DwarfDebug::EmitFunctionDebugFrame(const FunctionDebugFrameInfo&DebugFrameInfo){ +DwarfDebug::emitFunctionDebugFrame(const FunctionDebugFrameInfo&DebugFrameInfo){ if (!MAI->doesDwarfRequireFrameSection()) return; @@ -2734,7 +2672,7 @@ DwarfDebug::EmitFunctionDebugFrame(const FunctionDebugFrameInfo&DebugFrameInfo){ Asm->EOL(); } -void DwarfDebug::EmitDebugPubNamesPerCU(CompileUnit *Unit) { +void DwarfDebug::emitDebugPubNamesPerCU(CompileUnit *Unit) { EmitDifference("pubnames_end", Unit->getID(), "pubnames_begin", Unit->getID(), true); Asm->EOL("Length of Public Names Info"); @@ -2751,7 +2689,7 @@ void DwarfDebug::EmitDebugPubNamesPerCU(CompileUnit *Unit) { true); Asm->EOL("Compilation Unit Length"); - StringMap &Globals = Unit->getGlobals(); + const StringMap &Globals = Unit->getGlobals(); for (StringMap::const_iterator GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) { const char *Name = GI->getKeyData(); @@ -2767,19 +2705,55 @@ void DwarfDebug::EmitDebugPubNamesPerCU(CompileUnit *Unit) { Asm->EOL(); } -/// EmitDebugPubNames - Emit visible names into a debug pubnames section. +/// emitDebugPubNames - Emit visible names into a debug pubnames section. /// -void DwarfDebug::EmitDebugPubNames() { +void DwarfDebug::emitDebugPubNames() { // Start the dwarf pubnames section. Asm->OutStreamer.SwitchSection( Asm->getObjFileLowering().getDwarfPubNamesSection()); - EmitDebugPubNamesPerCU(ModuleCU); + emitDebugPubNamesPerCU(ModuleCU); } -/// EmitDebugStr - Emit visible names into a debug str section. +void DwarfDebug::emitDebugPubTypes() { + // Start the dwarf pubnames section. + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfPubTypesSection()); + EmitDifference("pubtypes_end", ModuleCU->getID(), + "pubtypes_begin", ModuleCU->getID(), true); + Asm->EOL("Length of Public Types Info"); + + EmitLabel("pubtypes_begin", ModuleCU->getID()); + + Asm->EmitInt16(dwarf::DWARF_VERSION); Asm->EOL("DWARF Version"); + + EmitSectionOffset("info_begin", "section_info", + ModuleCU->getID(), 0, true, false); + Asm->EOL("Offset of Compilation ModuleCU Info"); + + EmitDifference("info_end", ModuleCU->getID(), "info_begin", ModuleCU->getID(), + true); + Asm->EOL("Compilation ModuleCU Length"); + + const StringMap &Globals = ModuleCU->getGlobalTypes(); + for (StringMap::const_iterator + GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) { + const char *Name = GI->getKeyData(); + DIE * Entity = GI->second; + + Asm->EmitInt32(Entity->getOffset()); Asm->EOL("DIE offset"); + Asm->EmitString(Name, strlen(Name)); Asm->EOL("External Name"); + } + + Asm->EmitInt32(0); Asm->EOL("End Mark"); + EmitLabel("pubtypes_end", ModuleCU->getID()); + + Asm->EOL(); +} + +/// emitDebugStr - Emit visible names into a debug str section. /// -void DwarfDebug::EmitDebugStr() { +void DwarfDebug::emitDebugStr() { // Check to see if it is worth the effort. if (!StringPool.empty()) { // Start the dwarf str section. @@ -2801,9 +2775,9 @@ void DwarfDebug::EmitDebugStr() { } } -/// EmitDebugLoc - Emit visible names into a debug loc section. +/// emitDebugLoc - Emit visible names into a debug loc section. /// -void DwarfDebug::EmitDebugLoc() { +void DwarfDebug::emitDebugLoc() { // Start the dwarf loc section. Asm->OutStreamer.SwitchSection( Asm->getObjFileLowering().getDwarfLocSection()); @@ -2847,18 +2821,18 @@ void DwarfDebug::EmitDebugARanges() { Asm->EOL(); } -/// EmitDebugRanges - Emit visible names into a debug ranges section. +/// emitDebugRanges - Emit visible names into a debug ranges section. /// -void DwarfDebug::EmitDebugRanges() { +void DwarfDebug::emitDebugRanges() { // Start the dwarf ranges section. Asm->OutStreamer.SwitchSection( Asm->getObjFileLowering().getDwarfRangesSection()); Asm->EOL(); } -/// EmitDebugMacInfo - Emit visible names into a debug macinfo section. +/// emitDebugMacInfo - Emit visible names into a debug macinfo section. /// -void DwarfDebug::EmitDebugMacInfo() { +void DwarfDebug::emitDebugMacInfo() { if (const MCSection *LineInfo = Asm->getObjFileLowering().getDwarfMacroInfoSection()) { // Start the dwarf macinfo section. @@ -2867,7 +2841,7 @@ void DwarfDebug::EmitDebugMacInfo() { } } -/// EmitDebugInlineInfo - Emit inline info using following format. +/// emitDebugInlineInfo - Emit inline info using following format. /// Section Header: /// 1. length of section /// 2. Dwarf version number @@ -2885,7 +2859,7 @@ void DwarfDebug::EmitDebugMacInfo() { /// inlined instance; the die_offset points to the inlined_subroutine die in the /// __debug_info section, and the low_pc is the starting address for the /// inlining instance. -void DwarfDebug::EmitDebugInlineInfo() { +void DwarfDebug::emitDebugInlineInfo() { if (!MAI->doesDwarfUsesInlineInfoSection()) return; @@ -2906,17 +2880,18 @@ void DwarfDebug::EmitDebugInlineInfo() { for (SmallVector::iterator I = InlinedSPNodes.begin(), E = InlinedSPNodes.end(); I != E; ++I) { - + // for (ValueMap >::iterator // I = InlineInfo.begin(), E = InlineInfo.end(); I != E; ++I) { MDNode *Node = *I; - ValueMap >::iterator II = InlineInfo.find(Node); + ValueMap >::iterator II + = InlineInfo.find(Node); SmallVector &Labels = II->second; DISubprogram SP(Node); - const char *LName = SP.getLinkageName(); - const char *Name = SP.getName(); + StringRef LName = SP.getLinkageName(); + StringRef Name = SP.getName(); - if (!LName) + if (LName.empty()) Asm->EmitString(Name); else { // Skip special LLVM prefix that is used to inform the asm printer to not @@ -2924,14 +2899,14 @@ void DwarfDebug::EmitDebugInlineInfo() { // Objective-C symbol names and symbol whose name is replaced using GCC's // __asm__ attribute. if (LName[0] == 1) - LName = &LName[1]; + LName = LName.substr(1); // Asm->EmitString(LName); EmitSectionOffset("string", "section_str", StringPool.idFor(LName), false, true); } Asm->EOL("MIPS linkage name"); -// Asm->EmitString(Name); +// Asm->EmitString(Name); EmitSectionOffset("string", "section_str", StringPool.idFor(Name), false, true); Asm->EOL("Function name"); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index 646de8f36e14..679d9b9d1a0b 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -106,13 +106,9 @@ class DwarfDebug : public Dwarf { /// Lines - List of of source line correspondence. std::vector Lines; - /// ValuesSet - Used to uniquely define values. + /// DIEValues - A list of all the unique values in use. /// - FoldingSet ValuesSet; - - /// Values - A list of all the unique values in use. - /// - std::vector Values; + std::vector DIEValues; /// StringPool - A UniqueVector of strings used by indirect references. /// @@ -229,137 +225,135 @@ class DwarfDebug : public Dwarf { return SourceIds.size(); } - /// AssignAbbrevNumber - Define a unique number for the abbreviation. + /// assignAbbrevNumber - Define a unique number for the abbreviation. /// - void AssignAbbrevNumber(DIEAbbrev &Abbrev); + void assignAbbrevNumber(DIEAbbrev &Abbrev); - /// CreateDIEEntry - Creates a new DIEEntry to be a proxy for a debug + /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug /// information entry. - DIEEntry *CreateDIEEntry(DIE *Entry = NULL); + DIEEntry *createDIEEntry(DIE *Entry = NULL); - /// SetDIEEntry - Set a DIEEntry once the debug information entry is defined. + /// addUInt - Add an unsigned integer attribute data and value. /// - void SetDIEEntry(DIEEntry *Value, DIE *Entry); + void addUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer); - /// AddUInt - Add an unsigned integer attribute data and value. + /// addSInt - Add an signed integer attribute data and value. /// - void AddUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer); + void addSInt(DIE *Die, unsigned Attribute, unsigned Form, int64_t Integer); - /// AddSInt - Add an signed integer attribute data and value. + /// addString - Add a string attribute data and value. /// - void AddSInt(DIE *Die, unsigned Attribute, unsigned Form, int64_t Integer); + void addString(DIE *Die, unsigned Attribute, unsigned Form, + const StringRef Str); - /// AddString - Add a string attribute data and value. + /// addLabel - Add a Dwarf label attribute data and value. /// - void AddString(DIE *Die, unsigned Attribute, unsigned Form, - const std::string &String); - - /// AddLabel - Add a Dwarf label attribute data and value. - /// - void AddLabel(DIE *Die, unsigned Attribute, unsigned Form, + void addLabel(DIE *Die, unsigned Attribute, unsigned Form, const DWLabel &Label); - /// AddObjectLabel - Add an non-Dwarf label attribute data and value. + /// addObjectLabel - Add an non-Dwarf label attribute data and value. /// - void AddObjectLabel(DIE *Die, unsigned Attribute, unsigned Form, + void addObjectLabel(DIE *Die, unsigned Attribute, unsigned Form, const std::string &Label); - /// AddSectionOffset - Add a section offset label attribute data and value. + /// addSectionOffset - Add a section offset label attribute data and value. /// - void AddSectionOffset(DIE *Die, unsigned Attribute, unsigned Form, + void addSectionOffset(DIE *Die, unsigned Attribute, unsigned Form, const DWLabel &Label, const DWLabel &Section, bool isEH = false, bool useSet = true); - /// AddDelta - Add a label delta attribute data and value. + /// addDelta - Add a label delta attribute data and value. /// - void AddDelta(DIE *Die, unsigned Attribute, unsigned Form, + void addDelta(DIE *Die, unsigned Attribute, unsigned Form, const DWLabel &Hi, const DWLabel &Lo); - /// AddDIEEntry - Add a DIE attribute data and value. + /// addDIEEntry - Add a DIE attribute data and value. /// - void AddDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, DIE *Entry) { - Die->AddValue(Attribute, Form, CreateDIEEntry(Entry)); + void addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, DIE *Entry) { + Die->addValue(Attribute, Form, createDIEEntry(Entry)); } - /// AddBlock - Add block data. + /// addBlock - Add block data. /// - void AddBlock(DIE *Die, unsigned Attribute, unsigned Form, DIEBlock *Block); + void addBlock(DIE *Die, unsigned Attribute, unsigned Form, DIEBlock *Block); - /// AddSourceLine - Add location information to specified debug information + /// addSourceLine - Add location information to specified debug information /// entry. - void AddSourceLine(DIE *Die, const DIVariable *V); - void AddSourceLine(DIE *Die, const DIGlobal *G); - void AddSourceLine(DIE *Die, const DISubprogram *SP); - void AddSourceLine(DIE *Die, const DIType *Ty); + void addSourceLine(DIE *Die, const DIVariable *V); + void addSourceLine(DIE *Die, const DIGlobal *G); + void addSourceLine(DIE *Die, const DISubprogram *SP); + void addSourceLine(DIE *Die, const DIType *Ty); - /// AddAddress - Add an address attribute to a die based on the location + /// addAddress - Add an address attribute to a die based on the location /// provided. - void AddAddress(DIE *Die, unsigned Attribute, + void addAddress(DIE *Die, unsigned Attribute, const MachineLocation &Location); - /// AddComplexAddress - Start with the address based on the location provided, + /// addComplexAddress - Start with the address based on the location provided, /// and generate the DWARF information necessary to find the actual variable /// (navigating the extra location information encoded in the type) based on /// the starting location. Add the DWARF information to the die. /// - void AddComplexAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute, + void addComplexAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute, const MachineLocation &Location); - // FIXME: Should be reformulated in terms of AddComplexAddress. - /// AddBlockByrefAddress - Start with the address based on the location + // FIXME: Should be reformulated in terms of addComplexAddress. + /// addBlockByrefAddress - Start with the address based on the location /// provided, and generate the DWARF information necessary to find the /// actual Block variable (navigating the Block struct) based on the /// starting location. Add the DWARF information to the die. Obsolete, - /// please use AddComplexAddress instead. + /// please use addComplexAddress instead. /// - void AddBlockByrefAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute, + void addBlockByrefAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute, const MachineLocation &Location); - /// AddType - Add a new type attribute to the specified entity. - void AddType(CompileUnit *DW_Unit, DIE *Entity, DIType Ty); + /// addType - Add a new type attribute to the specified entity. + void addType(CompileUnit *DW_Unit, DIE *Entity, DIType Ty); - /// ConstructTypeDIE - Construct basic type die from DIBasicType. - void ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, + void addPubTypes(DISubprogram SP); + + /// constructTypeDIE - Construct basic type die from DIBasicType. + void constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, DIBasicType BTy); - /// ConstructTypeDIE - Construct derived type die from DIDerivedType. - void ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, + /// constructTypeDIE - Construct derived type die from DIDerivedType. + void constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, DIDerivedType DTy); - /// ConstructTypeDIE - Construct type DIE from DICompositeType. - void ConstructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, + /// constructTypeDIE - Construct type DIE from DICompositeType. + void constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, DICompositeType CTy); - /// ConstructSubrangeDIE - Construct subrange DIE from DISubrange. - void ConstructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy); + /// constructSubrangeDIE - Construct subrange DIE from DISubrange. + void constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy); - /// ConstructArrayTypeDIE - Construct array type DIE from DICompositeType. - void ConstructArrayTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, + /// constructArrayTypeDIE - Construct array type DIE from DICompositeType. + void constructArrayTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, DICompositeType *CTy); - /// ConstructEnumTypeDIE - Construct enum type DIE from DIEnumerator. - DIE *ConstructEnumTypeDIE(CompileUnit *DW_Unit, DIEnumerator *ETy); + /// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator. + DIE *constructEnumTypeDIE(CompileUnit *DW_Unit, DIEnumerator *ETy); - /// CreateGlobalVariableDIE - Create new DIE using GV. - DIE *CreateGlobalVariableDIE(CompileUnit *DW_Unit, + /// createGlobalVariableDIE - Create new DIE using GV. + DIE *createGlobalVariableDIE(CompileUnit *DW_Unit, const DIGlobalVariable &GV); - /// CreateMemberDIE - Create new member DIE. - DIE *CreateMemberDIE(CompileUnit *DW_Unit, const DIDerivedType &DT); + /// createMemberDIE - Create new member DIE. + DIE *createMemberDIE(CompileUnit *DW_Unit, const DIDerivedType &DT); - /// CreateSubprogramDIE - Create new DIE using SP. - DIE *CreateSubprogramDIE(CompileUnit *DW_Unit, + /// createSubprogramDIE - Create new DIE using SP. + DIE *createSubprogramDIE(CompileUnit *DW_Unit, const DISubprogram &SP, bool IsConstructor = false, bool IsInlined = false); - /// FindCompileUnit - Get the compile unit for the given descriptor. + /// findCompileUnit - Get the compile unit for the given descriptor. /// - CompileUnit &FindCompileUnit(DICompileUnit Unit) const; + CompileUnit &findCompileUnit(DICompileUnit Unit) const; - /// CreateDbgScopeVariable - Create a new scope variable. + /// createDbgScopeVariable - Create a new scope variable. /// - DIE *CreateDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit); + DIE *createDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit); /// getUpdatedDbgScope - Find or create DbgScope assicated with /// the instruction. Initialize scope and update scope hierarchy. @@ -374,88 +368,101 @@ class DwarfDebug : public Dwarf { DbgVariable *findAbstractVariable(DIVariable &Var, unsigned FrameIdx, DILocation &Loc); - DIE *UpdateSubprogramScopeDIE(MDNode *SPNode); - DIE *ConstructLexicalScopeDIE(DbgScope *Scope); - DIE *ConstructScopeDIE(DbgScope *Scope); - DIE *ConstructInlinedScopeDIE(DbgScope *Scope); - DIE *ConstructVariableDIE(DbgVariable *DV, DbgScope *S, CompileUnit *Unit); + /// updateSubprogramScopeDIE - Find DIE for the given subprogram and + /// attach appropriate DW_AT_low_pc and DW_AT_high_pc attributes. + /// If there are global variables in this scope then create and insert + /// DIEs for these variables. + DIE *updateSubprogramScopeDIE(MDNode *SPNode); - /// ConstructDbgScope - Construct the components of a scope. - /// - void ConstructDbgScope(DbgScope *ParentScope, - unsigned ParentStartID, unsigned ParentEndID, - DIE *ParentDie, CompileUnit *Unit); + /// constructLexicalScope - Construct new DW_TAG_lexical_block + /// for this scope and attach DW_AT_low_pc/DW_AT_high_pc labels. + DIE *constructLexicalScopeDIE(DbgScope *Scope); - /// EmitInitial - Emit initial Dwarf declarations. This is necessary for cc + /// constructInlinedScopeDIE - This scope represents inlined body of + /// a function. Construct DIE to represent this concrete inlined copy + /// of the function. + DIE *constructInlinedScopeDIE(DbgScope *Scope); + + /// constructVariableDIE - Construct a DIE for the given DbgVariable. + DIE *constructVariableDIE(DbgVariable *DV, DbgScope *S, CompileUnit *Unit); + + /// constructScopeDIE - Construct a DIE for this scope. + DIE *constructScopeDIE(DbgScope *Scope); + + /// emitInitial - Emit initial Dwarf declarations. This is necessary for cc /// tools to recognize the object file contains Dwarf information. - void EmitInitial(); + void emitInitial(); - /// EmitDIE - Recusively Emits a debug information entry. + /// emitDIE - Recusively Emits a debug information entry. /// - void EmitDIE(DIE *Die); + void emitDIE(DIE *Die); - /// SizeAndOffsetDie - Compute the size and offset of a DIE. + /// computeSizeAndOffset - Compute the size and offset of a DIE. /// - unsigned SizeAndOffsetDie(DIE *Die, unsigned Offset, bool Last); + unsigned computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last); - /// SizeAndOffsets - Compute the size and offset of all the DIEs. + /// computeSizeAndOffsets - Compute the size and offset of all the DIEs. /// - void SizeAndOffsets(); + void computeSizeAndOffsets(); - /// EmitDebugInfo / EmitDebugInfoPerCU - Emit the debug info section. + /// EmitDebugInfo / emitDebugInfoPerCU - Emit the debug info section. /// - void EmitDebugInfoPerCU(CompileUnit *Unit); + void emitDebugInfoPerCU(CompileUnit *Unit); - void EmitDebugInfo(); + void emitDebugInfo(); - /// EmitAbbreviations - Emit the abbreviation section. + /// emitAbbreviations - Emit the abbreviation section. /// - void EmitAbbreviations() const; + void emitAbbreviations() const; - /// EmitEndOfLineMatrix - Emit the last address of the section and the end of + /// emitEndOfLineMatrix - Emit the last address of the section and the end of /// the line matrix. /// - void EmitEndOfLineMatrix(unsigned SectionEnd); + void emitEndOfLineMatrix(unsigned SectionEnd); - /// EmitDebugLines - Emit source line information. + /// emitDebugLines - Emit source line information. /// - void EmitDebugLines(); + void emitDebugLines(); - /// EmitCommonDebugFrame - Emit common frame info into a debug frame section. + /// emitCommonDebugFrame - Emit common frame info into a debug frame section. /// - void EmitCommonDebugFrame(); + void emitCommonDebugFrame(); - /// EmitFunctionDebugFrame - Emit per function frame info into a debug frame + /// emitFunctionDebugFrame - Emit per function frame info into a debug frame /// section. - void EmitFunctionDebugFrame(const FunctionDebugFrameInfo &DebugFrameInfo); + void emitFunctionDebugFrame(const FunctionDebugFrameInfo &DebugFrameInfo); - void EmitDebugPubNamesPerCU(CompileUnit *Unit); + void emitDebugPubNamesPerCU(CompileUnit *Unit); - /// EmitDebugPubNames - Emit visible names into a debug pubnames section. + /// emitDebugPubNames - Emit visible names into a debug pubnames section. /// - void EmitDebugPubNames(); + void emitDebugPubNames(); - /// EmitDebugStr - Emit visible names into a debug str section. + /// emitDebugPubTypes - Emit visible types into a debug pubtypes section. /// - void EmitDebugStr(); + void emitDebugPubTypes(); - /// EmitDebugLoc - Emit visible names into a debug loc section. + /// emitDebugStr - Emit visible names into a debug str section. /// - void EmitDebugLoc(); + void emitDebugStr(); + + /// emitDebugLoc - Emit visible names into a debug loc section. + /// + void emitDebugLoc(); /// EmitDebugARanges - Emit visible names into a debug aranges section. /// void EmitDebugARanges(); - /// EmitDebugRanges - Emit visible names into a debug ranges section. + /// emitDebugRanges - Emit visible names into a debug ranges section. /// - void EmitDebugRanges(); + void emitDebugRanges(); - /// EmitDebugMacInfo - Emit visible names into a debug macinfo section. + /// emitDebugMacInfo - Emit visible names into a debug macinfo section. /// - void EmitDebugMacInfo(); + void emitDebugMacInfo(); - /// EmitDebugInlineInfo - Emit inline info using following format. + /// emitDebugInlineInfo - Emit inline info using following format. /// Section Header: /// 1. length of section /// 2. Dwarf version number @@ -473,26 +480,25 @@ class DwarfDebug : public Dwarf { /// inlined instance; the die_offset points to the inlined_subroutine die in /// the __debug_info section, and the low_pc is the starting address for the /// inlining instance. - void EmitDebugInlineInfo(); + void emitDebugInlineInfo(); /// GetOrCreateSourceID - Look up the source id with the given directory and /// source file names. If none currently exists, create a new id and insert it /// in the SourceIds map. This can update DirectoryNames and SourceFileNames maps /// as well. - unsigned GetOrCreateSourceID(const char *DirName, - const char *FileName); + unsigned GetOrCreateSourceID(StringRef DirName, StringRef FileName); - void ConstructCompileUnit(MDNode *N); + void constructCompileUnit(MDNode *N); - void ConstructGlobalVariableDIE(MDNode *N); + void constructGlobalVariableDIE(MDNode *N); - void ConstructSubprogram(MDNode *N); + void constructSubprogramDIE(MDNode *N); // FIXME: This should go away in favor of complex addresses. /// Find the type the programmer originally declared the variable to be /// and return that type. Obsolete, use GetComplexAddrType instead. /// - DIType GetBlockByrefType(DIType Ty, std::string Name); + DIType getBlockByrefType(DIType Ty, std::string Name); public: //===--------------------------------------------------------------------===// @@ -505,30 +511,30 @@ public: /// be emitted. bool ShouldEmitDwarfDebug() const { return shouldEmit; } - /// BeginModule - Emit all Dwarf sections that should come prior to the + /// beginModule - Emit all Dwarf sections that should come prior to the /// content. - void BeginModule(Module *M, MachineModuleInfo *MMI); + void beginModule(Module *M, MachineModuleInfo *MMI); - /// EndModule - Emit all Dwarf sections that should come after the content. + /// endModule - Emit all Dwarf sections that should come after the content. /// - void EndModule(); + void endModule(); - /// BeginFunction - Gather pre-function debug information. Assumes being + /// beginFunction - Gather pre-function debug information. Assumes being /// emitted immediately after the function entry point. - void BeginFunction(MachineFunction *MF); + void beginFunction(MachineFunction *MF); - /// EndFunction - Gather and emit post-function debug information. + /// endFunction - Gather and emit post-function debug information. /// - void EndFunction(MachineFunction *MF); + void endFunction(MachineFunction *MF); - /// RecordSourceLine - Records location information and associates it with a + /// recordSourceLine - Records location information and associates it with a /// label. Returns a unique label ID used to generate a label and provide /// correspondence to the source line list. - unsigned RecordSourceLine(unsigned Line, unsigned Col, MDNode *Scope); + unsigned recordSourceLine(unsigned Line, unsigned Col, MDNode *Scope); - /// getRecordSourceLineCount - Return the number of source lines in the debug + /// getSourceLineCount - Return the number of source lines in the debug /// info. - unsigned getRecordSourceLineCount() const { + unsigned getSourceLineCount() const { return Lines.size(); } @@ -540,22 +546,18 @@ public: unsigned getOrCreateSourceID(const std::string &DirName, const std::string &FileName); - /// ExtractScopeInformation - Scan machine instructions in this function + /// extractScopeInformation - Scan machine instructions in this function /// and collect DbgScopes. Return true, if atleast one scope was found. - bool ExtractScopeInformation(MachineFunction *MF); + bool extractScopeInformation(MachineFunction *MF); - /// CollectVariableInfo - Populate DbgScope entries with variables' info. - void CollectVariableInfo(); + /// collectVariableInfo - Populate DbgScope entries with variables' info. + void collectVariableInfo(); - /// SetDbgScopeEndLabels - Update DbgScope end labels for the scopes that - /// end with this machine instruction. - void SetDbgScopeEndLabels(const MachineInstr *MI, unsigned Label); + /// beginScope - Process beginning of a scope starting at Label. + void beginScope(const MachineInstr *MI, unsigned Label); - /// BeginScope - Process beginning of a scope starting at Label. - void BeginScope(const MachineInstr *MI, unsigned Label); - - /// EndScope - Prcess end of a scope. - void EndScope(const MachineInstr *MI); + /// endScope - Prcess end of a scope. + void endScope(const MachineInstr *MI); }; } // End of namespace llvm diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp index fcdcfd31bc3e..1c8b8f464720 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp @@ -727,8 +727,7 @@ void DwarfException::EmitExceptionTable() { // somewhere. This predicate should be moved to a shared location that is // in target-independent code. // - if ((LSDASection->getKind().isWriteable() && - !LSDASection->getKind().isReadOnlyWithRel()) || + if (LSDASection->getKind().isWriteable() || Asm->TM.getRelocationModel() == Reloc::Static) TTypeFormat = dwarf::DW_EH_PE_absptr; else @@ -918,36 +917,14 @@ void DwarfException::EmitExceptionTable() { } // Emit the Catch TypeInfos. - const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); - unsigned Index = 1; - for (std::vector::const_reverse_iterator I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) { - const GlobalVariable *TI = *I; + const GlobalVariable *GV = *I; + PrintRelDirective(); - if (TI) { - if (!LSDASection->getKind().isReadOnlyWithRel() && - (TTypeFormat == dwarf::DW_EH_PE_absptr || - TI->getLinkage() == GlobalValue::InternalLinkage)) { - // Print out the unadorned name of the type info. - PrintRelDirective(); - O << Asm->Mang->getMangledName(TI); - } else { - bool IsTypeInfoIndirect = false, IsTypeInfoPCRel = false; - const MCExpr *TypeInfoRef = - TLOF.getSymbolForDwarfGlobalReference(TI, Asm->Mang, Asm->MMI, - IsTypeInfoIndirect, - IsTypeInfoPCRel); - - if (!IsTypeInfoPCRel) - TypeInfoRef = CreateLabelDiff(TypeInfoRef, "typeinforef_addr", - Index++); - - O << MAI->getData32bitsDirective(); - TypeInfoRef->print(O, MAI); - } + if (GV) { + O << Asm->Mang->getMangledName(GV); } else { - PrintRelDirective(); O << "0x0"; } diff --git a/lib/CodeGen/AsmPrinter/DwarfWriter.cpp b/lib/CodeGen/AsmPrinter/DwarfWriter.cpp index 63ae65368058..dd8d88a2e4af 100644 --- a/lib/CodeGen/AsmPrinter/DwarfWriter.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfWriter.cpp @@ -43,14 +43,14 @@ void DwarfWriter::BeginModule(Module *M, DE = new DwarfException(OS, A, T); DD = new DwarfDebug(OS, A, T); DE->BeginModule(M, MMI); - DD->BeginModule(M, MMI); + DD->beginModule(M, MMI); } /// EndModule - Emit all Dwarf sections that should come after the content. /// void DwarfWriter::EndModule() { DE->EndModule(); - DD->EndModule(); + DD->endModule(); delete DD; DD = 0; delete DE; DE = 0; } @@ -59,13 +59,13 @@ void DwarfWriter::EndModule() { /// emitted immediately after the function entry point. void DwarfWriter::BeginFunction(MachineFunction *MF) { DE->BeginFunction(MF); - DD->BeginFunction(MF); + DD->beginFunction(MF); } /// EndFunction - Gather and emit post-function debug information. /// void DwarfWriter::EndFunction(MachineFunction *MF) { - DD->EndFunction(MF); + DD->endFunction(MF); DE->EndFunction(); if (MachineModuleInfo *MMI = DD->getMMI() ? DD->getMMI() : DE->getMMI()) @@ -78,12 +78,12 @@ void DwarfWriter::EndFunction(MachineFunction *MF) { /// correspondence to the source line list. unsigned DwarfWriter::RecordSourceLine(unsigned Line, unsigned Col, MDNode *Scope) { - return DD->RecordSourceLine(Line, Col, Scope); + return DD->recordSourceLine(Line, Col, Scope); } /// getRecordSourceLineCount - Count source lines. unsigned DwarfWriter::getRecordSourceLineCount() { - return DD->getRecordSourceLineCount(); + return DD->getSourceLineCount(); } /// ShouldEmitDwarfDebug - Returns true if Dwarf debugging declarations should @@ -93,8 +93,8 @@ bool DwarfWriter::ShouldEmitDwarfDebug() const { } void DwarfWriter::BeginScope(const MachineInstr *MI, unsigned L) { - DD->BeginScope(MI, L); + DD->beginScope(MI, L); } void DwarfWriter::EndScope(const MachineInstr *MI) { - DD->EndScope(MI); + DD->endScope(MI); } diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index f807e8fa261e..8a62eb20bbb4 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -41,8 +41,6 @@ using namespace llvm; STATISTIC(NumDeadBlocks, "Number of dead blocks removed"); STATISTIC(NumBranchOpts, "Number of branches optimized"); STATISTIC(NumTailMerge , "Number of block tails merged"); -STATISTIC(NumTailDups , "Number of tail duplicated blocks"); -STATISTIC(NumInstrDups , "Additional instructions due to tail duplication"); static cl::opt FlagEnableTailMerge("enable-tail-merge", cl::init(cl::BOU_UNSET), cl::Hidden); @@ -205,16 +203,6 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, MadeChange |= MadeChangeThisIteration; } - // Do tail duplication after tail merging is done. Otherwise it is - // tough to avoid situations where tail duplication and tail merging undo - // each other's transformations ad infinitum. - MadeChangeThisIteration = true; - while (MadeChangeThisIteration) { - MadeChangeThisIteration = false; - MadeChangeThisIteration |= TailDuplicateBlocks(MF); - MadeChange |= MadeChangeThisIteration; - } - // See if any jump tables have become mergable or dead as the code generator // did its thing. MachineJumpTableInfo *JTI = MF.getJumpTableInfo(); @@ -918,71 +906,6 @@ bool BranchFolder::OptimizeBranches(MachineFunction &MF) { } -/// CanFallThrough - Return true if the specified block (with the specified -/// branch condition) can implicitly transfer control to the block after it by -/// falling off the end of it. This should return false if it can reach the -/// block after it, but it uses an explicit branch to do so (e.g. a table jump). -/// -/// True is a conservative answer. -/// -bool BranchFolder::CanFallThrough(MachineBasicBlock *CurBB, - bool BranchUnAnalyzable, - MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl &Cond) { - MachineFunction::iterator Fallthrough = CurBB; - ++Fallthrough; - // If FallthroughBlock is off the end of the function, it can't fall through. - if (Fallthrough == CurBB->getParent()->end()) - return false; - - // If FallthroughBlock isn't a successor of CurBB, no fallthrough is possible. - if (!CurBB->isSuccessor(Fallthrough)) - return false; - - // If we couldn't analyze the branch, examine the last instruction. - // If the block doesn't end in a known control barrier, assume fallthrough - // is possible. The isPredicable check is needed because this code can be - // called during IfConversion, where an instruction which is normally a - // Barrier is predicated and thus no longer an actual control barrier. This - // is over-conservative though, because if an instruction isn't actually - // predicated we could still treat it like a barrier. - if (BranchUnAnalyzable) - return CurBB->empty() || !CurBB->back().getDesc().isBarrier() || - CurBB->back().getDesc().isPredicable(); - - // If there is no branch, control always falls through. - if (TBB == 0) return true; - - // If there is some explicit branch to the fallthrough block, it can obviously - // reach, even though the branch should get folded to fall through implicitly. - if (MachineFunction::iterator(TBB) == Fallthrough || - MachineFunction::iterator(FBB) == Fallthrough) - return true; - - // If it's an unconditional branch to some block not the fall through, it - // doesn't fall through. - if (Cond.empty()) return false; - - // Otherwise, if it is conditional and has no explicit false block, it falls - // through. - return FBB == 0; -} - -/// CanFallThrough - Return true if the specified can implicitly transfer -/// control to the block after it by falling off the end of it. This should -/// return false if it can reach the block after it, but it uses an explicit -/// branch to do so (e.g. a table jump). -/// -/// True is a conservative answer. -/// -bool BranchFolder::CanFallThrough(MachineBasicBlock *CurBB) { - MachineBasicBlock *TBB = 0, *FBB = 0; - SmallVector Cond; - bool CurUnAnalyzable = TII->AnalyzeBranch(*CurBB, TBB, FBB, Cond, true); - return CanFallThrough(CurBB, CurUnAnalyzable, TBB, FBB, Cond); -} - /// IsBetterFallthrough - Return true if it would be clearly better to /// fall-through to MBB1 than to fall through into MBB2. This has to return /// a strict ordering, returning true for both (MBB1,MBB2) and (MBB2,MBB1) will @@ -1005,143 +928,6 @@ static bool IsBetterFallthrough(MachineBasicBlock *MBB1, return MBB2I->getDesc().isCall() && !MBB1I->getDesc().isCall(); } -/// TailDuplicateBlocks - Look for small blocks that are unconditionally -/// branched to and do not fall through. Tail-duplicate their instructions -/// into their predecessors to eliminate (dynamic) branches. -bool BranchFolder::TailDuplicateBlocks(MachineFunction &MF) { - bool MadeChange = false; - - for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) { - MachineBasicBlock *MBB = I++; - - // Only duplicate blocks that end with unconditional branches. - if (CanFallThrough(MBB)) - continue; - - MadeChange |= TailDuplicate(MBB, MF); - - // If it is dead, remove it. - if (MBB->pred_empty()) { - NumInstrDups -= MBB->size(); - RemoveDeadBlock(MBB); - MadeChange = true; - ++NumDeadBlocks; - } - } - return MadeChange; -} - -/// TailDuplicate - If it is profitable, duplicate TailBB's contents in each -/// of its predecessors. -bool BranchFolder::TailDuplicate(MachineBasicBlock *TailBB, - MachineFunction &MF) { - // Don't try to tail-duplicate single-block loops. - if (TailBB->isSuccessor(TailBB)) - return false; - - // Set the limit on the number of instructions to duplicate, with a default - // of one less than the tail-merge threshold. When optimizing for size, - // duplicate only one, because one branch instruction can be eliminated to - // compensate for the duplication. - unsigned MaxDuplicateCount = - MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize) ? - 1 : TII->TailDuplicationLimit(*TailBB, TailMergeSize - 1); - - // Check the instructions in the block to determine whether tail-duplication - // is invalid or unlikely to be profitable. - unsigned i = 0; - bool HasCall = false; - for (MachineBasicBlock::iterator I = TailBB->begin(); - I != TailBB->end(); ++I, ++i) { - // Non-duplicable things shouldn't be tail-duplicated. - if (I->getDesc().isNotDuplicable()) return false; - // Don't duplicate more than the threshold. - if (i == MaxDuplicateCount) return false; - // Remember if we saw a call. - if (I->getDesc().isCall()) HasCall = true; - } - // Heuristically, don't tail-duplicate calls if it would expand code size, - // as it's less likely to be worth the extra cost. - if (i > 1 && HasCall) - return false; - - // Iterate through all the unique predecessors and tail-duplicate this - // block into them, if possible. Copying the list ahead of time also - // avoids trouble with the predecessor list reallocating. - bool Changed = false; - SmallSetVector Preds(TailBB->pred_begin(), - TailBB->pred_end()); - for (SmallSetVector::iterator PI = Preds.begin(), - PE = Preds.end(); PI != PE; ++PI) { - MachineBasicBlock *PredBB = *PI; - - assert(TailBB != PredBB && - "Single-block loop should have been rejected earlier!"); - if (PredBB->succ_size() > 1) continue; - - MachineBasicBlock *PredTBB, *PredFBB; - SmallVector PredCond; - if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) - continue; - if (!PredCond.empty()) - continue; - // EH edges are ignored by AnalyzeBranch. - if (PredBB->succ_size() != 1) - continue; - // Don't duplicate into a fall-through predecessor (at least for now). - if (PredBB->isLayoutSuccessor(TailBB) && CanFallThrough(PredBB)) - continue; - - DEBUG(errs() << "\nTail-duplicating into PredBB: " << *PredBB - << "From Succ: " << *TailBB); - - // Remove PredBB's unconditional branch. - TII->RemoveBranch(*PredBB); - // Clone the contents of TailBB into PredBB. - for (MachineBasicBlock::iterator I = TailBB->begin(), E = TailBB->end(); - I != E; ++I) { - MachineInstr *NewMI = MF.CloneMachineInstr(I); - PredBB->insert(PredBB->end(), NewMI); - } - NumInstrDups += TailBB->size() - 1; // subtract one for removed branch - - // Update the CFG. - PredBB->removeSuccessor(PredBB->succ_begin()); - assert(PredBB->succ_empty() && - "TailDuplicate called on block with multiple successors!"); - for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(), - E = TailBB->succ_end(); I != E; ++I) - PredBB->addSuccessor(*I); - - Changed = true; - ++NumTailDups; - } - - // If TailBB was duplicated into all its predecessors except for the prior - // block, which falls through unconditionally, move the contents of this - // block into the prior block. - MachineBasicBlock &PrevBB = *prior(MachineFunction::iterator(TailBB)); - MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0; - SmallVector PriorCond; - bool PriorUnAnalyzable = - TII->AnalyzeBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, true); - // This has to check PrevBB->succ_size() because EH edges are ignored by - // AnalyzeBranch. - if (!PriorUnAnalyzable && PriorCond.empty() && !PriorTBB && - TailBB->pred_size() == 1 && PrevBB.succ_size() == 1 && - !TailBB->hasAddressTaken()) { - DEBUG(errs() << "\nMerging into block: " << PrevBB - << "From MBB: " << *TailBB); - PrevBB.splice(PrevBB.end(), TailBB, TailBB->begin(), TailBB->end()); - PrevBB.removeSuccessor(PrevBB.succ_begin());; - assert(PrevBB.succ_empty()); - PrevBB.transferSuccessors(TailBB); - Changed = true; - } - - return Changed; -} - /// OptimizeBlock - Analyze and optimize control flow related to the specified /// block. This is never called on the entry block. bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { @@ -1266,7 +1052,7 @@ ReoptimizeBlock: // the assert condition out of the loop body. if (MBB->succ_empty() && !PriorCond.empty() && PriorFBB == 0 && MachineFunction::iterator(PriorTBB) == FallThrough && - !CanFallThrough(MBB)) { + !MBB->canFallThrough()) { bool DoTransform = true; // We have to be careful that the succs of PredBB aren't both no-successor @@ -1290,7 +1076,7 @@ ReoptimizeBlock: // In this case, we could actually be moving the return block *into* a // loop! if (DoTransform && !MBB->succ_empty() && - (!CanFallThrough(PriorTBB) || PriorTBB->empty())) + (!PriorTBB->canFallThrough() || PriorTBB->empty())) DoTransform = false; @@ -1422,13 +1208,11 @@ ReoptimizeBlock: // If the prior block doesn't fall through into this block, and if this // block doesn't fall through into some other block, see if we can find a // place to move this block where a fall-through will happen. - if (!CanFallThrough(&PrevBB, PriorUnAnalyzable, - PriorTBB, PriorFBB, PriorCond)) { + if (!PrevBB.canFallThrough()) { // Now we know that there was no fall-through into this block, check to // see if it has a fall-through into its successor. - bool CurFallsThru = CanFallThrough(MBB, CurUnAnalyzable, CurTBB, CurFBB, - CurCond); + bool CurFallsThru = MBB->canFallThrough(); if (!MBB->isLandingPad()) { // Check all the predecessors of this block. If one of them has no fall @@ -1440,7 +1224,7 @@ ReoptimizeBlock: MachineFunction::iterator PredFallthrough = PredBB; ++PredFallthrough; MachineBasicBlock *PredTBB, *PredFBB; SmallVector PredCond; - if (PredBB != MBB && !CanFallThrough(PredBB) && + if (PredBB != MBB && !PredBB->canFallThrough() && !TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true) && (!CurFallsThru || !CurTBB || !CurFBB) && (!CurFallsThru || MBB->getNumber() >= PredBB->getNumber())) { @@ -1479,7 +1263,7 @@ ReoptimizeBlock: // and if the successor isn't an EH destination, we can arrange for the // fallthrough to happen. if (SuccBB != MBB && &*SuccPrev != MBB && - !CanFallThrough(SuccPrev) && !CurUnAnalyzable && + !SuccPrev->canFallThrough() && !CurUnAnalyzable && !SuccBB->isLandingPad()) { MBB->moveBefore(SuccBB); MadeChange = true; diff --git a/lib/CodeGen/BranchFolding.h b/lib/CodeGen/BranchFolding.h index 4920755c227b..b08739564060 100644 --- a/lib/CodeGen/BranchFolding.h +++ b/lib/CodeGen/BranchFolding.h @@ -105,18 +105,10 @@ namespace llvm { unsigned CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, unsigned maxCommonTailLength); - bool TailDuplicateBlocks(MachineFunction &MF); - bool TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF); - bool OptimizeBranches(MachineFunction &MF); bool OptimizeBlock(MachineBasicBlock *MBB); void RemoveDeadBlock(MachineBasicBlock *MBB); bool OptimizeImpDefsBlock(MachineBasicBlock *MBB); - - bool CanFallThrough(MachineBasicBlock *CurBB); - bool CanFallThrough(MachineBasicBlock *CurBB, bool BranchUnAnalyzable, - MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl &Cond); }; } diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 9583edcbe44a..6f86614c90d8 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -63,6 +63,7 @@ add_llvm_library(LLVMCodeGen StackProtector.cpp StackSlotColoring.cpp StrongPHIElimination.cpp + TailDuplication.cpp TargetInstrInfoImpl.cpp TwoAddressInstructionPass.cpp UnreachableBlockElim.cpp diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index 984e0135b8c6..1b39fec395fa 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -316,7 +316,6 @@ CriticalAntiDepBreaker::findSuitableFreeRegister(unsigned AntiDepReg, unsigned CriticalAntiDepBreaker:: BreakAntiDependencies(std::vector& SUnits, - CandidateMap& Candidates, MachineBasicBlock::iterator& Begin, MachineBasicBlock::iterator& End, unsigned InsertPosIndex) { diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h index 5664d852fdfe..496888d45f9d 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.h +++ b/lib/CodeGen/CriticalAntiDepBreaker.h @@ -25,6 +25,7 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallSet.h" +#include namespace llvm { class CriticalAntiDepBreaker : public AntiDepBreaker { @@ -64,13 +65,6 @@ namespace llvm { CriticalAntiDepBreaker(MachineFunction& MFi); ~CriticalAntiDepBreaker(); - /// GetMaxTrials - Critical path anti-dependence breaking requires - /// only a single pass - unsigned GetMaxTrials() { return 1; } - - /// NeedCandidates - Candidates not needed. - bool NeedCandidates() { return false; } - /// Start - Initialize anti-dep breaking for a new basic block. void StartBlock(MachineBasicBlock *BB); @@ -78,7 +72,6 @@ namespace llvm { /// of the ScheduleDAG and break them by renaming registers. /// unsigned BreakAntiDependencies(std::vector& SUnits, - CandidateMap& Candidates, MachineBasicBlock::iterator& Begin, MachineBasicBlock::iterator& End, unsigned InsertPosIndex); diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp index 7fc62a9a96c4..9b516ed75a88 100644 --- a/lib/CodeGen/DwarfEHPrepare.cpp +++ b/lib/CodeGen/DwarfEHPrepare.cpp @@ -332,7 +332,7 @@ bool DwarfEHPrepare::PromoteStackTemporaries() { if (ExceptionValueVar && DT && DF && isAllocaPromotable(ExceptionValueVar)) { // Turn the exception temporary into registers and phi nodes if possible. std::vector Allocas(1, ExceptionValueVar); - PromoteMemToReg(Allocas, *DT, *DF, ExceptionValueVar->getContext()); + PromoteMemToReg(Allocas, *DT, *DF); return true; } return false; diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index 45f08b168a49..c23d7070a34e 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -608,7 +608,7 @@ void IfConverter::ScanInstructions(BBInfo &BBI) { if (TII->DefinesPredicate(I, PredDefs)) BBI.ClobbersPred = true; - if (!TID.isPredicable()) { + if (!TII->isPredicable(I)) { BBI.IsUnpredicable = true; return; } diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 0db459bb9163..242cba5b64e3 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -35,6 +35,8 @@ static cl::opt DisablePostRA("disable-post-ra", cl::Hidden, cl::desc("Disable Post Regalloc")); static cl::opt DisableBranchFold("disable-branch-fold", cl::Hidden, cl::desc("Disable branch folding")); +static cl::opt DisableTailDuplicate("disable-tail-duplicate", cl::Hidden, + cl::desc("Disable tail duplication")); static cl::opt DisableCodePlace("disable-code-place", cl::Hidden, cl::desc("Disable code placement")); static cl::opt DisableSSC("disable-ssc", cl::Hidden, @@ -66,6 +68,11 @@ static cl::opt EnableFastISelOption("fast-isel", cl::Hidden, cl::desc("Enable the \"fast\" instruction selector")); +// Enable or disable an experimental optimization to split GEPs +// and run a special GVN pass which does not examine loads, in +// an effort to factor out redundancy implicit in complex GEPs. +static cl::opt EnableSplitGEPGVN("split-gep-gvn", cl::Hidden, + cl::desc("Split GEPs and run no-load GVN")); LLVMTargetMachine::LLVMTargetMachine(const Target &T, const std::string &TargetTriple) @@ -223,6 +230,12 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { // Standard LLVM-Level Passes. + // Optionally, tun split-GEPs and no-load GVN. + if (EnableSplitGEPGVN) { + PM.add(createGEPSplitterPass()); + PM.add(createGVNPass(/*NoPRE=*/false, /*NoLoads=*/true)); + } + // Run loop strength reduction before anything else. if (OptLevel != CodeGenOpt::None && !DisableLSR) { PM.add(createLoopStrengthReducePass(getTargetLowering())); @@ -333,15 +346,17 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, printAndVerify(PM, "After BranchFolding"); } + // Tail duplication. + if (OptLevel != CodeGenOpt::None && !DisableTailDuplicate) { + PM.add(createTailDuplicatePass()); + printAndVerify(PM, "After TailDuplicate"); + } + PM.add(createGCMachineCodeAnalysisPass()); if (PrintGCInfo) PM.add(createGCInfoPrinter(errs())); - // Fold redundant debug labels. - PM.add(createDebugLabelFoldingPass()); - printAndVerify(PM, "After DebugLabelFolding"); - if (OptLevel != CodeGenOpt::None && !DisableCodePlace) { PM.add(createCodePlacementOptPass()); printAndVerify(PM, "After CodePlacementOpt"); diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp index 23dce4a91a13..f1bd5735439d 100644 --- a/lib/CodeGen/LatencyPriorityQueue.cpp +++ b/lib/CodeGen/LatencyPriorityQueue.cpp @@ -55,10 +55,6 @@ SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) { SUnit *OnlyAvailablePred = 0; for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { - if (IgnoreAntiDep && - ((I->getKind() == SDep::Anti) || (I->getKind() == SDep::Output))) - continue; - SUnit &Pred = *I->getSUnit(); if (!Pred.isScheduled) { // We found an available, but not scheduled, predecessor. If it's the @@ -78,10 +74,6 @@ void LatencyPriorityQueue::push_impl(SUnit *SU) { unsigned NumNodesBlocking = 0; for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; ++I) { - if (IgnoreAntiDep && - ((I->getKind() == SDep::Anti) || (I->getKind() == SDep::Output))) - continue; - if (getSingleUnscheduledPred(I->getSUnit()) == SU) ++NumNodesBlocking; } @@ -98,10 +90,6 @@ void LatencyPriorityQueue::push_impl(SUnit *SU) { void LatencyPriorityQueue::ScheduledNode(SUnit *SU) { for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; ++I) { - if (IgnoreAntiDep && - ((I->getKind() == SDep::Anti) || (I->getKind() == SDep::Output))) - continue; - AdjustPriorityOfUnscheduledPreds(I->getSUnit()); } } diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index bbfc82b5d96d..24adf364e710 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -136,7 +136,8 @@ void LiveIntervals::printInstrs(raw_ostream &OS) const { for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end(); mbbi != mbbe; ++mbbi) { - OS << ((Value*)mbbi->getBasicBlock())->getName() << ":\n"; + OS << "BB#" << mbbi->getNumber() + << ":\t\t# derived from " << mbbi->getName() << "\n"; for (MachineBasicBlock::iterator mii = mbbi->begin(), mie = mbbi->end(); mii != mie; ++mii) { OS << getInstructionIndex(mii) << '\t' << *mii; @@ -658,7 +659,7 @@ void LiveIntervals::computeIntervals() { MachineBasicBlock *MBB = MBBI; // Track the index of the current machine instr. SlotIndex MIIndex = getMBBStartIdx(MBB); - DEBUG(errs() << ((Value*)MBB->getBasicBlock())->getName() << ":\n"); + DEBUG(errs() << MBB->getName() << ":\n"); MachineBasicBlock::iterator MI = MBB->begin(), miEnd = MBB->end(); @@ -1094,6 +1095,12 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI, NewVReg = mri_->createVirtualRegister(rc); vrm.grow(); CreatedNewVReg = true; + + // The new virtual register should get the same allocation hints as the + // old one. + std::pair Hint = mri_->getRegAllocationHint(Reg); + if (Hint.first || Hint.second) + mri_->setRegAllocationHint(NewVReg, Hint.first, Hint.second); } if (!TryFold) diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index 16a79bb54e97..68f80acf1562 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -279,6 +279,43 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) { PhysRegUse[SubReg] = MI; } +/// FindLastRefOrPartRef - Return the last reference or partial reference of +/// the specified register. +MachineInstr *LiveVariables::FindLastRefOrPartRef(unsigned Reg) { + MachineInstr *LastDef = PhysRegDef[Reg]; + MachineInstr *LastUse = PhysRegUse[Reg]; + if (!LastDef && !LastUse) + return false; + + MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef; + unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef]; + MachineInstr *LastPartDef = 0; + unsigned LastPartDefDist = 0; + for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); + unsigned SubReg = *SubRegs; ++SubRegs) { + MachineInstr *Def = PhysRegDef[SubReg]; + if (Def && Def != LastDef) { + // There was a def of this sub-register in between. This is a partial + // def, keep track of the last one. + unsigned Dist = DistanceMap[Def]; + if (Dist > LastPartDefDist) { + LastPartDefDist = Dist; + LastPartDef = Def; + } + continue; + } + if (MachineInstr *Use = PhysRegUse[SubReg]) { + unsigned Dist = DistanceMap[Use]; + if (Dist > LastRefOrPartRefDist) { + LastRefOrPartRefDist = Dist; + LastRefOrPartRef = Use; + } + } + } + + return LastRefOrPartRef; +} + bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) { MachineInstr *LastDef = PhysRegDef[Reg]; MachineInstr *LastUse = PhysRegUse[Reg]; @@ -373,7 +410,16 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) { if (NeedDef) PhysRegDef[Reg]->addOperand(MachineOperand::CreateReg(SubReg, true/*IsDef*/, true/*IsImp*/)); - LastRefOrPartRef->addRegisterKilled(SubReg, TRI, true); + MachineInstr *LastSubRef = FindLastRefOrPartRef(SubReg); + if (LastSubRef) + LastSubRef->addRegisterKilled(SubReg, TRI, true); + else { + LastRefOrPartRef->addRegisterKilled(SubReg, TRI, true); + PhysRegUse[SubReg] = LastRefOrPartRef; + for (const unsigned *SSRegs = TRI->getSubRegisters(SubReg); + unsigned SSReg = *SSRegs; ++SSRegs) + PhysRegUse[SSReg] = LastRefOrPartRef; + } for (const unsigned *SS = TRI->getSubRegisters(SubReg); *SS; ++SS) PartUses.erase(*SS); } @@ -656,35 +702,45 @@ void LiveVariables::analyzePHINodes(const MachineFunction& Fn) { .push_back(BBI->getOperand(i).getReg()); } +bool LiveVariables::VarInfo::isLiveIn(const MachineBasicBlock &MBB, + unsigned Reg, + MachineRegisterInfo &MRI) { + unsigned Num = MBB.getNumber(); + + // Reg is live-through. + if (AliveBlocks.test(Num)) + return true; + + // Registers defined in MBB cannot be live in. + const MachineInstr *Def = MRI.getVRegDef(Reg); + if (Def && Def->getParent() == &MBB) + return false; + + // Reg was not defined in MBB, was it killed here? + return findKill(&MBB); +} + /// addNewBlock - Add a new basic block BB as an empty succcessor to DomBB. All /// variables that are live out of DomBB will be marked as passing live through /// BB. void LiveVariables::addNewBlock(MachineBasicBlock *BB, - MachineBasicBlock *DomBB) { + MachineBasicBlock *DomBB, + MachineBasicBlock *SuccBB) { const unsigned NumNew = BB->getNumber(); - const unsigned NumDom = DomBB->getNumber(); + + // All registers used by PHI nodes in SuccBB must be live through BB. + for (MachineBasicBlock::const_iterator BBI = SuccBB->begin(), + BBE = SuccBB->end(); + BBI != BBE && BBI->getOpcode() == TargetInstrInfo::PHI; ++BBI) + for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) + if (BBI->getOperand(i+1).getMBB() == BB) + getVarInfo(BBI->getOperand(i).getReg()).AliveBlocks.set(NumNew); // Update info for all live variables for (unsigned Reg = TargetRegisterInfo::FirstVirtualRegister, E = MRI->getLastVirtReg()+1; Reg != E; ++Reg) { VarInfo &VI = getVarInfo(Reg); - - // Anything live through DomBB is also live through BB. - if (VI.AliveBlocks.test(NumDom)) { + if (!VI.AliveBlocks.test(NumNew) && VI.isLiveIn(*SuccBB, Reg, *MRI)) VI.AliveBlocks.set(NumNew); - continue; - } - - // Variables not defined in DomBB cannot be live out. - const MachineInstr *Def = MRI->getVRegDef(Reg); - if (!Def || Def->getParent() != DomBB) - continue; - - // Killed by DomBB? - if (VI.findKill(DomBB)) - continue; - - // This register is defined in DomBB and live out - VI.AliveBlocks.set(NumNew); } } diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index cd52825d21f1..e55e3694bcc4 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -172,6 +172,13 @@ static inline void OutputReg(raw_ostream &os, unsigned RegNo, os << " %reg" << RegNo; } +StringRef MachineBasicBlock::getName() const { + if (const BasicBlock *LBB = getBasicBlock()) + return LBB->getName(); + else + return "(null)"; +} + void MachineBasicBlock::print(raw_ostream &OS) const { const MachineFunction *MF = getParent(); if (!MF) { @@ -272,8 +279,9 @@ void MachineBasicBlock::updateTerminator() { // successors is its layout successor, rewrite it to a fallthrough // conditional branch. if (isLayoutSuccessor(TBB)) { + if (TII->ReverseBranchCondition(Cond)) + return; TII->RemoveBranch(*this); - TII->ReverseBranchCondition(Cond); TII->InsertBranch(*this, FBB, 0, Cond); } else if (isLayoutSuccessor(FBB)) { TII->RemoveBranch(*this); @@ -285,8 +293,13 @@ void MachineBasicBlock::updateTerminator() { MachineBasicBlock *MBBB = *next(succ_begin()); if (MBBA == TBB) std::swap(MBBB, MBBA); if (isLayoutSuccessor(TBB)) { + if (TII->ReverseBranchCondition(Cond)) { + // We can't reverse the condition, add an unconditional branch. + Cond.clear(); + TII->InsertBranch(*this, MBBA, 0, Cond); + return; + } TII->RemoveBranch(*this); - TII->ReverseBranchCondition(Cond); TII->InsertBranch(*this, MBBA, 0, Cond); } else if (!isLayoutSuccessor(MBBA)) { TII->RemoveBranch(*this); @@ -349,6 +362,51 @@ bool MachineBasicBlock::isLayoutSuccessor(const MachineBasicBlock *MBB) const { return next(I) == MachineFunction::const_iterator(MBB); } +bool MachineBasicBlock::canFallThrough() { + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector Cond; + const TargetInstrInfo *TII = getParent()->getTarget().getInstrInfo(); + bool BranchUnAnalyzable = TII->AnalyzeBranch(*this, TBB, FBB, Cond, true); + + MachineFunction::iterator Fallthrough = this; + ++Fallthrough; + // If FallthroughBlock is off the end of the function, it can't fall through. + if (Fallthrough == getParent()->end()) + return false; + + // If FallthroughBlock isn't a successor, no fallthrough is possible. + if (!isSuccessor(Fallthrough)) + return false; + + // If we couldn't analyze the branch, examine the last instruction. + // If the block doesn't end in a known control barrier, assume fallthrough + // is possible. The isPredicable check is needed because this code can be + // called during IfConversion, where an instruction which is normally a + // Barrier is predicated and thus no longer an actual control barrier. This + // is over-conservative though, because if an instruction isn't actually + // predicated we could still treat it like a barrier. + if (BranchUnAnalyzable) + return empty() || !back().getDesc().isBarrier() || + back().getDesc().isPredicable(); + + // If there is no branch, control always falls through. + if (TBB == 0) return true; + + // If there is some explicit branch to the fallthrough block, it can obviously + // reach, even though the branch should get folded to fall through implicitly. + if (MachineFunction::iterator(TBB) == Fallthrough || + MachineFunction::iterator(FBB) == Fallthrough) + return true; + + // If it's an unconditional branch to some block not the fall through, it + // doesn't fall through. + if (Cond.empty()) return false; + + // Otherwise, if it is conditional and has no explicit false block, it falls + // through. + return FBB == 0; +} + /// removeFromParent - This method unlinks 'this' from the containing function, /// and returns it, but does not delete it. MachineBasicBlock *MachineBasicBlock::removeFromParent() { diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 81d1301336b8..d20f4464e502 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -359,14 +359,16 @@ void MachineFunction::print(raw_ostream &OS) const { namespace llvm { template<> struct DOTGraphTraits : public DefaultDOTGraphTraits { + + DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} + static std::string getGraphName(const MachineFunction *F) { return "CFG for '" + F->getFunction()->getNameStr() + "' function"; } - static std::string getNodeLabel(const MachineBasicBlock *Node, - const MachineFunction *Graph, - bool ShortNames) { - if (ShortNames && Node->getBasicBlock() && + std::string getNodeLabel(const MachineBasicBlock *Node, + const MachineFunction *Graph) { + if (isSimple () && Node->getBasicBlock() && !Node->getBasicBlock()->getName().empty()) return Node->getBasicBlock()->getNameStr() + ":"; @@ -374,7 +376,7 @@ namespace llvm { { raw_string_ostream OSS(OutStr); - if (ShortNames) + if (isSimple()) OSS << Node->getNumber() << ':'; else Node->print(OSS); diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index b250faa62ae6..f73a5a362112 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -1148,10 +1148,11 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { // TODO: print InlinedAtLoc information DebugLocTuple DLT = MF->getDebugLocTuple(debugLoc); - DICompileUnit CU(DLT.Scope); - if (!CU.isNull()) - OS << " dbg:" << CU.getDirectory() << '/' << CU.getFilename() << ":" - << DLT.Line << ":" << DLT.Col; + DIScope Scope(DLT.Scope); + OS << " dbg:"; + if (!Scope.isNull()) + OS << Scope.getDirectory() << ':' << Scope.getFilename() << ':'; + OS << DLT.Line << ":" << DLT.Col; } OS << "\n"; diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index 33b6b823446e..66de5359df99 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -107,6 +107,10 @@ namespace { /// void HoistRegion(MachineDomTreeNode *N); + /// isLoadFromConstantMemory - Return true if the given instruction is a + /// load from constant memory. + bool isLoadFromConstantMemory(MachineInstr *MI); + /// ExtractHoistableLoad - Unfold a load from the given machineinstr if /// the load itself could be hoisted. Return the unfolded and hoistable /// load, or null if the load couldn't be unfolded or if it wouldn't @@ -338,6 +342,24 @@ static bool HasPHIUses(unsigned Reg, MachineRegisterInfo *RegInfo) { return false; } +/// isLoadFromConstantMemory - Return true if the given instruction is a +/// load from constant memory. Machine LICM will hoist these even if they are +/// not re-materializable. +bool MachineLICM::isLoadFromConstantMemory(MachineInstr *MI) { + if (!MI->getDesc().mayLoad()) return false; + if (!MI->hasOneMemOperand()) return false; + MachineMemOperand *MMO = *MI->memoperands_begin(); + if (MMO->isVolatile()) return false; + if (!MMO->getValue()) return false; + const PseudoSourceValue *PSV = dyn_cast(MMO->getValue()); + if (PSV) { + MachineFunction &MF = *MI->getParent()->getParent(); + return PSV->isConstant(MF.getFrameInfo()); + } else { + return AA->pointsToConstantMemory(MMO->getValue()); + } +} + /// IsProfitableToHoist - Return true if it is potentially profitable to hoist /// the given loop invariant. bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { @@ -347,8 +369,15 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { // FIXME: For now, only hoist re-materilizable instructions. LICM will // increase register pressure. We want to make sure it doesn't increase // spilling. - if (!TII->isTriviallyReMaterializable(&MI, AA)) - return false; + // Also hoist loads from constant memory, e.g. load from stubs, GOT. Hoisting + // these tend to help performance in low register pressure situation. The + // trade off is it may cause spill in high pressure situation. It will end up + // adding a store in the loop preheader. But the reload is no more expensive. + // The side benefit is these loads are frequently CSE'ed. + if (!TII->isTriviallyReMaterializable(&MI, AA)) { + if (!isLoadFromConstantMemory(&MI)) + return false; + } // If result(s) of this instruction is used by PHIs, then don't hoist it. // The presence of joins makes it difficult for current register allocator @@ -368,18 +397,9 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { // If not, we may be able to unfold a load and hoist that. // First test whether the instruction is loading from an amenable // memory location. - if (!MI->getDesc().mayLoad()) return 0; - if (!MI->hasOneMemOperand()) return 0; - MachineMemOperand *MMO = *MI->memoperands_begin(); - if (MMO->isVolatile()) return 0; - MachineFunction &MF = *MI->getParent()->getParent(); - if (!MMO->getValue()) return 0; - if (const PseudoSourceValue *PSV = - dyn_cast(MMO->getValue())) { - if (!PSV->isConstant(MF.getFrameInfo())) return 0; - } else { - if (!AA->pointsToConstantMemory(MMO->getValue())) return 0; - } + if (!isLoadFromConstantMemory(MI)) + return 0; + // Next determine the register class for a temporary register. unsigned LoadRegIndex; unsigned NewOpc = @@ -393,6 +413,8 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { const TargetRegisterClass *RC = TID.OpInfo[LoadRegIndex].getRegClass(TRI); // Ok, we're unfolding. Create a temporary register and do the unfold. unsigned Reg = RegInfo->createVirtualRegister(RC); + + MachineFunction &MF = *MI->getParent()->getParent(); SmallVector NewMIs; bool Success = TII->unfoldMemoryOperand(MF, MI, Reg, @@ -487,10 +509,10 @@ void MachineLICM::Hoist(MachineInstr *MI) { errs() << "Hoisting " << *MI; if (CurPreheader->getBasicBlock()) errs() << " to MachineBasicBlock " - << CurPreheader->getBasicBlock()->getName(); + << CurPreheader->getName(); if (MI->getParent()->getBasicBlock()) errs() << " from MachineBasicBlock " - << MI->getParent()->getBasicBlock()->getName(); + << MI->getParent()->getName(); errs() << "\n"; }); diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp index 4b067a0aa98b..ed5bb5e5410c 100644 --- a/lib/CodeGen/MachineModuleInfo.cpp +++ b/lib/CodeGen/MachineModuleInfo.cpp @@ -293,75 +293,3 @@ unsigned MachineModuleInfo::getPersonalityIndex() const { return 0; } -//===----------------------------------------------------------------------===// -/// DebugLabelFolding pass - This pass prunes out redundant labels. This allows -/// a info consumer to determine if the range of two labels is empty, by seeing -/// if the labels map to the same reduced label. - -namespace llvm { - -struct DebugLabelFolder : public MachineFunctionPass { - static char ID; - DebugLabelFolder() : MachineFunctionPass(&ID) {} - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - AU.addPreservedID(MachineLoopInfoID); - AU.addPreservedID(MachineDominatorsID); - MachineFunctionPass::getAnalysisUsage(AU); - } - - virtual bool runOnMachineFunction(MachineFunction &MF); - virtual const char *getPassName() const { return "Label Folder"; } -}; - -char DebugLabelFolder::ID = 0; - -bool DebugLabelFolder::runOnMachineFunction(MachineFunction &MF) { - // Get machine module info. - MachineModuleInfo *MMI = getAnalysisIfAvailable(); - if (!MMI) return false; - - // Track if change is made. - bool MadeChange = false; - // No prior label to begin. - unsigned PriorLabel = 0; - - // Iterate through basic blocks. - for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); - BB != E; ++BB) { - // Iterate through instructions. - for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { - // Is it a label. - if (I->isDebugLabel() && !MMI->isDbgLabelUsed(I->getOperand(0).getImm())){ - // The label ID # is always operand #0, an immediate. - unsigned NextLabel = I->getOperand(0).getImm(); - - // If there was an immediate prior label. - if (PriorLabel) { - // Remap the current label to prior label. - MMI->RemapLabel(NextLabel, PriorLabel); - // Delete the current label. - I = BB->erase(I); - // Indicate a change has been made. - MadeChange = true; - continue; - } else { - // Start a new round. - PriorLabel = NextLabel; - } - } else { - // No consecutive labels. - PriorLabel = 0; - } - - ++I; - } - } - - return MadeChange; -} - -FunctionPass *createDebugLabelFoldingPass() { return new DebugLabelFolder(); } - -} diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index a1c74c0c48a4..d9f4c997b905 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -305,7 +305,7 @@ void MachineVerifier::report(const char *msg, const MachineFunction *MF) { void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) { assert(MBB); report(msg, MBB->getParent()); - *OS << "- basic block: " << MBB->getBasicBlock()->getNameStr() + *OS << "- basic block: " << MBB->getName() << " " << (void*)MBB << " (BB#" << MBB->getNumber() << ")\n"; } diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index b3802ed6725d..2e30cc6abd32 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -353,7 +353,7 @@ bool llvm::PHIElimination::SplitPHIEdges(MachineFunction &MF, // We break edges when registers are live out from the predecessor block // (not considering PHI nodes). If the register is live in to this block // anyway, we would gain nothing from splitting. - if (isLiveOut(Reg, *PreMBB, LV) && !isLiveIn(Reg, MBB, LV)) + if (!LV.isLiveIn(Reg, MBB) && isLiveOut(Reg, *PreMBB, LV)) SplitCriticalEdge(PreMBB, &MBB); } } @@ -406,22 +406,6 @@ bool llvm::PHIElimination::isLiveOut(unsigned Reg, const MachineBasicBlock &MBB, return false; } -bool llvm::PHIElimination::isLiveIn(unsigned Reg, const MachineBasicBlock &MBB, - LiveVariables &LV) { - LiveVariables::VarInfo &VI = LV.getVarInfo(Reg); - - if (VI.AliveBlocks.test(MBB.getNumber())) - return true; - - // defined in MBB? - const MachineInstr *Def = MRI->getVRegDef(Reg); - if (Def && Def->getParent() == &MBB) - return false; - - // killed in MBB? - return VI.findKill(&MBB); -} - MachineBasicBlock *PHIElimination::SplitCriticalEdge(MachineBasicBlock *A, MachineBasicBlock *B) { assert(A && B && "Missing MBB end point"); @@ -439,21 +423,21 @@ MachineBasicBlock *PHIElimination::SplitCriticalEdge(MachineBasicBlock *A, ++NumSplits; MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock(); - MF->push_back(NMBB); + MF->insert(next(MachineFunction::iterator(A)), NMBB); DEBUG(errs() << "PHIElimination splitting critical edge:" " BB#" << A->getNumber() << " -- BB#" << NMBB->getNumber() << " -- BB#" << B->getNumber() << '\n'); A->ReplaceUsesOfBlockWith(B, NMBB); - // If A may fall through to B, we may have to insert a branch. - if (A->isLayoutSuccessor(B)) - A->updateTerminator(); + A->updateTerminator(); - // Insert unconditional "jump B" instruction in NMBB. + // Insert unconditional "jump B" instruction in NMBB if necessary. NMBB->addSuccessor(B); - Cond.clear(); - MF->getTarget().getInstrInfo()->InsertBranch(*NMBB, B, NULL, Cond); + if (!NMBB->isLayoutSuccessor(B)) { + Cond.clear(); + MF->getTarget().getInstrInfo()->InsertBranch(*NMBB, B, NULL, Cond); + } // Fix PHI nodes in B so they refer to NMBB instead of A for (MachineBasicBlock::iterator i = B->begin(), e = B->end(); @@ -463,7 +447,7 @@ MachineBasicBlock *PHIElimination::SplitCriticalEdge(MachineBasicBlock *A, i->getOperand(ni+1).setMBB(NMBB); if (LiveVariables *LV=getAnalysisIfAvailable()) - LV->addNewBlock(NMBB, A); + LV->addNewBlock(NMBB, A, B); if (MachineDominatorTree *MDT=getAnalysisIfAvailable()) MDT->addNewBlock(NMBB, A); diff --git a/lib/CodeGen/PHIElimination.h b/lib/CodeGen/PHIElimination.h index f8c9fe728457..f5872cbe8d54 100644 --- a/lib/CodeGen/PHIElimination.h +++ b/lib/CodeGen/PHIElimination.h @@ -99,12 +99,6 @@ namespace llvm { bool isLiveOut(unsigned Reg, const MachineBasicBlock &MBB, LiveVariables &LV); - /// isLiveIn - Determine if Reg is live in to MBB, not considering PHI - /// source registers. This means that Reg is either killed by MBB or passes - /// through it. - bool isLiveIn(unsigned Reg, const MachineBasicBlock &MBB, - LiveVariables &LV); - /// SplitCriticalEdge - Split a critical edge from A to B by /// inserting a new MBB. Update branches in A and PHI instructions /// in B. Return the new block. diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index 5f1f1f3580c1..9101fce27a6f 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -175,11 +175,10 @@ namespace { void FixupKills(MachineBasicBlock *MBB); private: - void ReleaseSucc(SUnit *SU, SDep *SuccEdge, bool IgnoreAntiDep); - void ReleaseSuccessors(SUnit *SU, bool IgnoreAntiDep); - void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle, bool IgnoreAntiDep); - void ListScheduleTopDown( - AntiDepBreaker::CandidateMap *AntiDepCandidates); + void ReleaseSucc(SUnit *SU, SDep *SuccEdge); + void ReleaseSuccessors(SUnit *SU); + void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle); + void ListScheduleTopDown(); void StartBlockForKills(MachineBasicBlock *BB); // ToggleKillFlag - Toggle a register operand kill flag. Other @@ -322,50 +321,24 @@ void SchedulePostRATDList::Schedule() { BuildSchedGraph(AA); if (AntiDepBreak != NULL) { - AntiDepBreaker::CandidateMap AntiDepCandidates; - const bool NeedCandidates = AntiDepBreak->NeedCandidates(); + unsigned Broken = + AntiDepBreak->BreakAntiDependencies(SUnits, Begin, InsertPos, + InsertPosIndex); - for (unsigned i = 0, Trials = AntiDepBreak->GetMaxTrials(); - i < Trials; ++i) { - DEBUG(errs() << "\n********** Break Anti-Deps, Trial " << - i << " **********\n"); - - // If candidates are required, then schedule forward ignoring - // anti-dependencies to collect the candidate operands for - // anti-dependence breaking. The candidates will be the def - // operands for the anti-dependencies that if broken would allow - // an improved schedule - if (NeedCandidates) { - DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) - SUnits[su].dumpAll(this)); - - AntiDepCandidates.clear(); - AvailableQueue.initNodes(SUnits); - ListScheduleTopDown(&AntiDepCandidates); - AvailableQueue.releaseState(); - } - - unsigned Broken = - AntiDepBreak->BreakAntiDependencies(SUnits, AntiDepCandidates, - Begin, InsertPos, InsertPosIndex); - + if (Broken != 0) { // We made changes. Update the dependency graph. // Theoretically we could update the graph in place: // When a live range is changed to use a different register, remove // the def's anti-dependence *and* output-dependence edges due to // that register, and add new anti-dependence and output-dependence // edges based on the next live range of the register. - if ((Broken != 0) || NeedCandidates) { - SUnits.clear(); - Sequence.clear(); - EntrySU = SUnit(); - ExitSU = SUnit(); - BuildSchedGraph(AA); - } - + SUnits.clear(); + Sequence.clear(); + EntrySU = SUnit(); + ExitSU = SUnit(); + BuildSchedGraph(AA); + NumFixedAnti += Broken; - if (Broken == 0) - break; } } @@ -374,7 +347,7 @@ void SchedulePostRATDList::Schedule() { SUnits[su].dumpAll(this)); AvailableQueue.initNodes(SUnits); - ListScheduleTopDown(NULL); + ListScheduleTopDown(); AvailableQueue.releaseState(); } @@ -573,8 +546,7 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to /// the PendingQueue if the count reaches zero. Also update its cycle bound. -void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge, - bool IgnoreAntiDep) { +void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) { SUnit *SuccSU = SuccEdge->getSUnit(); #ifndef NDEBUG @@ -590,8 +562,7 @@ void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge, // Compute how many cycles it will be before this actually becomes // available. This is the max of the start time of all predecessors plus // their latencies. - SuccSU->setDepthToAtLeast(SU->getDepth(IgnoreAntiDep) + - SuccEdge->getLatency(), IgnoreAntiDep); + SuccSU->setDepthToAtLeast(SU->getDepth() + SuccEdge->getLatency()); // If all the node's predecessors are scheduled, this node is ready // to be scheduled. Ignore the special ExitSU node. @@ -600,40 +571,34 @@ void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge, } /// ReleaseSuccessors - Call ReleaseSucc on each of SU's successors. -void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU, bool IgnoreAntiDep) { +void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU) { for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; ++I) { - if (IgnoreAntiDep && - ((I->getKind() == SDep::Anti) || (I->getKind() == SDep::Output))) - continue; - ReleaseSucc(SU, &*I, IgnoreAntiDep); + ReleaseSucc(SU, &*I); } } /// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending /// count of its successors. If a successor pending count is zero, add it to /// the Available queue. -void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle, - bool IgnoreAntiDep) { +void SchedulePostRATDList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { DEBUG(errs() << "*** Scheduling [" << CurCycle << "]: "); DEBUG(SU->dump(this)); Sequence.push_back(SU); - assert(CurCycle >= SU->getDepth(IgnoreAntiDep) && + assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); - SU->setDepthToAtLeast(CurCycle, IgnoreAntiDep); + SU->setDepthToAtLeast(CurCycle); - ReleaseSuccessors(SU, IgnoreAntiDep); + ReleaseSuccessors(SU); SU->isScheduled = true; AvailableQueue.ScheduledNode(SU); } /// ListScheduleTopDown - The main loop of list scheduling for top-down /// schedulers. -void SchedulePostRATDList::ListScheduleTopDown( - AntiDepBreaker::CandidateMap *AntiDepCandidates) { +void SchedulePostRATDList::ListScheduleTopDown() { unsigned CurCycle = 0; - const bool IgnoreAntiDep = (AntiDepCandidates != NULL); // We're scheduling top-down but we're visiting the regions in // bottom-up order, so we don't know the hazards at the start of a @@ -641,33 +606,13 @@ void SchedulePostRATDList::ListScheduleTopDown( // blocks are a single region). HazardRec->Reset(); - // If ignoring anti-dependencies, the Schedule DAG still has Anti - // dep edges, but we ignore them for scheduling purposes - AvailableQueue.setIgnoreAntiDep(IgnoreAntiDep); - // Release any successors of the special Entry node. - ReleaseSuccessors(&EntrySU, IgnoreAntiDep); + ReleaseSuccessors(&EntrySU); - // Add all leaves to Available queue. If ignoring antideps we also - // adjust the predecessor count for each node to not include antidep - // edges. + // Add all leaves to Available queue. for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { // It is available if it has no predecessors. bool available = SUnits[i].Preds.empty(); - // If we are ignoring anti-dependencies then a node that has only - // anti-dep predecessors is available. - if (!available && IgnoreAntiDep) { - available = true; - for (SUnit::const_pred_iterator I = SUnits[i].Preds.begin(), - E = SUnits[i].Preds.end(); I != E; ++I) { - if ((I->getKind() != SDep::Anti) && (I->getKind() != SDep::Output)) { - available = false; - } else { - SUnits[i].NumPredsLeft -= 1; - } - } - } - if (available) { AvailableQueue.push(&SUnits[i]); SUnits[i].isAvailable = true; @@ -687,21 +632,21 @@ void SchedulePostRATDList::ListScheduleTopDown( // so, add them to the available queue. unsigned MinDepth = ~0u; for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { - if (PendingQueue[i]->getDepth(IgnoreAntiDep) <= CurCycle) { + if (PendingQueue[i]->getDepth() <= CurCycle) { AvailableQueue.push(PendingQueue[i]); PendingQueue[i]->isAvailable = true; PendingQueue[i] = PendingQueue.back(); PendingQueue.pop_back(); --i; --e; - } else if (PendingQueue[i]->getDepth(IgnoreAntiDep) < MinDepth) - MinDepth = PendingQueue[i]->getDepth(IgnoreAntiDep); + } else if (PendingQueue[i]->getDepth() < MinDepth) + MinDepth = PendingQueue[i]->getDepth(); } DEBUG(errs() << "\n*** Examining Available\n"; LatencyPriorityQueue q = AvailableQueue; while (!q.empty()) { SUnit *su = q.pop(); - errs() << "Height " << su->getHeight(IgnoreAntiDep) << ": "; + errs() << "Height " << su->getHeight() << ": "; su->dump(this); }); @@ -731,30 +676,8 @@ void SchedulePostRATDList::ListScheduleTopDown( // If we found a node to schedule... if (FoundSUnit) { - // If we are ignoring anti-dependencies and the SUnit we are - // scheduling has an antidep predecessor that has not been - // scheduled, then we will need to break that antidep if we want - // to get this schedule when not ignoring anti-dependencies. - if (IgnoreAntiDep) { - AntiDepBreaker::AntiDepRegVector AntiDepRegs; - for (SUnit::const_pred_iterator I = FoundSUnit->Preds.begin(), - E = FoundSUnit->Preds.end(); I != E; ++I) { - if (((I->getKind() == SDep::Anti) || - (I->getKind() == SDep::Output)) && - !I->getSUnit()->isScheduled) - AntiDepRegs.push_back(I->getReg()); - } - - if (AntiDepRegs.size() > 0) { - DEBUG(errs() << "*** AntiDep Candidate: "); - DEBUG(FoundSUnit->dump(this)); - AntiDepCandidates->insert( - AntiDepBreaker::CandidateMap::value_type(FoundSUnit, AntiDepRegs)); - } - } - // ... schedule the node... - ScheduleNodeTopDown(FoundSUnit, CurCycle, IgnoreAntiDep); + ScheduleNodeTopDown(FoundSUnit, CurCycle); HazardRec->EmitInstruction(FoundSUnit); CycleHasInsts = true; @@ -775,8 +698,7 @@ void SchedulePostRATDList::ListScheduleTopDown( // just advance the current cycle and try again. DEBUG(errs() << "*** Stall in cycle " << CurCycle << '\n'); HazardRec->AdvanceCycle(); - if (!IgnoreAntiDep) - ++NumStalls; + ++NumStalls; } else { // Otherwise, we have no instructions to issue and we have instructions // that will fault if we don't do this right. This is the case for @@ -784,8 +706,7 @@ void SchedulePostRATDList::ListScheduleTopDown( DEBUG(errs() << "*** Emitting noop in cycle " << CurCycle << '\n'); HazardRec->EmitNoop(); Sequence.push_back(0); // NULL here means noop - if (!IgnoreAntiDep) - ++NumNoops; + ++NumNoops; } ++CurCycle; diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp index 455964b5c5ad..c9a33d885154 100644 --- a/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/lib/CodeGen/ProcessImplicitDefs.cpp @@ -75,10 +75,11 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { SmallSet ImpDefRegs; SmallVector ImpDefMIs; - MachineBasicBlock *Entry = fn.begin(); + SmallVector RUses; SmallPtrSet Visited; SmallPtrSet ModInsts; + MachineBasicBlock *Entry = fn.begin(); for (df_ext_iterator > DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited); DFI != E; ++DFI) { @@ -182,53 +183,87 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { // is not an implicit_def, do not insert implicit_def's before the // uses. bool Skip = false; + SmallVector DeadImpDefs; for (MachineRegisterInfo::def_iterator DI = mri_->def_begin(Reg), DE = mri_->def_end(); DI != DE; ++DI) { - if (DI->getOpcode() != TargetInstrInfo::IMPLICIT_DEF) { + MachineInstr *DeadImpDef = &*DI; + if (DeadImpDef->getOpcode() != TargetInstrInfo::IMPLICIT_DEF) { Skip = true; break; } + DeadImpDefs.push_back(DeadImpDef); } if (Skip) continue; // The only implicit_def which we want to keep are those that are live // out of its block. - MI->eraseFromParent(); + for (unsigned j = 0, ee = DeadImpDefs.size(); j != ee; ++j) + DeadImpDefs[j]->eraseFromParent(); Changed = true; + // Process each use instruction once. for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(Reg), - UE = mri_->use_end(); UI != UE; ) { - MachineOperand &RMO = UI.getOperand(); + UE = mri_->use_end(); UI != UE; ++UI) { MachineInstr *RMI = &*UI; - ++UI; - if (ModInsts.count(RMI)) - continue; MachineBasicBlock *RMBB = RMI->getParent(); if (RMBB == MBB) continue; + if (ModInsts.insert(RMI)) + RUses.push_back(RMI); + } + + for (unsigned i = 0, e = RUses.size(); i != e; ++i) { + MachineInstr *RMI = RUses[i]; // Turn a copy use into an implicit_def. unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; if (tii_->isMoveInstr(*RMI, SrcReg, DstReg, SrcSubReg, DstSubReg) && Reg == SrcReg) { - if (RMO.isKill()) { + RMI->setDesc(tii_->get(TargetInstrInfo::IMPLICIT_DEF)); + + bool isKill = false; + SmallVector Ops; + for (unsigned j = 0, ee = RMI->getNumOperands(); j != ee; ++j) { + MachineOperand &RRMO = RMI->getOperand(j); + if (RRMO.isReg() && RRMO.getReg() == Reg) { + Ops.push_back(j); + if (RRMO.isKill()) + isKill = true; + } + } + // Leave the other operands along. + for (unsigned j = 0, ee = Ops.size(); j != ee; ++j) { + unsigned OpIdx = Ops[j]; + RMI->RemoveOperand(OpIdx-j); + } + + // Update LiveVariables varinfo if the instruction is a kill. + if (isKill) { LiveVariables::VarInfo& vi = lv_->getVarInfo(Reg); vi.removeKill(RMI); } - RMI->setDesc(tii_->get(TargetInstrInfo::IMPLICIT_DEF)); - for (int j = RMI->getNumOperands() - 1, ee = 0; j > ee; --j) - RMI->RemoveOperand(j); - ModInsts.insert(RMI); continue; } + // Replace Reg with a new vreg that's marked implicit. const TargetRegisterClass* RC = mri_->getRegClass(Reg); unsigned NewVReg = mri_->createVirtualRegister(RC); - RMO.setReg(NewVReg); - RMO.setIsUndef(); - RMO.setIsKill(); + bool isKill = true; + for (unsigned j = 0, ee = RMI->getNumOperands(); j != ee; ++j) { + MachineOperand &RRMO = RMI->getOperand(j); + if (RRMO.isReg() && RRMO.getReg() == Reg) { + RRMO.setReg(NewVReg); + RRMO.setIsUndef(); + if (isKill) { + // Only the first operand of NewVReg is marked kill. + RRMO.setIsKill(); + isKill = false; + } + } + } } + RUses.clear(); } ModInsts.clear(); ImpDefRegs.clear(); diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp index fff50da947c1..4ff512932f8e 100644 --- a/lib/CodeGen/RegAllocLinearScan.cpp +++ b/lib/CodeGen/RegAllocLinearScan.cpp @@ -64,9 +64,31 @@ linearscanRegAlloc("linearscan", "linear scan register allocator", createLinearScanRegisterAllocator); namespace { + // When we allocate a register, add it to a fixed-size queue of + // registers to skip in subsequent allocations. This trades a small + // amount of register pressure and increased spills for flexibility in + // the post-pass scheduler. + // + // Note that in a the number of registers used for reloading spills + // will be one greater than the value of this option. + // + // One big limitation of this is that it doesn't differentiate between + // different register classes. So on x86-64, if there is xmm register + // pressure, it can caused fewer GPRs to be held in the queue. + static cl::opt + NumRecentlyUsedRegs("linearscan-skip-count", + cl::desc("Number of registers for linearscan to remember to skip."), + cl::init(0), + cl::Hidden); + struct RALinScan : public MachineFunctionPass { static char ID; - RALinScan() : MachineFunctionPass(&ID) {} + RALinScan() : MachineFunctionPass(&ID) { + // Initialize the queue to record recently-used registers. + if (NumRecentlyUsedRegs > 0) + RecentRegs.resize(NumRecentlyUsedRegs, 0); + RecentNext = RecentRegs.begin(); + } typedef std::pair IntervalPtr; typedef SmallVector IntervalPtrs; @@ -132,6 +154,20 @@ namespace { std::auto_ptr spiller_; + // The queue of recently-used registers. + SmallVector RecentRegs; + SmallVector::iterator RecentNext; + + // Record that we just picked this register. + void recordRecentlyUsed(unsigned reg) { + assert(reg != 0 && "Recently used register is NOREG!"); + if (!RecentRegs.empty()) { + *RecentNext++ = reg; + if (RecentNext == RecentRegs.end()) + RecentNext = RecentRegs.begin(); + } + } + public: virtual const char* getPassName() const { return "Linear Scan Register Allocator"; @@ -161,6 +197,12 @@ namespace { /// runOnMachineFunction - register allocate the whole function bool runOnMachineFunction(MachineFunction&); + // Determine if we skip this register due to its being recently used. + bool isRecentlyUsed(unsigned reg) const { + return std::find(RecentRegs.begin(), RecentRegs.end(), reg) != + RecentRegs.end(); + } + private: /// linearScan - the linear scan algorithm void linearScan(); @@ -436,7 +478,7 @@ bool RALinScan::runOnMachineFunction(MachineFunction &fn) { vrm_ = &getAnalysis(); if (!rewriter_.get()) rewriter_.reset(createVirtRegRewriter()); - spiller_.reset(createSpiller(mf_, li_, ls_, loopInfo, vrm_)); + spiller_.reset(createSpiller(mf_, li_, loopInfo, vrm_)); initIntervalSets(); @@ -833,9 +875,15 @@ void RALinScan::findIntervalsToSpill(LiveInterval *cur, namespace { struct WeightCompare { + private: + const RALinScan &Allocator; + + public: + WeightCompare(const RALinScan &Alloc) : Allocator(Alloc) {}; + typedef std::pair RegWeightPair; bool operator()(const RegWeightPair &LHS, const RegWeightPair &RHS) const { - return LHS.second < RHS.second; + return LHS.second < RHS.second && !Allocator.isRecentlyUsed(LHS.first); } }; } @@ -1079,7 +1127,8 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { e = RC->allocation_order_end(*mf_); i != e; ++i) { unsigned reg = *i; float regWeight = SpillWeights[reg]; - if (minWeight > regWeight) + // Skip recently allocated registers. + if (minWeight > regWeight && !isRecentlyUsed(reg)) Found = true; RegsWeights.push_back(std::make_pair(reg, regWeight)); } @@ -1097,7 +1146,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { } // Sort all potential spill candidates by weight. - std::sort(RegsWeights.begin(), RegsWeights.end(), WeightCompare()); + std::sort(RegsWeights.begin(), RegsWeights.end(), WeightCompare(*this)); minReg = RegsWeights[0].first; minWeight = RegsWeights[0].second; if (minWeight == HUGE_VALF) { @@ -1360,7 +1409,8 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur, // Ignore "downgraded" registers. if (SkipDGRegs && DowngradedRegs.count(Reg)) continue; - if (isRegAvail(Reg)) { + // Skip recently allocated registers. + if (isRegAvail(Reg) && !isRecentlyUsed(Reg)) { FreeReg = Reg; if (FreeReg < inactiveCounts.size()) FreeRegInactiveCount = inactiveCounts[FreeReg]; @@ -1372,9 +1422,12 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur, // If there are no free regs, or if this reg has the max inactive count, // return this register. - if (FreeReg == 0 || FreeRegInactiveCount == MaxInactiveCount) + if (FreeReg == 0 || FreeRegInactiveCount == MaxInactiveCount) { + // Remember what register we picked so we can skip it next time. + if (FreeReg != 0) recordRecentlyUsed(FreeReg); return FreeReg; - + } + // Continue scanning the registers, looking for the one with the highest // inactive count. Alkis found that this reduced register pressure very // slightly on X86 (in rev 1.94 of this file), though this should probably be @@ -1385,7 +1438,7 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur, if (SkipDGRegs && DowngradedRegs.count(Reg)) continue; if (isRegAvail(Reg) && Reg < inactiveCounts.size() && - FreeRegInactiveCount < inactiveCounts[Reg]) { + FreeRegInactiveCount < inactiveCounts[Reg] && !isRecentlyUsed(Reg)) { FreeReg = Reg; FreeRegInactiveCount = inactiveCounts[Reg]; if (FreeRegInactiveCount == MaxInactiveCount) @@ -1393,6 +1446,9 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur, } } + // Remember what register we picked so we can skip it next time. + recordRecentlyUsed(FreeReg); + return FreeReg; } diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index 6b27db263b25..71693d21c688 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -183,8 +183,8 @@ void SUnit::setHeightDirty() { /// setDepthToAtLeast - Update this node's successors to reflect the /// fact that this node's depth just increased. /// -void SUnit::setDepthToAtLeast(unsigned NewDepth, bool IgnoreAntiDep) { - if (NewDepth <= getDepth(IgnoreAntiDep)) +void SUnit::setDepthToAtLeast(unsigned NewDepth) { + if (NewDepth <= getDepth()) return; setDepthDirty(); Depth = NewDepth; @@ -194,8 +194,8 @@ void SUnit::setDepthToAtLeast(unsigned NewDepth, bool IgnoreAntiDep) { /// setHeightToAtLeast - Update this node's predecessors to reflect the /// fact that this node's height just increased. /// -void SUnit::setHeightToAtLeast(unsigned NewHeight, bool IgnoreAntiDep) { - if (NewHeight <= getHeight(IgnoreAntiDep)) +void SUnit::setHeightToAtLeast(unsigned NewHeight) { + if (NewHeight <= getHeight()) return; setHeightDirty(); Height = NewHeight; @@ -204,7 +204,7 @@ void SUnit::setHeightToAtLeast(unsigned NewHeight, bool IgnoreAntiDep) { /// ComputeDepth - Calculate the maximal path from the node to the exit. /// -void SUnit::ComputeDepth(bool IgnoreAntiDep) { +void SUnit::ComputeDepth() { SmallVector WorkList; WorkList.push_back(this); do { @@ -214,10 +214,6 @@ void SUnit::ComputeDepth(bool IgnoreAntiDep) { unsigned MaxPredDepth = 0; for (SUnit::const_pred_iterator I = Cur->Preds.begin(), E = Cur->Preds.end(); I != E; ++I) { - if (IgnoreAntiDep && - ((I->getKind() == SDep::Anti) || (I->getKind() == SDep::Output))) - continue; - SUnit *PredSU = I->getSUnit(); if (PredSU->isDepthCurrent) MaxPredDepth = std::max(MaxPredDepth, @@ -241,7 +237,7 @@ void SUnit::ComputeDepth(bool IgnoreAntiDep) { /// ComputeHeight - Calculate the maximal path from the node to the entry. /// -void SUnit::ComputeHeight(bool IgnoreAntiDep) { +void SUnit::ComputeHeight() { SmallVector WorkList; WorkList.push_back(this); do { @@ -251,10 +247,6 @@ void SUnit::ComputeHeight(bool IgnoreAntiDep) { unsigned MaxSuccHeight = 0; for (SUnit::const_succ_iterator I = Cur->Succs.begin(), E = Cur->Succs.end(); I != E; ++I) { - if (IgnoreAntiDep && - ((I->getKind() == SDep::Anti) || (I->getKind() == SDep::Output))) - continue; - SUnit *SuccSU = I->getSUnit(); if (SuccSU->isHeightCurrent) MaxSuccHeight = std::max(MaxSuccHeight, diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp index 4851d496bdbd..027f6150e26b 100644 --- a/lib/CodeGen/ScheduleDAGPrinter.cpp +++ b/lib/CodeGen/ScheduleDAGPrinter.cpp @@ -32,6 +32,9 @@ using namespace llvm; namespace llvm { template<> struct DOTGraphTraits : public DefaultDOTGraphTraits { + + DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} + static std::string getGraphName(const ScheduleDAG *G) { return G->MF.getFunction()->getName(); } @@ -57,9 +60,7 @@ namespace llvm { } - static std::string getNodeLabel(const SUnit *Node, - const ScheduleDAG *Graph, - bool ShortNames); + std::string getNodeLabel(const SUnit *Node, const ScheduleDAG *Graph); static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *Graph) { return "shape=Mrecord"; @@ -73,8 +74,7 @@ namespace llvm { } std::string DOTGraphTraits::getNodeLabel(const SUnit *SU, - const ScheduleDAG *G, - bool ShortNames) { + const ScheduleDAG *G) { return G->getGraphNodeLabel(SU); } diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt index c766859ae9c8..80c7d7c9eb9c 100644 --- a/lib/CodeGen/SelectionDAG/CMakeLists.txt +++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt @@ -2,6 +2,7 @@ add_llvm_library(LLVMSelectionDAG CallingConvLower.cpp DAGCombiner.cpp FastISel.cpp + FunctionLoweringInfo.cpp InstrEmitter.cpp LegalizeDAG.cpp LegalizeFloatTypes.cpp @@ -15,7 +16,7 @@ add_llvm_library(LLVMSelectionDAG ScheduleDAGRRList.cpp ScheduleDAGSDNodes.cpp SelectionDAG.cpp - SelectionDAGBuild.cpp + SelectionDAGBuilder.cpp SelectionDAGISel.cpp SelectionDAGPrinter.cpp TargetLowering.cpp diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 7dbc136f3a62..5eb9ca1ebe02 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -54,7 +54,8 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" -#include "SelectionDAGBuild.h" +#include "SelectionDAGBuilder.h" +#include "FunctionLoweringInfo.h" using namespace llvm; unsigned FastISel::getRegForValue(Value *V) { diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp new file mode 100644 index 000000000000..e3b25c2a85cc --- /dev/null +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -0,0 +1,355 @@ +//===-- FunctionLoweringInfo.cpp ------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements routines for translating functions from LLVM IR into +// Machine IR. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "function-lowering-info" +#include "FunctionLoweringInfo.h" +#include "llvm/CallingConv.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/Instructions.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/LLVMContext.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameInfo.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetIntrinsicInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" +#include +using namespace llvm; + +/// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence +/// of insertvalue or extractvalue indices that identify a member, return +/// the linearized index of the start of the member. +/// +unsigned llvm::ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty, + const unsigned *Indices, + const unsigned *IndicesEnd, + unsigned CurIndex) { + // Base case: We're done. + if (Indices && Indices == IndicesEnd) + return CurIndex; + + // Given a struct type, recursively traverse the elements. + if (const StructType *STy = dyn_cast(Ty)) { + for (StructType::element_iterator EB = STy->element_begin(), + EI = EB, + EE = STy->element_end(); + EI != EE; ++EI) { + if (Indices && *Indices == unsigned(EI - EB)) + return ComputeLinearIndex(TLI, *EI, Indices+1, IndicesEnd, CurIndex); + CurIndex = ComputeLinearIndex(TLI, *EI, 0, 0, CurIndex); + } + return CurIndex; + } + // Given an array type, recursively traverse the elements. + else if (const ArrayType *ATy = dyn_cast(Ty)) { + const Type *EltTy = ATy->getElementType(); + for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) { + if (Indices && *Indices == i) + return ComputeLinearIndex(TLI, EltTy, Indices+1, IndicesEnd, CurIndex); + CurIndex = ComputeLinearIndex(TLI, EltTy, 0, 0, CurIndex); + } + return CurIndex; + } + // We haven't found the type we're looking for, so keep searching. + return CurIndex + 1; +} + +/// ComputeValueVTs - Given an LLVM IR type, compute a sequence of +/// EVTs that represent all the individual underlying +/// non-aggregate types that comprise it. +/// +/// If Offsets is non-null, it points to a vector to be filled in +/// with the in-memory offsets of each of the individual values. +/// +void llvm::ComputeValueVTs(const TargetLowering &TLI, const Type *Ty, + SmallVectorImpl &ValueVTs, + SmallVectorImpl *Offsets, + uint64_t StartingOffset) { + // Given a struct type, recursively traverse the elements. + if (const StructType *STy = dyn_cast(Ty)) { + const StructLayout *SL = TLI.getTargetData()->getStructLayout(STy); + for (StructType::element_iterator EB = STy->element_begin(), + EI = EB, + EE = STy->element_end(); + EI != EE; ++EI) + ComputeValueVTs(TLI, *EI, ValueVTs, Offsets, + StartingOffset + SL->getElementOffset(EI - EB)); + return; + } + // Given an array type, recursively traverse the elements. + if (const ArrayType *ATy = dyn_cast(Ty)) { + const Type *EltTy = ATy->getElementType(); + uint64_t EltSize = TLI.getTargetData()->getTypeAllocSize(EltTy); + for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) + ComputeValueVTs(TLI, EltTy, ValueVTs, Offsets, + StartingOffset + i * EltSize); + return; + } + // Interpret void as zero return values. + if (Ty == Type::getVoidTy(Ty->getContext())) + return; + // Base case: we can get an EVT for this LLVM IR type. + ValueVTs.push_back(TLI.getValueType(Ty)); + if (Offsets) + Offsets->push_back(StartingOffset); +} + +/// isUsedOutsideOfDefiningBlock - Return true if this instruction is used by +/// PHI nodes or outside of the basic block that defines it, or used by a +/// switch or atomic instruction, which may expand to multiple basic blocks. +static bool isUsedOutsideOfDefiningBlock(Instruction *I) { + if (isa(I)) return true; + BasicBlock *BB = I->getParent(); + for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ++UI) + if (cast(*UI)->getParent() != BB || isa(*UI)) + return true; + return false; +} + +/// isOnlyUsedInEntryBlock - If the specified argument is only used in the +/// entry block, return true. This includes arguments used by switches, since +/// the switch may expand into multiple basic blocks. +static bool isOnlyUsedInEntryBlock(Argument *A, bool EnableFastISel) { + // With FastISel active, we may be splitting blocks, so force creation + // of virtual registers for all non-dead arguments. + // Don't force virtual registers for byval arguments though, because + // fast-isel can't handle those in all cases. + if (EnableFastISel && !A->hasByValAttr()) + return A->use_empty(); + + BasicBlock *Entry = A->getParent()->begin(); + for (Value::use_iterator UI = A->use_begin(), E = A->use_end(); UI != E; ++UI) + if (cast(*UI)->getParent() != Entry || isa(*UI)) + return false; // Use not in entry block. + return true; +} + +FunctionLoweringInfo::FunctionLoweringInfo(TargetLowering &tli) + : TLI(tli) { +} + +void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf, + bool EnableFastISel) { + Fn = &fn; + MF = &mf; + RegInfo = &MF->getRegInfo(); + + // Create a vreg for each argument register that is not dead and is used + // outside of the entry block for the function. + for (Function::arg_iterator AI = Fn->arg_begin(), E = Fn->arg_end(); + AI != E; ++AI) + if (!isOnlyUsedInEntryBlock(AI, EnableFastISel)) + InitializeRegForValue(AI); + + // Initialize the mapping of values to registers. This is only set up for + // instruction values that are used outside of the block that defines + // them. + Function::iterator BB = Fn->begin(), EB = Fn->end(); + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + if (AllocaInst *AI = dyn_cast(I)) + if (ConstantInt *CUI = dyn_cast(AI->getArraySize())) { + const Type *Ty = AI->getAllocatedType(); + uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty); + unsigned Align = + std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), + AI->getAlignment()); + + TySize *= CUI->getZExtValue(); // Get total allocated size. + if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects. + StaticAllocaMap[AI] = + MF->getFrameInfo()->CreateStackObject(TySize, Align, false); + } + + for (; BB != EB; ++BB) + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + if (!I->use_empty() && isUsedOutsideOfDefiningBlock(I)) + if (!isa(I) || + !StaticAllocaMap.count(cast(I))) + InitializeRegForValue(I); + + // Create an initial MachineBasicBlock for each LLVM BasicBlock in F. This + // also creates the initial PHI MachineInstrs, though none of the input + // operands are populated. + for (BB = Fn->begin(), EB = Fn->end(); BB != EB; ++BB) { + MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(BB); + MBBMap[BB] = MBB; + MF->push_back(MBB); + + // Transfer the address-taken flag. This is necessary because there could + // be multiple MachineBasicBlocks corresponding to one BasicBlock, and only + // the first one should be marked. + if (BB->hasAddressTaken()) + MBB->setHasAddressTaken(); + + // Create Machine PHI nodes for LLVM PHI nodes, lowering them as + // appropriate. + PHINode *PN; + DebugLoc DL; + for (BasicBlock::iterator + I = BB->begin(), E = BB->end(); I != E; ++I) { + + PN = dyn_cast(I); + if (!PN || PN->use_empty()) continue; + + unsigned PHIReg = ValueMap[PN]; + assert(PHIReg && "PHI node does not have an assigned virtual register!"); + + SmallVector ValueVTs; + ComputeValueVTs(TLI, PN->getType(), ValueVTs); + for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { + EVT VT = ValueVTs[vti]; + unsigned NumRegisters = TLI.getNumRegisters(Fn->getContext(), VT); + const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); + for (unsigned i = 0; i != NumRegisters; ++i) + BuildMI(MBB, DL, TII->get(TargetInstrInfo::PHI), PHIReg + i); + PHIReg += NumRegisters; + } + } + } +} + +/// clear - Clear out all the function-specific state. This returns this +/// FunctionLoweringInfo to an empty state, ready to be used for a +/// different function. +void FunctionLoweringInfo::clear() { + MBBMap.clear(); + ValueMap.clear(); + StaticAllocaMap.clear(); +#ifndef NDEBUG + CatchInfoLost.clear(); + CatchInfoFound.clear(); +#endif + LiveOutRegInfo.clear(); +} + +unsigned FunctionLoweringInfo::MakeReg(EVT VT) { + return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT)); +} + +/// CreateRegForValue - Allocate the appropriate number of virtual registers of +/// the correctly promoted or expanded types. Assign these registers +/// consecutive vreg numbers and return the first assigned number. +/// +/// In the case that the given value has struct or array type, this function +/// will assign registers for each member or element. +/// +unsigned FunctionLoweringInfo::CreateRegForValue(const Value *V) { + SmallVector ValueVTs; + ComputeValueVTs(TLI, V->getType(), ValueVTs); + + unsigned FirstReg = 0; + for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { + EVT ValueVT = ValueVTs[Value]; + EVT RegisterVT = TLI.getRegisterType(V->getContext(), ValueVT); + + unsigned NumRegs = TLI.getNumRegisters(V->getContext(), ValueVT); + for (unsigned i = 0; i != NumRegs; ++i) { + unsigned R = MakeReg(RegisterVT); + if (!FirstReg) FirstReg = R; + } + } + return FirstReg; +} + +/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V. +GlobalVariable *llvm::ExtractTypeInfo(Value *V) { + V = V->stripPointerCasts(); + GlobalVariable *GV = dyn_cast(V); + assert ((GV || isa(V)) && + "TypeInfo must be a global variable or NULL"); + return GV; +} + +/// AddCatchInfo - Extract the personality and type infos from an eh.selector +/// call, and add them to the specified machine basic block. +void llvm::AddCatchInfo(CallInst &I, MachineModuleInfo *MMI, + MachineBasicBlock *MBB) { + // Inform the MachineModuleInfo of the personality for this landing pad. + ConstantExpr *CE = cast(I.getOperand(2)); + assert(CE->getOpcode() == Instruction::BitCast && + isa(CE->getOperand(0)) && + "Personality should be a function"); + MMI->addPersonality(MBB, cast(CE->getOperand(0))); + + // Gather all the type infos for this landing pad and pass them along to + // MachineModuleInfo. + std::vector TyInfo; + unsigned N = I.getNumOperands(); + + for (unsigned i = N - 1; i > 2; --i) { + if (ConstantInt *CI = dyn_cast(I.getOperand(i))) { + unsigned FilterLength = CI->getZExtValue(); + unsigned FirstCatch = i + FilterLength + !FilterLength; + assert (FirstCatch <= N && "Invalid filter length"); + + if (FirstCatch < N) { + TyInfo.reserve(N - FirstCatch); + for (unsigned j = FirstCatch; j < N; ++j) + TyInfo.push_back(ExtractTypeInfo(I.getOperand(j))); + MMI->addCatchTypeInfo(MBB, TyInfo); + TyInfo.clear(); + } + + if (!FilterLength) { + // Cleanup. + MMI->addCleanup(MBB); + } else { + // Filter. + TyInfo.reserve(FilterLength - 1); + for (unsigned j = i + 1; j < FirstCatch; ++j) + TyInfo.push_back(ExtractTypeInfo(I.getOperand(j))); + MMI->addFilterTypeInfo(MBB, TyInfo); + TyInfo.clear(); + } + + N = i; + } + } + + if (N > 3) { + TyInfo.reserve(N - 3); + for (unsigned j = 3; j < N; ++j) + TyInfo.push_back(ExtractTypeInfo(I.getOperand(j))); + MMI->addCatchTypeInfo(MBB, TyInfo); + } +} + +void llvm::CopyCatchInfo(BasicBlock *SrcBB, BasicBlock *DestBB, + MachineModuleInfo *MMI, FunctionLoweringInfo &FLI) { + for (BasicBlock::iterator I = SrcBB->begin(), E = --SrcBB->end(); I != E; ++I) + if (EHSelectorInst *EHSel = dyn_cast(I)) { + // Apply the catch info to DestBB. + AddCatchInfo(*EHSel, MMI, FLI.MBBMap[DestBB]); +#ifndef NDEBUG + if (!FLI.MBBMap[SrcBB]->isLandingPad()) + FLI.CatchInfoFound.insert(EHSel); +#endif + } +} diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h new file mode 100644 index 000000000000..d851e6429c0c --- /dev/null +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h @@ -0,0 +1,151 @@ +//===-- FunctionLoweringInfo.h - Lower functions from LLVM IR to CodeGen --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements routines for translating functions from LLVM IR into +// Machine IR. +// +//===----------------------------------------------------------------------===// + +#ifndef FUNCTIONLOWERINGINFO_H +#define FUNCTIONLOWERINGINFO_H + +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/DenseMap.h" +#ifndef NDEBUG +#include "llvm/ADT/SmallSet.h" +#endif +#include "llvm/CodeGen/ValueTypes.h" +#include + +namespace llvm { + +class AllocaInst; +class BasicBlock; +class CallInst; +class Function; +class GlobalVariable; +class Instruction; +class MachineBasicBlock; +class MachineFunction; +class MachineModuleInfo; +class MachineRegisterInfo; +class TargetLowering; +class Value; + +//===--------------------------------------------------------------------===// +/// FunctionLoweringInfo - This contains information that is global to a +/// function that is used when lowering a region of the function. +/// +class FunctionLoweringInfo { +public: + TargetLowering &TLI; + Function *Fn; + MachineFunction *MF; + MachineRegisterInfo *RegInfo; + + /// CanLowerReturn - true iff the function's return value can be lowered to + /// registers. + bool CanLowerReturn; + + /// DemoteRegister - if CanLowerReturn is false, DemoteRegister is a vreg + /// allocated to hold a pointer to the hidden sret parameter. + unsigned DemoteRegister; + + explicit FunctionLoweringInfo(TargetLowering &TLI); + + /// set - Initialize this FunctionLoweringInfo with the given Function + /// and its associated MachineFunction. + /// + void set(Function &Fn, MachineFunction &MF, bool EnableFastISel); + + /// MBBMap - A mapping from LLVM basic blocks to their machine code entry. + DenseMap MBBMap; + + /// ValueMap - Since we emit code for the function a basic block at a time, + /// we must remember which virtual registers hold the values for + /// cross-basic-block values. + DenseMap ValueMap; + + /// StaticAllocaMap - Keep track of frame indices for fixed sized allocas in + /// the entry block. This allows the allocas to be efficiently referenced + /// anywhere in the function. + DenseMap StaticAllocaMap; + +#ifndef NDEBUG + SmallSet CatchInfoLost; + SmallSet CatchInfoFound; +#endif + + unsigned MakeReg(EVT VT); + + /// isExportedInst - Return true if the specified value is an instruction + /// exported from its block. + bool isExportedInst(const Value *V) { + return ValueMap.count(V); + } + + unsigned CreateRegForValue(const Value *V); + + unsigned InitializeRegForValue(const Value *V) { + unsigned &R = ValueMap[V]; + assert(R == 0 && "Already initialized this value register!"); + return R = CreateRegForValue(V); + } + + struct LiveOutInfo { + unsigned NumSignBits; + APInt KnownOne, KnownZero; + LiveOutInfo() : NumSignBits(0), KnownOne(1, 0), KnownZero(1, 0) {} + }; + + /// LiveOutRegInfo - Information about live out vregs, indexed by their + /// register number offset by 'FirstVirtualRegister'. + std::vector LiveOutRegInfo; + + /// clear - Clear out all the function-specific state. This returns this + /// FunctionLoweringInfo to an empty state, ready to be used for a + /// different function. + void clear(); +}; + +/// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence +/// of insertvalue or extractvalue indices that identify a member, return +/// the linearized index of the start of the member. +/// +unsigned ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty, + const unsigned *Indices, + const unsigned *IndicesEnd, + unsigned CurIndex = 0); + +/// ComputeValueVTs - Given an LLVM IR type, compute a sequence of +/// EVTs that represent all the individual underlying +/// non-aggregate types that comprise it. +/// +/// If Offsets is non-null, it points to a vector to be filled in +/// with the in-memory offsets of each of the individual values. +/// +void ComputeValueVTs(const TargetLowering &TLI, const Type *Ty, + SmallVectorImpl &ValueVTs, + SmallVectorImpl *Offsets = 0, + uint64_t StartingOffset = 0); + +/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V. +GlobalVariable *ExtractTypeInfo(Value *V); + +/// AddCatchInfo - Extract the personality and type infos from an eh.selector +/// call, and add them to the specified machine basic block. +void AddCatchInfo(CallInst &I, MachineModuleInfo *MMI, MachineBasicBlock *MBB); + +/// CopyCatchInfo - Copy catch information from DestBB to SrcBB. +void CopyCatchInfo(BasicBlock *SrcBB, BasicBlock *DestBB, + MachineModuleInfo *MMI, FunctionLoweringInfo &FLI); + +} // end namespace llvm + +#endif diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 52b0832b0616..669d414cefa2 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -350,7 +350,8 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op, MI->addOperand(MachineOperand::CreateES(ES->getSymbol(), ES->getTargetFlags())); } else if (BlockAddressSDNode *BA = dyn_cast(Op)) { - MI->addOperand(MachineOperand::CreateBA(BA->getBlockAddress())); + MI->addOperand(MachineOperand::CreateBA(BA->getBlockAddress(), + BA->getTargetFlags())); } else { assert(Op.getValueType() != MVT::Other && Op.getValueType() != MVT::Flag && diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 4f0a229a505e..273dbf0d5611 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -158,7 +158,6 @@ private: SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, DebugLoc dl); SDValue ExpandBUILD_VECTOR(SDNode *Node); SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node); - SDValue ExpandDBG_STOPPOINT(SDNode *Node); void ExpandDYNAMIC_STACKALLOC(SDNode *Node, SmallVectorImpl &Results); SDValue ExpandFCOPYSIGN(SDNode *Node); @@ -1517,6 +1516,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { // Create the stack frame object. EVT VT = Node->getValueType(0); EVT OpVT = Node->getOperand(0).getValueType(); + EVT EltVT = VT.getVectorElementType(); DebugLoc dl = Node->getDebugLoc(); SDValue FIPtr = DAG.CreateStackTemporary(VT); int FI = cast(FIPtr.getNode())->getIndex(); @@ -1524,7 +1524,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { // Emit a store of each element to the stack slot. SmallVector Stores; - unsigned TypeByteSize = OpVT.getSizeInBits() / 8; + unsigned TypeByteSize = EltVT.getSizeInBits() / 8; // Store (in the right endianness) the elements to memory. for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) { // Ignore undef elements. @@ -1535,8 +1535,13 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { SDValue Idx = DAG.getConstant(Offset, FIPtr.getValueType()); Idx = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, Idx); - Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, Node->getOperand(i), - Idx, SV, Offset)); + // If EltVT smaller than OpVT, only store the bits necessary. + if (EltVT.bitsLT(OpVT)) + Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl, + Node->getOperand(i), Idx, SV, Offset, EltVT)); + else + Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, + Node->getOperand(i), Idx, SV, Offset)); } SDValue StoreChain; @@ -1590,37 +1595,6 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { AbsVal); } -SDValue SelectionDAGLegalize::ExpandDBG_STOPPOINT(SDNode* Node) { - DebugLoc dl = Node->getDebugLoc(); - DwarfWriter *DW = DAG.getDwarfWriter(); - bool useDEBUG_LOC = TLI.isOperationLegalOrCustom(ISD::DEBUG_LOC, - MVT::Other); - bool useLABEL = TLI.isOperationLegalOrCustom(ISD::DBG_LABEL, MVT::Other); - - const DbgStopPointSDNode *DSP = cast(Node); - MDNode *CU_Node = DSP->getCompileUnit(); - if (DW && (useDEBUG_LOC || useLABEL)) { - - unsigned Line = DSP->getLine(); - unsigned Col = DSP->getColumn(); - - if (OptLevel == CodeGenOpt::None) { - // A bit self-referential to have DebugLoc on Debug_Loc nodes, but it - // won't hurt anything. - if (useDEBUG_LOC) { - return DAG.getNode(ISD::DEBUG_LOC, dl, MVT::Other, Node->getOperand(0), - DAG.getConstant(Line, MVT::i32), - DAG.getConstant(Col, MVT::i32), - DAG.getSrcValue(CU_Node)); - } else { - unsigned ID = DW->RecordSourceLine(Line, Col, CU_Node); - return DAG.getLabel(ISD::DBG_LABEL, dl, Node->getOperand(0), ID); - } - } - } - return Node->getOperand(0); -} - void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, SmallVectorImpl &Results) { unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore(); @@ -2269,16 +2243,12 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, Results.push_back(DAG.getConstant(1, Node->getValueType(0))); break; case ISD::EH_RETURN: - case ISD::DBG_LABEL: case ISD::EH_LABEL: case ISD::PREFETCH: case ISD::MEMBARRIER: case ISD::VAEND: Results.push_back(Node->getOperand(0)); break; - case ISD::DBG_STOPPOINT: - Results.push_back(ExpandDBG_STOPPOINT(Node)); - break; case ISD::DYNAMIC_STACKALLOC: ExpandDYNAMIC_STACKALLOC(Node, Results); break; diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index c4bd552f52ab..003cea7a6f43 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -64,8 +64,12 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { // The final node obtained by mapping by ReplacedValues is not marked NewNode. // Note that ReplacedValues should be applied iteratively. - // Note that the ReplacedValues map may also map deleted nodes. By iterating - // over the DAG we only consider non-deleted nodes. + // Note that the ReplacedValues map may also map deleted nodes (by iterating + // over the DAG we never dereference deleted nodes). This means that it may + // also map nodes marked NewNode if the deallocated memory was reallocated as + // another node, and that new node was not seen by the LegalizeTypes machinery + // (for example because it was created but not used). In general, we cannot + // distinguish between new nodes and deleted nodes. SmallVector NewNodes; for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), E = DAG.allnodes_end(); I != E; ++I) { @@ -114,7 +118,11 @@ void DAGTypeLegalizer::PerformExpensiveChecks() { Mapped |= 128; if (I->getNodeId() != Processed) { - if (Mapped != 0) { + // Since we allow ReplacedValues to map deleted nodes, it may map nodes + // marked NewNode too, since a deleted node may have been reallocated as + // another node that has not been seen by the LegalizeTypes machinery. + if ((I->getNodeId() == NewNode && Mapped > 1) || + (I->getNodeId() != NewNode && Mapped != 0)) { errs() << "Unprocessed value in a map!"; Failed = true; } @@ -320,16 +328,12 @@ ScanOperands: continue; // The node morphed - this is equivalent to legalizing by replacing every - // value of N with the corresponding value of M. So do that now. However - // there is no need to remember the replacement - morphing will make sure - // it is never used non-trivially. + // value of N with the corresponding value of M. So do that now. assert(N->getNumValues() == M->getNumValues() && "Node morphing changed the number of results!"); for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) - // Replacing the value takes care of remapping the new value. Do the - // replacement without recording it in ReplacedValues. This does not - // expunge From but that is fine - it is not really a new node. - ReplaceValueWithHelper(SDValue(N, i), SDValue(M, i)); + // Replacing the value takes care of remapping the new value. + ReplaceValueWith(SDValue(N, i), SDValue(M, i)); assert(N->getNodeId() == NewNode && "Unexpected node state!"); // The node continues to live on as part of the NewNode fungus that // grows on top of the useful nodes. Nothing more needs to be done @@ -666,14 +670,14 @@ namespace { } -/// ReplaceValueWithHelper - Internal helper for ReplaceValueWith. Updates the -/// DAG causing any uses of From to use To instead, but without expunging From -/// or recording the replacement in ReplacedValues. Do not call directly unless -/// you really know what you are doing! -void DAGTypeLegalizer::ReplaceValueWithHelper(SDValue From, SDValue To) { +/// ReplaceValueWith - The specified value was legalized to the specified other +/// value. Update the DAG and NodeIds replacing any uses of From to use To +/// instead. +void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) { assert(From.getNode() != To.getNode() && "Potential legalization loop!"); // If expansion produced new nodes, make sure they are properly marked. + ExpungeNode(From.getNode()); AnalyzeNewValue(To); // Expunges To. // Anything that used the old node should now use the new one. Note that this @@ -682,6 +686,10 @@ void DAGTypeLegalizer::ReplaceValueWithHelper(SDValue From, SDValue To) { NodeUpdateListener NUL(*this, NodesToAnalyze); DAG.ReplaceAllUsesOfValueWith(From, To, &NUL); + // The old node may still be present in a map like ExpandedIntegers or + // PromotedIntegers. Inform maps about the replacement. + ReplacedValues[From] = To; + // Process the list of nodes that need to be reanalyzed. while (!NodesToAnalyze.empty()) { SDNode *N = NodesToAnalyze.back(); @@ -712,25 +720,6 @@ void DAGTypeLegalizer::ReplaceValueWithHelper(SDValue From, SDValue To) { } } -/// ReplaceValueWith - The specified value was legalized to the specified other -/// value. Update the DAG and NodeIds replacing any uses of From to use To -/// instead. -void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) { - assert(From.getNode()->getNodeId() == ReadyToProcess && - "Only the node being processed may be remapped!"); - - // If expansion produced new nodes, make sure they are properly marked. - ExpungeNode(From.getNode()); - AnalyzeNewValue(To); // Expunges To. - - // The old node may still be present in a map like ExpandedIntegers or - // PromotedIntegers. Inform maps about the replacement. - ReplacedValues[From] = To; - - // Do the replacement. - ReplaceValueWithHelper(From, To); -} - void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) { assert(Result.getValueType() == TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && "Invalid type for promoted integer"); @@ -918,6 +907,29 @@ bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult) { return true; } + +/// CustomWidenLowerNode - Widen the node's results with custom code provided +/// by the target and return "true", or do nothing and return "false". +bool DAGTypeLegalizer::CustomWidenLowerNode(SDNode *N, EVT VT) { + // See if the target wants to custom lower this node. + if (TLI.getOperationAction(N->getOpcode(), VT) != TargetLowering::Custom) + return false; + + SmallVector Results; + TLI.ReplaceNodeResults(N, Results, DAG); + + if (Results.empty()) + // The target didn't want to custom widen lower its result after all. + return false; + + // Update the widening map. + assert(Results.size() == N->getNumValues() && + "Custom lowering returned the wrong number of results!"); + for (unsigned i = 0, e = Results.size(); i != e; ++i) + SetWidenedVector(SDValue(N, i), Results[i]); + return true; +} + /// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type /// which is split into two not necessarily identical pieces. void DAGTypeLegalizer::GetSplitDestVTs(EVT InVT, EVT &LoVT, EVT &HiVT) { diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index e1b7022dda23..2ee9f8a42ed3 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -188,6 +188,7 @@ private: SDValue BitConvertVectorToIntegerVector(SDValue Op); SDValue CreateStackStoreLoad(SDValue Op, EVT DestVT); bool CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult); + bool CustomWidenLowerNode(SDNode *N, EVT VT); SDValue GetVectorElementPointer(SDValue VecPtr, EVT EltVT, SDValue Index); SDValue JoinIntegers(SDValue Lo, SDValue Hi); SDValue LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned); @@ -196,7 +197,6 @@ private: DebugLoc dl); SDValue PromoteTargetBoolean(SDValue Bool, EVT VT); void ReplaceValueWith(SDValue From, SDValue To); - void ReplaceValueWithHelper(SDValue From, SDValue To); void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi); void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT, SDValue &Lo, SDValue &Hi); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index ca194305d989..785c2adb3943 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -54,9 +54,6 @@ class VectorLegalizer { SDValue LegalizeOp(SDValue Op); // Assuming the node is legal, "legalize" the results SDValue TranslateLegalizeResults(SDValue Op, SDValue Result); - // Implements unrolling a generic vector operation, i.e. turning it into - // scalar operations. - SDValue UnrollVectorOp(SDValue Op); // Implements unrolling a VSETCC. SDValue UnrollVSETCC(SDValue Op); // Implements expansion for FNEG; falls back to UnrollVectorOp if FSUB @@ -211,7 +208,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { else if (Node->getOpcode() == ISD::VSETCC) Result = UnrollVSETCC(Op); else - Result = UnrollVectorOp(Op); + Result = DAG.UnrollVectorOp(Op.getNode()); break; } @@ -256,7 +253,7 @@ SDValue VectorLegalizer::ExpandFNEG(SDValue Op) { return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), Zero, Op.getOperand(0)); } - return UnrollVectorOp(Op); + return DAG.UnrollVectorOp(Op.getNode()); } SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) { @@ -282,56 +279,6 @@ SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) { return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElems); } -/// UnrollVectorOp - We know that the given vector has a legal type, however -/// the operation it performs is not legal, and the target has requested that -/// the operation be expanded. "Unroll" the vector, splitting out the scalars -/// and operating on each element individually. -SDValue VectorLegalizer::UnrollVectorOp(SDValue Op) { - EVT VT = Op.getValueType(); - assert(Op.getNode()->getNumValues() == 1 && - "Can't unroll a vector with multiple results!"); - unsigned NE = VT.getVectorNumElements(); - EVT EltVT = VT.getVectorElementType(); - DebugLoc dl = Op.getDebugLoc(); - - SmallVector Scalars; - SmallVector Operands(Op.getNumOperands()); - for (unsigned i = 0; i != NE; ++i) { - for (unsigned j = 0; j != Op.getNumOperands(); ++j) { - SDValue Operand = Op.getOperand(j); - EVT OperandVT = Operand.getValueType(); - if (OperandVT.isVector()) { - // A vector operand; extract a single element. - EVT OperandEltVT = OperandVT.getVectorElementType(); - Operands[j] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - OperandEltVT, - Operand, - DAG.getConstant(i, MVT::i32)); - } else { - // A scalar operand; just use it as is. - Operands[j] = Operand; - } - } - - switch (Op.getOpcode()) { - default: - Scalars.push_back(DAG.getNode(Op.getOpcode(), dl, EltVT, - &Operands[0], Operands.size())); - break; - case ISD::SHL: - case ISD::SRA: - case ISD::SRL: - case ISD::ROTL: - case ISD::ROTR: - Scalars.push_back(DAG.getNode(Op.getOpcode(), dl, EltVT, Operands[0], - DAG.getShiftAmountOperand(Operands[1]))); - break; - } - } - - return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Scalars[0], Scalars.size()); -} - } bool SelectionDAG::LegalizeVectors() { diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 75e12395d8bd..023324b82af3 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1118,8 +1118,12 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { DEBUG(errs() << "Widen node result " << ResNo << ": "; N->dump(&DAG); errs() << "\n"); - SDValue Res = SDValue(); + // See if the target wants to custom widen this node. + if (CustomWidenLowerNode(N, N->getValueType(ResNo))) + return; + + SDValue Res = SDValue(); switch (N->getOpcode()) { default: #ifndef NDEBUG diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 4530ffc4a2d0..c38c79b14597 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -200,19 +200,6 @@ bool ISD::isScalarToVector(const SDNode *N) { return true; } - -/// isDebugLabel - Return true if the specified node represents a debug -/// label (i.e. ISD::DBG_LABEL or TargetInstrInfo::DBG_LABEL node). -bool ISD::isDebugLabel(const SDNode *N) { - SDValue Zero; - if (N->getOpcode() == ISD::DBG_LABEL) - return true; - if (N->isMachineOpcode() && - N->getMachineOpcode() == TargetInstrInfo::DBG_LABEL) - return true; - return false; -} - /// getSetCCSwappedOperands - Return the operation corresponding to (Y op X) /// when given the operation for (X op Y). ISD::CondCode ISD::getSetCCSwappedOperands(ISD::CondCode Operation) { @@ -393,13 +380,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { case ISD::Register: ID.AddInteger(cast(N)->getReg()); break; - case ISD::DBG_STOPPOINT: { - const DbgStopPointSDNode *DSP = cast(N); - ID.AddInteger(DSP->getLine()); - ID.AddInteger(DSP->getColumn()); - ID.AddPointer(DSP->getCompileUnit()); - break; - } + case ISD::SRCVALUE: ID.AddPointer(cast(N)->getValue()); break; @@ -462,7 +443,8 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { } case ISD::TargetBlockAddress: case ISD::BlockAddress: { - ID.AddPointer(cast(N)); + ID.AddPointer(cast(N)->getBlockAddress()); + ID.AddInteger(cast(N)->getTargetFlags()); break; } } // end switch (N->getOpcode()) @@ -508,8 +490,6 @@ static bool doNotCSE(SDNode *N) { switch (N->getOpcode()) { default: break; case ISD::HANDLENODE: - case ISD::DBG_LABEL: - case ISD::DBG_STOPPOINT: case ISD::EH_LABEL: return true; // Never CSE these nodes. } @@ -1296,16 +1276,6 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) { return SDValue(N, 0); } -SDValue SelectionDAG::getDbgStopPoint(DebugLoc DL, SDValue Root, - unsigned Line, unsigned Col, - MDNode *CU) { - SDNode *N = NodeAllocator.Allocate(); - new (N) DbgStopPointSDNode(Root, Line, Col, CU); - N->setDebugLoc(DL); - AllNodes.push_back(N); - return SDValue(N, 0); -} - SDValue SelectionDAG::getLabel(unsigned Opcode, DebugLoc dl, SDValue Root, unsigned LabelID) { @@ -1323,18 +1293,20 @@ SDValue SelectionDAG::getLabel(unsigned Opcode, DebugLoc dl, return SDValue(N, 0); } -SDValue SelectionDAG::getBlockAddress(BlockAddress *BA, DebugLoc DL, - bool isTarget) { +SDValue SelectionDAG::getBlockAddress(BlockAddress *BA, EVT VT, + bool isTarget, + unsigned char TargetFlags) { unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress; FoldingSetNodeID ID; - AddNodeIDNode(ID, Opc, getVTList(TLI.getPointerTy()), 0, 0); + AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); ID.AddPointer(BA); + ID.AddInteger(TargetFlags); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); SDNode *N = NodeAllocator.Allocate(); - new (N) BlockAddressSDNode(Opc, DL, TLI.getPointerTy(), BA); + new (N) BlockAddressSDNode(Opc, VT, BA, TargetFlags); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); @@ -5452,7 +5424,6 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::UNDEF: return "undef"; case ISD::MERGE_VALUES: return "merge_values"; case ISD::INLINEASM: return "inlineasm"; - case ISD::DBG_LABEL: return "dbg_label"; case ISD::EH_LABEL: return "eh_label"; case ISD::HANDLENODE: return "handlenode"; @@ -5586,10 +5557,6 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::CTTZ: return "cttz"; case ISD::CTLZ: return "ctlz"; - // Debug info - case ISD::DBG_STOPPOINT: return "dbg_stoppoint"; - case ISD::DEBUG_LOC: return "debug_loc"; - // Trampolines case ISD::TRAMPOLINE: return "trampoline"; @@ -5810,6 +5777,8 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { OS << ", "; WriteAsOperand(OS, BA->getBlockAddress()->getBasicBlock(), false); OS << ">"; + if (unsigned int TF = BA->getTargetFlags()) + OS << " [TF=" << TF << ']'; } } @@ -5838,6 +5807,66 @@ static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) { N->dump(G); } +SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { + assert(N->getNumValues() == 1 && + "Can't unroll a vector with multiple results!"); + + EVT VT = N->getValueType(0); + unsigned NE = VT.getVectorNumElements(); + EVT EltVT = VT.getVectorElementType(); + DebugLoc dl = N->getDebugLoc(); + + SmallVector Scalars; + SmallVector Operands(N->getNumOperands()); + + // If ResNE is 0, fully unroll the vector op. + if (ResNE == 0) + ResNE = NE; + else if (NE > ResNE) + NE = ResNE; + + unsigned i; + for (i= 0; i != NE; ++i) { + for (unsigned j = 0; j != N->getNumOperands(); ++j) { + SDValue Operand = N->getOperand(j); + EVT OperandVT = Operand.getValueType(); + if (OperandVT.isVector()) { + // A vector operand; extract a single element. + EVT OperandEltVT = OperandVT.getVectorElementType(); + Operands[j] = getNode(ISD::EXTRACT_VECTOR_ELT, dl, + OperandEltVT, + Operand, + getConstant(i, MVT::i32)); + } else { + // A scalar operand; just use it as is. + Operands[j] = Operand; + } + } + + switch (N->getOpcode()) { + default: + Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, + &Operands[0], Operands.size())); + break; + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + case ISD::ROTL: + case ISD::ROTR: + Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands[0], + getShiftAmountOperand(Operands[1]))); + break; + } + } + + for (; i < ResNE; ++i) + Scalars.push_back(getUNDEF(EltVT)); + + return getNode(ISD::BUILD_VECTOR, dl, + EVT::getVectorVT(*getContext(), EltVT, ResNE), + &Scalars[0], Scalars.size()); +} + void SelectionDAG::dump() const { errs() << "SelectionDAG has " << AllNodes.size() << " nodes:"; @@ -5993,3 +6022,4 @@ bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) { return false; return true; } + diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp similarity index 92% rename from lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp rename to lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 90fd95eb6352..57d89036a808 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1,4 +1,4 @@ -//===-- SelectionDAGBuild.cpp - Selection-DAG building --------------------===// +//===-- SelectionDAGBuilder.cpp - Selection-DAG building ------------------===// // // The LLVM Compiler Infrastructure // @@ -12,12 +12,12 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "isel" -#include "SelectionDAGBuild.h" +#include "SelectionDAGBuilder.h" +#include "FunctionLoweringInfo.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Constants.h" -#include "llvm/Constants.h" #include "llvm/CallingConv.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" @@ -68,85 +68,7 @@ LimitFPPrecision("limit-float-precision", cl::location(LimitFloatPrecision), cl::init(0)); -/// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence -/// of insertvalue or extractvalue indices that identify a member, return -/// the linearized index of the start of the member. -/// -static unsigned ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty, - const unsigned *Indices, - const unsigned *IndicesEnd, - unsigned CurIndex = 0) { - // Base case: We're done. - if (Indices && Indices == IndicesEnd) - return CurIndex; - - // Given a struct type, recursively traverse the elements. - if (const StructType *STy = dyn_cast(Ty)) { - for (StructType::element_iterator EB = STy->element_begin(), - EI = EB, - EE = STy->element_end(); - EI != EE; ++EI) { - if (Indices && *Indices == unsigned(EI - EB)) - return ComputeLinearIndex(TLI, *EI, Indices+1, IndicesEnd, CurIndex); - CurIndex = ComputeLinearIndex(TLI, *EI, 0, 0, CurIndex); - } - return CurIndex; - } - // Given an array type, recursively traverse the elements. - else if (const ArrayType *ATy = dyn_cast(Ty)) { - const Type *EltTy = ATy->getElementType(); - for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) { - if (Indices && *Indices == i) - return ComputeLinearIndex(TLI, EltTy, Indices+1, IndicesEnd, CurIndex); - CurIndex = ComputeLinearIndex(TLI, EltTy, 0, 0, CurIndex); - } - return CurIndex; - } - // We haven't found the type we're looking for, so keep searching. - return CurIndex + 1; -} - -/// ComputeValueVTs - Given an LLVM IR type, compute a sequence of -/// EVTs that represent all the individual underlying -/// non-aggregate types that comprise it. -/// -/// If Offsets is non-null, it points to a vector to be filled in -/// with the in-memory offsets of each of the individual values. -/// -static void ComputeValueVTs(const TargetLowering &TLI, const Type *Ty, - SmallVectorImpl &ValueVTs, - SmallVectorImpl *Offsets = 0, - uint64_t StartingOffset = 0) { - // Given a struct type, recursively traverse the elements. - if (const StructType *STy = dyn_cast(Ty)) { - const StructLayout *SL = TLI.getTargetData()->getStructLayout(STy); - for (StructType::element_iterator EB = STy->element_begin(), - EI = EB, - EE = STy->element_end(); - EI != EE; ++EI) - ComputeValueVTs(TLI, *EI, ValueVTs, Offsets, - StartingOffset + SL->getElementOffset(EI - EB)); - return; - } - // Given an array type, recursively traverse the elements. - if (const ArrayType *ATy = dyn_cast(Ty)) { - const Type *EltTy = ATy->getElementType(); - uint64_t EltSize = TLI.getTargetData()->getTypeAllocSize(EltTy); - for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) - ComputeValueVTs(TLI, EltTy, ValueVTs, Offsets, - StartingOffset + i * EltSize); - return; - } - // Interpret void as zero return values. - if (Ty == Type::getVoidTy(Ty->getContext())) - return; - // Base case: we can get an EVT for this LLVM IR type. - ValueVTs.push_back(TLI.getValueType(Ty)); - if (Offsets) - Offsets->push_back(StartingOffset); -} - -namespace llvm { +namespace { /// RegsForValue - This struct represents the registers (physical or virtual) /// that a particular set of values is assigned, and the type information about /// the value. The most common situation is to represent one value at a time, @@ -156,7 +78,7 @@ namespace llvm { /// have legal types, so each value may require one or more registers of some /// legal type. /// - struct VISIBILITY_HIDDEN RegsForValue { + struct RegsForValue { /// TLI - The TargetLowering object. /// const TargetLowering *TLI; @@ -241,150 +163,6 @@ namespace llvm { }; } -/// isUsedOutsideOfDefiningBlock - Return true if this instruction is used by -/// PHI nodes or outside of the basic block that defines it, or used by a -/// switch or atomic instruction, which may expand to multiple basic blocks. -static bool isUsedOutsideOfDefiningBlock(Instruction *I) { - if (isa(I)) return true; - BasicBlock *BB = I->getParent(); - for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ++UI) - if (cast(*UI)->getParent() != BB || isa(*UI)) - return true; - return false; -} - -/// isOnlyUsedInEntryBlock - If the specified argument is only used in the -/// entry block, return true. This includes arguments used by switches, since -/// the switch may expand into multiple basic blocks. -static bool isOnlyUsedInEntryBlock(Argument *A, bool EnableFastISel) { - // With FastISel active, we may be splitting blocks, so force creation - // of virtual registers for all non-dead arguments. - // Don't force virtual registers for byval arguments though, because - // fast-isel can't handle those in all cases. - if (EnableFastISel && !A->hasByValAttr()) - return A->use_empty(); - - BasicBlock *Entry = A->getParent()->begin(); - for (Value::use_iterator UI = A->use_begin(), E = A->use_end(); UI != E; ++UI) - if (cast(*UI)->getParent() != Entry || isa(*UI)) - return false; // Use not in entry block. - return true; -} - -FunctionLoweringInfo::FunctionLoweringInfo(TargetLowering &tli) - : TLI(tli) { -} - -void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf, - SelectionDAG &DAG, - bool EnableFastISel) { - Fn = &fn; - MF = &mf; - RegInfo = &MF->getRegInfo(); - - // Create a vreg for each argument register that is not dead and is used - // outside of the entry block for the function. - for (Function::arg_iterator AI = Fn->arg_begin(), E = Fn->arg_end(); - AI != E; ++AI) - if (!isOnlyUsedInEntryBlock(AI, EnableFastISel)) - InitializeRegForValue(AI); - - // Initialize the mapping of values to registers. This is only set up for - // instruction values that are used outside of the block that defines - // them. - Function::iterator BB = Fn->begin(), EB = Fn->end(); - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) - if (AllocaInst *AI = dyn_cast(I)) - if (ConstantInt *CUI = dyn_cast(AI->getArraySize())) { - const Type *Ty = AI->getAllocatedType(); - uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty); - unsigned Align = - std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), - AI->getAlignment()); - - TySize *= CUI->getZExtValue(); // Get total allocated size. - if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects. - StaticAllocaMap[AI] = - MF->getFrameInfo()->CreateStackObject(TySize, Align, false); - } - - for (; BB != EB; ++BB) - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) - if (!I->use_empty() && isUsedOutsideOfDefiningBlock(I)) - if (!isa(I) || - !StaticAllocaMap.count(cast(I))) - InitializeRegForValue(I); - - // Create an initial MachineBasicBlock for each LLVM BasicBlock in F. This - // also creates the initial PHI MachineInstrs, though none of the input - // operands are populated. - for (BB = Fn->begin(), EB = Fn->end(); BB != EB; ++BB) { - MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(BB); - MBBMap[BB] = MBB; - MF->push_back(MBB); - - // Transfer the address-taken flag. This is necessary because there could - // be multiple MachineBasicBlocks corresponding to one BasicBlock, and only - // the first one should be marked. - if (BB->hasAddressTaken()) - MBB->setHasAddressTaken(); - - // Create Machine PHI nodes for LLVM PHI nodes, lowering them as - // appropriate. - PHINode *PN; - DebugLoc DL; - for (BasicBlock::iterator - I = BB->begin(), E = BB->end(); I != E; ++I) { - - PN = dyn_cast(I); - if (!PN || PN->use_empty()) continue; - - unsigned PHIReg = ValueMap[PN]; - assert(PHIReg && "PHI node does not have an assigned virtual register!"); - - SmallVector ValueVTs; - ComputeValueVTs(TLI, PN->getType(), ValueVTs); - for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { - EVT VT = ValueVTs[vti]; - unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT); - const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); - for (unsigned i = 0; i != NumRegisters; ++i) - BuildMI(MBB, DL, TII->get(TargetInstrInfo::PHI), PHIReg + i); - PHIReg += NumRegisters; - } - } - } -} - -unsigned FunctionLoweringInfo::MakeReg(EVT VT) { - return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT)); -} - -/// CreateRegForValue - Allocate the appropriate number of virtual registers of -/// the correctly promoted or expanded types. Assign these registers -/// consecutive vreg numbers and return the first assigned number. -/// -/// In the case that the given value has struct or array type, this function -/// will assign registers for each member or element. -/// -unsigned FunctionLoweringInfo::CreateRegForValue(const Value *V) { - SmallVector ValueVTs; - ComputeValueVTs(TLI, V->getType(), ValueVTs); - - unsigned FirstReg = 0; - for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { - EVT ValueVT = ValueVTs[Value]; - EVT RegisterVT = TLI.getRegisterType(V->getContext(), ValueVT); - - unsigned NumRegs = TLI.getNumRegisters(V->getContext(), ValueVT); - for (unsigned i = 0; i != NumRegs; ++i) { - unsigned R = MakeReg(RegisterVT); - if (!FirstReg) FirstReg = R; - } - } - return FirstReg; -} - /// getCopyFromParts - Create a value that contains the specified legal parts /// combined into the value they represent. If the parts combine to a type /// larger then ValueVT then AssertOp can be used to specify whether the extra @@ -723,19 +501,19 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, SDValue Val, } -void SelectionDAGLowering::init(GCFunctionInfo *gfi, AliasAnalysis &aa) { +void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa) { AA = &aa; GFI = gfi; TD = DAG.getTarget().getTargetData(); } /// clear - Clear out the curret SelectionDAG and the associated -/// state and prepare this SelectionDAGLowering object to be used +/// state and prepare this SelectionDAGBuilder object to be used /// for a new block. This doesn't clear out information about /// additional blocks that are needed to complete switch lowering /// or PHI node updating; that information is cleared out as it is /// consumed. -void SelectionDAGLowering::clear() { +void SelectionDAGBuilder::clear() { NodeMap.clear(); PendingLoads.clear(); PendingExports.clear(); @@ -750,7 +528,7 @@ void SelectionDAGLowering::clear() { /// a store or any other node that may need to be ordered after any /// prior load instructions. /// -SDValue SelectionDAGLowering::getRoot() { +SDValue SelectionDAGBuilder::getRoot() { if (PendingLoads.empty()) return DAG.getRoot(); @@ -773,7 +551,7 @@ SDValue SelectionDAGLowering::getRoot() { /// PendingLoad items, flush all the PendingExports items. It is necessary /// to do this before emitting a terminator instruction. /// -SDValue SelectionDAGLowering::getControlRoot() { +SDValue SelectionDAGBuilder::getControlRoot() { SDValue Root = DAG.getRoot(); if (PendingExports.empty()) @@ -800,11 +578,11 @@ SDValue SelectionDAGLowering::getControlRoot() { return Root; } -void SelectionDAGLowering::visit(Instruction &I) { +void SelectionDAGBuilder::visit(Instruction &I) { visit(I.getOpcode(), I); } -void SelectionDAGLowering::visit(unsigned Opcode, User &I) { +void SelectionDAGBuilder::visit(unsigned Opcode, User &I) { // Note: this doesn't use InstVisitor, because it has to work with // ConstantExpr's in addition to instructions. switch (Opcode) { @@ -816,7 +594,7 @@ void SelectionDAGLowering::visit(unsigned Opcode, User &I) { } } -SDValue SelectionDAGLowering::getValue(const Value *V) { +SDValue SelectionDAGBuilder::getValue(const Value *V) { SDValue &N = NodeMap[V]; if (N.getNode()) return N; @@ -884,7 +662,7 @@ SDValue SelectionDAGLowering::getValue(const Value *V) { } if (BlockAddress *BA = dyn_cast(C)) - return DAG.getBlockAddress(BA, getCurDebugLoc()); + return DAG.getBlockAddress(BA, VT); const VectorType *VecTy = cast(V->getType()); unsigned NumElements = VecTy->getNumElements(); @@ -981,7 +759,7 @@ static void getReturnInfo(const Type* ReturnType, } } -void SelectionDAGLowering::visitRet(ReturnInst &I) { +void SelectionDAGBuilder::visitRet(ReturnInst &I) { SDValue Chain = getControlRoot(); SmallVector Outs; FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo(); @@ -1086,7 +864,7 @@ void SelectionDAGLowering::visitRet(ReturnInst &I) { /// CopyToExportRegsIfNeeded - If the given value has virtual registers /// created for it, emit nodes to copy the value into the virtual /// registers. -void SelectionDAGLowering::CopyToExportRegsIfNeeded(Value *V) { +void SelectionDAGBuilder::CopyToExportRegsIfNeeded(Value *V) { if (!V->use_empty()) { DenseMap::iterator VMI = FuncInfo.ValueMap.find(V); if (VMI != FuncInfo.ValueMap.end()) @@ -1097,7 +875,7 @@ void SelectionDAGLowering::CopyToExportRegsIfNeeded(Value *V) { /// ExportFromCurrentBlock - If this condition isn't known to be exported from /// the current basic block, add it to ValueMap now so that we'll get a /// CopyTo/FromReg. -void SelectionDAGLowering::ExportFromCurrentBlock(Value *V) { +void SelectionDAGBuilder::ExportFromCurrentBlock(Value *V) { // No need to export constants. if (!isa(V) && !isa(V)) return; @@ -1108,8 +886,8 @@ void SelectionDAGLowering::ExportFromCurrentBlock(Value *V) { CopyValueToVirtualRegister(V, Reg); } -bool SelectionDAGLowering::isExportableFromCurrentBlock(Value *V, - const BasicBlock *FromBB) { +bool SelectionDAGBuilder::isExportableFromCurrentBlock(Value *V, + const BasicBlock *FromBB) { // The operands of the setcc have to be in this block. We don't know // how to export them from some other block. if (Instruction *VI = dyn_cast(V)) { @@ -1201,10 +979,10 @@ static ISD::CondCode getICmpCondCode(ICmpInst::Predicate Pred) { /// AND operator tree. /// void -SelectionDAGLowering::EmitBranchForMergedCondition(Value *Cond, - MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - MachineBasicBlock *CurBB) { +SelectionDAGBuilder::EmitBranchForMergedCondition(Value *Cond, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + MachineBasicBlock *CurBB) { const BasicBlock *BB = CurBB->getBasicBlock(); // If the leaf of the tree is a comparison, merge the condition into @@ -1240,11 +1018,11 @@ SelectionDAGLowering::EmitBranchForMergedCondition(Value *Cond, } /// FindMergedConditions - If Cond is an expression like -void SelectionDAGLowering::FindMergedConditions(Value *Cond, - MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - MachineBasicBlock *CurBB, - unsigned Opc) { +void SelectionDAGBuilder::FindMergedConditions(Value *Cond, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + MachineBasicBlock *CurBB, + unsigned Opc) { // If this node is not part of the or/and tree, emit it as a branch. Instruction *BOp = dyn_cast(Cond); if (!BOp || !(isa(BOp) || isa(BOp)) || @@ -1299,7 +1077,7 @@ void SelectionDAGLowering::FindMergedConditions(Value *Cond, /// If we should emit this as a bunch of and/or'd together conditions, return /// false. bool -SelectionDAGLowering::ShouldEmitAsBranches(const std::vector &Cases){ +SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector &Cases){ if (Cases.size() != 2) return true; // If this is two comparisons of the same values or'd or and'd together, they @@ -1314,7 +1092,7 @@ SelectionDAGLowering::ShouldEmitAsBranches(const std::vector &Cases){ return true; } -void SelectionDAGLowering::visitBr(BranchInst &I) { +void SelectionDAGBuilder::visitBr(BranchInst &I) { // Update machine-CFG edges. MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)]; @@ -1398,7 +1176,7 @@ void SelectionDAGLowering::visitBr(BranchInst &I) { /// visitSwitchCase - Emits the necessary code to represent a single node in /// the binary search tree resulting from lowering a switch instruction. -void SelectionDAGLowering::visitSwitchCase(CaseBlock &CB) { +void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB) { SDValue Cond; SDValue CondLHS = getValue(CB.CmpLHS); DebugLoc dl = getCurDebugLoc(); @@ -1476,7 +1254,7 @@ void SelectionDAGLowering::visitSwitchCase(CaseBlock &CB) { } /// visitJumpTable - Emit JumpTable node in the current MBB -void SelectionDAGLowering::visitJumpTable(JumpTable &JT) { +void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) { // Emit the code for the jump table assert(JT.Reg != -1U && "Should lower JT Header first!"); EVT PTy = TLI.getPointerTy(); @@ -1490,8 +1268,8 @@ void SelectionDAGLowering::visitJumpTable(JumpTable &JT) { /// visitJumpTableHeader - This function emits necessary code to produce index /// in the JumpTable from switch case. -void SelectionDAGLowering::visitJumpTableHeader(JumpTable &JT, - JumpTableHeader &JTH) { +void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, + JumpTableHeader &JTH) { // Subtract the lowest switch case value from the value being switched on and // conditional branch to default mbb if the result is greater than the // difference between smallest and largest cases. @@ -1540,7 +1318,7 @@ void SelectionDAGLowering::visitJumpTableHeader(JumpTable &JT, /// visitBitTestHeader - This function emits necessary code to produce value /// suitable for "bit tests" -void SelectionDAGLowering::visitBitTestHeader(BitTestBlock &B) { +void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B) { // Subtract the minimum value SDValue SwitchOp = getValue(B.SValue); EVT VT = SwitchOp.getValueType(); @@ -1583,9 +1361,9 @@ void SelectionDAGLowering::visitBitTestHeader(BitTestBlock &B) { } /// visitBitTestCase - this function produces one "bit test" -void SelectionDAGLowering::visitBitTestCase(MachineBasicBlock* NextMBB, - unsigned Reg, - BitTestCase &B) { +void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB, + unsigned Reg, + BitTestCase &B) { // Make desired shift SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), Reg, TLI.getPointerTy()); @@ -1624,7 +1402,7 @@ void SelectionDAGLowering::visitBitTestCase(MachineBasicBlock* NextMBB, DAG.getBasicBlock(NextMBB))); } -void SelectionDAGLowering::visitInvoke(InvokeInst &I) { +void SelectionDAGBuilder::visitInvoke(InvokeInst &I) { // Retrieve successors. MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)]; MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)]; @@ -1649,15 +1427,15 @@ void SelectionDAGLowering::visitInvoke(InvokeInst &I) { DAG.getBasicBlock(Return))); } -void SelectionDAGLowering::visitUnwind(UnwindInst &I) { +void SelectionDAGBuilder::visitUnwind(UnwindInst &I) { } /// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for /// small case ranges). -bool SelectionDAGLowering::handleSmallSwitchRange(CaseRec& CR, - CaseRecVector& WorkList, - Value* SV, - MachineBasicBlock* Default) { +bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, + CaseRecVector& WorkList, + Value* SV, + MachineBasicBlock* Default) { Case& BackCase = *(CR.Range.second-1); // Size is the number of Cases represented by this range. @@ -1751,10 +1529,10 @@ static APInt ComputeRange(const APInt &First, const APInt &Last) { } /// handleJTSwitchCase - Emit jumptable for current switch case range -bool SelectionDAGLowering::handleJTSwitchCase(CaseRec& CR, - CaseRecVector& WorkList, - Value* SV, - MachineBasicBlock* Default) { +bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR, + CaseRecVector& WorkList, + Value* SV, + MachineBasicBlock* Default) { Case& FrontCase = *CR.Range.first; Case& BackCase = *(CR.Range.second-1); @@ -1845,10 +1623,10 @@ bool SelectionDAGLowering::handleJTSwitchCase(CaseRec& CR, /// handleBTSplitSwitchCase - emit comparison and split binary search tree into /// 2 subtrees. -bool SelectionDAGLowering::handleBTSplitSwitchCase(CaseRec& CR, - CaseRecVector& WorkList, - Value* SV, - MachineBasicBlock* Default) { +bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, + CaseRecVector& WorkList, + Value* SV, + MachineBasicBlock* Default) { // Get the MachineFunction which holds the current MBB. This is used when // inserting any additional MBBs necessary to represent the switch. MachineFunction *CurMF = FuncInfo.MF; @@ -1973,10 +1751,10 @@ bool SelectionDAGLowering::handleBTSplitSwitchCase(CaseRec& CR, /// handleBitTestsSwitchCase - if current case range has few destination and /// range span less, than machine word bitwidth, encode case range into series /// of masks and emit bit tests with these masks. -bool SelectionDAGLowering::handleBitTestsSwitchCase(CaseRec& CR, - CaseRecVector& WorkList, - Value* SV, - MachineBasicBlock* Default){ +bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, + CaseRecVector& WorkList, + Value* SV, + MachineBasicBlock* Default){ EVT PTy = TLI.getPointerTy(); unsigned IntPtrBits = PTy.getSizeInBits(); @@ -2104,8 +1882,8 @@ bool SelectionDAGLowering::handleBitTestsSwitchCase(CaseRec& CR, /// Clusterify - Transform simple list of Cases into list of CaseRange's -size_t SelectionDAGLowering::Clusterify(CaseVector& Cases, - const SwitchInst& SI) { +size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, + const SwitchInst& SI) { size_t numCmps = 0; // Start with "simple" cases @@ -2146,7 +1924,7 @@ size_t SelectionDAGLowering::Clusterify(CaseVector& Cases, return numCmps; } -void SelectionDAGLowering::visitSwitch(SwitchInst &SI) { +void SelectionDAGBuilder::visitSwitch(SwitchInst &SI) { // Figure out which block is immediately after the current one. MachineBasicBlock *NextBlock = 0; @@ -2209,7 +1987,7 @@ void SelectionDAGLowering::visitSwitch(SwitchInst &SI) { } } -void SelectionDAGLowering::visitIndirectBr(IndirectBrInst &I) { +void SelectionDAGBuilder::visitIndirectBr(IndirectBrInst &I) { // Update machine-CFG edges. for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) CurMBB->addSuccessor(FuncInfo.MBBMap[I.getSuccessor(i)]); @@ -2220,7 +1998,7 @@ void SelectionDAGLowering::visitIndirectBr(IndirectBrInst &I) { } -void SelectionDAGLowering::visitFSub(User &I) { +void SelectionDAGBuilder::visitFSub(User &I) { // -0.0 - X --> fneg const Type *Ty = I.getType(); if (isa(Ty)) { @@ -2249,7 +2027,7 @@ void SelectionDAGLowering::visitFSub(User &I) { visitBinary(I, ISD::FSUB); } -void SelectionDAGLowering::visitBinary(User &I, unsigned OpCode) { +void SelectionDAGBuilder::visitBinary(User &I, unsigned OpCode) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); @@ -2257,7 +2035,7 @@ void SelectionDAGLowering::visitBinary(User &I, unsigned OpCode) { Op1.getValueType(), Op1, Op2)); } -void SelectionDAGLowering::visitShift(User &I, unsigned Opcode) { +void SelectionDAGBuilder::visitShift(User &I, unsigned Opcode) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); if (!isa(I.getType()) && @@ -2291,7 +2069,7 @@ void SelectionDAGLowering::visitShift(User &I, unsigned Opcode) { Op1.getValueType(), Op1, Op2)); } -void SelectionDAGLowering::visitICmp(User &I) { +void SelectionDAGBuilder::visitICmp(User &I) { ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE; if (ICmpInst *IC = dyn_cast(&I)) predicate = IC->getPredicate(); @@ -2305,7 +2083,7 @@ void SelectionDAGLowering::visitICmp(User &I) { setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Opcode)); } -void SelectionDAGLowering::visitFCmp(User &I) { +void SelectionDAGBuilder::visitFCmp(User &I) { FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE; if (FCmpInst *FC = dyn_cast(&I)) predicate = FC->getPredicate(); @@ -2318,7 +2096,7 @@ void SelectionDAGLowering::visitFCmp(User &I) { setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition)); } -void SelectionDAGLowering::visitSelect(User &I) { +void SelectionDAGBuilder::visitSelect(User &I) { SmallVector ValueVTs; ComputeValueVTs(TLI, I.getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); @@ -2341,14 +2119,14 @@ void SelectionDAGLowering::visitSelect(User &I) { } -void SelectionDAGLowering::visitTrunc(User &I) { +void SelectionDAGBuilder::visitTrunc(User &I) { // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest). SDValue N = getValue(I.getOperand(0)); EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N)); } -void SelectionDAGLowering::visitZExt(User &I) { +void SelectionDAGBuilder::visitZExt(User &I) { // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest). // ZExt also can't be a cast to bool for same reason. So, nothing much to do SDValue N = getValue(I.getOperand(0)); @@ -2356,7 +2134,7 @@ void SelectionDAGLowering::visitZExt(User &I) { setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N)); } -void SelectionDAGLowering::visitSExt(User &I) { +void SelectionDAGBuilder::visitSExt(User &I) { // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest). // SExt also can't be a cast to bool for same reason. So, nothing much to do SDValue N = getValue(I.getOperand(0)); @@ -2364,7 +2142,7 @@ void SelectionDAGLowering::visitSExt(User &I) { setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(), DestVT, N)); } -void SelectionDAGLowering::visitFPTrunc(User &I) { +void SelectionDAGBuilder::visitFPTrunc(User &I) { // FPTrunc is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); EVT DestVT = TLI.getValueType(I.getType()); @@ -2372,42 +2150,42 @@ void SelectionDAGLowering::visitFPTrunc(User &I) { DestVT, N, DAG.getIntPtrConstant(0))); } -void SelectionDAGLowering::visitFPExt(User &I){ +void SelectionDAGBuilder::visitFPExt(User &I){ // FPTrunc is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N)); } -void SelectionDAGLowering::visitFPToUI(User &I) { +void SelectionDAGBuilder::visitFPToUI(User &I) { // FPToUI is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurDebugLoc(), DestVT, N)); } -void SelectionDAGLowering::visitFPToSI(User &I) { +void SelectionDAGBuilder::visitFPToSI(User &I) { // FPToSI is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurDebugLoc(), DestVT, N)); } -void SelectionDAGLowering::visitUIToFP(User &I) { +void SelectionDAGBuilder::visitUIToFP(User &I) { // UIToFP is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurDebugLoc(), DestVT, N)); } -void SelectionDAGLowering::visitSIToFP(User &I){ +void SelectionDAGBuilder::visitSIToFP(User &I){ // SIToFP is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurDebugLoc(), DestVT, N)); } -void SelectionDAGLowering::visitPtrToInt(User &I) { +void SelectionDAGBuilder::visitPtrToInt(User &I) { // What to do depends on the size of the integer and the size of the pointer. // We can either truncate, zero extend, or no-op, accordingly. SDValue N = getValue(I.getOperand(0)); @@ -2417,7 +2195,7 @@ void SelectionDAGLowering::visitPtrToInt(User &I) { setValue(&I, Result); } -void SelectionDAGLowering::visitIntToPtr(User &I) { +void SelectionDAGBuilder::visitIntToPtr(User &I) { // What to do depends on the size of the integer and the size of the pointer. // We can either truncate, zero extend, or no-op, accordingly. SDValue N = getValue(I.getOperand(0)); @@ -2426,7 +2204,7 @@ void SelectionDAGLowering::visitIntToPtr(User &I) { setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT)); } -void SelectionDAGLowering::visitBitCast(User &I) { +void SelectionDAGBuilder::visitBitCast(User &I) { SDValue N = getValue(I.getOperand(0)); EVT DestVT = TLI.getValueType(I.getType()); @@ -2439,7 +2217,7 @@ void SelectionDAGLowering::visitBitCast(User &I) { setValue(&I, N); // noop cast. } -void SelectionDAGLowering::visitInsertElement(User &I) { +void SelectionDAGBuilder::visitInsertElement(User &I) { SDValue InVec = getValue(I.getOperand(0)); SDValue InVal = getValue(I.getOperand(1)); SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), @@ -2451,7 +2229,7 @@ void SelectionDAGLowering::visitInsertElement(User &I) { InVec, InVal, InIdx)); } -void SelectionDAGLowering::visitExtractElement(User &I) { +void SelectionDAGBuilder::visitExtractElement(User &I) { SDValue InVec = getValue(I.getOperand(0)); SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), TLI.getPointerTy(), @@ -2471,7 +2249,7 @@ static bool SequentialMask(SmallVectorImpl &Mask, unsigned SIndx) { return true; } -void SelectionDAGLowering::visitShuffleVector(User &I) { +void SelectionDAGBuilder::visitShuffleVector(User &I) { SmallVector Mask; SDValue Src1 = getValue(I.getOperand(0)); SDValue Src2 = getValue(I.getOperand(1)); @@ -2645,7 +2423,7 @@ void SelectionDAGLowering::visitShuffleVector(User &I) { VT, &Ops[0], Ops.size())); } -void SelectionDAGLowering::visitInsertValue(InsertValueInst &I) { +void SelectionDAGBuilder::visitInsertValue(InsertValueInst &I) { const Value *Op0 = I.getOperand(0); const Value *Op1 = I.getOperand(1); const Type *AggTy = I.getType(); @@ -2686,7 +2464,7 @@ void SelectionDAGLowering::visitInsertValue(InsertValueInst &I) { &Values[0], NumAggValues)); } -void SelectionDAGLowering::visitExtractValue(ExtractValueInst &I) { +void SelectionDAGBuilder::visitExtractValue(ExtractValueInst &I) { const Value *Op0 = I.getOperand(0); const Type *AggTy = Op0->getType(); const Type *ValTy = I.getType(); @@ -2715,7 +2493,7 @@ void SelectionDAGLowering::visitExtractValue(ExtractValueInst &I) { } -void SelectionDAGLowering::visitGetElementPtr(User &I) { +void SelectionDAGBuilder::visitGetElementPtr(User &I) { SDValue N = getValue(I.getOperand(0)); const Type *Ty = I.getOperand(0)->getType(); @@ -2784,7 +2562,7 @@ void SelectionDAGLowering::visitGetElementPtr(User &I) { setValue(&I, N); } -void SelectionDAGLowering::visitAlloca(AllocaInst &I) { +void SelectionDAGBuilder::visitAlloca(AllocaInst &I) { // If this is a fixed sized alloca in the entry block of the function, // allocate it statically on the stack. if (FuncInfo.StaticAllocaMap.count(&I)) @@ -2837,7 +2615,7 @@ void SelectionDAGLowering::visitAlloca(AllocaInst &I) { FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject(); } -void SelectionDAGLowering::visitLoad(LoadInst &I) { +void SelectionDAGBuilder::visitLoad(LoadInst &I) { const Value *SV = I.getOperand(0); SDValue Ptr = getValue(SV); @@ -2895,7 +2673,7 @@ void SelectionDAGLowering::visitLoad(LoadInst &I) { } -void SelectionDAGLowering::visitStore(StoreInst &I) { +void SelectionDAGBuilder::visitStore(StoreInst &I) { Value *SrcV = I.getOperand(0); Value *PtrV = I.getOperand(1); @@ -2931,8 +2709,8 @@ void SelectionDAGLowering::visitStore(StoreInst &I) { /// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC /// node. -void SelectionDAGLowering::visitTargetIntrinsic(CallInst &I, - unsigned Intrinsic) { +void SelectionDAGBuilder::visitTargetIntrinsic(CallInst &I, + unsigned Intrinsic) { bool HasChain = !I.doesNotAccessMemory(); bool OnlyLoad = HasChain && I.onlyReadsMemory(); @@ -3012,73 +2790,6 @@ void SelectionDAGLowering::visitTargetIntrinsic(CallInst &I, } } -/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V. -static GlobalVariable *ExtractTypeInfo(Value *V) { - V = V->stripPointerCasts(); - GlobalVariable *GV = dyn_cast(V); - assert ((GV || isa(V)) && - "TypeInfo must be a global variable or NULL"); - return GV; -} - -namespace llvm { - -/// AddCatchInfo - Extract the personality and type infos from an eh.selector -/// call, and add them to the specified machine basic block. -void AddCatchInfo(CallInst &I, MachineModuleInfo *MMI, - MachineBasicBlock *MBB) { - // Inform the MachineModuleInfo of the personality for this landing pad. - ConstantExpr *CE = cast(I.getOperand(2)); - assert(CE->getOpcode() == Instruction::BitCast && - isa(CE->getOperand(0)) && - "Personality should be a function"); - MMI->addPersonality(MBB, cast(CE->getOperand(0))); - - // Gather all the type infos for this landing pad and pass them along to - // MachineModuleInfo. - std::vector TyInfo; - unsigned N = I.getNumOperands(); - - for (unsigned i = N - 1; i > 2; --i) { - if (ConstantInt *CI = dyn_cast(I.getOperand(i))) { - unsigned FilterLength = CI->getZExtValue(); - unsigned FirstCatch = i + FilterLength + !FilterLength; - assert (FirstCatch <= N && "Invalid filter length"); - - if (FirstCatch < N) { - TyInfo.reserve(N - FirstCatch); - for (unsigned j = FirstCatch; j < N; ++j) - TyInfo.push_back(ExtractTypeInfo(I.getOperand(j))); - MMI->addCatchTypeInfo(MBB, TyInfo); - TyInfo.clear(); - } - - if (!FilterLength) { - // Cleanup. - MMI->addCleanup(MBB); - } else { - // Filter. - TyInfo.reserve(FilterLength - 1); - for (unsigned j = i + 1; j < FirstCatch; ++j) - TyInfo.push_back(ExtractTypeInfo(I.getOperand(j))); - MMI->addFilterTypeInfo(MBB, TyInfo); - TyInfo.clear(); - } - - N = i; - } - } - - if (N > 3) { - TyInfo.reserve(N - 3); - for (unsigned j = 3; j < N; ++j) - TyInfo.push_back(ExtractTypeInfo(I.getOperand(j))); - MMI->addCatchTypeInfo(MBB, TyInfo); - } -} - -} - /// GetSignificand - Get the significand and build it into a floating-point /// number with exponent of 1: /// @@ -3121,7 +2832,7 @@ getF32Constant(SelectionDAG &DAG, unsigned Flt) { /// visitIntrinsicCall: I is a call instruction /// Op is the associated NodeType for I const char * -SelectionDAGLowering::implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op) { +SelectionDAGBuilder::implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op) { SDValue Root = getRoot(); SDValue L = DAG.getAtomic(Op, getCurDebugLoc(), @@ -3137,7 +2848,7 @@ SelectionDAGLowering::implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op) { // implVisitAluOverflow - Lower arithmetic overflow instrinsics. const char * -SelectionDAGLowering::implVisitAluOverflow(CallInst &I, ISD::NodeType Op) { +SelectionDAGBuilder::implVisitAluOverflow(CallInst &I, ISD::NodeType Op) { SDValue Op1 = getValue(I.getOperand(1)); SDValue Op2 = getValue(I.getOperand(2)); @@ -3151,7 +2862,7 @@ SelectionDAGLowering::implVisitAluOverflow(CallInst &I, ISD::NodeType Op) { /// visitExp - Lower an exp intrinsic. Handles the special sequences for /// limited-precision mode. void -SelectionDAGLowering::visitExp(CallInst &I) { +SelectionDAGBuilder::visitExp(CallInst &I) { SDValue result; DebugLoc dl = getCurDebugLoc(); @@ -3277,7 +2988,7 @@ SelectionDAGLowering::visitExp(CallInst &I) { /// visitLog - Lower a log intrinsic. Handles the special sequences for /// limited-precision mode. void -SelectionDAGLowering::visitLog(CallInst &I) { +SelectionDAGBuilder::visitLog(CallInst &I) { SDValue result; DebugLoc dl = getCurDebugLoc(); @@ -3387,7 +3098,7 @@ SelectionDAGLowering::visitLog(CallInst &I) { /// visitLog2 - Lower a log2 intrinsic. Handles the special sequences for /// limited-precision mode. void -SelectionDAGLowering::visitLog2(CallInst &I) { +SelectionDAGBuilder::visitLog2(CallInst &I) { SDValue result; DebugLoc dl = getCurDebugLoc(); @@ -3496,7 +3207,7 @@ SelectionDAGLowering::visitLog2(CallInst &I) { /// visitLog10 - Lower a log10 intrinsic. Handles the special sequences for /// limited-precision mode. void -SelectionDAGLowering::visitLog10(CallInst &I) { +SelectionDAGBuilder::visitLog10(CallInst &I) { SDValue result; DebugLoc dl = getCurDebugLoc(); @@ -3598,7 +3309,7 @@ SelectionDAGLowering::visitLog10(CallInst &I) { /// visitExp2 - Lower an exp2 intrinsic. Handles the special sequences for /// limited-precision mode. void -SelectionDAGLowering::visitExp2(CallInst &I) { +SelectionDAGBuilder::visitExp2(CallInst &I) { SDValue result; DebugLoc dl = getCurDebugLoc(); @@ -3712,7 +3423,7 @@ SelectionDAGLowering::visitExp2(CallInst &I) { /// visitPow - Lower a pow intrinsic. Handles the special sequences for /// limited-precision mode with x == 10.0f. void -SelectionDAGLowering::visitPow(CallInst &I) { +SelectionDAGBuilder::visitPow(CallInst &I) { SDValue result; Value *Val = I.getOperand(1); DebugLoc dl = getCurDebugLoc(); @@ -3847,7 +3558,7 @@ SelectionDAGLowering::visitPow(CallInst &I) { /// we want to emit this as a call to a named external function, return the name /// otherwise lower it and return null. const char * -SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { +SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) { DebugLoc dl = getCurDebugLoc(); switch (Intrinsic) { default: @@ -4412,9 +4123,9 @@ isInTailCallPosition(const Instruction *I, Attributes CalleeRetAttr, return true; } -void SelectionDAGLowering::LowerCallTo(CallSite CS, SDValue Callee, - bool isTailCall, - MachineBasicBlock *LandingPad) { +void SelectionDAGBuilder::LowerCallTo(CallSite CS, SDValue Callee, + bool isTailCall, + MachineBasicBlock *LandingPad) { const PointerType *PT = cast(CS.getCalledValue()->getType()); const FunctionType *FTy = cast(PT->getElementType()); const Type *RetTy = FTy->getReturnType(); @@ -4561,7 +4272,7 @@ void SelectionDAGLowering::LowerCallTo(CallSite CS, SDValue Callee, } -void SelectionDAGLowering::visitCall(CallInst &I) { +void SelectionDAGBuilder::visitCall(CallInst &I) { const char *RenameFn = 0; if (Function *F = I.getCalledFunction()) { if (F->isDeclaration()) { @@ -4956,7 +4667,7 @@ private: /// OpInfo describes the operand. /// Input and OutputRegs are the set of already allocated physical registers. /// -void SelectionDAGLowering:: +void SelectionDAGBuilder:: GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, std::set &OutputRegs, std::set &InputRegs) { @@ -5150,7 +4861,7 @@ hasInlineAsmMemConstraint(std::vector &CInfos, /// visitInlineAsm - Handle a call to an InlineAsm object. /// -void SelectionDAGLowering::visitInlineAsm(CallSite CS) { +void SelectionDAGBuilder::visitInlineAsm(CallSite CS) { InlineAsm *IA = cast(CS.getCalledValue()); /// ConstraintOperands - Information about all of the constraints. @@ -5572,14 +5283,14 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) { DAG.setRoot(Chain); } -void SelectionDAGLowering::visitVAStart(CallInst &I) { +void SelectionDAGBuilder::visitVAStart(CallInst &I) { DAG.setRoot(DAG.getNode(ISD::VASTART, getCurDebugLoc(), MVT::Other, getRoot(), getValue(I.getOperand(1)), DAG.getSrcValue(I.getOperand(1)))); } -void SelectionDAGLowering::visitVAArg(VAArgInst &I) { +void SelectionDAGBuilder::visitVAArg(VAArgInst &I) { SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(), getRoot(), getValue(I.getOperand(0)), DAG.getSrcValue(I.getOperand(0))); @@ -5587,14 +5298,14 @@ void SelectionDAGLowering::visitVAArg(VAArgInst &I) { DAG.setRoot(V.getValue(1)); } -void SelectionDAGLowering::visitVAEnd(CallInst &I) { +void SelectionDAGBuilder::visitVAEnd(CallInst &I) { DAG.setRoot(DAG.getNode(ISD::VAEND, getCurDebugLoc(), MVT::Other, getRoot(), getValue(I.getOperand(1)), DAG.getSrcValue(I.getOperand(1)))); } -void SelectionDAGLowering::visitVACopy(CallInst &I) { +void SelectionDAGBuilder::visitVACopy(CallInst &I) { DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurDebugLoc(), MVT::Other, getRoot(), getValue(I.getOperand(1)), @@ -5787,7 +5498,7 @@ SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { } -void SelectionDAGLowering::CopyValueToVirtualRegister(Value *V, unsigned Reg) { +void SelectionDAGBuilder::CopyValueToVirtualRegister(Value *V, unsigned Reg) { SDValue Op = getValue(V); assert((Op.getOpcode() != ISD::CopyFromReg || cast(Op.getOperand(1))->getReg() != Reg) && @@ -5805,9 +5516,9 @@ void SelectionDAGLowering::CopyValueToVirtualRegister(Value *V, unsigned Reg) { void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) { // If this is the entry block, emit arguments. Function &F = *LLVMBB->getParent(); - SelectionDAG &DAG = SDL->DAG; + SelectionDAG &DAG = SDB->DAG; SDValue OldRoot = DAG.getRoot(); - DebugLoc dl = SDL->getCurDebugLoc(); + DebugLoc dl = SDB->getCurDebugLoc(); const TargetData *TD = TLI.getTargetData(); SmallVector Ins; @@ -5923,11 +5634,11 @@ void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) { SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, RegVT, VT, AssertOp); - MachineFunction& MF = SDL->DAG.getMachineFunction(); + MachineFunction& MF = SDB->DAG.getMachineFunction(); MachineRegisterInfo& RegInfo = MF.getRegInfo(); unsigned SRetReg = RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT)); FLI.DemoteRegister = SRetReg; - NewRoot = SDL->DAG.getCopyToReg(NewRoot, SDL->getCurDebugLoc(), SRetReg, ArgValue); + NewRoot = SDB->DAG.getCopyToReg(NewRoot, SDB->getCurDebugLoc(), SRetReg, ArgValue); DAG.setRoot(NewRoot); // i indexes lowered arguments. Bump it past the hidden sret argument. @@ -5958,18 +5669,18 @@ void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) { i += NumParts; } if (!I->use_empty()) { - SDL->setValue(I, DAG.getMergeValues(&ArgValues[0], NumValues, - SDL->getCurDebugLoc())); + SDB->setValue(I, DAG.getMergeValues(&ArgValues[0], NumValues, + SDB->getCurDebugLoc())); // If this argument is live outside of the entry block, insert a copy from // whereever we got it to the vreg that other BB's will reference it as. - SDL->CopyToExportRegsIfNeeded(I); + SDB->CopyToExportRegsIfNeeded(I); } } assert(i == InVals.size() && "Argument register count mismatch!"); // Finally, if the target has anything special to do, allow it to do so. // FIXME: this should insert code into the DAG! - EmitFunctionEntryCode(F, SDL->DAG.getMachineFunction()); + EmitFunctionEntryCode(F, SDB->DAG.getMachineFunction()); } /// Handle PHI nodes in successor blocks. Emit code into the SelectionDAG to @@ -6011,10 +5722,10 @@ SelectionDAGISel::HandlePHINodesInSuccessorBlocks(BasicBlock *LLVMBB) { Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB); if (Constant *C = dyn_cast(PHIOp)) { - unsigned &RegOut = SDL->ConstantsOut[C]; + unsigned &RegOut = SDB->ConstantsOut[C]; if (RegOut == 0) { RegOut = FuncInfo->CreateRegForValue(C); - SDL->CopyValueToVirtualRegister(C, RegOut); + SDB->CopyValueToVirtualRegister(C, RegOut); } Reg = RegOut; } else { @@ -6024,7 +5735,7 @@ SelectionDAGISel::HandlePHINodesInSuccessorBlocks(BasicBlock *LLVMBB) { FuncInfo->StaticAllocaMap.count(cast(PHIOp)) && "Didn't codegen value into a register!??"); Reg = FuncInfo->CreateRegForValue(PHIOp); - SDL->CopyValueToVirtualRegister(PHIOp, Reg); + SDB->CopyValueToVirtualRegister(PHIOp, Reg); } } @@ -6036,12 +5747,12 @@ SelectionDAGISel::HandlePHINodesInSuccessorBlocks(BasicBlock *LLVMBB) { EVT VT = ValueVTs[vti]; unsigned NumRegisters = TLI.getNumRegisters(*CurDAG->getContext(), VT); for (unsigned i = 0, e = NumRegisters; i != e; ++i) - SDL->PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i)); + SDB->PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i)); Reg += NumRegisters; } } } - SDL->ConstantsOut.clear(); + SDB->ConstantsOut.clear(); } /// This is the Fast-ISel version of HandlePHINodesInSuccessorBlocks. It only @@ -6054,7 +5765,7 @@ SelectionDAGISel::HandlePHINodesInSuccessorBlocksFast(BasicBlock *LLVMBB, TerminatorInst *TI = LLVMBB->getTerminator(); SmallPtrSet SuccsHandled; - unsigned OrigNumPHINodesToUpdate = SDL->PHINodesToUpdate.size(); + unsigned OrigNumPHINodesToUpdate = SDB->PHINodesToUpdate.size(); // Check successor nodes' PHI nodes that expect a constant to be available // from this block. @@ -6090,7 +5801,7 @@ SelectionDAGISel::HandlePHINodesInSuccessorBlocksFast(BasicBlock *LLVMBB, if (VT == MVT::i1) VT = TLI.getTypeToTransformTo(*CurDAG->getContext(), VT); else { - SDL->PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); + SDB->PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); return false; } } @@ -6099,10 +5810,10 @@ SelectionDAGISel::HandlePHINodesInSuccessorBlocksFast(BasicBlock *LLVMBB, unsigned Reg = F->getRegForValue(PHIOp); if (Reg == 0) { - SDL->PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); + SDB->PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); return false; } - SDL->PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg)); + SDB->PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg)); } } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h similarity index 81% rename from lib/CodeGen/SelectionDAG/SelectionDAGBuild.h rename to lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 10f256c15306..244f9b5019e1 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -1,4 +1,4 @@ -//===-- SelectionDAGBuild.h - Selection-DAG building ----------------------===// +//===-- SelectionDAGBuilder.h - Selection-DAG building --------------------===// // // The LLVM Compiler Infrastructure // @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef SELECTIONDAGBUILD_H -#define SELECTIONDAGBUILD_H +#ifndef SELECTIONDAGBUILDER_H +#define SELECTIONDAGBUILDER_H #include "llvm/Constants.h" #include "llvm/CodeGen/SelectionDAG.h" @@ -25,7 +25,6 @@ #include "llvm/CodeGen/ValueTypes.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetMachine.h" #include #include @@ -45,6 +44,7 @@ class FPToSIInst; class FPToUIInst; class FPTruncInst; class Function; +class FunctionLoweringInfo; class GetElementPtrInst; class GCFunctionInfo; class ICmpInst; @@ -58,7 +58,6 @@ class LoadInst; class MachineBasicBlock; class MachineFunction; class MachineInstr; -class MachineModuleInfo; class MachineRegisterInfo; class PHINode; class PtrToIntInst; @@ -79,98 +78,12 @@ class UnwindInst; class VAArgInst; class ZExtInst; -//===--------------------------------------------------------------------===// -/// FunctionLoweringInfo - This contains information that is global to a -/// function that is used when lowering a region of the function. -/// -class FunctionLoweringInfo { -public: - TargetLowering &TLI; - Function *Fn; - MachineFunction *MF; - MachineRegisterInfo *RegInfo; - - /// CanLowerReturn - true iff the function's return value can be lowered to - /// registers. - bool CanLowerReturn; - - /// DemoteRegister - if CanLowerReturn is false, DemoteRegister is a vreg - /// allocated to hold a pointer to the hidden sret parameter. - unsigned DemoteRegister; - - explicit FunctionLoweringInfo(TargetLowering &TLI); - - /// set - Initialize this FunctionLoweringInfo with the given Function - /// and its associated MachineFunction. - /// - void set(Function &Fn, MachineFunction &MF, SelectionDAG &DAG, - bool EnableFastISel); - - /// MBBMap - A mapping from LLVM basic blocks to their machine code entry. - DenseMap MBBMap; - - /// ValueMap - Since we emit code for the function a basic block at a time, - /// we must remember which virtual registers hold the values for - /// cross-basic-block values. - DenseMap ValueMap; - - /// StaticAllocaMap - Keep track of frame indices for fixed sized allocas in - /// the entry block. This allows the allocas to be efficiently referenced - /// anywhere in the function. - DenseMap StaticAllocaMap; - -#ifndef NDEBUG - SmallSet CatchInfoLost; - SmallSet CatchInfoFound; -#endif - - unsigned MakeReg(EVT VT); - - /// isExportedInst - Return true if the specified value is an instruction - /// exported from its block. - bool isExportedInst(const Value *V) { - return ValueMap.count(V); - } - - unsigned CreateRegForValue(const Value *V); - - unsigned InitializeRegForValue(const Value *V) { - unsigned &R = ValueMap[V]; - assert(R == 0 && "Already initialized this value register!"); - return R = CreateRegForValue(V); - } - - struct LiveOutInfo { - unsigned NumSignBits; - APInt KnownOne, KnownZero; - LiveOutInfo() : NumSignBits(0), KnownOne(1, 0), KnownZero(1, 0) {} - }; - - /// LiveOutRegInfo - Information about live out vregs, indexed by their - /// register number offset by 'FirstVirtualRegister'. - std::vector LiveOutRegInfo; - - /// clear - Clear out all the function-specific state. This returns this - /// FunctionLoweringInfo to an empty state, ready to be used for a - /// different function. - void clear() { - MBBMap.clear(); - ValueMap.clear(); - StaticAllocaMap.clear(); -#ifndef NDEBUG - CatchInfoLost.clear(); - CatchInfoFound.clear(); -#endif - LiveOutRegInfo.clear(); - } -}; - //===----------------------------------------------------------------------===// -/// SelectionDAGLowering - This is the common target-independent lowering +/// SelectionDAGBuilder - This is the common target-independent lowering /// implementation that is parameterized by a TargetLowering object. /// Also, targets can overload any lowering method. /// -class SelectionDAGLowering { +class SelectionDAGBuilder { MachineBasicBlock *CurMBB; /// CurDebugLoc - current file + line number. Changes as we build the DAG. @@ -260,9 +173,9 @@ class SelectionDAGLowering { size_t Clusterify(CaseVector& Cases, const SwitchInst &SI); - /// CaseBlock - This structure is used to communicate between SDLowering and - /// SDISel for the code generation of additional basic blocks needed by multi- - /// case switch statements. + /// CaseBlock - This structure is used to communicate between + /// SelectionDAGBuilder and SDISel for the code generation of additional basic + /// blocks needed by multi-case switch statements. struct CaseBlock { CaseBlock(ISD::CondCode cc, Value *cmplhs, Value *cmprhs, Value *cmpmiddle, MachineBasicBlock *truebb, MachineBasicBlock *falsebb, @@ -384,9 +297,9 @@ public: LLVMContext *Context; - SelectionDAGLowering(SelectionDAG &dag, TargetLowering &tli, - FunctionLoweringInfo &funcinfo, - CodeGenOpt::Level ol) + SelectionDAGBuilder(SelectionDAG &dag, TargetLowering &tli, + FunctionLoweringInfo &funcinfo, + CodeGenOpt::Level ol) : CurDebugLoc(DebugLoc::getUnknownLoc()), TLI(tli), DAG(dag), FuncInfo(funcinfo), OptLevel(ol), HasTailCall(false), @@ -396,7 +309,7 @@ public: void init(GCFunctionInfo *gfi, AliasAnalysis &aa); /// clear - Clear out the curret SelectionDAG and the associated - /// state and prepare this SelectionDAGLowering object to be used + /// state and prepare this SelectionDAGBuilder object to be used /// for a new block. This doesn't clear out information about /// additional blocks that are needed to complete switch lowering /// or PHI node updating; that information is cleared out as it is @@ -569,11 +482,6 @@ private: const char *implVisitAluOverflow(CallInst &I, ISD::NodeType Op); }; -/// AddCatchInfo - Extract the personality and type infos from an eh.selector -/// call, and add them to the specified machine basic block. -void AddCatchInfo(CallInst &I, MachineModuleInfo *MMI, - MachineBasicBlock *MBB); - } // end namespace llvm #endif diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index ab5f21e4337c..c39437f98647 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -13,7 +13,8 @@ #define DEBUG_TYPE "isel" #include "ScheduleDAGSDNodes.h" -#include "SelectionDAGBuild.h" +#include "SelectionDAGBuilder.h" +#include "FunctionLoweringInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/DebugInfo.h" @@ -279,14 +280,14 @@ SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, CodeGenOpt::Level OL) : MachineFunctionPass(&ID), TM(tm), TLI(*tm.getTargetLowering()), FuncInfo(new FunctionLoweringInfo(TLI)), CurDAG(new SelectionDAG(TLI, *FuncInfo)), - SDL(new SelectionDAGLowering(*CurDAG, TLI, *FuncInfo, OL)), + SDB(new SelectionDAGBuilder(*CurDAG, TLI, *FuncInfo, OL)), GFI(), OptLevel(OL), DAGSize(0) {} SelectionDAGISel::~SelectionDAGISel() { - delete SDL; + delete SDB; delete CurDAG; delete FuncInfo; } @@ -331,8 +332,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { MachineModuleInfo *MMI = getAnalysisIfAvailable(); DwarfWriter *DW = getAnalysisIfAvailable(); CurDAG->init(*MF, MMI, DW); - FuncInfo->set(Fn, *MF, *CurDAG, EnableFastISel); - SDL->init(GFI, *AA); + FuncInfo->set(Fn, *MF, EnableFastISel); + SDB->init(GFI, *AA); for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) if (InvokeInst *Invoke = dyn_cast(I->getTerminator())) @@ -361,29 +362,17 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { return true; } -static void copyCatchInfo(BasicBlock *SrcBB, BasicBlock *DestBB, - MachineModuleInfo *MMI, FunctionLoweringInfo &FLI) { - for (BasicBlock::iterator I = SrcBB->begin(), E = --SrcBB->end(); I != E; ++I) - if (EHSelectorInst *EHSel = dyn_cast(I)) { - // Apply the catch info to DestBB. - AddCatchInfo(*EHSel, MMI, FLI.MBBMap[DestBB]); -#ifndef NDEBUG - if (!FLI.MBBMap[SrcBB]->isLandingPad()) - FLI.CatchInfoFound.insert(EHSel); -#endif - } -} - void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB, BasicBlock::iterator Begin, - BasicBlock::iterator End) { - SDL->setCurrentBasicBlock(BB); + BasicBlock::iterator End, + bool &HadTailCall) { + SDB->setCurrentBasicBlock(BB); MetadataContext &TheMetadata = LLVMBB->getParent()->getContext().getMetadata(); unsigned MDDbgKind = TheMetadata.getMDKind("dbg"); // Lower all of the non-terminator instructions. If a call is emitted // as a tail call, cease emitting nodes for this block. - for (BasicBlock::iterator I = Begin; I != End && !SDL->HasTailCall; ++I) { + for (BasicBlock::iterator I = Begin; I != End && !SDB->HasTailCall; ++I) { if (MDDbgKind) { // Update DebugLoc if debug information is attached with this // instruction. @@ -391,37 +380,38 @@ void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB, if (MDNode *Dbg = TheMetadata.getMD(MDDbgKind, I)) { DILocation DILoc(Dbg); DebugLoc Loc = ExtractDebugLocation(DILoc, MF->getDebugLocInfo()); - SDL->setCurDebugLoc(Loc); + SDB->setCurDebugLoc(Loc); if (MF->getDefaultDebugLoc().isUnknown()) MF->setDefaultDebugLoc(Loc); } } if (!isa(I)) - SDL->visit(*I); + SDB->visit(*I); } - if (!SDL->HasTailCall) { + if (!SDB->HasTailCall) { // Ensure that all instructions which are used outside of their defining // blocks are available as virtual registers. Invoke is handled elsewhere. for (BasicBlock::iterator I = Begin; I != End; ++I) if (!isa(I) && !isa(I)) - SDL->CopyToExportRegsIfNeeded(I); + SDB->CopyToExportRegsIfNeeded(I); // Handle PHI nodes in successor blocks. if (End == LLVMBB->end()) { HandlePHINodesInSuccessorBlocks(LLVMBB); // Lower the terminator after the copies are emitted. - SDL->visit(*LLVMBB->getTerminator()); + SDB->visit(*LLVMBB->getTerminator()); } } // Make sure the root of the DAG is up-to-date. - CurDAG->setRoot(SDL->getControlRoot()); + CurDAG->setRoot(SDB->getControlRoot()); // Final step, emit the lowered DAG as machine code. CodeGenAndEmitDAG(); - SDL->clear(); + HadTailCall = SDB->HasTailCall; + SDB->clear(); } void SelectionDAGISel::ComputeLiveOutVRegInfo() { @@ -629,9 +619,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { // inserted into. if (TimePassesIsEnabled) { NamedRegionTimer T("Instruction Creation", GroupName); - BB = Scheduler->EmitSchedule(&SDL->EdgeMapping); + BB = Scheduler->EmitSchedule(&SDB->EdgeMapping); } else { - BB = Scheduler->EmitSchedule(&SDL->EdgeMapping); + BB = Scheduler->EmitSchedule(&SDB->EdgeMapping); } // Free the scheduler state. @@ -701,7 +691,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, unsigned LabelID = MMI->addLandingPad(BB); const TargetInstrDesc &II = TII.get(TargetInstrInfo::EH_LABEL); - BuildMI(BB, SDL->getCurDebugLoc(), II).addImm(LabelID); + BuildMI(BB, SDB->getCurDebugLoc(), II).addImm(LabelID); // Mark exception register as live in. unsigned Reg = TLI.getExceptionAddressRegister(); @@ -732,7 +722,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, if (I == E) // No catch info found - try to extract some from the successor. - copyCatchInfo(Br->getSuccessor(0), LLVMBB, MMI, *FuncInfo); + CopyCatchInfo(Br->getSuccessor(0), LLVMBB, MMI, *FuncInfo); } } @@ -741,9 +731,9 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, // Emit code for any incoming arguments. This must happen before // beginning FastISel on the entry block. if (LLVMBB == &Fn.getEntryBlock()) { - CurDAG->setRoot(SDL->getControlRoot()); + CurDAG->setRoot(SDB->getControlRoot()); CodeGenAndEmitDAG(); - SDL->clear(); + SDB->clear(); } FastIS->startNewBlock(BB); // Do FastISel on as many instructions as possible. @@ -796,8 +786,17 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, R = FuncInfo->CreateRegForValue(BI); } - SDL->setCurDebugLoc(FastIS->getCurDebugLoc()); - SelectBasicBlock(LLVMBB, BI, next(BI)); + SDB->setCurDebugLoc(FastIS->getCurDebugLoc()); + + bool HadTailCall = false; + SelectBasicBlock(LLVMBB, BI, next(BI), HadTailCall); + + // If the call was emitted as a tail call, we're done with the block. + if (HadTailCall) { + BI = End; + break; + } + // If the instruction was codegen'd with multiple blocks, // inform the FastISel object where to resume inserting. FastIS->setCurrentBlock(BB); @@ -826,8 +825,9 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn, if (BI != End) { // If FastISel is run and it has known DebugLoc then use it. if (FastIS && !FastIS->getCurDebugLoc().isUnknown()) - SDL->setCurDebugLoc(FastIS->getCurDebugLoc()); - SelectBasicBlock(LLVMBB, BI, End); + SDB->setCurDebugLoc(FastIS->getCurDebugLoc()); + bool HadTailCall; + SelectBasicBlock(LLVMBB, BI, End, HadTailCall); } FinishBasicBlock(); @@ -843,150 +843,150 @@ SelectionDAGISel::FinishBasicBlock() { DEBUG(BB->dump()); DEBUG(errs() << "Total amount of phi nodes to update: " - << SDL->PHINodesToUpdate.size() << "\n"); - DEBUG(for (unsigned i = 0, e = SDL->PHINodesToUpdate.size(); i != e; ++i) + << SDB->PHINodesToUpdate.size() << "\n"); + DEBUG(for (unsigned i = 0, e = SDB->PHINodesToUpdate.size(); i != e; ++i) errs() << "Node " << i << " : (" - << SDL->PHINodesToUpdate[i].first - << ", " << SDL->PHINodesToUpdate[i].second << ")\n"); + << SDB->PHINodesToUpdate[i].first + << ", " << SDB->PHINodesToUpdate[i].second << ")\n"); // Next, now that we know what the last MBB the LLVM BB expanded is, update // PHI nodes in successors. - if (SDL->SwitchCases.empty() && - SDL->JTCases.empty() && - SDL->BitTestCases.empty()) { - for (unsigned i = 0, e = SDL->PHINodesToUpdate.size(); i != e; ++i) { - MachineInstr *PHI = SDL->PHINodesToUpdate[i].first; + if (SDB->SwitchCases.empty() && + SDB->JTCases.empty() && + SDB->BitTestCases.empty()) { + for (unsigned i = 0, e = SDB->PHINodesToUpdate.size(); i != e; ++i) { + MachineInstr *PHI = SDB->PHINodesToUpdate[i].first; assert(PHI->getOpcode() == TargetInstrInfo::PHI && "This is not a machine PHI node that we are updating!"); - PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[i].second, + PHI->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[i].second, false)); PHI->addOperand(MachineOperand::CreateMBB(BB)); } - SDL->PHINodesToUpdate.clear(); + SDB->PHINodesToUpdate.clear(); return; } - for (unsigned i = 0, e = SDL->BitTestCases.size(); i != e; ++i) { + for (unsigned i = 0, e = SDB->BitTestCases.size(); i != e; ++i) { // Lower header first, if it wasn't already lowered - if (!SDL->BitTestCases[i].Emitted) { + if (!SDB->BitTestCases[i].Emitted) { // Set the current basic block to the mbb we wish to insert the code into - BB = SDL->BitTestCases[i].Parent; - SDL->setCurrentBasicBlock(BB); + BB = SDB->BitTestCases[i].Parent; + SDB->setCurrentBasicBlock(BB); // Emit the code - SDL->visitBitTestHeader(SDL->BitTestCases[i]); - CurDAG->setRoot(SDL->getRoot()); + SDB->visitBitTestHeader(SDB->BitTestCases[i]); + CurDAG->setRoot(SDB->getRoot()); CodeGenAndEmitDAG(); - SDL->clear(); + SDB->clear(); } - for (unsigned j = 0, ej = SDL->BitTestCases[i].Cases.size(); j != ej; ++j) { + for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) { // Set the current basic block to the mbb we wish to insert the code into - BB = SDL->BitTestCases[i].Cases[j].ThisBB; - SDL->setCurrentBasicBlock(BB); + BB = SDB->BitTestCases[i].Cases[j].ThisBB; + SDB->setCurrentBasicBlock(BB); // Emit the code if (j+1 != ej) - SDL->visitBitTestCase(SDL->BitTestCases[i].Cases[j+1].ThisBB, - SDL->BitTestCases[i].Reg, - SDL->BitTestCases[i].Cases[j]); + SDB->visitBitTestCase(SDB->BitTestCases[i].Cases[j+1].ThisBB, + SDB->BitTestCases[i].Reg, + SDB->BitTestCases[i].Cases[j]); else - SDL->visitBitTestCase(SDL->BitTestCases[i].Default, - SDL->BitTestCases[i].Reg, - SDL->BitTestCases[i].Cases[j]); + SDB->visitBitTestCase(SDB->BitTestCases[i].Default, + SDB->BitTestCases[i].Reg, + SDB->BitTestCases[i].Cases[j]); - CurDAG->setRoot(SDL->getRoot()); + CurDAG->setRoot(SDB->getRoot()); CodeGenAndEmitDAG(); - SDL->clear(); + SDB->clear(); } // Update PHI Nodes - for (unsigned pi = 0, pe = SDL->PHINodesToUpdate.size(); pi != pe; ++pi) { - MachineInstr *PHI = SDL->PHINodesToUpdate[pi].first; + for (unsigned pi = 0, pe = SDB->PHINodesToUpdate.size(); pi != pe; ++pi) { + MachineInstr *PHI = SDB->PHINodesToUpdate[pi].first; MachineBasicBlock *PHIBB = PHI->getParent(); assert(PHI->getOpcode() == TargetInstrInfo::PHI && "This is not a machine PHI node that we are updating!"); // This is "default" BB. We have two jumps to it. From "header" BB and // from last "case" BB. - if (PHIBB == SDL->BitTestCases[i].Default) { - PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second, + if (PHIBB == SDB->BitTestCases[i].Default) { + PHI->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[pi].second, false)); - PHI->addOperand(MachineOperand::CreateMBB(SDL->BitTestCases[i].Parent)); - PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second, + PHI->addOperand(MachineOperand::CreateMBB(SDB->BitTestCases[i].Parent)); + PHI->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[pi].second, false)); - PHI->addOperand(MachineOperand::CreateMBB(SDL->BitTestCases[i].Cases. + PHI->addOperand(MachineOperand::CreateMBB(SDB->BitTestCases[i].Cases. back().ThisBB)); } // One of "cases" BB. - for (unsigned j = 0, ej = SDL->BitTestCases[i].Cases.size(); + for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) { - MachineBasicBlock* cBB = SDL->BitTestCases[i].Cases[j].ThisBB; + MachineBasicBlock* cBB = SDB->BitTestCases[i].Cases[j].ThisBB; if (cBB->succ_end() != std::find(cBB->succ_begin(),cBB->succ_end(), PHIBB)) { - PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second, + PHI->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[pi].second, false)); PHI->addOperand(MachineOperand::CreateMBB(cBB)); } } } } - SDL->BitTestCases.clear(); + SDB->BitTestCases.clear(); // If the JumpTable record is filled in, then we need to emit a jump table. // Updating the PHI nodes is tricky in this case, since we need to determine // whether the PHI is a successor of the range check MBB or the jump table MBB - for (unsigned i = 0, e = SDL->JTCases.size(); i != e; ++i) { + for (unsigned i = 0, e = SDB->JTCases.size(); i != e; ++i) { // Lower header first, if it wasn't already lowered - if (!SDL->JTCases[i].first.Emitted) { + if (!SDB->JTCases[i].first.Emitted) { // Set the current basic block to the mbb we wish to insert the code into - BB = SDL->JTCases[i].first.HeaderBB; - SDL->setCurrentBasicBlock(BB); + BB = SDB->JTCases[i].first.HeaderBB; + SDB->setCurrentBasicBlock(BB); // Emit the code - SDL->visitJumpTableHeader(SDL->JTCases[i].second, SDL->JTCases[i].first); - CurDAG->setRoot(SDL->getRoot()); + SDB->visitJumpTableHeader(SDB->JTCases[i].second, SDB->JTCases[i].first); + CurDAG->setRoot(SDB->getRoot()); CodeGenAndEmitDAG(); - SDL->clear(); + SDB->clear(); } // Set the current basic block to the mbb we wish to insert the code into - BB = SDL->JTCases[i].second.MBB; - SDL->setCurrentBasicBlock(BB); + BB = SDB->JTCases[i].second.MBB; + SDB->setCurrentBasicBlock(BB); // Emit the code - SDL->visitJumpTable(SDL->JTCases[i].second); - CurDAG->setRoot(SDL->getRoot()); + SDB->visitJumpTable(SDB->JTCases[i].second); + CurDAG->setRoot(SDB->getRoot()); CodeGenAndEmitDAG(); - SDL->clear(); + SDB->clear(); // Update PHI Nodes - for (unsigned pi = 0, pe = SDL->PHINodesToUpdate.size(); pi != pe; ++pi) { - MachineInstr *PHI = SDL->PHINodesToUpdate[pi].first; + for (unsigned pi = 0, pe = SDB->PHINodesToUpdate.size(); pi != pe; ++pi) { + MachineInstr *PHI = SDB->PHINodesToUpdate[pi].first; MachineBasicBlock *PHIBB = PHI->getParent(); assert(PHI->getOpcode() == TargetInstrInfo::PHI && "This is not a machine PHI node that we are updating!"); // "default" BB. We can go there only from header BB. - if (PHIBB == SDL->JTCases[i].second.Default) { + if (PHIBB == SDB->JTCases[i].second.Default) { PHI->addOperand - (MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second, false)); + (MachineOperand::CreateReg(SDB->PHINodesToUpdate[pi].second, false)); PHI->addOperand - (MachineOperand::CreateMBB(SDL->JTCases[i].first.HeaderBB)); + (MachineOperand::CreateMBB(SDB->JTCases[i].first.HeaderBB)); } // JT BB. Just iterate over successors here if (BB->succ_end() != std::find(BB->succ_begin(),BB->succ_end(), PHIBB)) { PHI->addOperand - (MachineOperand::CreateReg(SDL->PHINodesToUpdate[pi].second, false)); + (MachineOperand::CreateReg(SDB->PHINodesToUpdate[pi].second, false)); PHI->addOperand(MachineOperand::CreateMBB(BB)); } } } - SDL->JTCases.clear(); + SDB->JTCases.clear(); // If the switch block involved a branch to one of the actual successors, we // need to update PHI nodes in that block. - for (unsigned i = 0, e = SDL->PHINodesToUpdate.size(); i != e; ++i) { - MachineInstr *PHI = SDL->PHINodesToUpdate[i].first; + for (unsigned i = 0, e = SDB->PHINodesToUpdate.size(); i != e; ++i) { + MachineInstr *PHI = SDB->PHINodesToUpdate[i].first; assert(PHI->getOpcode() == TargetInstrInfo::PHI && "This is not a machine PHI node that we are updating!"); if (BB->isSuccessor(PHI->getParent())) { - PHI->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[i].second, + PHI->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[i].second, false)); PHI->addOperand(MachineOperand::CreateMBB(BB)); } @@ -994,36 +994,36 @@ SelectionDAGISel::FinishBasicBlock() { // If we generated any switch lowering information, build and codegen any // additional DAGs necessary. - for (unsigned i = 0, e = SDL->SwitchCases.size(); i != e; ++i) { + for (unsigned i = 0, e = SDB->SwitchCases.size(); i != e; ++i) { // Set the current basic block to the mbb we wish to insert the code into - MachineBasicBlock *ThisBB = BB = SDL->SwitchCases[i].ThisBB; - SDL->setCurrentBasicBlock(BB); + MachineBasicBlock *ThisBB = BB = SDB->SwitchCases[i].ThisBB; + SDB->setCurrentBasicBlock(BB); // Emit the code - SDL->visitSwitchCase(SDL->SwitchCases[i]); - CurDAG->setRoot(SDL->getRoot()); + SDB->visitSwitchCase(SDB->SwitchCases[i]); + CurDAG->setRoot(SDB->getRoot()); CodeGenAndEmitDAG(); // Handle any PHI nodes in successors of this chunk, as if we were coming // from the original BB before switch expansion. Note that PHI nodes can // occur multiple times in PHINodesToUpdate. We have to be very careful to // handle them the right number of times. - while ((BB = SDL->SwitchCases[i].TrueBB)) { // Handle LHS and RHS. + while ((BB = SDB->SwitchCases[i].TrueBB)) { // Handle LHS and RHS. // If new BB's are created during scheduling, the edges may have been // updated. That is, the edge from ThisBB to BB may have been split and // BB's predecessor is now another block. DenseMap::iterator EI = - SDL->EdgeMapping.find(BB); - if (EI != SDL->EdgeMapping.end()) + SDB->EdgeMapping.find(BB); + if (EI != SDB->EdgeMapping.end()) ThisBB = EI->second; for (MachineBasicBlock::iterator Phi = BB->begin(); Phi != BB->end() && Phi->getOpcode() == TargetInstrInfo::PHI; ++Phi){ // This value for this PHI node is recorded in PHINodesToUpdate, get it. for (unsigned pn = 0; ; ++pn) { - assert(pn != SDL->PHINodesToUpdate.size() && + assert(pn != SDB->PHINodesToUpdate.size() && "Didn't find PHI entry!"); - if (SDL->PHINodesToUpdate[pn].first == Phi) { - Phi->addOperand(MachineOperand::CreateReg(SDL->PHINodesToUpdate[pn]. + if (SDB->PHINodesToUpdate[pn].first == Phi) { + Phi->addOperand(MachineOperand::CreateReg(SDB->PHINodesToUpdate[pn]. second, false)); Phi->addOperand(MachineOperand::CreateMBB(ThisBB)); break; @@ -1032,19 +1032,19 @@ SelectionDAGISel::FinishBasicBlock() { } // Don't process RHS if same block as LHS. - if (BB == SDL->SwitchCases[i].FalseBB) - SDL->SwitchCases[i].FalseBB = 0; + if (BB == SDB->SwitchCases[i].FalseBB) + SDB->SwitchCases[i].FalseBB = 0; // If we haven't handled the RHS, do so now. Otherwise, we're done. - SDL->SwitchCases[i].TrueBB = SDL->SwitchCases[i].FalseBB; - SDL->SwitchCases[i].FalseBB = 0; + SDB->SwitchCases[i].TrueBB = SDB->SwitchCases[i].FalseBB; + SDB->SwitchCases[i].FalseBB = 0; } - assert(SDL->SwitchCases[i].TrueBB == 0 && SDL->SwitchCases[i].FalseBB == 0); - SDL->clear(); + assert(SDB->SwitchCases[i].TrueBB == 0 && SDB->SwitchCases[i].FalseBB == 0); + SDB->clear(); } - SDL->SwitchCases.clear(); + SDB->SwitchCases.clear(); - SDL->PHINodesToUpdate.clear(); + SDB->PHINodesToUpdate.clear(); } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index ccc5e3c75c99..c5adc5000dba 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -35,6 +35,9 @@ using namespace llvm; namespace llvm { template<> struct DOTGraphTraits : public DefaultDOTGraphTraits { + + DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} + static bool hasEdgeDestLabels() { return true; } @@ -48,8 +51,8 @@ namespace llvm { } /// edgeTargetsEdgeSource - This method returns true if this outgoing edge - /// should actually target another edge source, not a node. If this method is - /// implemented, getEdgeTarget should be implemented. + /// should actually target another edge source, not a node. If this method + /// is implemented, getEdgeTarget should be implemented. template static bool edgeTargetsEdgeSource(const void *Node, EdgeIter I) { return true; @@ -93,9 +96,16 @@ namespace llvm { } - static std::string getNodeLabel(const SDNode *Node, - const SelectionDAG *Graph, - bool ShortNames); + static std::string getSimpleNodeLabel(const SDNode *Node, + const SelectionDAG *G) { + std::string Result = Node->getOperationName(G); + { + raw_string_ostream OS(Result); + Node->print_details(OS, G); + } + return Result; + } + std::string getNodeLabel(const SDNode *Node, const SelectionDAG *Graph); static std::string getNodeAttributes(const SDNode *N, const SelectionDAG *Graph) { #ifndef NDEBUG @@ -121,14 +131,8 @@ namespace llvm { } std::string DOTGraphTraits::getNodeLabel(const SDNode *Node, - const SelectionDAG *G, - bool ShortNames) { - std::string Result = Node->getOperationName(G); - { - raw_string_ostream OS(Result); - Node->print_details(OS, G); - } - return Result; + const SelectionDAG *G) { + return DOTGraphTraits::getSimpleNodeLabel (Node, G); } @@ -269,8 +273,8 @@ std::string ScheduleDAGSDNodes::getGraphNodeLabel(const SUnit *SU) const { for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode()) FlaggedNodes.push_back(N); while (!FlaggedNodes.empty()) { - O << DOTGraphTraits::getNodeLabel(FlaggedNodes.back(), - DAG, false); + O << DOTGraphTraits + ::getSimpleNodeLabel(FlaggedNodes.back(), DAG); FlaggedNodes.pop_back(); if (!FlaggedNodes.empty()) O << "\n "; diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 2ca52a48c2a9..68bc2d6306b2 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -532,11 +532,6 @@ TargetLowering::TargetLowering(TargetMachine &tm,TargetLoweringObjectFile *tlof) InitLibcallNames(LibcallRoutineNames); InitCmpLibcallCCs(CmpLibcallCCs); InitLibcallCallingConvs(LibcallCallingConvs); - - // Tell Legalize whether the assembler supports DEBUG_LOC. - const MCAsmInfo *TASM = TM.getMCAsmInfo(); - if (!TASM || !TASM->hasDotLocAndDotFile()) - setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); } TargetLowering::~TargetLowering() { diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp index 3909c56bdbb2..58763718f9b5 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.cpp +++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp @@ -2371,16 +2371,26 @@ namespace { struct DepthMBBCompare { typedef std::pair DepthMBBPair; bool operator()(const DepthMBBPair &LHS, const DepthMBBPair &RHS) const { - if (LHS.first > RHS.first) return true; // Deeper loops first - return LHS.first == RHS.first && - LHS.second->getNumber() < RHS.second->getNumber(); + // Deeper loops first + if (LHS.first != RHS.first) + return LHS.first > RHS.first; + + // Prefer blocks that are more connected in the CFG. This takes care of + // the most difficult copies first while intervals are short. + unsigned cl = LHS.second->pred_size() + LHS.second->succ_size(); + unsigned cr = RHS.second->pred_size() + RHS.second->succ_size(); + if (cl != cr) + return cl > cr; + + // As a last resort, sort by block number. + return LHS.second->getNumber() < RHS.second->getNumber(); } }; } void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB, std::vector &TryAgain) { - DEBUG(errs() << ((Value*)MBB->getBasicBlock())->getName() << ":\n"); + DEBUG(errs() << MBB->getName() << ":\n"); std::vector VirtCopies; std::vector PhysCopies; diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp index 20c4a28b1f3d..237d0b5f4658 100644 --- a/lib/CodeGen/Spiller.cpp +++ b/lib/CodeGen/Spiller.cpp @@ -12,7 +12,6 @@ #include "Spiller.h" #include "VirtRegMap.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -47,16 +46,14 @@ protected: MachineFunction *mf; LiveIntervals *lis; - LiveStacks *ls; MachineFrameInfo *mfi; MachineRegisterInfo *mri; const TargetInstrInfo *tii; VirtRegMap *vrm; /// Construct a spiller base. - SpillerBase(MachineFunction *mf, LiveIntervals *lis, LiveStacks *ls, - VirtRegMap *vrm) : - mf(mf), lis(lis), ls(ls), vrm(vrm) + SpillerBase(MachineFunction *mf, LiveIntervals *lis, VirtRegMap *vrm) + : mf(mf), lis(lis), vrm(vrm) { mfi = mf->getFrameInfo(); mri = &mf->getRegInfo(); @@ -169,9 +166,8 @@ protected: class TrivialSpiller : public SpillerBase { public: - TrivialSpiller(MachineFunction *mf, LiveIntervals *lis, LiveStacks *ls, - VirtRegMap *vrm) - : SpillerBase(mf, lis, ls, vrm) {} + TrivialSpiller(MachineFunction *mf, LiveIntervals *lis, VirtRegMap *vrm) + : SpillerBase(mf, lis, vrm) {} std::vector spill(LiveInterval *li, SmallVectorImpl &spillIs) { @@ -188,7 +184,7 @@ private: const MachineLoopInfo *loopInfo; VirtRegMap *vrm; public: - StandardSpiller(MachineFunction *mf, LiveIntervals *lis, LiveStacks *ls, + StandardSpiller(MachineFunction *mf, LiveIntervals *lis, const MachineLoopInfo *loopInfo, VirtRegMap *vrm) : lis(lis), loopInfo(loopInfo), vrm(vrm) {} @@ -203,12 +199,11 @@ public: } llvm::Spiller* llvm::createSpiller(MachineFunction *mf, LiveIntervals *lis, - LiveStacks *ls, const MachineLoopInfo *loopInfo, VirtRegMap *vrm) { switch (spillerOpt) { - case trivial: return new TrivialSpiller(mf, lis, ls, vrm); break; - case standard: return new StandardSpiller(mf, lis, ls, loopInfo, vrm); break; + case trivial: return new TrivialSpiller(mf, lis, vrm); break; + case standard: return new StandardSpiller(mf, lis, loopInfo, vrm); break; default: llvm_unreachable("Unreachable!"); break; } } diff --git a/lib/CodeGen/Spiller.h b/lib/CodeGen/Spiller.h index 7ec8e6d7ffb5..c6bd9857dbaf 100644 --- a/lib/CodeGen/Spiller.h +++ b/lib/CodeGen/Spiller.h @@ -41,8 +41,7 @@ namespace llvm { /// Create and return a spiller object, as specified on the command line. Spiller* createSpiller(MachineFunction *mf, LiveIntervals *li, - LiveStacks *ls, const MachineLoopInfo *loopInfo, - VirtRegMap *vrm); + const MachineLoopInfo *loopInfo, VirtRegMap *vrm); } #endif diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp new file mode 100644 index 000000000000..9c0b596c33f5 --- /dev/null +++ b/lib/CodeGen/TailDuplication.cpp @@ -0,0 +1,249 @@ +//===-- TailDuplication.cpp - Duplicate blocks into predecessors' tails ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass duplicates basic blocks ending in unconditional branches into +// the tails of their predecessors. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "tailduplication" +#include "llvm/Function.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/Statistic.h" +using namespace llvm; + +STATISTIC(NumTailDups , "Number of tail duplicated blocks"); +STATISTIC(NumInstrDups , "Additional instructions due to tail duplication"); +STATISTIC(NumDeadBlocks, "Number of dead blocks removed"); + +// Heuristic for tail duplication. +static cl::opt +TailDuplicateSize("tail-dup-size", + cl::desc("Maximum instructions to consider tail duplicating"), + cl::init(2), cl::Hidden); + +namespace { + /// TailDuplicatePass - Perform tail duplication. + class TailDuplicatePass : public MachineFunctionPass { + const TargetInstrInfo *TII; + MachineModuleInfo *MMI; + + public: + static char ID; + explicit TailDuplicatePass() : MachineFunctionPass(&ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF); + virtual const char *getPassName() const { return "Tail Duplication"; } + + private: + bool TailDuplicateBlocks(MachineFunction &MF); + bool TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF); + void RemoveDeadBlock(MachineBasicBlock *MBB); + }; + + char TailDuplicatePass::ID = 0; +} + +FunctionPass *llvm::createTailDuplicatePass() { + return new TailDuplicatePass(); +} + +bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) { + TII = MF.getTarget().getInstrInfo(); + MMI = getAnalysisIfAvailable(); + + bool MadeChange = false; + bool MadeChangeThisIteration = true; + while (MadeChangeThisIteration) { + MadeChangeThisIteration = false; + MadeChangeThisIteration |= TailDuplicateBlocks(MF); + MadeChange |= MadeChangeThisIteration; + } + + return MadeChange; +} + +/// TailDuplicateBlocks - Look for small blocks that are unconditionally +/// branched to and do not fall through. Tail-duplicate their instructions +/// into their predecessors to eliminate (dynamic) branches. +bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) { + bool MadeChange = false; + + for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) { + MachineBasicBlock *MBB = I++; + + // Only duplicate blocks that end with unconditional branches. + if (MBB->canFallThrough()) + continue; + + MadeChange |= TailDuplicate(MBB, MF); + + // If it is dead, remove it. + if (MBB->pred_empty()) { + NumInstrDups -= MBB->size(); + RemoveDeadBlock(MBB); + MadeChange = true; + ++NumDeadBlocks; + } + } + return MadeChange; +} + +/// TailDuplicate - If it is profitable, duplicate TailBB's contents in each +/// of its predecessors. +bool TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, + MachineFunction &MF) { + // Don't try to tail-duplicate single-block loops. + if (TailBB->isSuccessor(TailBB)) + return false; + + // Set the limit on the number of instructions to duplicate, with a default + // of one less than the tail-merge threshold. When optimizing for size, + // duplicate only one, because one branch instruction can be eliminated to + // compensate for the duplication. + unsigned MaxDuplicateCount; + if (!TailBB->empty() && TailBB->back().getDesc().isIndirectBranch()) + // If the target has hardware branch prediction that can handle indirect + // branches, duplicating them can often make them predictable when there + // are common paths through the code. The limit needs to be high enough + // to allow undoing the effects of tail merging. + MaxDuplicateCount = 20; + else if (MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) + MaxDuplicateCount = 1; + else + MaxDuplicateCount = TailDuplicateSize; + + // Check the instructions in the block to determine whether tail-duplication + // is invalid or unlikely to be profitable. + unsigned i = 0; + bool HasCall = false; + for (MachineBasicBlock::iterator I = TailBB->begin(); + I != TailBB->end(); ++I, ++i) { + // Non-duplicable things shouldn't be tail-duplicated. + if (I->getDesc().isNotDuplicable()) return false; + // Don't duplicate more than the threshold. + if (i == MaxDuplicateCount) return false; + // Remember if we saw a call. + if (I->getDesc().isCall()) HasCall = true; + } + // Heuristically, don't tail-duplicate calls if it would expand code size, + // as it's less likely to be worth the extra cost. + if (i > 1 && HasCall) + return false; + + // Iterate through all the unique predecessors and tail-duplicate this + // block into them, if possible. Copying the list ahead of time also + // avoids trouble with the predecessor list reallocating. + bool Changed = false; + SmallSetVector Preds(TailBB->pred_begin(), + TailBB->pred_end()); + for (SmallSetVector::iterator PI = Preds.begin(), + PE = Preds.end(); PI != PE; ++PI) { + MachineBasicBlock *PredBB = *PI; + + assert(TailBB != PredBB && + "Single-block loop should have been rejected earlier!"); + if (PredBB->succ_size() > 1) continue; + + MachineBasicBlock *PredTBB, *PredFBB; + SmallVector PredCond; + if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) + continue; + if (!PredCond.empty()) + continue; + // EH edges are ignored by AnalyzeBranch. + if (PredBB->succ_size() != 1) + continue; + // Don't duplicate into a fall-through predecessor (at least for now). + if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough()) + continue; + + DEBUG(errs() << "\nTail-duplicating into PredBB: " << *PredBB + << "From Succ: " << *TailBB); + + // Remove PredBB's unconditional branch. + TII->RemoveBranch(*PredBB); + // Clone the contents of TailBB into PredBB. + for (MachineBasicBlock::iterator I = TailBB->begin(), E = TailBB->end(); + I != E; ++I) { + MachineInstr *NewMI = MF.CloneMachineInstr(I); + PredBB->insert(PredBB->end(), NewMI); + } + NumInstrDups += TailBB->size() - 1; // subtract one for removed branch + + // Update the CFG. + PredBB->removeSuccessor(PredBB->succ_begin()); + assert(PredBB->succ_empty() && + "TailDuplicate called on block with multiple successors!"); + for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(), + E = TailBB->succ_end(); I != E; ++I) + PredBB->addSuccessor(*I); + + Changed = true; + ++NumTailDups; + } + + // If TailBB was duplicated into all its predecessors except for the prior + // block, which falls through unconditionally, move the contents of this + // block into the prior block. + MachineBasicBlock &PrevBB = *prior(MachineFunction::iterator(TailBB)); + MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0; + SmallVector PriorCond; + bool PriorUnAnalyzable = + TII->AnalyzeBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, true); + // This has to check PrevBB->succ_size() because EH edges are ignored by + // AnalyzeBranch. + if (!PriorUnAnalyzable && PriorCond.empty() && !PriorTBB && + TailBB->pred_size() == 1 && PrevBB.succ_size() == 1 && + !TailBB->hasAddressTaken()) { + DEBUG(errs() << "\nMerging into block: " << PrevBB + << "From MBB: " << *TailBB); + PrevBB.splice(PrevBB.end(), TailBB, TailBB->begin(), TailBB->end()); + PrevBB.removeSuccessor(PrevBB.succ_begin());; + assert(PrevBB.succ_empty()); + PrevBB.transferSuccessors(TailBB); + Changed = true; + } + + return Changed; +} + +/// RemoveDeadBlock - Remove the specified dead machine basic block from the +/// function, updating the CFG. +void TailDuplicatePass::RemoveDeadBlock(MachineBasicBlock *MBB) { + assert(MBB->pred_empty() && "MBB must be dead!"); + DEBUG(errs() << "\nRemoving MBB: " << *MBB); + + // Remove all successors. + while (!MBB->succ_empty()) + MBB->removeSuccessor(MBB->succ_end()-1); + + // If there are any labels in the basic block, unregister them from + // MachineModuleInfo. + if (MMI && !MBB->empty()) { + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + if (I->isLabel()) + // The label ID # is always operand #0, an immediate. + MMI->InvalidateLabel(I->getOperand(0).getImm()); + } + } + + // Remove the block. + MBB->eraseFromParent(); +} + diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp index c836286f60ff..10c806677c93 100644 --- a/lib/CodeGen/VirtRegRewriter.cpp +++ b/lib/CodeGen/VirtRegRewriter.cpp @@ -1600,7 +1600,7 @@ private: std::vector &KillOps) { DEBUG(errs() << "\n**** Local spiller rewriting MBB '" - << MBB.getBasicBlock()->getName() << "':\n"); + << MBB.getName() << "':\n"); MachineFunction &MF = *MBB.getParent(); diff --git a/lib/CompilerDriver/CompilationGraph.cpp b/lib/CompilerDriver/CompilationGraph.cpp index bb0eb7bcf197..3e6e050d6862 100644 --- a/lib/CompilerDriver/CompilationGraph.cpp +++ b/lib/CompilerDriver/CompilationGraph.cpp @@ -471,10 +471,10 @@ namespace llvm { struct DOTGraphTraits : public DefaultDOTGraphTraits { + DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} template - static std::string getNodeLabel(const Node* N, const GraphType&, - bool ShortNames) + static std::string getNodeLabel(const Node* N, const GraphType&) { if (N->ToolPtr) if (N->ToolPtr->IsJoin()) diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp index 5f195ee8b10e..bbac762b4592 100644 --- a/lib/ExecutionEngine/JIT/JITEmitter.cpp +++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp @@ -83,15 +83,15 @@ namespace { class JITResolverState { public: typedef ValueMap > - FunctionToStubMapTy; + FunctionToLazyStubMapTy; typedef std::map > CallSiteToFunctionMapTy; typedef ValueMap, CallSiteValueMapConfig> FunctionToCallSitesMapTy; typedef std::map, void*> GlobalToIndirectSymMapTy; private: - /// FunctionToStubMap - Keep track of the stub created for a particular - /// function so that we can reuse them if necessary. - FunctionToStubMapTy FunctionToStubMap; + /// FunctionToLazyStubMap - Keep track of the lazy stub created for a + /// particular function so that we can reuse them if necessary. + FunctionToLazyStubMapTy FunctionToLazyStubMap; /// CallSiteToFunctionMap - Keep track of the function that each lazy call /// site corresponds to, and vice versa. @@ -103,12 +103,13 @@ namespace { GlobalToIndirectSymMapTy GlobalToIndirectSymMap; public: - JITResolverState() : FunctionToStubMap(this), + JITResolverState() : FunctionToLazyStubMap(this), FunctionToCallSitesMap(this) {} - FunctionToStubMapTy& getFunctionToStubMap(const MutexGuard& locked) { + FunctionToLazyStubMapTy& getFunctionToLazyStubMap( + const MutexGuard& locked) { assert(locked.holds(TheJIT->lock)); - return FunctionToStubMap; + return FunctionToLazyStubMap; } GlobalToIndirectSymMapTy& getGlobalToIndirectSymMap(const MutexGuard& locked) { @@ -154,11 +155,11 @@ namespace { Function *const F = C2F_I->second; #ifndef NDEBUG - void *RealStub = FunctionToStubMap.lookup(F); + void *RealStub = FunctionToLazyStubMap.lookup(F); assert(RealStub == Stub && "Call-site that wasn't a stub pass in to EraseStub"); #endif - FunctionToStubMap.erase(F); + FunctionToLazyStubMap.erase(F); CallSiteToFunctionMap.erase(C2F_I); // Remove the stub from the function->call-sites map, and remove the whole @@ -196,7 +197,7 @@ namespace { /// JITResolver - Keep track of, and resolve, call sites for functions that /// have not yet been compiled. class JITResolver { - typedef JITResolverState::FunctionToStubMapTy FunctionToStubMapTy; + typedef JITResolverState::FunctionToLazyStubMapTy FunctionToLazyStubMapTy; typedef JITResolverState::CallSiteToFunctionMapTy CallSiteToFunctionMapTy; typedef JITResolverState::GlobalToIndirectSymMapTy GlobalToIndirectSymMapTy; @@ -206,8 +207,11 @@ namespace { JITResolverState state; - /// ExternalFnToStubMap - This is the equivalent of FunctionToStubMap for - /// external functions. + /// ExternalFnToStubMap - This is the equivalent of FunctionToLazyStubMap + /// for external functions. TODO: Of course, external functions don't need + /// a lazy stub. It's actually here to make it more likely that far calls + /// succeed, but no single stub can guarantee that. I'll remove this in a + /// subsequent checkin when I actually fix far calls. std::map ExternalFnToStubMap; /// revGOTMap - map addresses to indexes in the GOT @@ -230,14 +234,13 @@ namespace { TheJITResolver = 0; } - /// getFunctionStubIfAvailable - This returns a pointer to a function stub - /// if it has already been created. - void *getFunctionStubIfAvailable(Function *F); + /// getLazyFunctionStubIfAvailable - This returns a pointer to a function's + /// lazy-compilation stub if it has already been created. + void *getLazyFunctionStubIfAvailable(Function *F); - /// getFunctionStub - This returns a pointer to a function stub, creating - /// one on demand as needed. If empty is true, create a function stub - /// pointing at address 0, to be filled in later. - void *getFunctionStub(Function *F); + /// getLazyFunctionStub - This returns a pointer to a function's + /// lazy-compilation stub, creating one on demand as needed. + void *getLazyFunctionStub(Function *F); /// getExternalFunctionStub - Return a stub for the function at the /// specified address, created lazily on demand. @@ -268,10 +271,6 @@ namespace { class JITEmitter : public JITCodeEmitter { JITMemoryManager *MemMgr; - // When outputting a function stub in the context of some other function, we - // save BufferBegin/BufferEnd/CurBufferPtr here. - uint8_t *SavedBufferBegin, *SavedBufferEnd, *SavedCurBufferPtr; - // When reattempting to JIT a function after running out of space, we store // the estimated size of the function we're trying to JIT here, so we can // ask the memory manager for at least this much space. When we @@ -397,11 +396,11 @@ namespace { void initJumpTableInfo(MachineJumpTableInfo *MJTI); void emitJumpTableInfo(MachineJumpTableInfo *MJTI); - virtual void startGVStub(const GlobalValue* GV, unsigned StubSize, - unsigned Alignment = 1); - virtual void startGVStub(const GlobalValue* GV, void *Buffer, + virtual void startGVStub(BufferState &BS, const GlobalValue* GV, + unsigned StubSize, unsigned Alignment = 1); + virtual void startGVStub(BufferState &BS, void *Buffer, unsigned StubSize); - virtual void* finishGVStub(const GlobalValue *GV); + virtual void* finishGVStub(BufferState &BS); /// allocateSpace - Reserves space in the current block if any, or /// allocate a new one of the given size. @@ -489,22 +488,22 @@ void CallSiteValueMapConfig::onDelete(JITResolverState *JRS, Function *F) { JRS->EraseAllCallSitesPrelocked(F); } -/// getFunctionStubIfAvailable - This returns a pointer to a function stub +/// getLazyFunctionStubIfAvailable - This returns a pointer to a function stub /// if it has already been created. -void *JITResolver::getFunctionStubIfAvailable(Function *F) { +void *JITResolver::getLazyFunctionStubIfAvailable(Function *F) { MutexGuard locked(TheJIT->lock); // If we already have a stub for this function, recycle it. - return state.getFunctionToStubMap(locked).lookup(F); + return state.getFunctionToLazyStubMap(locked).lookup(F); } /// getFunctionStub - This returns a pointer to a function stub, creating /// one on demand as needed. -void *JITResolver::getFunctionStub(Function *F) { +void *JITResolver::getLazyFunctionStub(Function *F) { MutexGuard locked(TheJIT->lock); - // If we already have a stub for this function, recycle it. - void *&Stub = state.getFunctionToStubMap(locked)[F]; + // If we already have a lazy stub for this function, recycle it. + void *&Stub = state.getFunctionToLazyStubMap(locked)[F]; if (Stub) return Stub; // Call the lazy resolver function if we are JIT'ing lazily. Otherwise we @@ -522,9 +521,13 @@ void *JITResolver::getFunctionStub(Function *F) { if (!Actual) return 0; } + MachineCodeEmitter::BufferState BS; + TargetJITInfo::StubLayout SL = TheJIT->getJITInfo().getStubLayout(); + JE.startGVStub(BS, F, SL.Size, SL.Alignment); // Codegen a new stub, calling the lazy resolver or the actual address of the // external function, if it was resolved. Stub = TheJIT->getJITInfo().emitFunctionStub(F, Actual, JE); + JE.finishGVStub(BS); if (Actual != (void*)(intptr_t)LazyResolverFn) { // If we are getting the stub for an external function, we really want the @@ -533,7 +536,7 @@ void *JITResolver::getFunctionStub(Function *F) { TheJIT->updateGlobalMapping(F, Stub); } - DEBUG(errs() << "JIT: Stub emitted at [" << Stub << "] for function '" + DEBUG(errs() << "JIT: Lazy stub emitted at [" << Stub << "] for function '" << F->getName() << "'\n"); // Finally, keep track of the stub-to-Function mapping so that the @@ -576,7 +579,11 @@ void *JITResolver::getExternalFunctionStub(void *FnAddr) { void *&Stub = ExternalFnToStubMap[FnAddr]; if (Stub) return Stub; + MachineCodeEmitter::BufferState BS; + TargetJITInfo::StubLayout SL = TheJIT->getJITInfo().getStubLayout(); + JE.startGVStub(BS, 0, SL.Size, SL.Alignment); Stub = TheJIT->getJITInfo().emitFunctionStub(0, FnAddr, JE); + JE.finishGVStub(BS); DEBUG(errs() << "JIT: Stub emitted at [" << Stub << "] for external function at '" << FnAddr << "'\n"); @@ -598,10 +605,10 @@ void JITResolver::getRelocatableGVs(SmallVectorImpl &GVs, SmallVectorImpl &Ptrs) { MutexGuard locked(TheJIT->lock); - const FunctionToStubMapTy &FM = state.getFunctionToStubMap(locked); + const FunctionToLazyStubMapTy &FM = state.getFunctionToLazyStubMap(locked); GlobalToIndirectSymMapTy &GM = state.getGlobalToIndirectSymMap(locked); - for (FunctionToStubMapTy::const_iterator i = FM.begin(), e = FM.end(); + for (FunctionToLazyStubMapTy::const_iterator i = FM.begin(), e = FM.end(); i != e; ++i){ Function *F = i->first; if (F->isDeclaration() && F->hasExternalLinkage()) { @@ -727,32 +734,37 @@ void *JITEmitter::getPointerToGlobal(GlobalValue *V, void *Reference, // If we have already compiled the function, return a pointer to its body. Function *F = cast(V); - void *FnStub = Resolver.getFunctionStubIfAvailable(F); + void *FnStub = Resolver.getLazyFunctionStubIfAvailable(F); if (FnStub) { - // Return the function stub if it's already created. We do this first - // so that we're returning the same address for the function as any - // previous call. + // Return the function stub if it's already created. We do this first so + // that we're returning the same address for the function as any previous + // call. TODO: Yes, this is wrong. The lazy stub isn't guaranteed to be + // close enough to call. AddStubToCurrentFunction(FnStub); return FnStub; } - // Otherwise if we have code, go ahead and return that. - void *ResultPtr = TheJIT->getPointerToGlobalIfAvailable(F); - if (ResultPtr) return ResultPtr; + // If we know the target can handle arbitrary-distance calls, try to + // return a direct pointer. + if (!MayNeedFarStub) { + // If we have code, go ahead and return that. + void *ResultPtr = TheJIT->getPointerToGlobalIfAvailable(F); + if (ResultPtr) return ResultPtr; - // If this is an external function pointer, we can force the JIT to - // 'compile' it, which really just adds it to the map. - if (F->isDeclaration() && !F->hasNotBeenReadFromBitcode() && - !MayNeedFarStub) - return TheJIT->getPointerToFunction(F); + // If this is an external function pointer, we can force the JIT to + // 'compile' it, which really just adds it to the map. + if (F->isDeclaration() && !F->hasNotBeenReadFromBitcode()) + return TheJIT->getPointerToFunction(F); + } - // Otherwise, we have to emit a stub. - void *StubAddr = Resolver.getFunctionStub(F); + // Otherwise, we may need a to emit a stub, and, conservatively, we + // always do so. + void *StubAddr = Resolver.getLazyFunctionStub(F); // Add the stub to the current function's list of referenced stubs, so we can // deallocate them if the current function is ever freed. It's possible to - // return null from getFunctionStub in the case of a weak extern that fails - // to resolve. + // return null from getLazyFunctionStub in the case of a weak extern that + // fails to resolve. if (StubAddr) AddStubToCurrentFunction(StubAddr); @@ -1203,9 +1215,8 @@ bool JITEmitter::finishFunction(MachineFunction &F) { if (DwarfExceptionHandling || JITEmitDebugInfo) { uintptr_t ActualSize = 0; - SavedBufferBegin = BufferBegin; - SavedBufferEnd = BufferEnd; - SavedCurBufferPtr = CurBufferPtr; + BufferState BS; + SaveStateTo(BS); if (MemMgr->NeedsExactSize()) { ActualSize = DE->GetDwarfTableSizeInBytes(F, *this, FnStart, FnEnd); @@ -1221,9 +1232,7 @@ bool JITEmitter::finishFunction(MachineFunction &F) { MemMgr->endExceptionTable(F.getFunction(), BufferBegin, CurBufferPtr, FrameRegister); uint8_t *EhEnd = CurBufferPtr; - BufferBegin = SavedBufferBegin; - BufferEnd = SavedBufferEnd; - CurBufferPtr = SavedCurBufferPtr; + RestoreStateFrom(BS); if (DwarfExceptionHandling) { TheJIT->RegisterTable(FrameRegister); @@ -1429,32 +1438,27 @@ void JITEmitter::emitJumpTableInfo(MachineJumpTableInfo *MJTI) { } } -void JITEmitter::startGVStub(const GlobalValue* GV, unsigned StubSize, - unsigned Alignment) { - SavedBufferBegin = BufferBegin; - SavedBufferEnd = BufferEnd; - SavedCurBufferPtr = CurBufferPtr; +void JITEmitter::startGVStub(BufferState &BS, const GlobalValue* GV, + unsigned StubSize, unsigned Alignment) { + SaveStateTo(BS); BufferBegin = CurBufferPtr = MemMgr->allocateStub(GV, StubSize, Alignment); BufferEnd = BufferBegin+StubSize+1; } -void JITEmitter::startGVStub(const GlobalValue* GV, void *Buffer, - unsigned StubSize) { - SavedBufferBegin = BufferBegin; - SavedBufferEnd = BufferEnd; - SavedCurBufferPtr = CurBufferPtr; +void JITEmitter::startGVStub(BufferState &BS, void *Buffer, unsigned StubSize) { + SaveStateTo(BS); BufferBegin = CurBufferPtr = (uint8_t *)Buffer; BufferEnd = BufferBegin+StubSize+1; } -void *JITEmitter::finishGVStub(const GlobalValue* GV) { +void *JITEmitter::finishGVStub(BufferState &BS) { + assert(CurBufferPtr != BufferEnd && "Stub overflowed allocated space."); NumBytes += getCurrentPCOffset(); - std::swap(SavedBufferBegin, BufferBegin); - BufferEnd = SavedBufferEnd; - CurBufferPtr = SavedCurBufferPtr; - return SavedBufferBegin; + void *Result = BufferBegin; + RestoreStateFrom(BS); + return Result; } // getConstantPoolEntryAddress - Return the address of the 'ConstantNum' entry @@ -1530,19 +1534,23 @@ void *JIT::getPointerToFunctionOrStub(Function *F) { // Get a stub if the target supports it. assert(isa(JCE) && "Unexpected MCE?"); JITEmitter *JE = cast(getCodeEmitter()); - return JE->getJITResolver().getFunctionStub(F); + return JE->getJITResolver().getLazyFunctionStub(F); } void JIT::updateFunctionStub(Function *F) { // Get the empty stub we generated earlier. assert(isa(JCE) && "Unexpected MCE?"); JITEmitter *JE = cast(getCodeEmitter()); - void *Stub = JE->getJITResolver().getFunctionStub(F); + void *Stub = JE->getJITResolver().getLazyFunctionStub(F); + void *Addr = getPointerToGlobalIfAvailable(F); // Tell the target jit info to rewrite the stub at the specified address, // rather than creating a new one. - void *Addr = getPointerToGlobalIfAvailable(F); - getJITInfo().emitFunctionStubAtAddr(F, Addr, Stub, *getCodeEmitter()); + MachineCodeEmitter::BufferState BS; + TargetJITInfo::StubLayout layout = getJITInfo().getStubLayout(); + JE->startGVStub(BS, Stub, layout.Size); + getJITInfo().emitFunctionStub(F, Addr, *getCodeEmitter()); + JE->finishGVStub(BS); } /// freeMachineCodeForFunction - release machine code memory for given Function. diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp index 59340d4d5bc5..9cf9c894d410 100644 --- a/lib/Support/CommandLine.cpp +++ b/lib/Support/CommandLine.cpp @@ -39,6 +39,7 @@ using namespace cl; //===----------------------------------------------------------------------===// // Template instantiations and anchors. // +namespace llvm { namespace cl { TEMPLATE_INSTANTIATION(class basic_parser); TEMPLATE_INSTANTIATION(class basic_parser); TEMPLATE_INSTANTIATION(class basic_parser); @@ -53,6 +54,7 @@ TEMPLATE_INSTANTIATION(class opt); TEMPLATE_INSTANTIATION(class opt); TEMPLATE_INSTANTIATION(class opt); TEMPLATE_INSTANTIATION(class opt); +} } // end namespace llvm::cl void Option::anchor() {} void basic_parser_impl::anchor() {} @@ -156,9 +158,9 @@ static Option *LookupOption(StringRef &Arg, StringRef &Value, const StringMap &OptionsMap) { // Reject all dashes. if (Arg.empty()) return 0; - + size_t EqualPos = Arg.find('='); - + // If we have an equals sign, remember the value. if (EqualPos == StringRef::npos) { // Look up the option. @@ -171,13 +173,43 @@ static Option *LookupOption(StringRef &Arg, StringRef &Value, StringMap::const_iterator I = OptionsMap.find(Arg.substr(0, EqualPos)); if (I == OptionsMap.end()) return 0; - + Value = Arg.substr(EqualPos+1); Arg = Arg.substr(0, EqualPos); return I->second; } +/// CommaSeparateAndAddOccurence - A wrapper around Handler->addOccurence() that +/// does special handling of cl::CommaSeparated options. +static bool CommaSeparateAndAddOccurence(Option *Handler, unsigned pos, + StringRef ArgName, + StringRef Value, bool MultiArg = false) +{ + // Check to see if this option accepts a comma separated list of values. If + // it does, we have to split up the value into multiple values. + if (Handler->getMiscFlags() & CommaSeparated) { + StringRef Val(Value); + StringRef::size_type Pos = Val.find(','); + while (Pos != StringRef::npos) { + // Process the portion before the comma. + if (Handler->addOccurrence(pos, ArgName, Val.substr(0, Pos), MultiArg)) + return true; + // Erase the portion before the comma, AND the comma. + Val = Val.substr(Pos+1); + Value.substr(Pos+1); // Increment the original value pointer as well. + // Check for another comma. + Pos = Val.find(','); + } + + Value = Val; + } + + if (Handler->addOccurrence(pos, ArgName, Value, MultiArg)) + return true; + + return false; +} /// ProvideOption - For Value, this differentiates between an empty value ("") /// and a null value (StringRef()). The later is accepted for arguments that @@ -209,7 +241,7 @@ static inline bool ProvideOption(Option *Handler, StringRef ArgName, break; case ValueOptional: break; - + default: errs() << ProgramName << ": Bad ValueMask flag! CommandLine usage error:" @@ -219,13 +251,13 @@ static inline bool ProvideOption(Option *Handler, StringRef ArgName, // If this isn't a multi-arg option, just run the handler. if (NumAdditionalVals == 0) - return Handler->addOccurrence(i, ArgName, Value); + return CommaSeparateAndAddOccurence(Handler, i, ArgName, Value); // If it is, run the handle several times. bool MultiArg = false; if (Value.data()) { - if (Handler->addOccurrence(i, ArgName, Value, MultiArg)) + if (CommaSeparateAndAddOccurence(Handler, i, ArgName, Value, MultiArg)) return true; --NumAdditionalVals; MultiArg = true; @@ -235,8 +267,8 @@ static inline bool ProvideOption(Option *Handler, StringRef ArgName, if (i+1 >= argc) return Handler->error("not enough values!"); Value = argv[++i]; - - if (Handler->addOccurrence(i, ArgName, Value, MultiArg)) + + if (CommaSeparateAndAddOccurence(Handler, i, ArgName, Value, MultiArg)) return true; MultiArg = true; --NumAdditionalVals; @@ -298,7 +330,7 @@ static Option *HandlePrefixedOrGroupedOption(StringRef &Arg, StringRef &Value, size_t Length = 0; Option *PGOpt = getOptionPred(Arg, Length, isPrefixedOrGrouping, OptionsMap); if (PGOpt == 0) return 0; - + // If the option is a prefixed option, then the value is simply the // rest of the name... so fall through to later processing, by // setting up the argument name flags and value fields. @@ -308,16 +340,16 @@ static Option *HandlePrefixedOrGroupedOption(StringRef &Arg, StringRef &Value, assert(OptionsMap.count(Arg) && OptionsMap.find(Arg)->second == PGOpt); return PGOpt; } - + // This must be a grouped option... handle them now. Grouping options can't // have values. assert(isGrouping(PGOpt) && "Broken getOptionPred!"); - + do { // Move current arg name out of Arg into OneArgName. StringRef OneArgName = Arg.substr(0, Length); Arg = Arg.substr(Length); - + // Because ValueRequired is an invalid flag for grouped arguments, // we don't need to pass argc/argv in. assert(PGOpt->getValueExpectedFlag() != cl::ValueRequired && @@ -325,11 +357,11 @@ static Option *HandlePrefixedOrGroupedOption(StringRef &Arg, StringRef &Value, int Dummy; ErrorParsing |= ProvideOption(PGOpt, OneArgName, StringRef(), 0, 0, Dummy); - + // Get the next grouping option. PGOpt = getOptionPred(Arg, Length, isGrouping, OptionsMap); } while (PGOpt && Length != Arg.size()); - + // Return the last option with Arg cut down to just the last one. return PGOpt; } @@ -366,17 +398,17 @@ static void ParseCStringVector(std::vector &OutputVector, WorkStr = WorkStr.substr(Pos); continue; } - + // Find position of first delimiter. size_t Pos = WorkStr.find_first_of(Delims); if (Pos == StringRef::npos) Pos = WorkStr.size(); - + // Everything from 0 to Pos is the next word to copy. char *NewStr = (char*)malloc(Pos+1); memcpy(NewStr, WorkStr.data(), Pos); NewStr[Pos] = 0; OutputVector.push_back(NewStr); - + WorkStr = WorkStr.substr(Pos); } } @@ -563,7 +595,7 @@ void cl::ParseCommandLineOptions(int argc, char **argv, ProvidePositionalOption(ActivePositionalArg, argv[i], i); continue; // We are done! } - + if (!PositionalOpts.empty()) { PositionalVals.push_back(std::make_pair(argv[i],i)); @@ -593,7 +625,7 @@ void cl::ParseCommandLineOptions(int argc, char **argv, // Eat leading dashes. while (!ArgName.empty() && ArgName[0] == '-') ArgName = ArgName.substr(1); - + Handler = LookupOption(ArgName, Value, Opts); if (!Handler || Handler->getFormattingFlag() != cl::Positional) { ProvidePositionalOption(ActivePositionalArg, argv[i], i); @@ -605,7 +637,7 @@ void cl::ParseCommandLineOptions(int argc, char **argv, // Eat leading dashes. while (!ArgName.empty() && ArgName[0] == '-') ArgName = ArgName.substr(1); - + Handler = LookupOption(ArgName, Value, Opts); // Check to see if this "option" is really a prefixed or grouped argument. @@ -627,26 +659,6 @@ void cl::ParseCommandLineOptions(int argc, char **argv, continue; } - // Check to see if this option accepts a comma separated list of values. If - // it does, we have to split up the value into multiple values. - if (Handler->getMiscFlags() & CommaSeparated) { - StringRef Val(Value); - StringRef::size_type Pos = Val.find(','); - - while (Pos != StringRef::npos) { - // Process the portion before the comma. - ErrorParsing |= ProvideOption(Handler, ArgName, Val.substr(0, Pos), - argc, argv, i); - // Erase the portion before the comma, AND the comma. - Val = Val.substr(Pos+1); - Value.substr(Pos+1); // Increment the original value pointer as well. - - // Check for another comma. - Pos = Val.find(','); - } - Value = Val; - } - // If this is a named positional argument, just remember that it is the // active one... if (Handler->getFormattingFlag() == cl::Positional) @@ -881,7 +893,7 @@ bool parser::parse(Option &O, StringRef ArgName, Value = true; return false; } - + if (Arg == "false" || Arg == "FALSE" || Arg == "False" || Arg == "0") { Value = false; return false; @@ -903,7 +915,7 @@ bool parser::parse(Option &O, StringRef ArgName, Value = BOU_FALSE; return false; } - + return O.error("'" + Arg + "' is invalid value for boolean argument! Try 0 or 1"); } @@ -1020,7 +1032,7 @@ void generic_parser_base::printOptionInfo(const Option &O, static int OptNameCompare(const void *LHS, const void *RHS) { typedef std::pair pair_ty; - + return strcmp(((pair_ty*)LHS)->first, ((pair_ty*)RHS)->first); } @@ -1054,11 +1066,11 @@ public: // Ignore really-hidden options. if (I->second->getOptionHiddenFlag() == ReallyHidden) continue; - + // Unless showhidden is set, ignore hidden flags. if (I->second->getOptionHiddenFlag() == Hidden && !ShowHidden) continue; - + // If we've already seen this option, don't add it to the list again. if (!OptionSet.insert(I->second)) continue; @@ -1066,7 +1078,7 @@ public: Opts.push_back(std::pair(I->getKey().data(), I->second)); } - + // Sort the options list alphabetically. qsort(Opts.data(), Opts.size(), sizeof(Opts[0]), OptNameCompare); @@ -1164,7 +1176,7 @@ public: std::vector > Targets; size_t Width = 0; - for (TargetRegistry::iterator it = TargetRegistry::begin(), + for (TargetRegistry::iterator it = TargetRegistry::begin(), ie = TargetRegistry::end(); it != ie; ++it) { Targets.push_back(std::make_pair(it->getName(), &*it)); Width = std::max(Width, strlen(Targets.back().first)); @@ -1183,7 +1195,7 @@ public: } void operator=(bool OptionWasSpecified) { if (!OptionWasSpecified) return; - + if (OverrideVersionPrinter == 0) { print(); exit(1); diff --git a/lib/Support/SourceMgr.cpp b/lib/Support/SourceMgr.cpp index 4b93f7f99a24..7dd42f4df8f7 100644 --- a/lib/Support/SourceMgr.cpp +++ b/lib/Support/SourceMgr.cpp @@ -136,7 +136,7 @@ void SourceMgr::PrintIncludeStack(SMLoc IncludeLoc, raw_ostream &OS) const { /// @param Type - If non-null, the kind of message (e.g., "error") which is /// prefixed to the message. SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, const std::string &Msg, - const char *Type) const { + const char *Type, bool ShowLine) const { // First thing to do: find the current buffer containing the specified // location. @@ -144,18 +144,22 @@ SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, const std::string &Msg, assert(CurBuf != -1 && "Invalid or unspecified location!"); MemoryBuffer *CurMB = getBufferInfo(CurBuf).Buffer; - - + // Scan backward to find the start of the line. const char *LineStart = Loc.getPointer(); - while (LineStart != CurMB->getBufferStart() && + while (LineStart != CurMB->getBufferStart() && LineStart[-1] != '\n' && LineStart[-1] != '\r') --LineStart; - // Get the end of the line. - const char *LineEnd = Loc.getPointer(); - while (LineEnd != CurMB->getBufferEnd() && - LineEnd[0] != '\n' && LineEnd[0] != '\r') - ++LineEnd; + + std::string LineStr; + if (ShowLine) { + // Get the end of the line. + const char *LineEnd = Loc.getPointer(); + while (LineEnd != CurMB->getBufferEnd() && + LineEnd[0] != '\n' && LineEnd[0] != '\r') + ++LineEnd; + LineStr = std::string(LineStart, LineEnd); + } std::string PrintedMsg; if (Type) { @@ -163,22 +167,21 @@ SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, const std::string &Msg, PrintedMsg += ": "; } PrintedMsg += Msg; - - // Print out the line. + return SMDiagnostic(CurMB->getBufferIdentifier(), FindLineNumber(Loc, CurBuf), Loc.getPointer()-LineStart, PrintedMsg, - std::string(LineStart, LineEnd)); + LineStr, ShowLine); } void SourceMgr::PrintMessage(SMLoc Loc, const std::string &Msg, - const char *Type) const { + const char *Type, bool ShowLine) const { raw_ostream &OS = errs(); int CurBuf = FindBufferContainingLoc(Loc); assert(CurBuf != -1 && "Invalid or unspecified location!"); PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc, OS); - GetMessage(Loc, Msg, Type).Print(0, OS); + GetMessage(Loc, Msg, Type, ShowLine).Print(0, OS); } //===----------------------------------------------------------------------===// @@ -201,8 +204,8 @@ void SMDiagnostic::Print(const char *ProgName, raw_ostream &S) { } S << ": " << Message << '\n'; - - if (LineNo != -1 && ColumnNo != -1) { + + if (LineNo != -1 && ColumnNo != -1 && ShowLine) { S << LineContents << '\n'; // Print out spaces/tabs before the caret. diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp index 51e11004f322..2d023e4895d0 100644 --- a/lib/Support/StringRef.cpp +++ b/lib/Support/StringRef.cpp @@ -23,7 +23,7 @@ static char ascii_tolower(char x) { /// compare_lower - Compare strings, ignoring case. int StringRef::compare_lower(StringRef RHS) const { - for (size_t I = 0, E = std::min(Length, RHS.Length); I != E; ++I) { + for (size_t I = 0, E = min(Length, RHS.Length); I != E; ++I) { char LHC = ascii_tolower(Data[I]); char RHC = ascii_tolower(RHS.Data[I]); if (LHC != RHC) @@ -48,7 +48,7 @@ size_t StringRef::find(StringRef Str, size_t From) const { size_t N = Str.size(); if (N > Length) return npos; - for (size_t e = Length - N + 1, i = std::min(From, e); i != e; ++i) + for (size_t e = Length - N + 1, i = min(From, e); i != e; ++i) if (substr(i, N).equals(Str)) return i; return npos; @@ -76,7 +76,7 @@ size_t StringRef::rfind(StringRef Str) const { /// Note: O(size() * Chars.size()) StringRef::size_type StringRef::find_first_of(StringRef Chars, size_t From) const { - for (size_type i = std::min(From, Length), e = Length; i != e; ++i) + for (size_type i = min(From, Length), e = Length; i != e; ++i) if (Chars.find(Data[i]) != npos) return i; return npos; @@ -85,7 +85,7 @@ StringRef::size_type StringRef::find_first_of(StringRef Chars, /// find_first_not_of - Find the first character in the string that is not /// \arg C or npos if not found. StringRef::size_type StringRef::find_first_not_of(char C, size_t From) const { - for (size_type i = std::min(From, Length), e = Length; i != e; ++i) + for (size_type i = min(From, Length), e = Length; i != e; ++i) if (Data[i] != C) return i; return npos; @@ -97,7 +97,7 @@ StringRef::size_type StringRef::find_first_not_of(char C, size_t From) const { /// Note: O(size() * Chars.size()) StringRef::size_type StringRef::find_first_not_of(StringRef Chars, size_t From) const { - for (size_type i = std::min(From, Length), e = Length; i != e; ++i) + for (size_type i = min(From, Length), e = Length; i != e; ++i) if (Chars.find(Data[i]) == npos) return i; return npos; diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index 840fb98fe9f9..2fec094d79f0 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -90,6 +90,7 @@ const char *Triple::getOSTypeName(OSType Kind) { case DragonFly: return "dragonfly"; case FreeBSD: return "freebsd"; case Linux: return "linux"; + case Lv2: return "lv2"; case MinGW32: return "mingw32"; case MinGW64: return "mingw64"; case NetBSD: return "netbsd"; @@ -227,7 +228,7 @@ void Triple::Parse() const { Arch = pic16; else if (ArchName == "powerpc") Arch = ppc; - else if (ArchName == "powerpc64") + else if ((ArchName == "powerpc64") || (ArchName == "ppu")) Arch = ppc64; else if (ArchName == "arm" || ArchName.startswith("armv") || @@ -293,6 +294,8 @@ void Triple::Parse() const { OS = FreeBSD; else if (OSName.startswith("linux")) OS = Linux; + else if (OSName.startswith("lv2")) + OS = Lv2; else if (OSName.startswith("mingw32")) OS = MinGW32; else if (OSName.startswith("mingw64")) diff --git a/lib/System/CMakeLists.txt b/lib/System/CMakeLists.txt index 2945e33d5b1c..a56a1f78bb72 100644 --- a/lib/System/CMakeLists.txt +++ b/lib/System/CMakeLists.txt @@ -42,5 +42,5 @@ add_llvm_library(LLVMSystem ) if( BUILD_SHARED_LIBS AND NOT WIN32 ) - target_link_libraries(LLVMSystem dl) + target_link_libraries(LLVMSystem ${CMAKE_DL_LIBS}) endif() diff --git a/lib/System/Host.cpp b/lib/System/Host.cpp index 37591a57b070..e112698349ee 100644 --- a/lib/System/Host.cpp +++ b/lib/System/Host.cpp @@ -22,6 +22,9 @@ #ifdef LLVM_ON_WIN32 #include "Win32/Host.inc" #endif +#ifdef _MSC_VER +#include +#endif //===----------------------------------------------------------------------===// // diff --git a/lib/System/Unix/Path.inc b/lib/System/Unix/Path.inc index 89285b48132f..ff1497a5c6df 100644 --- a/lib/System/Unix/Path.inc +++ b/lib/System/Unix/Path.inc @@ -335,7 +335,7 @@ getprogpath(char ret[PATH_MAX], const char *bin) free(pv); return (NULL); } -#endif +#endif // __FreeBSD__ /// GetMainExecutable - Return the path to the main executable, given the /// value of argv[0] from program startup. @@ -348,7 +348,8 @@ Path Path::GetMainExecutable(const char *argv0, void *MainAddr) { uint32_t size = sizeof(exe_path); if (_NSGetExecutablePath(exe_path, &size) == 0) { char link_path[MAXPATHLEN]; - return Path(std::string(realpath(exe_path, link_path))); + if (realpath(exe_path, link_path)) + return Path(std::string(link_path)); } #elif defined(__FreeBSD__) char exe_path[PATH_MAX]; @@ -370,7 +371,8 @@ Path Path::GetMainExecutable(const char *argv0, void *MainAddr) { // If the filename is a symlink, we need to resolve and return the location of // the actual executable. char link_path[MAXPATHLEN]; - return Path(std::string(realpath(DLInfo.dli_fname, link_path))); + if (realpath(DLInfo.dli_fname, link_path)) + return Path(std::string(link_path)); #endif return Path(); } @@ -453,6 +455,20 @@ Path::canWrite() const { return 0 == access(path.c_str(), W_OK); } +bool +Path::isRegularFile() const { + // Get the status so we can determine if its a file or directory + struct stat buf; + + if (0 != stat(path.c_str(), &buf)) + return false; + + if (S_ISREG(buf.st_mode)) + return true; + + return false; +} + bool Path::canExecute() const { if (0 != access(path.c_str(), R_OK | X_OK )) @@ -723,7 +739,7 @@ Path::createTemporaryFileOnDisk(bool reuse_current, std::string* ErrMsg) { bool Path::eraseFromDisk(bool remove_contents, std::string *ErrStr) const { - // Get the status so we can determin if its a file or directory + // Get the status so we can determine if its a file or directory struct stat buf; if (0 != stat(path.c_str(), &buf)) { MakeErrMsg(ErrStr, path + ": can't get status of file"); diff --git a/lib/System/Win32/Path.inc b/lib/System/Win32/Path.inc index 573369e97d49..634fbc7650b3 100644 --- a/lib/System/Win32/Path.inc +++ b/lib/System/Win32/Path.inc @@ -357,6 +357,13 @@ Path::canExecute() const { return attr != INVALID_FILE_ATTRIBUTES; } +bool +Path::isRegularFile() const { + if (isDirectory()) + return false; + return true; +} + std::string Path::getLast() const { // Find the last slash diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index b50b6098dd24..c95d4c8f3f41 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -402,6 +402,21 @@ bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI, return Found; } +/// isPredicable - Return true if the specified instruction can be predicated. +/// By default, this returns true for every instruction with a +/// PredicateOperand. +bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const { + const TargetInstrDesc &TID = MI->getDesc(); + if (!TID.isPredicable()) + return false; + + if ((TID.TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) { + ARMFunctionInfo *AFI = + MI->getParent()->getParent()->getInfo(); + return AFI->isThumb2Function(); + } + return true; +} /// FIXME: Works around a gcc miscompilation with -fstrict-aliasing static unsigned getNumJTEntries(const std::vector &JT, @@ -647,11 +662,13 @@ ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB, SrcRC == ARM::DPR_VFP2RegisterClass || SrcRC == ARM::DPR_8RegisterClass) { // Always use neon reg-reg move if source or dest is NEON-only regclass. - BuildMI(MBB, I, DL, get(ARM::VMOVDneon), DestReg).addReg(SrcReg); + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVDneon), + DestReg).addReg(SrcReg)); } else if (DestRC == ARM::QPRRegisterClass || DestRC == ARM::QPR_VFP2RegisterClass || DestRC == ARM::QPR_8RegisterClass) { - BuildMI(MBB, I, DL, get(ARM::VMOVQ), DestReg).addReg(SrcReg); + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVQ), + DestReg).addReg(SrcReg)); } else { return false; } @@ -695,13 +712,14 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, // FIXME: Neon instructions should support predicates if (Align >= 16 && (getRegisterInfo().needsStackRealignment(MF))) { - BuildMI(MBB, I, DL, get(ARM::VST1q64)) - .addFrameIndex(FI).addImm(0).addImm(0).addImm(128).addMemOperand(MMO) - .addReg(SrcReg, getKillRegState(isKill)); + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64)) + .addFrameIndex(FI).addImm(0).addImm(0).addImm(128) + .addMemOperand(MMO) + .addReg(SrcReg, getKillRegState(isKill))); } else { - BuildMI(MBB, I, DL, get(ARM::VSTRQ)). - addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI).addImm(0).addMemOperand(MMO); + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRQ)). + addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); } } } @@ -740,11 +758,12 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, // FIXME: Neon instructions should support predicates if (Align >= 16 && (getRegisterInfo().needsStackRealignment(MF))) { - BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg) - .addFrameIndex(FI).addImm(0).addImm(0).addImm(128).addMemOperand(MMO); + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg) + .addFrameIndex(FI).addImm(0).addImm(0).addImm(128) + .addMemOperand(MMO)); } else { - BuildMI(MBB, I, DL, get(ARM::VLDRQ), DestReg).addFrameIndex(FI).addImm(0). - addMemOperand(MMO); + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRQ), DestReg) + .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); } } } @@ -978,7 +997,10 @@ bool ARMBaseInstrInfo::isIdentical(const MachineInstr *MI0, const MachineInstr *MI1, const MachineRegisterInfo *MRI) const { int Opcode = MI0->getOpcode(); - if (Opcode == ARM::t2LDRpci_pic || Opcode == ARM::tLDRpci_pic) { + if (Opcode == ARM::t2LDRpci || + Opcode == ARM::t2LDRpci_pic || + Opcode == ARM::tLDRpci || + Opcode == ARM::tLDRpci_pic) { if (MI1->getOpcode() != Opcode) return false; if (MI0->getNumOperands() != MI1->getNumOperands()) @@ -1005,16 +1027,6 @@ bool ARMBaseInstrInfo::isIdentical(const MachineInstr *MI0, return TargetInstrInfoImpl::isIdentical(MI0, MI1, MRI); } -unsigned ARMBaseInstrInfo::TailDuplicationLimit(const MachineBasicBlock &MBB, - unsigned DefaultLimit) const { - // If the target processor can predict indirect branches, it is highly - // desirable to duplicate them, since it can often make them predictable. - if (!MBB.empty() && isIndirectBranchOpcode(MBB.back().getOpcode()) && - getSubtarget().hasBranchTargetBuffer()) - return DefaultLimit + 2; - return DefaultLimit; -} - /// getInstrPredicate - If instruction is predicated, returns its predicate /// condition, otherwise returns AL. It also returns the condition code /// register by reference. diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 73e854faf2fc..282e30c6f9f9 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -162,6 +162,22 @@ namespace ARMII { I_BitShift = 25, CondShift = 28 }; + + /// Target Operand Flag enum. + enum TOF { + //===------------------------------------------------------------------===// + // ARM Specific MachineOperand flags. + + MO_NO_FLAG, + + /// MO_LO16 - On a symbol operand, this represents a relocation containing + /// lower 16 bit of the address. Used only via movw instruction. + MO_LO16, + + /// MO_HI16 - On a symbol operand, this represents a relocation containing + /// higher 16 bit of the address. Used only via movt instruction. + MO_HI16 + }; } class ARMBaseInstrInfo : public TargetInstrInfoImpl { @@ -220,6 +236,8 @@ public: virtual bool DefinesPredicate(MachineInstr *MI, std::vector &Pred) const; + virtual bool isPredicable(MachineInstr *MI) const; + /// GetInstSize - Returns the size of the specified MachineInstr. /// virtual unsigned GetInstSizeInBytes(const MachineInstr* MI) const; @@ -272,9 +290,6 @@ public: virtual bool isIdentical(const MachineInstr *MI, const MachineInstr *Other, const MachineRegisterInfo *MRI) const; - - virtual unsigned TailDuplicationLimit(const MachineBasicBlock &MBB, - unsigned DefaultLimit) const; }; static inline diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 19762ee5cfb0..653328d8d022 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -799,6 +799,54 @@ ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const { return ARM::SP; } +int +ARMBaseRegisterInfo::getFrameIndexReference(MachineFunction &MF, int FI, + unsigned &FrameReg) const { + const MachineFrameInfo *MFI = MF.getFrameInfo(); + ARMFunctionInfo *AFI = MF.getInfo(); + int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize(); + bool isFixed = MFI->isFixedObjectIndex(FI); + + FrameReg = ARM::SP; + if (AFI->isGPRCalleeSavedArea1Frame(FI)) + Offset -= AFI->getGPRCalleeSavedArea1Offset(); + else if (AFI->isGPRCalleeSavedArea2Frame(FI)) + Offset -= AFI->getGPRCalleeSavedArea2Offset(); + else if (AFI->isDPRCalleeSavedAreaFrame(FI)) + Offset -= AFI->getDPRCalleeSavedAreaOffset(); + else if (needsStackRealignment(MF)) { + // When dynamically realigning the stack, use the frame pointer for + // parameters, and the stack pointer for locals. + assert (hasFP(MF) && "dynamic stack realignment without a FP!"); + if (isFixed) { + FrameReg = getFrameRegister(MF); + Offset -= AFI->getFramePtrSpillOffset(); + } + } else if (hasFP(MF) && AFI->hasStackFrame()) { + if (isFixed || MFI->hasVarSizedObjects()) { + // Use frame pointer to reference fixed objects unless this is a + // frameless function. + FrameReg = getFrameRegister(MF); + Offset -= AFI->getFramePtrSpillOffset(); + } else if (AFI->isThumb2Function()) { + // In Thumb2 mode, the negative offset is very limited. + int FPOffset = Offset - AFI->getFramePtrSpillOffset(); + if (FPOffset >= -255 && FPOffset < 0) { + FrameReg = getFrameRegister(MF); + Offset = FPOffset; + } + } + } + return Offset; +} + + +int +ARMBaseRegisterInfo::getFrameIndexOffset(MachineFunction &MF, int FI) const { + unsigned FrameReg; + return getFrameIndexReference(MF, FI, FrameReg); +} + unsigned ARMBaseRegisterInfo::getEHExceptionRegister() const { llvm_unreachable("What is the exception register"); return 0; @@ -1115,45 +1163,13 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); } - unsigned FrameReg = ARM::SP; int FrameIndex = MI.getOperand(i).getIndex(); int Offset = MFI->getObjectOffset(FrameIndex) + MFI->getStackSize() + SPAdj; - bool isFixed = MFI->isFixedObjectIndex(FrameIndex); + unsigned FrameReg; - // When doing dynamic stack realignment, all of these need to change(?) - if (AFI->isGPRCalleeSavedArea1Frame(FrameIndex)) - Offset -= AFI->getGPRCalleeSavedArea1Offset(); - else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex)) - Offset -= AFI->getGPRCalleeSavedArea2Offset(); - else if (AFI->isDPRCalleeSavedAreaFrame(FrameIndex)) - Offset -= AFI->getDPRCalleeSavedAreaOffset(); - else if (needsStackRealignment(MF)) { - // When dynamically realigning the stack, use the frame pointer for - // parameters, and the stack pointer for locals. - assert (hasFP(MF) && "dynamic stack realignment without a FP!"); - if (isFixed) { - FrameReg = getFrameRegister(MF); - Offset -= AFI->getFramePtrSpillOffset(); - // When referencing from the frame pointer, stack pointer adjustments - // don't matter. - SPAdj = 0; - } - } else if (hasFP(MF) && AFI->hasStackFrame()) { - assert(SPAdj == 0 && "Unexpected stack offset!"); - if (isFixed || MFI->hasVarSizedObjects()) { - // Use frame pointer to reference fixed objects unless this is a - // frameless function. - FrameReg = getFrameRegister(MF); - Offset -= AFI->getFramePtrSpillOffset(); - } else if (AFI->isThumb2Function()) { - // In Thumb2 mode, the negative offset is very limited. - int FPOffset = Offset - AFI->getFramePtrSpillOffset(); - if (FPOffset >= -255 && FPOffset < 0) { - FrameReg = getFrameRegister(MF); - Offset = FPOffset; - } - } - } + Offset = getFrameIndexReference(MF, FrameIndex, FrameReg); + if (FrameReg != ARM::SP) + SPAdj = 0; // Modify MI as necessary to handle as much of 'Offset' as possible bool Done = false; diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index 4b267b092c31..2788d0733d1a 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -106,6 +106,9 @@ public: // Debug information queries. unsigned getRARegister() const; unsigned getFrameRegister(const MachineFunction &MF) const; + int getFrameIndexReference(MachineFunction &MF, int FI, + unsigned &FrameReg) const; + int getFrameIndexOffset(MachineFunction &MF, int FI) const; // Exception handling queries. unsigned getEHExceptionRegister() const; diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index 766acff797e2..17e7d4479db5 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -613,7 +613,6 @@ void Emitter::emitPseudoInstruction(const MachineInstr &MI) { break; case TargetInstrInfo::IMPLICIT_DEF: case TargetInstrInfo::KILL: - case ARM::DWARF_LOC: // Do nothing. break; case ARM::CONSTPOOL_ENTRY: diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index d22c43afc5f2..e59a315a483c 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -162,6 +162,9 @@ namespace { /// the branch fix up pass. bool HasFarJump; + /// HasInlineAsm - True if the function contains inline assembly. + bool HasInlineAsm; + const TargetInstrInfo *TII; const ARMSubtarget *STI; ARMFunctionInfo *AFI; @@ -236,10 +239,19 @@ void ARMConstantIslands::verify(MachineFunction &MF) { if (!MBB->empty() && MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) { unsigned MBBId = MBB->getNumber(); - assert((BBOffsets[MBBId]%4 == 0 && BBSizes[MBBId]%4 == 0) || + assert(HasInlineAsm || + (BBOffsets[MBBId]%4 == 0 && BBSizes[MBBId]%4 == 0) || (BBOffsets[MBBId]%4 != 0 && BBSizes[MBBId]%4 != 0)); } } + for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) { + CPUser &U = CPUsers[i]; + unsigned UserOffset = GetOffsetOf(U.MI) + (isThumb ? 4 : 8); + unsigned CPEOffset = GetOffsetOf(U.CPEMI); + unsigned Disp = UserOffset < CPEOffset ? CPEOffset - UserOffset : + UserOffset - CPEOffset; + assert(Disp <= U.MaxDisp || "Constant pool entry out of range!"); + } #endif } @@ -269,6 +281,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { isThumb2 = AFI->isThumb2Function(); HasFarJump = false; + HasInlineAsm = false; // Renumber all of the machine basic blocks in the function, guaranteeing that // the numbers agree with the position of the block in the function. @@ -452,6 +465,19 @@ void ARMConstantIslands::JumpTableFunctionScan(MachineFunction &MF) { /// and finding all of the constant pool users. void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF, const std::vector &CPEMIs) { + // First thing, see if the function has any inline assembly in it. If so, + // we have to be conservative about alignment assumptions, as we don't + // know for sure the size of any instructions in the inline assembly. + for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); + MBBI != E; ++MBBI) { + MachineBasicBlock &MBB = *MBBI; + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); + I != E; ++I) + if (I->getOpcode() == ARM::INLINEASM) + HasInlineAsm = true; + } + + // Now go back through the instructions and build up our data structures unsigned Offset = 0; for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); MBBI != E; ++MBBI) { @@ -481,7 +507,7 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF, // A Thumb1 table jump may involve padding; for the offsets to // be right, functions containing these must be 4-byte aligned. AFI->setAlign(2U); - if ((Offset+MBBSize)%4 != 0) + if ((Offset+MBBSize)%4 != 0 || HasInlineAsm) // FIXME: Add a pseudo ALIGN instruction instead. MBBSize += 2; // padding continue; // Does not get an entry in ImmBranches @@ -550,7 +576,7 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF, case ARM::LEApcrel: // This takes a SoImm, which is 8 bit immediate rotated. We'll // pretend the maximum offset is 255 * 4. Since each instruction - // 4 byte wide, this is always correct. We'llc heck for other + // 4 byte wide, this is always correct. We'll check for other // displacements that fits in a SoImm as well. Bits = 8; Scale = 4; @@ -609,7 +635,7 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF, if (isThumb && !MBB.empty() && MBB.begin()->getOpcode() == ARM::CONSTPOOL_ENTRY && - (Offset%4) != 0) + ((Offset%4) != 0 || HasInlineAsm)) MBBSize += 2; BBSizes.push_back(MBBSize); @@ -633,7 +659,7 @@ unsigned ARMConstantIslands::GetOffsetOf(MachineInstr *MI) const { // alignment padding, and compensate if so. if (isThumb && MI->getOpcode() == ARM::CONSTPOOL_ENTRY && - Offset%4 != 0) + (Offset%4 != 0 || HasInlineAsm)) Offset += 2; // Sum instructions before MI in MBB. @@ -829,7 +855,7 @@ bool ARMConstantIslands::CPEIsInRange(MachineInstr *MI, unsigned UserOffset, MachineInstr *CPEMI, unsigned MaxDisp, bool NegOk, bool DoDump) { unsigned CPEOffset = GetOffsetOf(CPEMI); - assert(CPEOffset%4 == 0 && "Misaligned CPE"); + assert((CPEOffset%4 == 0 || HasInlineAsm) && "Misaligned CPE"); if (DoDump) { DEBUG(errs() << "User of CPE#" << CPEMI->getOperand(0).getImm() @@ -870,7 +896,7 @@ void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB, if (!isThumb) continue; MachineBasicBlock *MBB = MBBI; - if (!MBB->empty()) { + if (!MBB->empty() && !HasInlineAsm) { // Constant pool entries require padding. if (MBB->begin()->getOpcode() == ARM::CONSTPOOL_ENTRY) { unsigned OldOffset = BBOffsets[i] - delta; @@ -1226,7 +1252,7 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF, BBOffsets[NewIsland->getNumber()] = BBOffsets[NewMBB->getNumber()]; // Compensate for .align 2 in thumb mode. - if (isThumb && BBOffsets[NewIsland->getNumber()]%4 != 0) + if (isThumb && (BBOffsets[NewIsland->getNumber()]%4 != 0 || HasInlineAsm)) Size += 2; // Increase the size of the island block to account for the new entry. BBSizes[NewIsland->getNumber()] += Size; diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 4d0f8993e002..c929c54d489f 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -75,17 +75,30 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { } case ARM::t2MOVi32imm: { unsigned DstReg = MI.getOperand(0).getReg(); - unsigned Imm = MI.getOperand(1).getImm(); - unsigned Lo16 = Imm & 0xffff; - unsigned Hi16 = (Imm >> 16) & 0xffff; if (!MI.getOperand(0).isDead()) { - AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), - TII->get(ARM::t2MOVi16), DstReg) - .addImm(Lo16)); - AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), - TII->get(ARM::t2MOVTi16)) - .addReg(DstReg, getDefRegState(true)) - .addReg(DstReg).addImm(Hi16)); + const MachineOperand &MO = MI.getOperand(1); + MachineInstrBuilder LO16, HI16; + + LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVi16), + DstReg); + HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::t2MOVTi16)) + .addReg(DstReg, getDefRegState(true)).addReg(DstReg); + + if (MO.isImm()) { + unsigned Imm = MO.getImm(); + unsigned Lo16 = Imm & 0xffff; + unsigned Hi16 = (Imm >> 16) & 0xffff; + LO16 = LO16.addImm(Lo16); + HI16 = HI16.addImm(Hi16); + } else { + GlobalValue *GV = MO.getGlobal(); + unsigned TF = MO.getTargetFlags(); + LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16); + HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16); + // FIXME: What's about memoperands? + } + AddDefaultPred(LO16); + AddDefaultPred(HI16); } MI.eraseFromParent(); Modified = true; diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 696a8e1674dc..d63f3e66fa4f 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -149,6 +149,21 @@ private: /// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM. SDNode *SelectV6T2BitfieldExtractOp(SDValue Op, unsigned Opc); + /// SelectCMOVOp - Select CMOV instructions for ARM. + SDNode *SelectCMOVOp(SDValue Op); + SDNode *SelectT2CMOVShiftOp(SDValue Op, SDValue FalseVal, SDValue TrueVal, + ARMCC::CondCodes CCVal, SDValue CCR, + SDValue InFlag); + SDNode *SelectARMCMOVShiftOp(SDValue Op, SDValue FalseVal, SDValue TrueVal, + ARMCC::CondCodes CCVal, SDValue CCR, + SDValue InFlag); + SDNode *SelectT2CMOVSoImmOp(SDValue Op, SDValue FalseVal, SDValue TrueVal, + ARMCC::CondCodes CCVal, SDValue CCR, + SDValue InFlag); + SDNode *SelectARMCMOVSoImmOp(SDValue Op, SDValue FalseVal, SDValue TrueVal, + ARMCC::CondCodes CCVal, SDValue CCR, + SDValue InFlag); + /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, @@ -246,7 +261,9 @@ bool ARMDAGToDAGISel::SelectAddrMode2(SDValue Op, SDValue N, if (N.getOpcode() == ISD::FrameIndex) { int FI = cast(N)->getIndex(); Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); - } else if (N.getOpcode() == ARMISD::Wrapper) { + } else if (N.getOpcode() == ARMISD::Wrapper && + !(Subtarget->useMovt() && + N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) { Base = N.getOperand(0); } Offset = CurDAG->getRegister(0, MVT::i32); @@ -448,7 +465,9 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue Op, SDValue N, if (N.getOpcode() == ISD::FrameIndex) { int FI = cast(N)->getIndex(); Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); - } else if (N.getOpcode() == ARMISD::Wrapper) { + } else if (N.getOpcode() == ARMISD::Wrapper && + !(Subtarget->useMovt() && + N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) { Base = N.getOperand(0); } Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), @@ -543,7 +562,13 @@ ARMDAGToDAGISel::SelectThumbAddrModeRI5(SDValue Op, SDValue N, } if (N.getOpcode() != ISD::ADD) { - Base = (N.getOpcode() == ARMISD::Wrapper) ? N.getOperand(0) : N; + if (N.getOpcode() == ARMISD::Wrapper && + !(Subtarget->useMovt() && + N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) { + Base = N.getOperand(0); + } else + Base = N; + Offset = CurDAG->getRegister(0, MVT::i32); OffImm = CurDAG->getTargetConstant(0, MVT::i32); return true; @@ -666,7 +691,9 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue Op, SDValue N, Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); OffImm = CurDAG->getTargetConstant(0, MVT::i32); return true; - } else if (N.getOpcode() == ARMISD::Wrapper) { + } else if (N.getOpcode() == ARMISD::Wrapper && + !(Subtarget->useMovt() && + N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) { Base = N.getOperand(0); if (Base.getOpcode() == ISD::TargetConstantPool) return false; // We want to select t2LDRpci instead. @@ -1034,12 +1061,15 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs, case MVT::v4i32: OpcodeIndex = 2; break; } + SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); + SDValue PredReg = CurDAG->getRegister(0, MVT::i32); if (is64BitVector) { unsigned Opc = DOpcodes[OpcodeIndex]; - const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align, Chain }; + const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align, + Pred, PredReg, Chain }; std::vector ResTys(NumVecs, VT); ResTys.push_back(MVT::Other); - return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5); + return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 7); } EVT RegVT = GetNEONSubregVT(VT); @@ -1047,10 +1077,11 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs, // Quad registers are directly supported for VLD2, // loading 2 pairs of D regs. unsigned Opc = QOpcodes0[OpcodeIndex]; - const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align, Chain }; + const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align, + Pred, PredReg, Chain }; std::vector ResTys(4, VT); ResTys.push_back(MVT::Other); - SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5); + SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 7); Chain = SDValue(VLd, 4); // Combine the even and odd subregs to produce the result. @@ -1071,15 +1102,16 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs, // Load the even subregs. unsigned Opc = QOpcodes0[OpcodeIndex]; - const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, Align, Chain }; - SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 5); + const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, Align, + Pred, PredReg, Chain }; + SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 7); Chain = SDValue(VLdA, NumVecs+1); // Load the odd subregs. Opc = QOpcodes1[OpcodeIndex]; const SDValue OpsB[] = { SDValue(VLdA, NumVecs), MemUpdate, MemOpc, - Align, Chain }; - SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 5); + Align, Pred, PredReg, Chain }; + SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 7); Chain = SDValue(VLdB, NumVecs+1); // Combine the even and odd subregs to produce the result. @@ -1123,6 +1155,9 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs, case MVT::v4i32: OpcodeIndex = 2; break; } + SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); + SDValue PredReg = CurDAG->getRegister(0, MVT::i32); + SmallVector Ops; Ops.push_back(MemAddr); Ops.push_back(MemUpdate); @@ -1133,8 +1168,10 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs, unsigned Opc = DOpcodes[OpcodeIndex]; for (unsigned Vec = 0; Vec < NumVecs; ++Vec) Ops.push_back(N->getOperand(Vec+3)); + Ops.push_back(Pred); + Ops.push_back(PredReg); Ops.push_back(Chain); - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+5); + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+7); } EVT RegVT = GetNEONSubregVT(VT); @@ -1148,8 +1185,10 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs, Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, N->getOperand(Vec+3))); } + Ops.push_back(Pred); + Ops.push_back(PredReg); Ops.push_back(Chain); - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 9); + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 11); } // Otherwise, quad registers are stored with two separate instructions, @@ -1162,10 +1201,12 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs, for (unsigned Vec = 0; Vec < NumVecs; ++Vec) Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT, N->getOperand(Vec+3))); + Ops.push_back(Pred); + Ops.push_back(PredReg); Ops.push_back(Chain); unsigned Opc = QOpcodes0[OpcodeIndex]; SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), - MVT::Other, Ops.data(), NumVecs+5); + MVT::Other, Ops.data(), NumVecs+7); Chain = SDValue(VStA, 1); // Store the odd subregs. @@ -1173,10 +1214,12 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs, for (unsigned Vec = 0; Vec < NumVecs; ++Vec) Ops[Vec+4] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, N->getOperand(Vec+3)); - Ops[NumVecs+4] = Chain; + Ops[NumVecs+4] = Pred; + Ops[NumVecs+5] = PredReg; + Ops[NumVecs+6] = Chain; Opc = QOpcodes1[OpcodeIndex]; SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), - MVT::Other, Ops.data(), NumVecs+5); + MVT::Other, Ops.data(), NumVecs+7); Chain = SDValue(VStB, 1); ReplaceUses(SDValue(N, 0), Chain); return NULL; @@ -1224,6 +1267,9 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDValue Op, bool IsLoad, case MVT::v4i32: OpcodeIndex = 1; break; } + SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); + SDValue PredReg = CurDAG->getRegister(0, MVT::i32); + SmallVector Ops; Ops.push_back(MemAddr); Ops.push_back(MemUpdate); @@ -1249,15 +1295,17 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDValue Op, bool IsLoad, N->getOperand(Vec+3))); } Ops.push_back(getI32Imm(Lane)); + Ops.push_back(Pred); + Ops.push_back(PredReg); Ops.push_back(Chain); if (!IsLoad) - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+5); + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+7); std::vector ResTys(NumVecs, RegVT); ResTys.push_back(MVT::Other); SDNode *VLdLn = - CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), NumVecs+5); + CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), NumVecs+7); // For a 64-bit vector load to D registers, nothing more needs to be done. if (is64BitVector) return VLdLn; @@ -1282,7 +1330,7 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDValue Op, return NULL; unsigned Shl_imm = 0; - if (isOpcWithIntImmediate(Op.getOperand(0).getNode(), ISD::SHL, Shl_imm)){ + if (isOpcWithIntImmediate(Op.getOperand(0).getNode(), ISD::SHL, Shl_imm)) { assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!"); unsigned Srl_imm = 0; if (isInt32Immediate(Op.getOperand(1), Srl_imm)) { @@ -1302,6 +1350,173 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDValue Op, return NULL; } +SDNode *ARMDAGToDAGISel:: +SelectT2CMOVShiftOp(SDValue Op, SDValue FalseVal, SDValue TrueVal, + ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) { + SDValue CPTmp0; + SDValue CPTmp1; + if (SelectT2ShifterOperandReg(Op, TrueVal, CPTmp0, CPTmp1)) { + unsigned SOVal = cast(CPTmp1)->getZExtValue(); + unsigned SOShOp = ARM_AM::getSORegShOp(SOVal); + unsigned Opc = 0; + switch (SOShOp) { + case ARM_AM::lsl: Opc = ARM::t2MOVCClsl; break; + case ARM_AM::lsr: Opc = ARM::t2MOVCClsr; break; + case ARM_AM::asr: Opc = ARM::t2MOVCCasr; break; + case ARM_AM::ror: Opc = ARM::t2MOVCCror; break; + default: + llvm_unreachable("Unknown so_reg opcode!"); + break; + } + SDValue SOShImm = + CurDAG->getTargetConstant(ARM_AM::getSORegOffset(SOVal), MVT::i32); + SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); + SDValue Ops[] = { FalseVal, CPTmp0, SOShImm, CC, CCR, InFlag }; + return CurDAG->SelectNodeTo(Op.getNode(), Opc, MVT::i32,Ops, 6); + } + return 0; +} + +SDNode *ARMDAGToDAGISel:: +SelectARMCMOVShiftOp(SDValue Op, SDValue FalseVal, SDValue TrueVal, + ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) { + SDValue CPTmp0; + SDValue CPTmp1; + SDValue CPTmp2; + if (SelectShifterOperandReg(Op, TrueVal, CPTmp0, CPTmp1, CPTmp2)) { + SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); + SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CPTmp2, CC, CCR, InFlag }; + return CurDAG->SelectNodeTo(Op.getNode(), ARM::MOVCCs, MVT::i32, Ops, 7); + } + return 0; +} + +SDNode *ARMDAGToDAGISel:: +SelectT2CMOVSoImmOp(SDValue Op, SDValue FalseVal, SDValue TrueVal, + ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) { + ConstantSDNode *T = dyn_cast(TrueVal); + if (!T) + return 0; + + if (Predicate_t2_so_imm(TrueVal.getNode())) { + SDValue True = CurDAG->getTargetConstant(T->getZExtValue(), MVT::i32); + SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); + SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag }; + return CurDAG->SelectNodeTo(Op.getNode(), + ARM::t2MOVCCi, MVT::i32, Ops, 5); + } + return 0; +} + +SDNode *ARMDAGToDAGISel:: +SelectARMCMOVSoImmOp(SDValue Op, SDValue FalseVal, SDValue TrueVal, + ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) { + ConstantSDNode *T = dyn_cast(TrueVal); + if (!T) + return 0; + + if (Predicate_so_imm(TrueVal.getNode())) { + SDValue True = CurDAG->getTargetConstant(T->getZExtValue(), MVT::i32); + SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); + SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag }; + return CurDAG->SelectNodeTo(Op.getNode(), + ARM::MOVCCi, MVT::i32, Ops, 5); + } + return 0; +} + +SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDValue Op) { + EVT VT = Op.getValueType(); + SDValue FalseVal = Op.getOperand(0); + SDValue TrueVal = Op.getOperand(1); + SDValue CC = Op.getOperand(2); + SDValue CCR = Op.getOperand(3); + SDValue InFlag = Op.getOperand(4); + assert(CC.getOpcode() == ISD::Constant); + assert(CCR.getOpcode() == ISD::Register); + ARMCC::CondCodes CCVal = + (ARMCC::CondCodes)cast(CC)->getZExtValue(); + + if (!Subtarget->isThumb1Only() && VT == MVT::i32) { + // Pattern: (ARMcmov:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc) + // Emits: (MOVCCs:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc) + // Pattern complexity = 18 cost = 1 size = 0 + SDValue CPTmp0; + SDValue CPTmp1; + SDValue CPTmp2; + if (Subtarget->isThumb()) { + SDNode *Res = SelectT2CMOVShiftOp(Op, FalseVal, TrueVal, + CCVal, CCR, InFlag); + if (!Res) + Res = SelectT2CMOVShiftOp(Op, TrueVal, FalseVal, + ARMCC::getOppositeCondition(CCVal), CCR, InFlag); + if (Res) + return Res; + } else { + SDNode *Res = SelectARMCMOVShiftOp(Op, FalseVal, TrueVal, + CCVal, CCR, InFlag); + if (!Res) + Res = SelectARMCMOVShiftOp(Op, TrueVal, FalseVal, + ARMCC::getOppositeCondition(CCVal), CCR, InFlag); + if (Res) + return Res; + } + + // Pattern: (ARMcmov:i32 GPR:i32:$false, + // (imm:i32)<>:$true, + // (imm:i32):$cc) + // Emits: (MOVCCi:i32 GPR:i32:$false, + // (so_imm:i32 (imm:i32):$true), (imm:i32):$cc) + // Pattern complexity = 10 cost = 1 size = 0 + if (Subtarget->isThumb()) { + SDNode *Res = SelectT2CMOVSoImmOp(Op, FalseVal, TrueVal, + CCVal, CCR, InFlag); + if (!Res) + Res = SelectT2CMOVSoImmOp(Op, TrueVal, FalseVal, + ARMCC::getOppositeCondition(CCVal), CCR, InFlag); + if (Res) + return Res; + } else { + SDNode *Res = SelectARMCMOVSoImmOp(Op, FalseVal, TrueVal, + CCVal, CCR, InFlag); + if (!Res) + Res = SelectARMCMOVSoImmOp(Op, TrueVal, FalseVal, + ARMCC::getOppositeCondition(CCVal), CCR, InFlag); + if (Res) + return Res; + } + } + + // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc) + // Emits: (MOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc) + // Pattern complexity = 6 cost = 1 size = 0 + // + // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc) + // Emits: (tMOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc) + // Pattern complexity = 6 cost = 11 size = 0 + // + // Also FCPYScc and FCPYDcc. + SDValue Tmp2 = CurDAG->getTargetConstant(CCVal, MVT::i32); + SDValue Ops[] = { FalseVal, TrueVal, Tmp2, CCR, InFlag }; + unsigned Opc = 0; + switch (VT.getSimpleVT().SimpleTy) { + default: assert(false && "Illegal conditional move type!"); + break; + case MVT::i32: + Opc = Subtarget->isThumb() + ? (Subtarget->hasThumb2() ? ARM::t2MOVCCr : ARM::tMOVCCr_pseudo) + : ARM::MOVCCr; + break; + case MVT::f32: + Opc = ARM::VMOVScc; + break; + case MVT::f64: + Opc = ARM::VMOVDcc; + break; + } + return CurDAG->SelectNodeTo(Op.getNode(), Opc, VT, Ops, 5); +} + SDNode *ARMDAGToDAGISel::Select(SDValue Op) { SDNode *N = Op.getNode(); DebugLoc dl = N->getDebugLoc(); @@ -1337,7 +1552,7 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { SDNode *ResNode; if (Subtarget->isThumb1Only()) { - SDValue Pred = CurDAG->getTargetConstant(0xEULL, MVT::i32); + SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); SDValue PredReg = CurDAG->getRegister(0, MVT::i32); SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() }; ResNode = CurDAG->getMachineNode(ARM::tLDRcp, dl, MVT::i32, MVT::Other, @@ -1549,122 +1764,8 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { SDValue(Chain.getNode(), Chain.getResNo())); return NULL; } - case ARMISD::CMOV: { - EVT VT = Op.getValueType(); - SDValue N0 = Op.getOperand(0); - SDValue N1 = Op.getOperand(1); - SDValue N2 = Op.getOperand(2); - SDValue N3 = Op.getOperand(3); - SDValue InFlag = Op.getOperand(4); - assert(N2.getOpcode() == ISD::Constant); - assert(N3.getOpcode() == ISD::Register); - - if (!Subtarget->isThumb1Only() && VT == MVT::i32) { - // Pattern: (ARMcmov:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc) - // Emits: (MOVCCs:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc) - // Pattern complexity = 18 cost = 1 size = 0 - SDValue CPTmp0; - SDValue CPTmp1; - SDValue CPTmp2; - if (Subtarget->isThumb()) { - if (SelectT2ShifterOperandReg(Op, N1, CPTmp0, CPTmp1)) { - unsigned SOVal = cast(CPTmp1)->getZExtValue(); - unsigned SOShOp = ARM_AM::getSORegShOp(SOVal); - unsigned Opc = 0; - switch (SOShOp) { - case ARM_AM::lsl: Opc = ARM::t2MOVCClsl; break; - case ARM_AM::lsr: Opc = ARM::t2MOVCClsr; break; - case ARM_AM::asr: Opc = ARM::t2MOVCCasr; break; - case ARM_AM::ror: Opc = ARM::t2MOVCCror; break; - default: - llvm_unreachable("Unknown so_reg opcode!"); - break; - } - SDValue SOShImm = - CurDAG->getTargetConstant(ARM_AM::getSORegOffset(SOVal), MVT::i32); - SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned) - cast(N2)->getZExtValue()), - MVT::i32); - SDValue Ops[] = { N0, CPTmp0, SOShImm, Tmp2, N3, InFlag }; - return CurDAG->SelectNodeTo(Op.getNode(), Opc, MVT::i32,Ops, 6); - } - } else { - if (SelectShifterOperandReg(Op, N1, CPTmp0, CPTmp1, CPTmp2)) { - SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned) - cast(N2)->getZExtValue()), - MVT::i32); - SDValue Ops[] = { N0, CPTmp0, CPTmp1, CPTmp2, Tmp2, N3, InFlag }; - return CurDAG->SelectNodeTo(Op.getNode(), - ARM::MOVCCs, MVT::i32, Ops, 7); - } - } - - // Pattern: (ARMcmov:i32 GPR:i32:$false, - // (imm:i32)<>:$true, - // (imm:i32):$cc) - // Emits: (MOVCCi:i32 GPR:i32:$false, - // (so_imm:i32 (imm:i32):$true), (imm:i32):$cc) - // Pattern complexity = 10 cost = 1 size = 0 - if (N3.getOpcode() == ISD::Constant) { - if (Subtarget->isThumb()) { - if (Predicate_t2_so_imm(N3.getNode())) { - SDValue Tmp1 = CurDAG->getTargetConstant(((unsigned) - cast(N1)->getZExtValue()), - MVT::i32); - SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned) - cast(N2)->getZExtValue()), - MVT::i32); - SDValue Ops[] = { N0, Tmp1, Tmp2, N3, InFlag }; - return CurDAG->SelectNodeTo(Op.getNode(), - ARM::t2MOVCCi, MVT::i32, Ops, 5); - } - } else { - if (Predicate_so_imm(N3.getNode())) { - SDValue Tmp1 = CurDAG->getTargetConstant(((unsigned) - cast(N1)->getZExtValue()), - MVT::i32); - SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned) - cast(N2)->getZExtValue()), - MVT::i32); - SDValue Ops[] = { N0, Tmp1, Tmp2, N3, InFlag }; - return CurDAG->SelectNodeTo(Op.getNode(), - ARM::MOVCCi, MVT::i32, Ops, 5); - } - } - } - } - - // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc) - // Emits: (MOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc) - // Pattern complexity = 6 cost = 1 size = 0 - // - // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc) - // Emits: (tMOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc) - // Pattern complexity = 6 cost = 11 size = 0 - // - // Also FCPYScc and FCPYDcc. - SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned) - cast(N2)->getZExtValue()), - MVT::i32); - SDValue Ops[] = { N0, N1, Tmp2, N3, InFlag }; - unsigned Opc = 0; - switch (VT.getSimpleVT().SimpleTy) { - default: assert(false && "Illegal conditional move type!"); - break; - case MVT::i32: - Opc = Subtarget->isThumb() - ? (Subtarget->hasThumb2() ? ARM::t2MOVCCr : ARM::tMOVCCr_pseudo) - : ARM::MOVCCr; - break; - case MVT::f32: - Opc = ARM::VMOVScc; - break; - case MVT::f64: - Opc = ARM::VMOVDcc; - break; - } - return CurDAG->SelectNodeTo(Op.getNode(), Opc, VT, Ops, 5); - } + case ARMISD::CMOV: + return SelectCMOVOp(Op); case ARMISD::CNEG: { EVT VT = Op.getValueType(); SDValue N0 = Op.getOperand(0); @@ -1707,8 +1808,10 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { case MVT::v4f32: case MVT::v4i32: Opc = ARM::VZIPq32; break; } - return CurDAG->getMachineNode(Opc, dl, VT, VT, - N->getOperand(0), N->getOperand(1)); + SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); + SDValue PredReg = CurDAG->getRegister(0, MVT::i32); + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; + return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4); } case ARMISD::VUZP: { unsigned Opc = 0; @@ -1724,8 +1827,10 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { case MVT::v4f32: case MVT::v4i32: Opc = ARM::VUZPq32; break; } - return CurDAG->getMachineNode(Opc, dl, VT, VT, - N->getOperand(0), N->getOperand(1)); + SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); + SDValue PredReg = CurDAG->getRegister(0, MVT::i32); + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; + return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4); } case ARMISD::VTRN: { unsigned Opc = 0; @@ -1741,8 +1846,10 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { case MVT::v4f32: case MVT::v4i32: Opc = ARM::VTRNq32; break; } - return CurDAG->getMachineNode(Opc, dl, VT, VT, - N->getOperand(0), N->getOperand(1)); + SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); + SDValue PredReg = CurDAG->getRegister(0, MVT::i32); + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; + return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops, 4); } case ISD::INTRINSIC_VOID: diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index c3af8e695fc8..c839fc65606a 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -39,6 +39,7 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Target/TargetOptions.h" #include "llvm/ADT/VectorExtras.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include @@ -355,10 +356,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::SDIVREM, MVT::i32, Expand); setOperationAction(ISD::UDIVREM, MVT::i32, Expand); - // Support label based line numbers. - setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand); - setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); - setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); setOperationAction(ISD::ConstantPool, MVT::i32, Custom); setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom); @@ -1360,10 +1357,17 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, PseudoSourceValue::getGOT(), 0); return Result; } else { - SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); - CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); - return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, - PseudoSourceValue::getConstantPool(), 0); + // If we have T2 ops, we can materialize the address directly via movt/movw + // pair. This is always cheaper. + if (Subtarget->useMovt()) { + return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, + DAG.getTargetGlobalAddress(GV, PtrVT)); + } else { + SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); + CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); + return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, + PseudoSourceValue::getConstantPool(), 0); + } } } diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 83b5cb4cac97..e76e93cf671c 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -1217,27 +1217,45 @@ class AVConv5I opcod1, bits<4> opcod2, dag oops, dag iops, // class NeonI pattern> + string opc, string dt, string asm, string cstr, list pattern> : InstARM { let OutOperandList = oops; - let InOperandList = iops; - let AsmString = asm; + let InOperandList = !con(iops, (ops pred:$p)); + let AsmString = !strconcat( + !strconcat(!strconcat(opc, "${p}"), !strconcat(".", dt)), + !strconcat("\t", asm)); let Pattern = pattern; list Predicates = [HasNEON]; } -class NI pattern> - : NeonI { +// Same as NeonI except it does not have a "data type" specifier. +class NeonXI pattern> + : InstARM { + let OutOperandList = oops; + let InOperandList = !con(iops, (ops pred:$p)); + let AsmString = !strconcat(!strconcat(opc, "${p}"), !strconcat("\t", asm)); + let Pattern = pattern; + list Predicates = [HasNEON]; } -class NI4 pattern> - : NeonI { +class NI pattern> + : NeonXI { +} + +class NI4 pattern> + : NeonXI { } class NLdSt op21_20, bits<4> op11_8, bits<4> op7_4, dag oops, dag iops, InstrItinClass itin, - string asm, string cstr, list pattern> - : NeonI { + string opc, string dt, string asm, string cstr, list pattern> + : NeonI { let Inst{31-24} = 0b11110100; let Inst{23} = op23; let Inst{21-20} = op21_20; @@ -1246,8 +1264,16 @@ class NLdSt op21_20, bits<4> op11_8, bits<4> op7_4, } class NDataI pattern> - : NeonI { + string opc, string dt, string asm, string cstr, list pattern> + : NeonI { + let Inst{31-25} = 0b1111001; +} + +class NDataXI pattern> + : NeonXI { let Inst{31-25} = 0b1111001; } @@ -1255,8 +1281,8 @@ class NDataI op21_19, bits<4> op11_8, bit op7, bit op6, bit op5, bit op4, dag oops, dag iops, InstrItinClass itin, - string asm, string cstr, list pattern> - : NDataI { + string opc, string dt, string asm, string cstr, list pattern> + : NDataI { let Inst{23} = op23; let Inst{21-19} = op21_19; let Inst{11-8} = op11_8; @@ -1270,8 +1296,8 @@ class N1ModImm op21_19, bits<4> op11_8, bit op7, bit op6, class N2V op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, dag oops, dag iops, InstrItinClass itin, - string asm, string cstr, list pattern> - : NDataI { + string opc, string dt, string asm, string cstr, list pattern> + : NDataI { let Inst{24-23} = op24_23; let Inst{21-20} = op21_20; let Inst{19-18} = op19_18; @@ -1281,14 +1307,16 @@ class N2V op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, let Inst{4} = op4; } -// NEON Vector Duplicate (scalar). -// Inst{19-16} is specified by subclasses. -class N2VDup op24_23, bits<2> op21_20, bits<5> op11_7, bit op6, bit op4, - dag oops, dag iops, InstrItinClass itin, - string asm, string cstr, list pattern> - : NDataI { +// Same as N2V except it doesn't have a datatype suffix. +class N2VX op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, + bits<5> op11_7, bit op6, bit op4, + dag oops, dag iops, InstrItinClass itin, + string opc, string asm, string cstr, list pattern> + : NDataXI { let Inst{24-23} = op24_23; let Inst{21-20} = op21_20; + let Inst{19-18} = op19_18; + let Inst{17-16} = op17_16; let Inst{11-7} = op11_7; let Inst{6} = op6; let Inst{4} = op4; @@ -1297,8 +1325,8 @@ class N2VDup op24_23, bits<2> op21_20, bits<5> op11_7, bit op6, bit op4, // NEON 2 vector register with immediate. class N2VImm op11_8, bit op7, bit op6, bit op4, dag oops, dag iops, InstrItinClass itin, - string asm, string cstr, list pattern> - : NDataI { + string opc, string dt, string asm, string cstr, list pattern> + : NDataI { let Inst{24} = op24; let Inst{23} = op23; let Inst{11-8} = op11_8; @@ -1310,8 +1338,8 @@ class N2VImm op11_8, bit op7, bit op6, bit op4, // NEON 3 vector register format. class N3V op21_20, bits<4> op11_8, bit op6, bit op4, dag oops, dag iops, InstrItinClass itin, - string asm, string cstr, list pattern> - : NDataI { + string opc, string dt, string asm, string cstr, list pattern> + : NDataI { let Inst{24} = op24; let Inst{23} = op23; let Inst{21-20} = op21_20; @@ -1320,16 +1348,15 @@ class N3V op21_20, bits<4> op11_8, bit op6, bit op4, let Inst{4} = op4; } -// NEON 3 vector register with immediate. This is only used for VEXT where -// op11_8 represents the starting byte index of the extracted result in the -// concatenation of the operands and is left unspecified. -class N3VImm op21_20, bit op6, bit op4, - dag oops, dag iops, InstrItinClass itin, - string asm, string cstr, list pattern> - : NDataI { +// Same as N3VX except it doesn't have a data type suffix. +class N3VX op21_20, bits<4> op11_8, bit op6, bit op4, + dag oops, dag iops, InstrItinClass itin, + string opc, string asm, string cstr, list pattern> + : NDataXI { let Inst{24} = op24; let Inst{23} = op23; let Inst{21-20} = op21_20; + let Inst{11-8} = op11_8; let Inst{6} = op6; let Inst{4} = op4; } @@ -1337,29 +1364,37 @@ class N3VImm op21_20, bit op6, bit op4, // NEON VMOVs between scalar and core registers. class NVLaneOp opcod1, bits<4> opcod2, bits<2> opcod3, dag oops, dag iops, Format f, InstrItinClass itin, - string opc, string asm, list pattern> - : AI { + string opc, string dt, string asm, list pattern> + : InstARM { let Inst{27-20} = opcod1; let Inst{11-8} = opcod2; let Inst{6-5} = opcod3; let Inst{4} = 1; + + let OutOperandList = oops; + let InOperandList = !con(iops, (ops pred:$p)); + let AsmString = !strconcat( + !strconcat(!strconcat(opc, "${p}"), !strconcat(".", dt)), + !strconcat("\t", asm)); + let Pattern = pattern; list Predicates = [HasNEON]; } class NVGetLane opcod1, bits<4> opcod2, bits<2> opcod3, dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list pattern> + string opc, string dt, string asm, list pattern> : NVLaneOp; + opc, dt, asm, pattern>; class NVSetLane opcod1, bits<4> opcod2, bits<2> opcod3, dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list pattern> + string opc, string dt, string asm, list pattern> : NVLaneOp; + opc, dt, asm, pattern>; class NVDup opcod1, bits<4> opcod2, bits<2> opcod3, dag oops, dag iops, InstrItinClass itin, - string opc, string asm, list pattern> + string opc, string dt, string asm, list pattern> : NVLaneOp; + opc, dt, asm, pattern>; // NEONFPPat - Same as Pat<>, but requires that the compiler be using NEON // for single-precision FP. diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 79bde29cd859..7516d3c85b97 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -116,6 +116,10 @@ def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">; def CarryDefIsUnused : Predicate<"!N.getNode()->hasAnyUseOfValue(1)">; def CarryDefIsUsed : Predicate<"N.getNode()->hasAnyUseOfValue(1)">; +// FIXME: Eventually this will be just "hasV6T2Ops". +def UseMovt : Predicate<"Subtarget->useMovt()">; +def DontUseMovt : Predicate<"!Subtarget->useMovt()">; + //===----------------------------------------------------------------------===// // ARM Flag Definitions. @@ -204,7 +208,7 @@ def hi16 : SDNodeXFormgetZExtValue()) & 0xFFFFUL) == 0; - }], hi16>; +}], hi16>; /// imm0_65535 predicate - True if the 32-bit immediate is in the range /// [0.65535]. @@ -284,6 +288,22 @@ def so_imm2part_2 : SDNodeXFormgetTargetConstant(V, MVT::i32); }]>; +def so_neg_imm2part : Operand, PatLeaf<(imm), [{ + return ARM_AM::isSOImmTwoPartVal(-(int)N->getZExtValue()); + }]> { + let PrintMethod = "printSOImm2PartOperand"; +} + +def so_neg_imm2part_1 : SDNodeXFormgetZExtValue()); + return CurDAG->getTargetConstant(V, MVT::i32); +}]>; + +def so_neg_imm2part_2 : SDNodeXFormgetZExtValue()); + return CurDAG->getTargetConstant(V, MVT::i32); +}]>; + /// imm0_31 predicate - True if the 32-bit immediate is in the range [0,31]. def imm0_31 : Operand, PatLeaf<(imm), [{ return (int32_t)N->getZExtValue() < 32; @@ -568,12 +588,6 @@ PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary, [(ARMcallseq_start timm:$amt)]>; } -def DWARF_LOC : -PseudoInst<(outs), (ins i32imm:$line, i32imm:$col, i32imm:$file), NoItinerary, - ".loc $file, $line, $col", - [(dwarf_loc (i32 imm:$line), (i32 imm:$col), (i32 imm:$file))]>; - - // Address computation and loads and stores in PIC mode. let isNotDuplicable = 1 in { def PICADD : AXI1<0b0100, (outs GPR:$dst), (ins GPR:$a, pclabel:$cp, pred:$p), @@ -581,25 +595,24 @@ def PICADD : AXI1<0b0100, (outs GPR:$dst), (ins GPR:$a, pclabel:$cp, pred:$p), [(set GPR:$dst, (ARMpic_add GPR:$a, imm:$cp))]>; let AddedComplexity = 10 in { -let canFoldAsLoad = 1 in def PICLDR : AXI2ldw<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p), Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr$p\t$dst, $addr", [(set GPR:$dst, (load addrmodepc:$addr))]>; def PICLDRH : AXI3ldh<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p), - Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr${p}h\t$dst, $addr", + Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldrh${p}\t$dst, $addr", [(set GPR:$dst, (zextloadi16 addrmodepc:$addr))]>; def PICLDRB : AXI2ldb<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p), - Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr${p}b\t$dst, $addr", + Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldrb${p}\t$dst, $addr", [(set GPR:$dst, (zextloadi8 addrmodepc:$addr))]>; def PICLDRSH : AXI3ldsh<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p), - Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr${p}sh\t$dst, $addr", + Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldrsh${p}\t$dst, $addr", [(set GPR:$dst, (sextloadi16 addrmodepc:$addr))]>; def PICLDRSB : AXI3ldsb<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p), - Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldr${p}sb\t$dst, $addr", + Pseudo, IIC_iLoadr, "\n${addr:label}:\n\tldrsb${p}\t$dst, $addr", [(set GPR:$dst, (sextloadi8 addrmodepc:$addr))]>; } let AddedComplexity = 10 in { @@ -801,13 +814,14 @@ let isBranch = 1, isTerminator = 1 in { // // Load -let canFoldAsLoad = 1, isReMaterializable = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in def LDR : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr, "ldr", "\t$dst, $addr", [(set GPR:$dst, (load addrmode2:$addr))]>; // Special LDR for loads from non-pc-relative constpools. -let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1 in +let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, + mayHaveSideEffects = 1 in def LDRcp : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr, "ldr", "\t$dst, $addr", []>; @@ -992,7 +1006,7 @@ def MOVi16 : AI1<0b1000, (outs GPR:$dst), (ins i32imm:$src), let Constraints = "$src = $dst" in def MOVTi16 : AI1<0b1010, (outs GPR:$dst), (ins GPR:$src, i32imm:$imm), DPFrm, IIC_iMOVi, - "movt", "\t$dst, $imm", + "movt", "\t$dst, $imm", [(set GPR:$dst, (or (and GPR:$src, 0xffff), lo16AllZero:$imm))]>, UnaryDP, @@ -1593,12 +1607,6 @@ let Defs = // Non-Instruction Patterns // -// ConstantPool, GlobalAddress, and JumpTable -def : ARMPat<(ARMWrapper tglobaladdr :$dst), (LEApcrel tglobaladdr :$dst)>; -def : ARMPat<(ARMWrapper tconstpool :$dst), (LEApcrel tconstpool :$dst)>; -def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id), - (LEApcrelJT tjumptable:$dst, imm:$id)>; - // Large immediate handling. // Two piece so_imms. @@ -1618,9 +1626,9 @@ def : ARMPat<(xor GPR:$LHS, so_imm2part:$RHS), def : ARMPat<(add GPR:$LHS, so_imm2part:$RHS), (ADDri (ADDri GPR:$LHS, (so_imm2part_1 imm:$RHS)), (so_imm2part_2 imm:$RHS))>; -def : ARMPat<(sub GPR:$LHS, so_imm2part:$RHS), - (SUBri (SUBri GPR:$LHS, (so_imm2part_1 imm:$RHS)), - (so_imm2part_2 imm:$RHS))>; +def : ARMPat<(add GPR:$LHS, so_neg_imm2part:$RHS), + (SUBri (SUBri GPR:$LHS, (so_neg_imm2part_1 imm:$RHS)), + (so_neg_imm2part_2 imm:$RHS))>; // 32-bit immediate using movw + movt. // This is a single pseudo instruction, the benefit is that it can be remat'd @@ -1628,10 +1636,19 @@ def : ARMPat<(sub GPR:$LHS, so_imm2part:$RHS), // FIXME: Remove this when we can do generalized remat. let isReMaterializable = 1 in def MOVi32imm : AI1x2<(outs GPR:$dst), (ins i32imm:$src), Pseudo, IIC_iMOVi, - "movw", "\t$dst, ${src:lo16}\n\tmovt${p} $dst, ${src:hi16}", + "movw", "\t$dst, ${src:lo16}\n\tmovt${p}\t$dst, ${src:hi16}", [(set GPR:$dst, (i32 imm:$src))]>, Requires<[IsARM, HasV6T2]>; +// ConstantPool, GlobalAddress, and JumpTable +def : ARMPat<(ARMWrapper tglobaladdr :$dst), (LEApcrel tglobaladdr :$dst)>, + Requires<[IsARM, DontUseMovt]>; +def : ARMPat<(ARMWrapper tconstpool :$dst), (LEApcrel tconstpool :$dst)>; +def : ARMPat<(ARMWrapper tglobaladdr :$dst), (MOVi32imm tglobaladdr :$dst)>, + Requires<[IsARM, UseMovt]>; +def : ARMPat<(ARMWrapperJT tjumptable:$dst, imm:$id), + (LEApcrelJT tjumptable:$dst, imm:$id)>; + // TODO: add,sub,and, 3-instr forms? diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index e1353b788add..3166931325d2 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -124,7 +124,7 @@ let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { def VLDMD : NI<(outs), (ins addrmode_neonldstm:$addr, reglist:$dst1, variable_ops), IIC_fpLoadm, - "vldm${addr:submode} ${addr:base}, $dst1", + "vldm", "${addr:submode} ${addr:base}, $dst1", []> { let Inst{27-25} = 0b110; let Inst{20} = 1; @@ -134,7 +134,7 @@ def VLDMD : NI<(outs), def VLDMS : NI<(outs), (ins addrmode_neonldstm:$addr, reglist:$dst1, variable_ops), IIC_fpLoadm, - "vldm${addr:submode} ${addr:base}, $dst1", + "vldm", "${addr:submode} ${addr:base}, $dst1", []> { let Inst{27-25} = 0b110; let Inst{20} = 1; @@ -146,7 +146,7 @@ def VLDMS : NI<(outs), // Use vldmia to load a Q register as a D register pair. def VLDRQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr), IIC_fpLoadm, - "vldmia\t$addr, ${dst:dregpair}", + "vldmia", "$addr, ${dst:dregpair}", [(set QPR:$dst, (v2f64 (load addrmode4:$addr)))]> { let Inst{27-25} = 0b110; let Inst{24} = 0; // P bit @@ -158,7 +158,7 @@ def VLDRQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr), // Use vstmia to store a Q register as a D register pair. def VSTRQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr), IIC_fpStorem, - "vstmia\t$addr, ${src:dregpair}", + "vstmia", "$addr, ${src:dregpair}", [(store (v2f64 QPR:$src), addrmode4:$addr)]> { let Inst{27-25} = 0b110; let Inst{24} = 0; // P bit @@ -168,178 +168,221 @@ def VSTRQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr), } // VLD1 : Vector Load (multiple single elements) -class VLD1D op7_4, string OpcodeStr, ValueType Ty, Intrinsic IntOp> +class VLD1D op7_4, string OpcodeStr, string Dt, + ValueType Ty, Intrinsic IntOp> : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst), (ins addrmode6:$addr), IIC_VLD1, - !strconcat(OpcodeStr, "\t\\{$dst\\}, $addr"), "", + OpcodeStr, Dt, "\\{$dst\\}, $addr", "", [(set DPR:$dst, (Ty (IntOp addrmode6:$addr)))]>; -class VLD1Q op7_4, string OpcodeStr, ValueType Ty, Intrinsic IntOp> +class VLD1Q op7_4, string OpcodeStr, string Dt, + ValueType Ty, Intrinsic IntOp> : NLdSt<0,0b10,0b1010,op7_4, (outs QPR:$dst), (ins addrmode6:$addr), IIC_VLD1, - !strconcat(OpcodeStr, "\t${dst:dregpair}, $addr"), "", + OpcodeStr, Dt, "${dst:dregpair}, $addr", "", [(set QPR:$dst, (Ty (IntOp addrmode6:$addr)))]>; -def VLD1d8 : VLD1D<0b0000, "vld1.8", v8i8, int_arm_neon_vld1>; -def VLD1d16 : VLD1D<0b0100, "vld1.16", v4i16, int_arm_neon_vld1>; -def VLD1d32 : VLD1D<0b1000, "vld1.32", v2i32, int_arm_neon_vld1>; -def VLD1df : VLD1D<0b1000, "vld1.32", v2f32, int_arm_neon_vld1>; -def VLD1d64 : VLD1D<0b1100, "vld1.64", v1i64, int_arm_neon_vld1>; +def VLD1d8 : VLD1D<0b0000, "vld1", "8", v8i8, int_arm_neon_vld1>; +def VLD1d16 : VLD1D<0b0100, "vld1", "16", v4i16, int_arm_neon_vld1>; +def VLD1d32 : VLD1D<0b1000, "vld1", "32", v2i32, int_arm_neon_vld1>; +def VLD1df : VLD1D<0b1000, "vld1", "32", v2f32, int_arm_neon_vld1>; +def VLD1d64 : VLD1D<0b1100, "vld1", "64", v1i64, int_arm_neon_vld1>; -def VLD1q8 : VLD1Q<0b0000, "vld1.8", v16i8, int_arm_neon_vld1>; -def VLD1q16 : VLD1Q<0b0100, "vld1.16", v8i16, int_arm_neon_vld1>; -def VLD1q32 : VLD1Q<0b1000, "vld1.32", v4i32, int_arm_neon_vld1>; -def VLD1qf : VLD1Q<0b1000, "vld1.32", v4f32, int_arm_neon_vld1>; -def VLD1q64 : VLD1Q<0b1100, "vld1.64", v2i64, int_arm_neon_vld1>; +def VLD1q8 : VLD1Q<0b0000, "vld1", "8", v16i8, int_arm_neon_vld1>; +def VLD1q16 : VLD1Q<0b0100, "vld1", "16", v8i16, int_arm_neon_vld1>; +def VLD1q32 : VLD1Q<0b1000, "vld1", "32", v4i32, int_arm_neon_vld1>; +def VLD1qf : VLD1Q<0b1000, "vld1", "32", v4f32, int_arm_neon_vld1>; +def VLD1q64 : VLD1Q<0b1100, "vld1", "64", v2i64, int_arm_neon_vld1>; let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { // VLD2 : Vector Load (multiple 2-element structures) -class VLD2D op7_4, string OpcodeStr> +class VLD2D op7_4, string OpcodeStr, string Dt> : NLdSt<0,0b10,0b1000,op7_4, (outs DPR:$dst1, DPR:$dst2), (ins addrmode6:$addr), IIC_VLD2, - !strconcat(OpcodeStr, "\t\\{$dst1,$dst2\\}, $addr"), "", []>; -class VLD2Q op7_4, string OpcodeStr> + OpcodeStr, Dt, "\\{$dst1,$dst2\\}, $addr", "", []>; +class VLD2Q op7_4, string OpcodeStr, string Dt> : NLdSt<0,0b10,0b0011,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), (ins addrmode6:$addr), IIC_VLD2, - !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3,$dst4\\}, $addr"), + OpcodeStr, Dt, "\\{$dst1,$dst2,$dst3,$dst4\\}, $addr", "", []>; -def VLD2d8 : VLD2D<0b0000, "vld2.8">; -def VLD2d16 : VLD2D<0b0100, "vld2.16">; -def VLD2d32 : VLD2D<0b1000, "vld2.32">; +def VLD2d8 : VLD2D<0b0000, "vld2", "8">; +def VLD2d16 : VLD2D<0b0100, "vld2", "16">; +def VLD2d32 : VLD2D<0b1000, "vld2", "32">; def VLD2d64 : NLdSt<0,0b10,0b1010,0b1100, (outs DPR:$dst1, DPR:$dst2), (ins addrmode6:$addr), IIC_VLD1, - "vld1.64\t\\{$dst1,$dst2\\}, $addr", "", []>; + "vld1", "64", "\\{$dst1,$dst2\\}, $addr", "", []>; -def VLD2q8 : VLD2Q<0b0000, "vld2.8">; -def VLD2q16 : VLD2Q<0b0100, "vld2.16">; -def VLD2q32 : VLD2Q<0b1000, "vld2.32">; +def VLD2q8 : VLD2Q<0b0000, "vld2", "8">; +def VLD2q16 : VLD2Q<0b0100, "vld2", "16">; +def VLD2q32 : VLD2Q<0b1000, "vld2", "32">; // VLD3 : Vector Load (multiple 3-element structures) -class VLD3D op7_4, string OpcodeStr> +class VLD3D op7_4, string OpcodeStr, string Dt> : NLdSt<0,0b10,0b0100,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), (ins addrmode6:$addr), IIC_VLD3, - !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3\\}, $addr"), "", []>; -class VLD3WB op7_4, string OpcodeStr> + OpcodeStr, Dt, "\\{$dst1,$dst2,$dst3\\}, $addr", "", []>; +class VLD3WB op7_4, string OpcodeStr, string Dt> : NLdSt<0,0b10,0b0101,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb), (ins addrmode6:$addr), IIC_VLD3, - !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3\\}, $addr"), + OpcodeStr, Dt, "\\{$dst1,$dst2,$dst3\\}, $addr", "$addr.addr = $wb", []>; -def VLD3d8 : VLD3D<0b0000, "vld3.8">; -def VLD3d16 : VLD3D<0b0100, "vld3.16">; -def VLD3d32 : VLD3D<0b1000, "vld3.32">; +def VLD3d8 : VLD3D<0b0000, "vld3", "8">; +def VLD3d16 : VLD3D<0b0100, "vld3", "16">; +def VLD3d32 : VLD3D<0b1000, "vld3", "32">; def VLD3d64 : NLdSt<0,0b10,0b0110,0b1100, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), (ins addrmode6:$addr), IIC_VLD1, - "vld1.64\t\\{$dst1,$dst2,$dst3\\}, $addr", "", []>; + "vld1", "64", "\\{$dst1,$dst2,$dst3\\}, $addr", "", []>; // vld3 to double-spaced even registers. -def VLD3q8a : VLD3WB<0b0000, "vld3.8">; -def VLD3q16a : VLD3WB<0b0100, "vld3.16">; -def VLD3q32a : VLD3WB<0b1000, "vld3.32">; +def VLD3q8a : VLD3WB<0b0000, "vld3", "8">; +def VLD3q16a : VLD3WB<0b0100, "vld3", "16">; +def VLD3q32a : VLD3WB<0b1000, "vld3", "32">; // vld3 to double-spaced odd registers. -def VLD3q8b : VLD3WB<0b0000, "vld3.8">; -def VLD3q16b : VLD3WB<0b0100, "vld3.16">; -def VLD3q32b : VLD3WB<0b1000, "vld3.32">; +def VLD3q8b : VLD3WB<0b0000, "vld3", "8">; +def VLD3q16b : VLD3WB<0b0100, "vld3", "16">; +def VLD3q32b : VLD3WB<0b1000, "vld3", "32">; // VLD4 : Vector Load (multiple 4-element structures) -class VLD4D op7_4, string OpcodeStr> +class VLD4D op7_4, string OpcodeStr, string Dt> : NLdSt<0,0b10,0b0000,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), (ins addrmode6:$addr), IIC_VLD4, - !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3,$dst4\\}, $addr"), + OpcodeStr, Dt, "\\{$dst1,$dst2,$dst3,$dst4\\}, $addr", "", []>; -class VLD4WB op7_4, string OpcodeStr> +class VLD4WB op7_4, string OpcodeStr, string Dt> : NLdSt<0,0b10,0b0001,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), (ins addrmode6:$addr), IIC_VLD4, - !strconcat(OpcodeStr, "\t\\{$dst1,$dst2,$dst3,$dst4\\}, $addr"), + OpcodeStr, Dt, "\\{$dst1,$dst2,$dst3,$dst4\\}, $addr", "$addr.addr = $wb", []>; -def VLD4d8 : VLD4D<0b0000, "vld4.8">; -def VLD4d16 : VLD4D<0b0100, "vld4.16">; -def VLD4d32 : VLD4D<0b1000, "vld4.32">; +def VLD4d8 : VLD4D<0b0000, "vld4", "8">; +def VLD4d16 : VLD4D<0b0100, "vld4", "16">; +def VLD4d32 : VLD4D<0b1000, "vld4", "32">; def VLD4d64 : NLdSt<0,0b10,0b0010,0b1100, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), (ins addrmode6:$addr), IIC_VLD1, - "vld1.64\t\\{$dst1,$dst2,$dst3,$dst4\\}, $addr", "", []>; + "vld1", "64", "\\{$dst1,$dst2,$dst3,$dst4\\}, $addr", "", []>; // vld4 to double-spaced even registers. -def VLD4q8a : VLD4WB<0b0000, "vld4.8">; -def VLD4q16a : VLD4WB<0b0100, "vld4.16">; -def VLD4q32a : VLD4WB<0b1000, "vld4.32">; +def VLD4q8a : VLD4WB<0b0000, "vld4", "8">; +def VLD4q16a : VLD4WB<0b0100, "vld4", "16">; +def VLD4q32a : VLD4WB<0b1000, "vld4", "32">; // vld4 to double-spaced odd registers. -def VLD4q8b : VLD4WB<0b0000, "vld4.8">; -def VLD4q16b : VLD4WB<0b0100, "vld4.16">; -def VLD4q32b : VLD4WB<0b1000, "vld4.32">; +def VLD4q8b : VLD4WB<0b0000, "vld4", "8">; +def VLD4q16b : VLD4WB<0b0100, "vld4", "16">; +def VLD4q32b : VLD4WB<0b1000, "vld4", "32">; // VLD1LN : Vector Load (single element to one lane) // FIXME: Not yet implemented. // VLD2LN : Vector Load (single 2-element structure to one lane) -class VLD2LN op11_8, string OpcodeStr> - : NLdSt<1,0b10,op11_8,0b0000, (outs DPR:$dst1, DPR:$dst2), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), - IIC_VLD2, - !strconcat(OpcodeStr, "\t\\{$dst1[$lane],$dst2[$lane]\\}, $addr"), - "$src1 = $dst1, $src2 = $dst2", []>; +class VLD2LN op11_8, string OpcodeStr, string Dt> + : NLdSt<1,0b10,op11_8,{?,?,?,?}, (outs DPR:$dst1, DPR:$dst2), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), + IIC_VLD2, + OpcodeStr, Dt, "\\{$dst1[$lane],$dst2[$lane]\\}, $addr", + "$src1 = $dst1, $src2 = $dst2", []>; -def VLD2LNd8 : VLD2LN<0b0001, "vld2.8">; -def VLD2LNd16 : VLD2LN<0b0101, "vld2.16">; -def VLD2LNd32 : VLD2LN<0b1001, "vld2.32">; +// vld2 to single-spaced registers. +def VLD2LNd8 : VLD2LN<0b0001, "vld2", "8">; +def VLD2LNd16 : VLD2LN<0b0101, "vld2", "16"> { + let Inst{5} = 0; +} +def VLD2LNd32 : VLD2LN<0b1001, "vld2", "32"> { + let Inst{6} = 0; +} // vld2 to double-spaced even registers. -def VLD2LNq16a: VLD2LN<0b0101, "vld2.16">; -def VLD2LNq32a: VLD2LN<0b1001, "vld2.32">; +def VLD2LNq16a: VLD2LN<0b0101, "vld2", "16"> { + let Inst{5} = 1; +} +def VLD2LNq32a: VLD2LN<0b1001, "vld2", "32"> { + let Inst{6} = 1; +} // vld2 to double-spaced odd registers. -def VLD2LNq16b: VLD2LN<0b0101, "vld2.16">; -def VLD2LNq32b: VLD2LN<0b1001, "vld2.32">; +def VLD2LNq16b: VLD2LN<0b0101, "vld2", "16"> { + let Inst{5} = 1; +} +def VLD2LNq32b: VLD2LN<0b1001, "vld2", "32"> { + let Inst{6} = 1; +} // VLD3LN : Vector Load (single 3-element structure to one lane) -class VLD3LN op11_8, string OpcodeStr> - : NLdSt<1,0b10,op11_8,0b0000, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, - nohash_imm:$lane), IIC_VLD3, - !strconcat(OpcodeStr, - "\t\\{$dst1[$lane],$dst2[$lane],$dst3[$lane]\\}, $addr"), - "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3", []>; +class VLD3LN op11_8, string OpcodeStr, string Dt> + : NLdSt<1,0b10,op11_8,{?,?,?,?}, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, + nohash_imm:$lane), IIC_VLD3, + OpcodeStr, Dt, + "\\{$dst1[$lane],$dst2[$lane],$dst3[$lane]\\}, $addr", + "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3", []>; -def VLD3LNd8 : VLD3LN<0b0010, "vld3.8">; -def VLD3LNd16 : VLD3LN<0b0110, "vld3.16">; -def VLD3LNd32 : VLD3LN<0b1010, "vld3.32">; +// vld3 to single-spaced registers. +def VLD3LNd8 : VLD3LN<0b0010, "vld3", "8"> { + let Inst{4} = 0; +} +def VLD3LNd16 : VLD3LN<0b0110, "vld3", "16"> { + let Inst{5-4} = 0b00; +} +def VLD3LNd32 : VLD3LN<0b1010, "vld3", "32"> { + let Inst{6-4} = 0b000; +} // vld3 to double-spaced even registers. -def VLD3LNq16a: VLD3LN<0b0110, "vld3.16">; -def VLD3LNq32a: VLD3LN<0b1010, "vld3.32">; +def VLD3LNq16a: VLD3LN<0b0110, "vld3", "16"> { + let Inst{5-4} = 0b10; +} +def VLD3LNq32a: VLD3LN<0b1010, "vld3", "32"> { + let Inst{6-4} = 0b100; +} // vld3 to double-spaced odd registers. -def VLD3LNq16b: VLD3LN<0b0110, "vld3.16">; -def VLD3LNq32b: VLD3LN<0b1010, "vld3.32">; +def VLD3LNq16b: VLD3LN<0b0110, "vld3", "16"> { + let Inst{5-4} = 0b10; +} +def VLD3LNq32b: VLD3LN<0b1010, "vld3", "32"> { + let Inst{6-4} = 0b100; +} // VLD4LN : Vector Load (single 4-element structure to one lane) -class VLD4LN op11_8, string OpcodeStr> - : NLdSt<1,0b10,op11_8,0b0000, - (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, - nohash_imm:$lane), IIC_VLD4, - !strconcat(OpcodeStr, - "\t\\{$dst1[$lane],$dst2[$lane],$dst3[$lane],$dst4[$lane]\\}, $addr"), - "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>; +class VLD4LN op11_8, string OpcodeStr, string Dt> + : NLdSt<1,0b10,op11_8,{?,?,?,?}, + (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, + nohash_imm:$lane), IIC_VLD4, + OpcodeStr, Dt, + "\\{$dst1[$lane],$dst2[$lane],$dst3[$lane],$dst4[$lane]\\}, $addr", + "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>; -def VLD4LNd8 : VLD4LN<0b0011, "vld4.8">; -def VLD4LNd16 : VLD4LN<0b0111, "vld4.16">; -def VLD4LNd32 : VLD4LN<0b1011, "vld4.32">; +// vld4 to single-spaced registers. +def VLD4LNd8 : VLD4LN<0b0011, "vld4", "8">; +def VLD4LNd16 : VLD4LN<0b0111, "vld4", "16"> { + let Inst{5} = 0; +} +def VLD4LNd32 : VLD4LN<0b1011, "vld4", "32"> { + let Inst{6} = 0; +} // vld4 to double-spaced even registers. -def VLD4LNq16a: VLD4LN<0b0111, "vld4.16">; -def VLD4LNq32a: VLD4LN<0b1011, "vld4.32">; +def VLD4LNq16a: VLD4LN<0b0111, "vld4", "16"> { + let Inst{5} = 1; +} +def VLD4LNq32a: VLD4LN<0b1011, "vld4", "32"> { + let Inst{6} = 1; +} // vld4 to double-spaced odd registers. -def VLD4LNq16b: VLD4LN<0b0111, "vld4.16">; -def VLD4LNq32b: VLD4LN<0b1011, "vld4.32">; +def VLD4LNq16b: VLD4LN<0b0111, "vld4", "16"> { + let Inst{5} = 1; +} +def VLD4LNq32b: VLD4LN<0b1011, "vld4", "32"> { + let Inst{6} = 1; +} // VLD1DUP : Vector Load (single element to all lanes) // VLD2DUP : Vector Load (single 2-element structure to all lanes) @@ -349,178 +392,221 @@ def VLD4LNq32b: VLD4LN<0b1011, "vld4.32">; } // mayLoad = 1, hasExtraDefRegAllocReq = 1 // VST1 : Vector Store (multiple single elements) -class VST1D op7_4, string OpcodeStr, ValueType Ty, Intrinsic IntOp> +class VST1D op7_4, string OpcodeStr, string Dt, + ValueType Ty, Intrinsic IntOp> : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$addr, DPR:$src), IIC_VST, - !strconcat(OpcodeStr, "\t\\{$src\\}, $addr"), "", + OpcodeStr, Dt, "\\{$src\\}, $addr", "", [(IntOp addrmode6:$addr, (Ty DPR:$src))]>; -class VST1Q op7_4, string OpcodeStr, ValueType Ty, Intrinsic IntOp> +class VST1Q op7_4, string OpcodeStr, string Dt, + ValueType Ty, Intrinsic IntOp> : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$addr, QPR:$src), IIC_VST, - !strconcat(OpcodeStr, "\t${src:dregpair}, $addr"), "", + OpcodeStr, Dt, "${src:dregpair}, $addr", "", [(IntOp addrmode6:$addr, (Ty QPR:$src))]>; let hasExtraSrcRegAllocReq = 1 in { -def VST1d8 : VST1D<0b0000, "vst1.8", v8i8, int_arm_neon_vst1>; -def VST1d16 : VST1D<0b0100, "vst1.16", v4i16, int_arm_neon_vst1>; -def VST1d32 : VST1D<0b1000, "vst1.32", v2i32, int_arm_neon_vst1>; -def VST1df : VST1D<0b1000, "vst1.32", v2f32, int_arm_neon_vst1>; -def VST1d64 : VST1D<0b1100, "vst1.64", v1i64, int_arm_neon_vst1>; +def VST1d8 : VST1D<0b0000, "vst1", "8", v8i8, int_arm_neon_vst1>; +def VST1d16 : VST1D<0b0100, "vst1", "16", v4i16, int_arm_neon_vst1>; +def VST1d32 : VST1D<0b1000, "vst1", "32", v2i32, int_arm_neon_vst1>; +def VST1df : VST1D<0b1000, "vst1", "32", v2f32, int_arm_neon_vst1>; +def VST1d64 : VST1D<0b1100, "vst1", "64", v1i64, int_arm_neon_vst1>; -def VST1q8 : VST1Q<0b0000, "vst1.8", v16i8, int_arm_neon_vst1>; -def VST1q16 : VST1Q<0b0100, "vst1.16", v8i16, int_arm_neon_vst1>; -def VST1q32 : VST1Q<0b1000, "vst1.32", v4i32, int_arm_neon_vst1>; -def VST1qf : VST1Q<0b1000, "vst1.32", v4f32, int_arm_neon_vst1>; -def VST1q64 : VST1Q<0b1100, "vst1.64", v2i64, int_arm_neon_vst1>; +def VST1q8 : VST1Q<0b0000, "vst1", "8", v16i8, int_arm_neon_vst1>; +def VST1q16 : VST1Q<0b0100, "vst1", "16", v8i16, int_arm_neon_vst1>; +def VST1q32 : VST1Q<0b1000, "vst1", "32", v4i32, int_arm_neon_vst1>; +def VST1qf : VST1Q<0b1000, "vst1", "32", v4f32, int_arm_neon_vst1>; +def VST1q64 : VST1Q<0b1100, "vst1", "64", v2i64, int_arm_neon_vst1>; } // hasExtraSrcRegAllocReq let mayStore = 1, hasExtraSrcRegAllocReq = 1 in { // VST2 : Vector Store (multiple 2-element structures) -class VST2D op7_4, string OpcodeStr> +class VST2D op7_4, string OpcodeStr, string Dt> : NLdSt<0,0b00,0b1000,op7_4, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST, - !strconcat(OpcodeStr, "\t\\{$src1,$src2\\}, $addr"), "", []>; -class VST2Q op7_4, string OpcodeStr> + OpcodeStr, Dt, "\\{$src1,$src2\\}, $addr", "", []>; +class VST2Q op7_4, string OpcodeStr, string Dt> : NLdSt<0,0b00,0b0011,op7_4, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST, - !strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3,$src4\\}, $addr"), + OpcodeStr, Dt, "\\{$src1,$src2,$src3,$src4\\}, $addr", "", []>; -def VST2d8 : VST2D<0b0000, "vst2.8">; -def VST2d16 : VST2D<0b0100, "vst2.16">; -def VST2d32 : VST2D<0b1000, "vst2.32">; +def VST2d8 : VST2D<0b0000, "vst2", "8">; +def VST2d16 : VST2D<0b0100, "vst2", "16">; +def VST2d32 : VST2D<0b1000, "vst2", "32">; def VST2d64 : NLdSt<0,0b00,0b1010,0b1100, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST, - "vst1.64\t\\{$src1,$src2\\}, $addr", "", []>; + "vst1", "64", "\\{$src1,$src2\\}, $addr", "", []>; -def VST2q8 : VST2Q<0b0000, "vst2.8">; -def VST2q16 : VST2Q<0b0100, "vst2.16">; -def VST2q32 : VST2Q<0b1000, "vst2.32">; +def VST2q8 : VST2Q<0b0000, "vst2", "8">; +def VST2q16 : VST2Q<0b0100, "vst2", "16">; +def VST2q32 : VST2Q<0b1000, "vst2", "32">; // VST3 : Vector Store (multiple 3-element structures) -class VST3D op7_4, string OpcodeStr> +class VST3D op7_4, string OpcodeStr, string Dt> : NLdSt<0,0b00,0b0100,op7_4, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, - !strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3\\}, $addr"), "", []>; -class VST3WB op7_4, string OpcodeStr> + OpcodeStr, Dt, "\\{$src1,$src2,$src3\\}, $addr", "", []>; +class VST3WB op7_4, string OpcodeStr, string Dt> : NLdSt<0,0b00,0b0101,op7_4, (outs GPR:$wb), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, - !strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3\\}, $addr"), + OpcodeStr, Dt, "\\{$src1,$src2,$src3\\}, $addr", "$addr.addr = $wb", []>; -def VST3d8 : VST3D<0b0000, "vst3.8">; -def VST3d16 : VST3D<0b0100, "vst3.16">; -def VST3d32 : VST3D<0b1000, "vst3.32">; +def VST3d8 : VST3D<0b0000, "vst3", "8">; +def VST3d16 : VST3D<0b0100, "vst3", "16">; +def VST3d32 : VST3D<0b1000, "vst3", "32">; def VST3d64 : NLdSt<0,0b00,0b0110,0b1100, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, - "vst1.64\t\\{$src1,$src2,$src3\\}, $addr", "", []>; + "vst1", "64", "\\{$src1,$src2,$src3\\}, $addr", "", []>; // vst3 to double-spaced even registers. -def VST3q8a : VST3WB<0b0000, "vst3.8">; -def VST3q16a : VST3WB<0b0100, "vst3.16">; -def VST3q32a : VST3WB<0b1000, "vst3.32">; +def VST3q8a : VST3WB<0b0000, "vst3", "8">; +def VST3q16a : VST3WB<0b0100, "vst3", "16">; +def VST3q32a : VST3WB<0b1000, "vst3", "32">; // vst3 to double-spaced odd registers. -def VST3q8b : VST3WB<0b0000, "vst3.8">; -def VST3q16b : VST3WB<0b0100, "vst3.16">; -def VST3q32b : VST3WB<0b1000, "vst3.32">; +def VST3q8b : VST3WB<0b0000, "vst3", "8">; +def VST3q16b : VST3WB<0b0100, "vst3", "16">; +def VST3q32b : VST3WB<0b1000, "vst3", "32">; // VST4 : Vector Store (multiple 4-element structures) -class VST4D op7_4, string OpcodeStr> +class VST4D op7_4, string OpcodeStr, string Dt> : NLdSt<0,0b00,0b0000,op7_4, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST, - !strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3,$src4\\}, $addr"), + OpcodeStr, Dt, "\\{$src1,$src2,$src3,$src4\\}, $addr", "", []>; -class VST4WB op7_4, string OpcodeStr> +class VST4WB op7_4, string OpcodeStr, string Dt> : NLdSt<0,0b00,0b0001,op7_4, (outs GPR:$wb), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST, - !strconcat(OpcodeStr, "\t\\{$src1,$src2,$src3,$src4\\}, $addr"), + OpcodeStr, Dt, "\\{$src1,$src2,$src3,$src4\\}, $addr", "$addr.addr = $wb", []>; -def VST4d8 : VST4D<0b0000, "vst4.8">; -def VST4d16 : VST4D<0b0100, "vst4.16">; -def VST4d32 : VST4D<0b1000, "vst4.32">; +def VST4d8 : VST4D<0b0000, "vst4", "8">; +def VST4d16 : VST4D<0b0100, "vst4", "16">; +def VST4d32 : VST4D<0b1000, "vst4", "32">; def VST4d64 : NLdSt<0,0b00,0b0010,0b1100, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST, - "vst1.64\t\\{$src1,$src2,$src3,$src4\\}, $addr", "", []>; + "vst1", "64", "\\{$src1,$src2,$src3,$src4\\}, $addr", "", []>; // vst4 to double-spaced even registers. -def VST4q8a : VST4WB<0b0000, "vst4.8">; -def VST4q16a : VST4WB<0b0100, "vst4.16">; -def VST4q32a : VST4WB<0b1000, "vst4.32">; +def VST4q8a : VST4WB<0b0000, "vst4", "8">; +def VST4q16a : VST4WB<0b0100, "vst4", "16">; +def VST4q32a : VST4WB<0b1000, "vst4", "32">; // vst4 to double-spaced odd registers. -def VST4q8b : VST4WB<0b0000, "vst4.8">; -def VST4q16b : VST4WB<0b0100, "vst4.16">; -def VST4q32b : VST4WB<0b1000, "vst4.32">; +def VST4q8b : VST4WB<0b0000, "vst4", "8">; +def VST4q16b : VST4WB<0b0100, "vst4", "16">; +def VST4q32b : VST4WB<0b1000, "vst4", "32">; // VST1LN : Vector Store (single element from one lane) // FIXME: Not yet implemented. // VST2LN : Vector Store (single 2-element structure from one lane) -class VST2LN op11_8, string OpcodeStr> - : NLdSt<1,0b00,op11_8,0b0000, (outs), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), - IIC_VST, - !strconcat(OpcodeStr, "\t\\{$src1[$lane],$src2[$lane]\\}, $addr"), - "", []>; +class VST2LN op11_8, string OpcodeStr, string Dt> + : NLdSt<1,0b00,op11_8,{?,?,?,?}, (outs), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), + IIC_VST, + OpcodeStr, Dt, "\\{$src1[$lane],$src2[$lane]\\}, $addr", + "", []>; -def VST2LNd8 : VST2LN<0b0001, "vst2.8">; -def VST2LNd16 : VST2LN<0b0101, "vst2.16">; -def VST2LNd32 : VST2LN<0b1001, "vst2.32">; +// vst2 to single-spaced registers. +def VST2LNd8 : VST2LN<0b0001, "vst2", "8">; +def VST2LNd16 : VST2LN<0b0101, "vst2", "16"> { + let Inst{5} = 0; +} +def VST2LNd32 : VST2LN<0b1001, "vst2", "32"> { + let Inst{6} = 0; +} // vst2 to double-spaced even registers. -def VST2LNq16a: VST2LN<0b0101, "vst2.16">; -def VST2LNq32a: VST2LN<0b1001, "vst2.32">; +def VST2LNq16a: VST2LN<0b0101, "vst2", "16"> { + let Inst{5} = 1; +} +def VST2LNq32a: VST2LN<0b1001, "vst2", "32"> { + let Inst{6} = 1; +} // vst2 to double-spaced odd registers. -def VST2LNq16b: VST2LN<0b0101, "vst2.16">; -def VST2LNq32b: VST2LN<0b1001, "vst2.32">; +def VST2LNq16b: VST2LN<0b0101, "vst2", "16"> { + let Inst{5} = 1; +} +def VST2LNq32b: VST2LN<0b1001, "vst2", "32"> { + let Inst{6} = 1; +} // VST3LN : Vector Store (single 3-element structure from one lane) -class VST3LN op11_8, string OpcodeStr> - : NLdSt<1,0b00,op11_8,0b0000, (outs), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, - nohash_imm:$lane), IIC_VST, - !strconcat(OpcodeStr, - "\t\\{$src1[$lane],$src2[$lane],$src3[$lane]\\}, $addr"), "", []>; +class VST3LN op11_8, string OpcodeStr, string Dt> + : NLdSt<1,0b00,op11_8,{?,?,?,?}, (outs), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, + nohash_imm:$lane), IIC_VST, + OpcodeStr, Dt, + "\\{$src1[$lane],$src2[$lane],$src3[$lane]\\}, $addr", "", []>; -def VST3LNd8 : VST3LN<0b0010, "vst3.8">; -def VST3LNd16 : VST3LN<0b0110, "vst3.16">; -def VST3LNd32 : VST3LN<0b1010, "vst3.32">; +// vst3 to single-spaced registers. +def VST3LNd8 : VST3LN<0b0010, "vst3", "8"> { + let Inst{4} = 0; +} +def VST3LNd16 : VST3LN<0b0110, "vst3", "16"> { + let Inst{5-4} = 0b00; +} +def VST3LNd32 : VST3LN<0b1010, "vst3", "32"> { + let Inst{6-4} = 0b000; +} // vst3 to double-spaced even registers. -def VST3LNq16a: VST3LN<0b0110, "vst3.16">; -def VST3LNq32a: VST3LN<0b1010, "vst3.32">; +def VST3LNq16a: VST3LN<0b0110, "vst3", "16"> { + let Inst{5-4} = 0b10; +} +def VST3LNq32a: VST3LN<0b1010, "vst3", "32"> { + let Inst{6-4} = 0b100; +} // vst3 to double-spaced odd registers. -def VST3LNq16b: VST3LN<0b0110, "vst3.16">; -def VST3LNq32b: VST3LN<0b1010, "vst3.32">; +def VST3LNq16b: VST3LN<0b0110, "vst3", "16"> { + let Inst{5-4} = 0b10; +} +def VST3LNq32b: VST3LN<0b1010, "vst3", "32"> { + let Inst{6-4} = 0b100; +} // VST4LN : Vector Store (single 4-element structure from one lane) -class VST4LN op11_8, string OpcodeStr> - : NLdSt<1,0b00,op11_8,0b0000, (outs), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, - nohash_imm:$lane), IIC_VST, - !strconcat(OpcodeStr, - "\t\\{$src1[$lane],$src2[$lane],$src3[$lane],$src4[$lane]\\}, $addr"), - "", []>; +class VST4LN op11_8, string OpcodeStr, string Dt> + : NLdSt<1,0b00,op11_8,{?,?,?,?}, (outs), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, + nohash_imm:$lane), IIC_VST, + OpcodeStr, Dt, + "\\{$src1[$lane],$src2[$lane],$src3[$lane],$src4[$lane]\\}, $addr", + "", []>; -def VST4LNd8 : VST4LN<0b0011, "vst4.8">; -def VST4LNd16 : VST4LN<0b0111, "vst4.16">; -def VST4LNd32 : VST4LN<0b1011, "vst4.32">; +// vst4 to single-spaced registers. +def VST4LNd8 : VST4LN<0b0011, "vst4", "8">; +def VST4LNd16 : VST4LN<0b0111, "vst4", "16"> { + let Inst{5} = 0; +} +def VST4LNd32 : VST4LN<0b1011, "vst4", "32"> { + let Inst{6} = 0; +} // vst4 to double-spaced even registers. -def VST4LNq16a: VST4LN<0b0111, "vst4.16">; -def VST4LNq32a: VST4LN<0b1011, "vst4.32">; +def VST4LNq16a: VST4LN<0b0111, "vst4", "16"> { + let Inst{5} = 1; +} +def VST4LNq32a: VST4LN<0b1011, "vst4", "32"> { + let Inst{6} = 1; +} // vst4 to double-spaced odd registers. -def VST4LNq16b: VST4LN<0b0111, "vst4.16">; -def VST4LNq32b: VST4LN<0b1011, "vst4.32">; +def VST4LNq16b: VST4LN<0b0111, "vst4", "16"> { + let Inst{5} = 1; +} +def VST4LNq32b: VST4LN<0b1011, "vst4", "32"> { + let Inst{6} = 1; +} } // mayStore = 1, hasExtraSrcRegAllocReq = 1 @@ -570,25 +656,25 @@ def SubReg_i32_lane : SDNodeXForm op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, + bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> : N2V; class N2VQ op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, + bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> : N2V; // Basic 2-register operations, scalar single-precision. class N2VDs op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, + bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> : N2V; + IIC_VUNAD, OpcodeStr, Dt, "$dst, $src", "", []>; class N2VDsPat : NEONFPPat<(ResTy (OpNode SPR:$a)), @@ -599,27 +685,27 @@ class N2VDsPat // Basic 2-register intrinsics, both double- and quad-register. class N2VDInt op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, - InstrItinClass itin, string OpcodeStr, + InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2V; class N2VQInt op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, - InstrItinClass itin, string OpcodeStr, + InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2V; // Basic 2-register intrinsics, scalar single-precision class N2VDInts op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op4, - InstrItinClass itin, string OpcodeStr, + InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2V; + OpcodeStr, Dt, "$dst, $src", "", []>; class N2VDIntsPat : NEONFPPat<(f32 (OpNode SPR:$a)), @@ -630,49 +716,62 @@ class N2VDIntsPat // Narrow 2-register intrinsics. class N2VNInt op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, - InstrItinClass itin, string OpcodeStr, + InstrItinClass itin, string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ, Intrinsic IntOp> : N2V; // Long 2-register intrinsics (currently only used for VMOVL). class N2VLInt op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, - InstrItinClass itin, string OpcodeStr, + InstrItinClass itin, string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, Intrinsic IntOp> : N2V; // 2-register shuffles (VTRN/VZIP/VUZP), both double- and quad-register. -class N2VDShuffle op19_18, bits<5> op11_7, string OpcodeStr> +class N2VDShuffle op19_18, bits<5> op11_7, string OpcodeStr, string Dt> : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 0, 0, (outs DPR:$dst1, DPR:$dst2), (ins DPR:$src1, DPR:$src2), IIC_VPERMD, - !strconcat(OpcodeStr, "\t$dst1, $dst2"), + OpcodeStr, Dt, "$dst1, $dst2", "$src1 = $dst1, $src2 = $dst2", []>; class N2VQShuffle op19_18, bits<5> op11_7, - InstrItinClass itin, string OpcodeStr> + InstrItinClass itin, string OpcodeStr, string Dt> : N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$dst1, QPR:$dst2), (ins QPR:$src1, QPR:$src2), itin, - !strconcat(OpcodeStr, "\t$dst1, $dst2"), + OpcodeStr, Dt, "$dst1, $dst2", "$src1 = $dst1, $src2 = $dst2", []>; // Basic 3-register operations, both double- and quad-register. class N3VD op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> : N3V { + let isCommutable = Commutable; +} +// Same as N3VD but no data type. +class N3VDX op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, + ValueType ResTy, ValueType OpTy, + SDNode OpNode, bit Commutable> + : N3VX { let isCommutable = Commutable; } class N3VDSL op21_20, bits<4> op11_8, - InstrItinClass itin, string OpcodeStr, ValueType Ty, SDNode ShOp> + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType Ty, SDNode ShOp> : N3V<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), - itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", + itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", [(set (Ty DPR:$dst), (Ty (ShOp (Ty DPR:$src1), (Ty (NEONvduplane (Ty DPR_VFP2:$src2), @@ -680,11 +779,11 @@ class N3VDSL op21_20, bits<4> op11_8, let isCommutable = 0; } class N3VDSL16 op21_20, bits<4> op11_8, - string OpcodeStr, ValueType Ty, SDNode ShOp> + string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> : N3V<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), IIC_VMULi16D, - !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", + OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", [(set (Ty DPR:$dst), (Ty (ShOp (Ty DPR:$src1), (Ty (NEONvduplane (Ty DPR_8:$src2), @@ -693,20 +792,31 @@ class N3VDSL16 op21_20, bits<4> op11_8, } class N3VQ op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> : N3V { + let isCommutable = Commutable; +} +class N3VQX op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, + ValueType ResTy, ValueType OpTy, + SDNode OpNode, bit Commutable> + : N3VX { let isCommutable = Commutable; } class N3VQSL op21_20, bits<4> op11_8, - InstrItinClass itin, string OpcodeStr, + InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode ShOp> : N3V<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), (ins QPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), - itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", + itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", [(set (ResTy QPR:$dst), (ResTy (ShOp (ResTy QPR:$src1), (ResTy (NEONvduplane (OpTy DPR_VFP2:$src2), @@ -714,11 +824,12 @@ class N3VQSL op21_20, bits<4> op11_8, let isCommutable = 0; } class N3VQSL16 op21_20, bits<4> op11_8, - string OpcodeStr, ValueType ResTy, ValueType OpTy, SDNode ShOp> + string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDNode ShOp> : N3V<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane), IIC_VMULi16Q, - !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", + OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", [(set (ResTy QPR:$dst), (ResTy (ShOp (ResTy QPR:$src1), (ResTy (NEONvduplane (OpTy DPR_8:$src2), @@ -728,11 +839,11 @@ class N3VQSL16 op21_20, bits<4> op11_8, // Basic 3-register operations, scalar single-precision class N3VDs op21_20, bits<4> op11_8, bit op4, - string OpcodeStr, ValueType ResTy, ValueType OpTy, + string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable> : N3V { + OpcodeStr, Dt, "$dst, $src1, $src2", "", []> { let isCommutable = Commutable; } class N3VDsPat @@ -744,19 +855,20 @@ class N3VDsPat // Basic 3-register intrinsics, both double- and quad-register. class N3VDInt op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable> : N3V { let isCommutable = Commutable; } class N3VDIntSL op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, ValueType Ty, Intrinsic IntOp> + string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp> : N3V<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), - itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", + itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", [(set (Ty DPR:$dst), (Ty (IntOp (Ty DPR:$src1), (Ty (NEONvduplane (Ty DPR_VFP2:$src2), @@ -764,10 +876,10 @@ class N3VDIntSL op21_20, bits<4> op11_8, InstrItinClass itin, let isCommutable = 0; } class N3VDIntSL16 op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, ValueType Ty, Intrinsic IntOp> + string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp> : N3V<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane), - itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", + itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", [(set (Ty DPR:$dst), (Ty (IntOp (Ty DPR:$src1), (Ty (NEONvduplane (Ty DPR_8:$src2), @@ -776,19 +888,21 @@ class N3VDIntSL16 op21_20, bits<4> op11_8, InstrItinClass itin, } class N3VQInt op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, ValueType ResTy, ValueType OpTy, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable> : N3V { let isCommutable = Commutable; } class N3VQIntSL op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), (ins QPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane), - itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", + itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", [(set (ResTy QPR:$dst), (ResTy (IntOp (ResTy QPR:$src1), (ResTy (NEONvduplane (OpTy DPR_VFP2:$src2), @@ -796,10 +910,11 @@ class N3VQIntSL op21_20, bits<4> op11_8, InstrItinClass itin, let isCommutable = 0; } class N3VQIntSL16 op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane), - itin, !strconcat(OpcodeStr, "\t$dst, $src1, $src2[$lane]"), "", + itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "", [(set (ResTy QPR:$dst), (ResTy (IntOp (ResTy QPR:$src1), (ResTy (NEONvduplane (OpTy DPR_8:$src2), @@ -809,30 +924,32 @@ class N3VQIntSL16 op21_20, bits<4> op11_8, InstrItinClass itin, // Multiply-Add/Sub operations, both double- and quad-register. class N3VDMulOp op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, + InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, SDNode MulOp, SDNode OpNode> : N3V; class N3VDMulOpSL op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, ValueType Ty, SDNode MulOp, SDNode ShOp> + string OpcodeStr, string Dt, + ValueType Ty, SDNode MulOp, SDNode ShOp> : N3V<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), itin, - !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst", + OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", [(set (Ty DPR:$dst), (Ty (ShOp (Ty DPR:$src1), (Ty (MulOp DPR:$src2, (Ty (NEONvduplane (Ty DPR_VFP2:$src3), imm:$lane)))))))]>; class N3VDMulOpSL16 op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, ValueType Ty, SDNode MulOp, SDNode ShOp> + string OpcodeStr, string Dt, + ValueType Ty, SDNode MulOp, SDNode ShOp> : N3V<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane), itin, - !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst", + OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", [(set (Ty DPR:$dst), (Ty (ShOp (Ty DPR:$src1), (Ty (MulOp DPR:$src2, @@ -840,32 +957,33 @@ class N3VDMulOpSL16 op21_20, bits<4> op11_8, InstrItinClass itin, imm:$lane)))))))]>; class N3VQMulOp op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, ValueType Ty, + InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, SDNode MulOp, SDNode OpNode> : N3V; class N3VQMulOpSL op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, ValueType ResTy, ValueType OpTy, + string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode MulOp, SDNode ShOp> : N3V<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), itin, - !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst", + OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", [(set (ResTy QPR:$dst), (ResTy (ShOp (ResTy QPR:$src1), (ResTy (MulOp QPR:$src2, (ResTy (NEONvduplane (OpTy DPR_VFP2:$src3), imm:$lane)))))))]>; class N3VQMulOpSL16 op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, ValueType ResTy, ValueType OpTy, + string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDNode MulOp, SDNode ShOp> : N3V<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, DPR_8:$src3, nohash_imm:$lane), itin, - !strconcat(OpcodeStr, "\t$dst, $src2, $src3[$lane]"), "$src1 = $dst", + OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst", [(set (ResTy QPR:$dst), (ResTy (ShOp (ResTy QPR:$src1), (ResTy (MulOp QPR:$src2, @@ -874,12 +992,12 @@ class N3VQMulOpSL16 op21_20, bits<4> op11_8, InstrItinClass itin, // Multiply-Add/Sub operations, scalar single-precision class N3VDMulOps op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, + InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, SDNode MulOp, SDNode OpNode> : N3V; + OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", []>; class N3VDMulOpsPat : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))), @@ -892,50 +1010,51 @@ class N3VDMulOpsPat // Neon 3-argument intrinsics, both double- and quad-register. // The destination register is also used as the first source operand register. class N3VDInt3 op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, + InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V; class N3VQInt3 op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, + InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V; // Neon Long 3-argument intrinsic. The destination register is // a quad-register and is also used as the first source operand register. class N3VLInt3 op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, + InstrItinClass itin, string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, Intrinsic IntOp> : N3V; class N3VLInt3SL op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V; class N3VLInt3SL16 op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, ValueType ResTy, ValueType OpTy, + string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V op21_20, bits<4> op11_8, InstrItinClass iti // Narrowing 3-register intrinsics. class N3VNInt op21_20, bits<4> op11_8, bit op4, - string OpcodeStr, ValueType TyD, ValueType TyQ, + string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ, Intrinsic IntOp, bit Commutable> : N3V { let isCommutable = Commutable; } // Long 3-register intrinsics. class N3VLInt op21_20, bits<4> op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, ValueType TyQ, ValueType TyD, - Intrinsic IntOp, bit Commutable> + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyQ, ValueType TyD, Intrinsic IntOp, bit Commutable> : N3V { let isCommutable = Commutable; } class N3VLIntSL op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> + string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V; class N3VLIntSL16 op21_20, bits<4> op11_8, InstrItinClass itin, - string OpcodeStr, ValueType ResTy, ValueType OpTy, + string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N3V op21_20, bits<4> op11_8, InstrItinClass itin // Wide 3-register intrinsics. class N3VWInt op21_20, bits<4> op11_8, bit op4, - string OpcodeStr, ValueType TyQ, ValueType TyD, + string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, Intrinsic IntOp, bit Commutable> : N3V { let isCommutable = Commutable; } // Pairwise long 2-register intrinsics, both double- and quad-register. class N2VDPLInt op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, + bits<2> op17_16, bits<5> op11_7, bit op4, + string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2V; class N2VQPLInt op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, + bits<2> op17_16, bits<5> op11_7, bit op4, + string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2V; // Pairwise long 2-register accumulate intrinsics, // both double- and quad-register. // The destination register is also used as the first source operand register. class N2VDPLInt2 op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, + bits<2> op17_16, bits<5> op11_7, bit op4, + string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2V; class N2VQPLInt2 op24_23, bits<2> op21_20, bits<2> op19_18, - bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr, + bits<2> op17_16, bits<5> op11_7, bit op4, + string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2V; // Shift by immediate, // both double- and quad-register. class N2VDSh op11_8, bit op7, bit op4, - InstrItinClass itin, string OpcodeStr, ValueType Ty, SDNode OpNode> + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType Ty, SDNode OpNode> : N2VImm; class N2VQSh op11_8, bit op7, bit op4, - InstrItinClass itin, string OpcodeStr, ValueType Ty, SDNode OpNode> + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType Ty, SDNode OpNode> : N2VImm; // Long shift by immediate. class N2VLSh op11_8, bit op7, bit op6, bit op4, - string OpcodeStr, ValueType ResTy, ValueType OpTy, SDNode OpNode> + string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDNode OpNode> : N2VImm; // Narrow shift by immediate. class N2VNSh op11_8, bit op7, bit op6, bit op4, - InstrItinClass itin, string OpcodeStr, + InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> : N2VImm; // Shift right by immediate and accumulate, // both double- and quad-register. class N2VDShAdd op11_8, bit op7, bit op4, - string OpcodeStr, ValueType Ty, SDNode ShOp> + string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> : N2VImm; class N2VQShAdd op11_8, bit op7, bit op4, - string OpcodeStr, ValueType Ty, SDNode ShOp> + string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> : N2VImm; // Shift by immediate and insert, // both double- and quad-register. class N2VDShIns op11_8, bit op7, bit op4, - string OpcodeStr, ValueType Ty, SDNode ShOp> + string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> : N2VImm; class N2VQShIns op11_8, bit op7, bit op4, - string OpcodeStr, ValueType Ty, SDNode ShOp> + string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> : N2VImm; // Convert, with fractional bits immediate, // both double- and quad-register. class N2VCvtD op11_8, bit op7, bit op4, - string OpcodeStr, ValueType ResTy, ValueType OpTy, + string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2VImm; class N2VCvtQ op11_8, bit op7, bit op4, - string OpcodeStr, ValueType ResTy, ValueType OpTy, + string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> : N2VImm; //===----------------------------------------------------------------------===// @@ -1126,41 +1253,55 @@ class N2VCvtQ op11_8, bit op7, bit op4, multiclass N3V_QHS op11_8, bit op4, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, - string OpcodeStr, SDNode OpNode, bit Commutable = 0> { + string OpcodeStr, string Dt, + SDNode OpNode, bit Commutable = 0> { // 64-bit vector types. def v8i8 : N3VD; + OpcodeStr, !strconcat(Dt, "8"), + v8i8, v8i8, OpNode, Commutable>; def v4i16 : N3VD; + OpcodeStr, !strconcat(Dt, "16"), + v4i16, v4i16, OpNode, Commutable>; def v2i32 : N3VD; + OpcodeStr, !strconcat(Dt, "32"), + v2i32, v2i32, OpNode, Commutable>; // 128-bit vector types. def v16i8 : N3VQ; + OpcodeStr, !strconcat(Dt, "8"), + v16i8, v16i8, OpNode, Commutable>; def v8i16 : N3VQ; + OpcodeStr, !strconcat(Dt, "16"), + v8i16, v8i16, OpNode, Commutable>; def v4i32 : N3VQ; + OpcodeStr, !strconcat(Dt, "32"), + v4i32, v4i32, OpNode, Commutable>; } -multiclass N3VSL_HS op11_8, string OpcodeStr, SDNode ShOp> { - def v4i16 : N3VDSL16<0b01, op11_8, !strconcat(OpcodeStr, "16"), v4i16, ShOp>; - def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, !strconcat(OpcodeStr, "32"), v2i32, ShOp>; - def v8i16 : N3VQSL16<0b01, op11_8, !strconcat(OpcodeStr, "16"), v8i16, v4i16, ShOp>; - def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, !strconcat(OpcodeStr, "32"), v4i32, v2i32, ShOp>; +multiclass N3VSL_HS op11_8, string OpcodeStr, string Dt, SDNode ShOp> { + def v4i16 : N3VDSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"), + v4i16, ShOp>; + def v2i32 : N3VDSL<0b10, op11_8, IIC_VMULi32D, OpcodeStr, !strconcat(Dt,"32"), + v2i32, ShOp>; + def v8i16 : N3VQSL16<0b01, op11_8, OpcodeStr, !strconcat(Dt, "16"), + v8i16, v4i16, ShOp>; + def v4i32 : N3VQSL<0b10, op11_8, IIC_VMULi32Q, OpcodeStr, !strconcat(Dt,"32"), + v4i32, v2i32, ShOp>; } // ....then also with element size 64 bits: multiclass N3V_QHSD op11_8, bit op4, InstrItinClass itinD, InstrItinClass itinQ, - string OpcodeStr, SDNode OpNode, bit Commutable = 0> + string OpcodeStr, string Dt, + SDNode OpNode, bit Commutable = 0> : N3V_QHS { + OpcodeStr, Dt, OpNode, Commutable> { def v1i64 : N3VD; + OpcodeStr, !strconcat(Dt, "64"), + v1i64, v1i64, OpNode, Commutable>; def v2i64 : N3VQ; + OpcodeStr, !strconcat(Dt, "64"), + v2i64, v2i64, OpNode, Commutable>; } @@ -1168,27 +1309,30 @@ multiclass N3V_QHSD op11_8, bit op4, // source operand element sizes of 16, 32 and 64 bits: multiclass N2VNInt_HSD op24_23, bits<2> op21_20, bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, - InstrItinClass itin, string OpcodeStr, + InstrItinClass itin, string OpcodeStr, string Dt, Intrinsic IntOp> { def v8i8 : N2VNInt; + itin, OpcodeStr, !strconcat(Dt, "16"), + v8i8, v8i16, IntOp>; def v4i16 : N2VNInt; + itin, OpcodeStr, !strconcat(Dt, "32"), + v4i16, v4i32, IntOp>; def v2i32 : N2VNInt; + itin, OpcodeStr, !strconcat(Dt, "64"), + v2i32, v2i64, IntOp>; } // Neon Lengthening 2-register vector intrinsic (currently specific to VMOVL). // source operand element sizes of 16, 32 and 64 bits: multiclass N2VLInt_QHS op24_23, bits<5> op11_7, bit op6, bit op4, - string OpcodeStr, Intrinsic IntOp> { + string OpcodeStr, string Dt, Intrinsic IntOp> { def v8i16 : N2VLInt; + OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>; def v4i32 : N2VLInt; + OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; def v2i64 : N2VLInt; + OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; } @@ -1198,66 +1342,85 @@ multiclass N2VLInt_QHS op24_23, bits<5> op11_7, bit op6, bit op4, multiclass N3VInt_HS op11_8, bit op4, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, - string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> { + string OpcodeStr, string Dt, + Intrinsic IntOp, bit Commutable = 0> { // 64-bit vector types. - def v4i16 : N3VDInt; - def v2i32 : N3VDInt; // 128-bit vector types. - def v8i16 : N3VQInt; - def v4i32 : N3VQInt; } multiclass N3VIntSL_HS op11_8, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, - string OpcodeStr, Intrinsic IntOp> { - def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16, !strconcat(OpcodeStr, "16"), v4i16, IntOp>; - def v2i32 : N3VDIntSL<0b10, op11_8, itinD32, !strconcat(OpcodeStr, "32"), v2i32, IntOp>; - def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16, !strconcat(OpcodeStr, "16"), v8i16, v4i16, IntOp>; - def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32, !strconcat(OpcodeStr, "32"), v4i32, v2i32, IntOp>; + string OpcodeStr, string Dt, Intrinsic IntOp> { + def v4i16 : N3VDIntSL16<0b01, op11_8, itinD16, + OpcodeStr, !strconcat(Dt, "16"), v4i16, IntOp>; + def v2i32 : N3VDIntSL<0b10, op11_8, itinD32, + OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>; + def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16, + OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>; + def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32, + OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>; } // ....then also with element size of 8 bits: multiclass N3VInt_QHS op11_8, bit op4, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, - string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> + string OpcodeStr, string Dt, + Intrinsic IntOp, bit Commutable = 0> : N3VInt_HS { + OpcodeStr, Dt, IntOp, Commutable> { def v8i8 : N3VDInt; + OpcodeStr, !strconcat(Dt, "8"), + v8i8, v8i8, IntOp, Commutable>; def v16i8 : N3VQInt; + OpcodeStr, !strconcat(Dt, "8"), + v16i8, v16i8, IntOp, Commutable>; } // ....then also with element size of 64 bits: multiclass N3VInt_QHSD op11_8, bit op4, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, - string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> + string OpcodeStr, string Dt, + Intrinsic IntOp, bit Commutable = 0> : N3VInt_QHS { + OpcodeStr, Dt, IntOp, Commutable> { def v1i64 : N3VDInt; + OpcodeStr, !strconcat(Dt, "64"), + v1i64, v1i64, IntOp, Commutable>; def v2i64 : N3VQInt; + OpcodeStr, !strconcat(Dt, "64"), + v2i64, v2i64, IntOp, Commutable>; } // Neon Narrowing 3-register vector intrinsics, // source operand element sizes of 16, 32 and 64 bits: multiclass N3VNInt_HSD op11_8, bit op4, - string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> { - def v8i8 : N3VNInt { + def v8i8 : N3VNInt; - def v4i16 : N3VNInt; - def v2i32 : N3VNInt; } @@ -1266,41 +1429,50 @@ multiclass N3VNInt_HSD op11_8, bit op4, // First with only element sizes of 16 and 32 bits: multiclass N3VLInt_HS op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, + InstrItinClass itin, string OpcodeStr, string Dt, Intrinsic IntOp, bit Commutable = 0> { def v4i32 : N3VLInt; + OpcodeStr, !strconcat(Dt, "16"), + v4i32, v4i16, IntOp, Commutable>; def v2i64 : N3VLInt; + OpcodeStr, !strconcat(Dt, "32"), + v2i64, v2i32, IntOp, Commutable>; } multiclass N3VLIntSL_HS op11_8, - InstrItinClass itin, string OpcodeStr, Intrinsic IntOp> { + InstrItinClass itin, string OpcodeStr, string Dt, + Intrinsic IntOp> { def v4i16 : N3VLIntSL16; + OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; def v2i32 : N3VLIntSL; + OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; } // ....then also with element size of 8 bits: multiclass N3VLInt_QHS op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, + InstrItinClass itin, string OpcodeStr, string Dt, Intrinsic IntOp, bit Commutable = 0> - : N3VLInt_HS { + : N3VLInt_HS { def v8i16 : N3VLInt; + OpcodeStr, !strconcat(Dt, "8"), + v8i16, v8i8, IntOp, Commutable>; } // Neon Wide 3-register vector intrinsics, // source operand element sizes of 8, 16 and 32 bits: multiclass N3VWInt_QHS op11_8, bit op4, - string OpcodeStr, Intrinsic IntOp, bit Commutable = 0> { - def v8i16 : N3VWInt { + def v8i16 : N3VWInt; - def v4i32 : N3VWInt; - def v2i64 : N3VWInt; } @@ -1310,57 +1482,57 @@ multiclass N3VWInt_QHS op11_8, bit op4, multiclass N3VMulOp_QHS op11_8, bit op4, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, - string OpcodeStr, SDNode OpNode> { + string OpcodeStr, string Dt, SDNode OpNode> { // 64-bit vector types. def v8i8 : N3VDMulOp; + OpcodeStr, !strconcat(Dt, "8"), v8i8, mul, OpNode>; def v4i16 : N3VDMulOp; + OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, OpNode>; def v2i32 : N3VDMulOp; + OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, OpNode>; // 128-bit vector types. def v16i8 : N3VQMulOp; + OpcodeStr, !strconcat(Dt, "8"), v16i8, mul, OpNode>; def v8i16 : N3VQMulOp; + OpcodeStr, !strconcat(Dt, "16"), v8i16, mul, OpNode>; def v4i32 : N3VQMulOp; + OpcodeStr, !strconcat(Dt, "32"), v4i32, mul, OpNode>; } multiclass N3VMulOpSL_HS op11_8, InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, - string OpcodeStr, SDNode ShOp> { + string OpcodeStr, string Dt, SDNode ShOp> { def v4i16 : N3VDMulOpSL16<0b01, op11_8, itinD16, - !strconcat(OpcodeStr, "16"), v4i16, mul, ShOp>; + OpcodeStr, !strconcat(Dt, "16"), v4i16, mul, ShOp>; def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32, - !strconcat(OpcodeStr, "32"), v2i32, mul, ShOp>; + OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>; def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16, - !strconcat(OpcodeStr, "16"), v8i16, v4i16, mul, ShOp>; + OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, mul, ShOp>; def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32, - !strconcat(OpcodeStr, "32"), v4i32, v2i32, mul, ShOp>; + OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, mul, ShOp>; } // Neon 3-argument intrinsics, // element sizes of 8, 16 and 32 bits: multiclass N3VInt3_QHS op11_8, bit op4, - string OpcodeStr, Intrinsic IntOp> { + string OpcodeStr, string Dt, Intrinsic IntOp> { // 64-bit vector types. def v8i8 : N3VDInt3; + OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; def v4i16 : N3VDInt3; + OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>; def v2i32 : N3VDInt3; + OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>; // 128-bit vector types. def v16i8 : N3VQInt3; + OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>; def v8i16 : N3VQInt3; + OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>; def v4i32 : N3VQInt3; + OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>; } @@ -1368,27 +1540,27 @@ multiclass N3VInt3_QHS op11_8, bit op4, // First with only element sizes of 16 and 32 bits: multiclass N3VLInt3_HS op11_8, bit op4, - string OpcodeStr, Intrinsic IntOp> { + string OpcodeStr, string Dt, Intrinsic IntOp> { def v4i32 : N3VLInt3; + OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, IntOp>; def v2i64 : N3VLInt3; + OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; } multiclass N3VLInt3SL_HS op11_8, - string OpcodeStr, Intrinsic IntOp> { + string OpcodeStr, string Dt, Intrinsic IntOp> { def v4i16 : N3VLInt3SL16; + OpcodeStr, !strconcat(Dt,"16"), v4i32, v4i16, IntOp>; def v2i32 : N3VLInt3SL; + OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, IntOp>; } // ....then also with element size of 8 bits: multiclass N3VLInt3_QHS op11_8, bit op4, - string OpcodeStr, Intrinsic IntOp> - : N3VLInt3_HS { + string OpcodeStr, string Dt, Intrinsic IntOp> + : N3VLInt3_HS { def v8i16 : N3VLInt3; + OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, IntOp>; } @@ -1397,22 +1569,22 @@ multiclass N3VLInt3_QHS op11_8, bit op4, multiclass N2VInt_QHS op24_23, bits<2> op21_20, bits<2> op17_16, bits<5> op11_7, bit op4, InstrItinClass itinD, InstrItinClass itinQ, - string OpcodeStr, Intrinsic IntOp> { + string OpcodeStr, string Dt, Intrinsic IntOp> { // 64-bit vector types. def v8i8 : N2VDInt; + itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>; def v4i16 : N2VDInt; + itinD, OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>; def v2i32 : N2VDInt; + itinD, OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>; // 128-bit vector types. def v16i8 : N2VQInt; + itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>; def v8i16 : N2VQInt; + itinQ, OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>; def v4i32 : N2VQInt; + itinQ, OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>; } @@ -1420,22 +1592,22 @@ multiclass N2VInt_QHS op24_23, bits<2> op21_20, bits<2> op17_16, // element sizes of 8, 16 and 32 bits: multiclass N2VPLInt_QHS op24_23, bits<2> op21_20, bits<2> op17_16, bits<5> op11_7, bit op4, - string OpcodeStr, Intrinsic IntOp> { + string OpcodeStr, string Dt, Intrinsic IntOp> { // 64-bit vector types. def v8i8 : N2VDPLInt; + OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; def v4i16 : N2VDPLInt; + OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; def v2i32 : N2VDPLInt; + OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; // 128-bit vector types. def v16i8 : N2VQPLInt; + OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; def v8i16 : N2VQPLInt; + OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; def v4i32 : N2VQPLInt; + OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; } @@ -1443,61 +1615,62 @@ multiclass N2VPLInt_QHS op24_23, bits<2> op21_20, bits<2> op17_16, // element sizes of 8, 16 and 32 bits: multiclass N2VPLInt2_QHS op24_23, bits<2> op21_20, bits<2> op17_16, bits<5> op11_7, bit op4, - string OpcodeStr, Intrinsic IntOp> { + string OpcodeStr, string Dt, Intrinsic IntOp> { // 64-bit vector types. def v8i8 : N2VDPLInt2; + OpcodeStr, !strconcat(Dt, "8"), v4i16, v8i8, IntOp>; def v4i16 : N2VDPLInt2; + OpcodeStr, !strconcat(Dt, "16"), v2i32, v4i16, IntOp>; def v2i32 : N2VDPLInt2; + OpcodeStr, !strconcat(Dt, "32"), v1i64, v2i32, IntOp>; // 128-bit vector types. def v16i8 : N2VQPLInt2; + OpcodeStr, !strconcat(Dt, "8"), v8i16, v16i8, IntOp>; def v8i16 : N2VQPLInt2; + OpcodeStr, !strconcat(Dt, "16"), v4i32, v8i16, IntOp>; def v4i32 : N2VQPLInt2; + OpcodeStr, !strconcat(Dt, "32"), v2i64, v4i32, IntOp>; } // Neon 2-register vector shift by immediate, // element sizes of 8, 16, 32 and 64 bits: multiclass N2VSh_QHSD op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, SDNode OpNode> { + InstrItinClass itin, string OpcodeStr, string Dt, + SDNode OpNode> { // 64-bit vector types. def v8i8 : N2VDSh { + OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> { let Inst{21-19} = 0b001; // imm6 = 001xxx } def v4i16 : N2VDSh { + OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> { let Inst{21-20} = 0b01; // imm6 = 01xxxx } def v2i32 : N2VDSh { + OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> { let Inst{21} = 0b1; // imm6 = 1xxxxx } def v1i64 : N2VDSh; + OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>; // imm6 = xxxxxx // 128-bit vector types. def v16i8 : N2VQSh { + OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> { let Inst{21-19} = 0b001; // imm6 = 001xxx } def v8i16 : N2VQSh { + OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> { let Inst{21-20} = 0b01; // imm6 = 01xxxx } def v4i32 : N2VQSh { + OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> { let Inst{21} = 0b1; // imm6 = 1xxxxx } def v2i64 : N2VQSh; + OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>; // imm6 = xxxxxx } @@ -1505,39 +1678,39 @@ multiclass N2VSh_QHSD op11_8, bit op4, // Neon Shift-Accumulate vector operations, // element sizes of 8, 16, 32 and 64 bits: multiclass N2VShAdd_QHSD op11_8, bit op4, - string OpcodeStr, SDNode ShOp> { + string OpcodeStr, string Dt, SDNode ShOp> { // 64-bit vector types. def v8i8 : N2VDShAdd { + OpcodeStr, !strconcat(Dt, "8"), v8i8, ShOp> { let Inst{21-19} = 0b001; // imm6 = 001xxx } def v4i16 : N2VDShAdd { + OpcodeStr, !strconcat(Dt, "16"), v4i16, ShOp> { let Inst{21-20} = 0b01; // imm6 = 01xxxx } def v2i32 : N2VDShAdd { + OpcodeStr, !strconcat(Dt, "32"), v2i32, ShOp> { let Inst{21} = 0b1; // imm6 = 1xxxxx } def v1i64 : N2VDShAdd; + OpcodeStr, !strconcat(Dt, "64"), v1i64, ShOp>; // imm6 = xxxxxx // 128-bit vector types. def v16i8 : N2VQShAdd { + OpcodeStr, !strconcat(Dt, "8"), v16i8, ShOp> { let Inst{21-19} = 0b001; // imm6 = 001xxx } def v8i16 : N2VQShAdd { + OpcodeStr, !strconcat(Dt, "16"), v8i16, ShOp> { let Inst{21-20} = 0b01; // imm6 = 01xxxx } def v4i32 : N2VQShAdd { + OpcodeStr, !strconcat(Dt, "32"), v4i32, ShOp> { let Inst{21} = 0b1; // imm6 = 1xxxxx } def v2i64 : N2VQShAdd; + OpcodeStr, !strconcat(Dt, "64"), v2i64, ShOp>; // imm6 = xxxxxx } @@ -1548,53 +1721,53 @@ multiclass N2VShIns_QHSD op11_8, bit op4, string OpcodeStr, SDNode ShOp> { // 64-bit vector types. def v8i8 : N2VDShIns { + OpcodeStr, "8", v8i8, ShOp> { let Inst{21-19} = 0b001; // imm6 = 001xxx } def v4i16 : N2VDShIns { + OpcodeStr, "16", v4i16, ShOp> { let Inst{21-20} = 0b01; // imm6 = 01xxxx } def v2i32 : N2VDShIns { + OpcodeStr, "32", v2i32, ShOp> { let Inst{21} = 0b1; // imm6 = 1xxxxx } def v1i64 : N2VDShIns; + OpcodeStr, "64", v1i64, ShOp>; // imm6 = xxxxxx // 128-bit vector types. def v16i8 : N2VQShIns { + OpcodeStr, "8", v16i8, ShOp> { let Inst{21-19} = 0b001; // imm6 = 001xxx } def v8i16 : N2VQShIns { + OpcodeStr, "16", v8i16, ShOp> { let Inst{21-20} = 0b01; // imm6 = 01xxxx } def v4i32 : N2VQShIns { + OpcodeStr, "32", v4i32, ShOp> { let Inst{21} = 0b1; // imm6 = 1xxxxx } def v2i64 : N2VQShIns; + OpcodeStr, "64", v2i64, ShOp>; // imm6 = xxxxxx } // Neon Shift Long operations, // element sizes of 8, 16, 32 bits: multiclass N2VLSh_QHS op11_8, bit op7, bit op6, - bit op4, string OpcodeStr, SDNode OpNode> { + bit op4, string OpcodeStr, string Dt, SDNode OpNode> { def v8i16 : N2VLSh { + OpcodeStr, !strconcat(Dt, "8"), v8i16, v8i8, OpNode> { let Inst{21-19} = 0b001; // imm6 = 001xxx } def v4i32 : N2VLSh { + OpcodeStr, !strconcat(Dt, "16"), v4i32, v4i16, OpNode> { let Inst{21-20} = 0b01; // imm6 = 01xxxx } def v2i64 : N2VLSh { + OpcodeStr, !strconcat(Dt, "32"), v2i64, v2i32, OpNode> { let Inst{21} = 0b1; // imm6 = 1xxxxx } } @@ -1602,18 +1775,18 @@ multiclass N2VLSh_QHS op11_8, bit op7, bit op6, // Neon Shift Narrow operations, // element sizes of 16, 32, 64 bits: multiclass N2VNSh_HSD op11_8, bit op7, bit op6, - bit op4, InstrItinClass itin, string OpcodeStr, + bit op4, InstrItinClass itin, string OpcodeStr, string Dt, SDNode OpNode> { def v8i8 : N2VNSh { + OpcodeStr, !strconcat(Dt, "16"), v8i8, v8i16, OpNode> { let Inst{21-19} = 0b001; // imm6 = 001xxx } def v4i16 : N2VNSh { + OpcodeStr, !strconcat(Dt, "32"), v4i16, v4i32, OpNode> { let Inst{21-20} = 0b01; // imm6 = 01xxxx } def v2i32 : N2VNSh { + OpcodeStr, !strconcat(Dt, "64"), v2i32, v2i64, OpNode> { let Inst{21} = 0b1; // imm6 = 1xxxxx } } @@ -1625,49 +1798,58 @@ multiclass N2VNSh_HSD op11_8, bit op7, bit op6, // Vector Add Operations. // VADD : Vector Add (integer and floating-point) -defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd.i", add, 1>; -def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd.f32", v2f32, v2f32, fadd, 1>; -def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd.f32", v4f32, v4f32, fadd, 1>; +defm VADD : N3V_QHSD<0, 0, 0b1000, 0, IIC_VBINiD, IIC_VBINiQ, "vadd", "i", + add, 1>; +def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32", + v2f32, v2f32, fadd, 1>; +def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32", + v4f32, v4f32, fadd, 1>; // VADDL : Vector Add Long (Q = D + D) -defm VADDLs : N3VLInt_QHS<0,1,0b0000,0, IIC_VSHLiD, "vaddl.s", int_arm_neon_vaddls, 1>; -defm VADDLu : N3VLInt_QHS<1,1,0b0000,0, IIC_VSHLiD, "vaddl.u", int_arm_neon_vaddlu, 1>; +defm VADDLs : N3VLInt_QHS<0,1,0b0000,0, IIC_VSHLiD, "vaddl", "s", + int_arm_neon_vaddls, 1>; +defm VADDLu : N3VLInt_QHS<1,1,0b0000,0, IIC_VSHLiD, "vaddl", "u", + int_arm_neon_vaddlu, 1>; // VADDW : Vector Add Wide (Q = Q + D) -defm VADDWs : N3VWInt_QHS<0,1,0b0001,0, "vaddw.s", int_arm_neon_vaddws, 0>; -defm VADDWu : N3VWInt_QHS<1,1,0b0001,0, "vaddw.u", int_arm_neon_vaddwu, 0>; +defm VADDWs : N3VWInt_QHS<0,1,0b0001,0, "vaddw", "s", int_arm_neon_vaddws, 0>; +defm VADDWu : N3VWInt_QHS<1,1,0b0001,0, "vaddw", "u", int_arm_neon_vaddwu, 0>; // VHADD : Vector Halving Add defm VHADDs : N3VInt_QHS<0,0,0b0000,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vhadd.s", int_arm_neon_vhadds, 1>; + IIC_VBINi4Q, "vhadd", "s", int_arm_neon_vhadds, 1>; defm VHADDu : N3VInt_QHS<1,0,0b0000,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vhadd.u", int_arm_neon_vhaddu, 1>; + IIC_VBINi4Q, "vhadd", "u", int_arm_neon_vhaddu, 1>; // VRHADD : Vector Rounding Halving Add defm VRHADDs : N3VInt_QHS<0,0,0b0001,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vrhadd.s", int_arm_neon_vrhadds, 1>; + IIC_VBINi4Q, "vrhadd", "s", int_arm_neon_vrhadds, 1>; defm VRHADDu : N3VInt_QHS<1,0,0b0001,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vrhadd.u", int_arm_neon_vrhaddu, 1>; + IIC_VBINi4Q, "vrhadd", "u", int_arm_neon_vrhaddu, 1>; // VQADD : Vector Saturating Add defm VQADDs : N3VInt_QHSD<0,0,0b0000,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vqadd.s", int_arm_neon_vqadds, 1>; + IIC_VBINi4Q, "vqadd", "s", int_arm_neon_vqadds, 1>; defm VQADDu : N3VInt_QHSD<1,0,0b0000,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vqadd.u", int_arm_neon_vqaddu, 1>; + IIC_VBINi4Q, "vqadd", "u", int_arm_neon_vqaddu, 1>; // VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q) -defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn.i", int_arm_neon_vaddhn, 1>; +defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", + int_arm_neon_vaddhn, 1>; // VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q) -defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn.i", int_arm_neon_vraddhn, 1>; +defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i", + int_arm_neon_vraddhn, 1>; // Vector Multiply Operations. // VMUL : Vector Multiply (integer, polynomial and floating-point) -defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D, IIC_VMULi16Q, - IIC_VMULi32Q, "vmul.i", mul, 1>; -def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16D, "vmul.p8", v8i8, v8i8, - int_arm_neon_vmulp, 1>; -def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16Q, "vmul.p8", v16i8, v16i8, - int_arm_neon_vmulp, 1>; -def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VBIND, "vmul.f32", v2f32, v2f32, fmul, 1>; -def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VBINQ, "vmul.f32", v4f32, v4f32, fmul, 1>; -defm VMULsl : N3VSL_HS<0b1000, "vmul.i", mul>; -def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul.f32", v2f32, fmul>; -def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul.f32", v4f32, v2f32, fmul>; +defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D, + IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>; +def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16D, "vmul", "p8", + v8i8, v8i8, int_arm_neon_vmulp, 1>; +def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16Q, "vmul", "p8", + v16i8, v16i8, int_arm_neon_vmulp, 1>; +def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VBIND, "vmul", "f32", + v2f32, v2f32, fmul, 1>; +def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VBINQ, "vmul", "f32", + v4f32, v4f32, fmul, 1>; +defm VMULsl : N3VSL_HS<0b1000, "vmul", "i", mul>; +def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>; +def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32, v2f32, fmul>; def : Pat<(v8i16 (mul (v8i16 QPR:$src1), (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))), (v8i16 (VMULslv8i16 (v8i16 QPR:$src1), @@ -1690,66 +1872,80 @@ def : Pat<(v4f32 (fmul (v4f32 QPR:$src1), // VQDMULH : Vector Saturating Doubling Multiply Returning High Half defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, IIC_VMULi16D, IIC_VMULi32D, IIC_VMULi16Q, IIC_VMULi32Q, - "vqdmulh.s", int_arm_neon_vqdmulh, 1>; + "vqdmulh", "s", int_arm_neon_vqdmulh, 1>; defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D, IIC_VMULi16Q, IIC_VMULi32Q, - "vqdmulh.s", int_arm_neon_vqdmulh>; + "vqdmulh", "s", int_arm_neon_vqdmulh>; def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1), - (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))), + (v8i16 (NEONvduplane (v8i16 QPR:$src2), + imm:$lane)))), (v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1), (v4i16 (EXTRACT_SUBREG QPR:$src2, - (DSubReg_i16_reg imm:$lane))), + (DSubReg_i16_reg imm:$lane))), (SubReg_i16_lane imm:$lane)))>; def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1), - (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))), + (v4i32 (NEONvduplane (v4i32 QPR:$src2), + imm:$lane)))), (v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1), (v2i32 (EXTRACT_SUBREG QPR:$src2, - (DSubReg_i32_reg imm:$lane))), + (DSubReg_i32_reg imm:$lane))), (SubReg_i32_lane imm:$lane)))>; // VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, IIC_VMULi16D, IIC_VMULi32D, IIC_VMULi16Q, IIC_VMULi32Q, - "vqrdmulh.s", int_arm_neon_vqrdmulh, 1>; + "vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>; defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D, IIC_VMULi16Q, IIC_VMULi32Q, - "vqrdmulh.s", int_arm_neon_vqrdmulh>; + "vqrdmulh", "s", int_arm_neon_vqrdmulh>; def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1), - (v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))), + (v8i16 (NEONvduplane (v8i16 QPR:$src2), + imm:$lane)))), (v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1), (v4i16 (EXTRACT_SUBREG QPR:$src2, (DSubReg_i16_reg imm:$lane))), (SubReg_i16_lane imm:$lane)))>; def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1), - (v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))), + (v4i32 (NEONvduplane (v4i32 QPR:$src2), + imm:$lane)))), (v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1), (v2i32 (EXTRACT_SUBREG QPR:$src2, - (DSubReg_i32_reg imm:$lane))), + (DSubReg_i32_reg imm:$lane))), (SubReg_i32_lane imm:$lane)))>; // VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) -defm VMULLs : N3VLInt_QHS<0,1,0b1100,0, IIC_VMULi16D, "vmull.s", int_arm_neon_vmulls, 1>; -defm VMULLu : N3VLInt_QHS<1,1,0b1100,0, IIC_VMULi16D, "vmull.u", int_arm_neon_vmullu, 1>; -def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull.p8", v8i16, v8i8, - int_arm_neon_vmullp, 1>; -defm VMULLsls : N3VLIntSL_HS<0, 0b1010, IIC_VMULi16D, "vmull.s", int_arm_neon_vmulls>; -defm VMULLslu : N3VLIntSL_HS<1, 0b1010, IIC_VMULi16D, "vmull.u", int_arm_neon_vmullu>; +defm VMULLs : N3VLInt_QHS<0,1,0b1100,0, IIC_VMULi16D, "vmull", "s", + int_arm_neon_vmulls, 1>; +defm VMULLu : N3VLInt_QHS<1,1,0b1100,0, IIC_VMULi16D, "vmull", "u", + int_arm_neon_vmullu, 1>; +def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8", + v8i16, v8i8, int_arm_neon_vmullp, 1>; +defm VMULLsls : N3VLIntSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", + int_arm_neon_vmulls>; +defm VMULLslu : N3VLIntSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", + int_arm_neon_vmullu>; // VQDMULL : Vector Saturating Doubling Multiply Long (Q = D * D) -defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, "vqdmull.s", int_arm_neon_vqdmull, 1>; -defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, "vqdmull.s", int_arm_neon_vqdmull>; +defm VQDMULL : N3VLInt_HS<0,1,0b1101,0, IIC_VMULi16D, "vqdmull", "s", + int_arm_neon_vqdmull, 1>; +defm VQDMULLsl: N3VLIntSL_HS<0, 0b1011, IIC_VMULi16D, "vqdmull", "s", + int_arm_neon_vqdmull>; // Vector Multiply-Accumulate and Multiply-Subtract Operations. // VMLA : Vector Multiply Accumulate (integer and floating-point) defm VMLA : N3VMulOp_QHS<0, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, - IIC_VMACi16Q, IIC_VMACi32Q, "vmla.i", add>; -def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla.f32", v2f32, fmul, fadd>; -def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla.f32", v4f32, fmul, fadd>; + IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; +def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", + v2f32, fmul, fadd>; +def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32", + v4f32, fmul, fadd>; defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D, - IIC_VMACi16Q, IIC_VMACi32Q, "vmla.i", add>; -def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla.f32", v2f32, fmul, fadd>; -def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla.f32", v4f32, v2f32, fmul, fadd>; + IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; +def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32", + v2f32, fmul, fadd>; +def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32", + v4f32, v2f32, fmul, fadd>; def : Pat<(v8i16 (add (v8i16 QPR:$src1), (mul (v8i16 QPR:$src2), @@ -1766,7 +1962,7 @@ def : Pat<(v4i32 (add (v4i32 QPR:$src1), (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), (v2i32 (EXTRACT_SUBREG QPR:$src3, - (DSubReg_i32_reg imm:$lane))), + (DSubReg_i32_reg imm:$lane))), (SubReg_i32_lane imm:$lane)))>; def : Pat<(v4f32 (fadd (v4f32 QPR:$src1), @@ -1779,25 +1975,30 @@ def : Pat<(v4f32 (fadd (v4f32 QPR:$src1), (SubReg_i32_lane imm:$lane)))>; // VMLAL : Vector Multiply Accumulate Long (Q += D * D) -defm VMLALs : N3VLInt3_QHS<0,1,0b1000,0, "vmlal.s", int_arm_neon_vmlals>; -defm VMLALu : N3VLInt3_QHS<1,1,0b1000,0, "vmlal.u", int_arm_neon_vmlalu>; +defm VMLALs : N3VLInt3_QHS<0,1,0b1000,0, "vmlal", "s", int_arm_neon_vmlals>; +defm VMLALu : N3VLInt3_QHS<1,1,0b1000,0, "vmlal", "u", int_arm_neon_vmlalu>; -defm VMLALsls : N3VLInt3SL_HS<0, 0b0010, "vmlal.s", int_arm_neon_vmlals>; -defm VMLALslu : N3VLInt3SL_HS<1, 0b0010, "vmlal.u", int_arm_neon_vmlalu>; +defm VMLALsls : N3VLInt3SL_HS<0, 0b0010, "vmlal", "s", int_arm_neon_vmlals>; +defm VMLALslu : N3VLInt3SL_HS<1, 0b0010, "vmlal", "u", int_arm_neon_vmlalu>; // VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) -defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, "vqdmlal.s", int_arm_neon_vqdmlal>; -defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal.s", int_arm_neon_vqdmlal>; +defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, "vqdmlal", "s", + int_arm_neon_vqdmlal>; +defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", int_arm_neon_vqdmlal>; // VMLS : Vector Multiply Subtract (integer and floating-point) defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, - IIC_VMACi16Q, IIC_VMACi32Q, "vmls.i", sub>; -def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls.f32", v2f32, fmul, fsub>; -def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls.f32", v4f32, fmul, fsub>; + IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; +def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", + v2f32, fmul, fsub>; +def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32", + v4f32, fmul, fsub>; defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D, - IIC_VMACi16Q, IIC_VMACi32Q, "vmls.i", sub>; -def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls.f32", v2f32, fmul, fsub>; -def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls.f32", v4f32, v2f32, fmul, fsub>; + IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; +def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32", + v2f32, fmul, fsub>; +def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32", + v4f32, v2f32, fmul, fsub>; def : Pat<(v8i16 (sub (v8i16 QPR:$src1), (mul (v8i16 QPR:$src2), @@ -1810,7 +2011,7 @@ def : Pat<(v8i16 (sub (v8i16 QPR:$src1), def : Pat<(v4i32 (sub (v4i32 QPR:$src1), (mul (v4i32 QPR:$src2), - (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), + (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))), (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2), (v2i32 (EXTRACT_SUBREG QPR:$src3, @@ -1819,7 +2020,7 @@ def : Pat<(v4i32 (sub (v4i32 QPR:$src1), def : Pat<(v4f32 (fsub (v4f32 QPR:$src1), (fmul (v4f32 QPR:$src2), - (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), + (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))), (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2), (v2f32 (EXTRACT_SUBREG QPR:$src3, @@ -1827,146 +2028,170 @@ def : Pat<(v4f32 (fsub (v4f32 QPR:$src1), (SubReg_i32_lane imm:$lane)))>; // VMLSL : Vector Multiply Subtract Long (Q -= D * D) -defm VMLSLs : N3VLInt3_QHS<0,1,0b1010,0, "vmlsl.s", int_arm_neon_vmlsls>; -defm VMLSLu : N3VLInt3_QHS<1,1,0b1010,0, "vmlsl.u", int_arm_neon_vmlslu>; +defm VMLSLs : N3VLInt3_QHS<0,1,0b1010,0, "vmlsl", "s", int_arm_neon_vmlsls>; +defm VMLSLu : N3VLInt3_QHS<1,1,0b1010,0, "vmlsl", "u", int_arm_neon_vmlslu>; -defm VMLSLsls : N3VLInt3SL_HS<0, 0b0110, "vmlsl.s", int_arm_neon_vmlsls>; -defm VMLSLslu : N3VLInt3SL_HS<1, 0b0110, "vmlsl.u", int_arm_neon_vmlslu>; +defm VMLSLsls : N3VLInt3SL_HS<0, 0b0110, "vmlsl", "s", int_arm_neon_vmlsls>; +defm VMLSLslu : N3VLInt3SL_HS<1, 0b0110, "vmlsl", "u", int_arm_neon_vmlslu>; // VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) -defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, "vqdmlsl.s", int_arm_neon_vqdmlsl>; -defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl.s", int_arm_neon_vqdmlsl>; +defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, "vqdmlsl", "s", + int_arm_neon_vqdmlsl>; +defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>; // Vector Subtract Operations. // VSUB : Vector Subtract (integer and floating-point) -defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ, "vsub.i", sub, 0>; -def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub.f32", v2f32, v2f32, fsub, 0>; -def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub.f32", v4f32, v4f32, fsub, 0>; +defm VSUB : N3V_QHSD<1, 0, 0b1000, 0, IIC_VSUBiD, IIC_VSUBiQ, + "vsub", "i", sub, 0>; +def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32", + v2f32, v2f32, fsub, 0>; +def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32", + v4f32, v4f32, fsub, 0>; // VSUBL : Vector Subtract Long (Q = D - D) -defm VSUBLs : N3VLInt_QHS<0,1,0b0010,0, IIC_VSHLiD, "vsubl.s", int_arm_neon_vsubls, 1>; -defm VSUBLu : N3VLInt_QHS<1,1,0b0010,0, IIC_VSHLiD, "vsubl.u", int_arm_neon_vsublu, 1>; +defm VSUBLs : N3VLInt_QHS<0,1,0b0010,0, IIC_VSHLiD, "vsubl", "s", + int_arm_neon_vsubls, 1>; +defm VSUBLu : N3VLInt_QHS<1,1,0b0010,0, IIC_VSHLiD, "vsubl", "u", + int_arm_neon_vsublu, 1>; // VSUBW : Vector Subtract Wide (Q = Q - D) -defm VSUBWs : N3VWInt_QHS<0,1,0b0011,0, "vsubw.s", int_arm_neon_vsubws, 0>; -defm VSUBWu : N3VWInt_QHS<1,1,0b0011,0, "vsubw.u", int_arm_neon_vsubwu, 0>; +defm VSUBWs : N3VWInt_QHS<0,1,0b0011,0, "vsubw", "s", int_arm_neon_vsubws, 0>; +defm VSUBWu : N3VWInt_QHS<1,1,0b0011,0, "vsubw", "u", int_arm_neon_vsubwu, 0>; // VHSUB : Vector Halving Subtract -defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vhsub.s", int_arm_neon_vhsubs, 0>; -defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vhsub.u", int_arm_neon_vhsubu, 0>; +defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, IIC_VBINi4D, IIC_VBINi4D, + IIC_VBINi4Q, IIC_VBINi4Q, + "vhsub", "s", int_arm_neon_vhsubs, 0>; +defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, IIC_VBINi4D, IIC_VBINi4D, + IIC_VBINi4Q, IIC_VBINi4Q, + "vhsub", "u", int_arm_neon_vhsubu, 0>; // VQSUB : Vector Saturing Subtract -defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vqsub.s", int_arm_neon_vqsubs, 0>; -defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vqsub.u", int_arm_neon_vqsubu, 0>; +defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, IIC_VBINi4D, IIC_VBINi4D, + IIC_VBINi4Q, IIC_VBINi4Q, + "vqsub", "s", int_arm_neon_vqsubs, 0>; +defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, IIC_VBINi4D, IIC_VBINi4D, + IIC_VBINi4Q, IIC_VBINi4Q, + "vqsub", "u", int_arm_neon_vqsubu, 0>; // VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q) -defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn.i", int_arm_neon_vsubhn, 0>; +defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", + int_arm_neon_vsubhn, 0>; // VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q) -defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn.i", int_arm_neon_vrsubhn, 0>; +defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i", + int_arm_neon_vrsubhn, 0>; // Vector Comparisons. // VCEQ : Vector Compare Equal defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vceq.i", NEONvceq, 1>; -def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq.f32", v2i32, v2f32, NEONvceq, 1>; -def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq.f32", v4i32, v4f32, NEONvceq, 1>; + IIC_VBINi4Q, "vceq", "i", NEONvceq, 1>; +def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, + NEONvceq, 1>; +def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, + NEONvceq, 1>; // VCGE : Vector Compare Greater Than or Equal defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vcge.s", NEONvcge, 0>; + IIC_VBINi4Q, "vcge", "s", NEONvcge, 0>; defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vcge.u", NEONvcgeu, 0>; -def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge.f32", v2i32, v2f32, NEONvcge, 0>; -def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge.f32", v4i32, v4f32, NEONvcge, 0>; + IIC_VBINi4Q, "vcge", "u", NEONvcgeu, 0>; +def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", + v2i32, v2f32, NEONvcge, 0>; +def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, + NEONvcge, 0>; // VCGT : Vector Compare Greater Than defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vcgt.s", NEONvcgt, 0>; + IIC_VBINi4Q, "vcgt", "s", NEONvcgt, 0>; defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vcgt.u", NEONvcgtu, 0>; -def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt.f32", v2i32, v2f32, NEONvcgt, 0>; -def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt.f32", v4i32, v4f32, NEONvcgt, 0>; + IIC_VBINi4Q, "vcgt", "u", NEONvcgtu, 0>; +def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, + NEONvcgt, 0>; +def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, + NEONvcgt, 0>; // VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) -def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, IIC_VBIND, "vacge.f32", v2i32, v2f32, - int_arm_neon_vacged, 0>; -def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, IIC_VBINQ, "vacge.f32", v4i32, v4f32, - int_arm_neon_vacgeq, 0>; +def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, IIC_VBIND, "vacge", "f32", + v2i32, v2f32, int_arm_neon_vacged, 0>; +def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, IIC_VBINQ, "vacge", "f32", + v4i32, v4f32, int_arm_neon_vacgeq, 0>; // VACGT : Vector Absolute Compare Greater Than (aka VCAGT) -def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, IIC_VBIND, "vacgt.f32", v2i32, v2f32, - int_arm_neon_vacgtd, 0>; -def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, IIC_VBINQ, "vacgt.f32", v4i32, v4f32, - int_arm_neon_vacgtq, 0>; +def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, IIC_VBIND, "vacgt", "f32", + v2i32, v2f32, int_arm_neon_vacgtd, 0>; +def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, IIC_VBINQ, "vacgt", "f32", + v4i32, v4f32, int_arm_neon_vacgtq, 0>; // VTST : Vector Test Bits defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vtst.i", NEONvtst, 1>; + IIC_VBINi4Q, "vtst", "i", NEONvtst, 1>; // Vector Bitwise Operations. // VAND : Vector Bitwise AND -def VANDd : N3VD<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand", v2i32, v2i32, and, 1>; -def VANDq : N3VQ<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand", v4i32, v4i32, and, 1>; +def VANDd : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand", + v2i32, v2i32, and, 1>; +def VANDq : N3VQX<0, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "vand", + v4i32, v4i32, and, 1>; // VEOR : Vector Bitwise Exclusive OR -def VEORd : N3VD<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor", v2i32, v2i32, xor, 1>; -def VEORq : N3VQ<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor", v4i32, v4i32, xor, 1>; +def VEORd : N3VDX<1, 0, 0b00, 0b0001, 1, IIC_VBINiD, "veor", + v2i32, v2i32, xor, 1>; +def VEORq : N3VQX<1, 0, 0b00, 0b0001, 1, IIC_VBINiQ, "veor", + v4i32, v4i32, xor, 1>; // VORR : Vector Bitwise OR -def VORRd : N3VD<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr", v2i32, v2i32, or, 1>; -def VORRq : N3VQ<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr", v4i32, v4i32, or, 1>; +def VORRd : N3VDX<0, 0, 0b10, 0b0001, 1, IIC_VBINiD, "vorr", + v2i32, v2i32, or, 1>; +def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr", + v4i32, v4i32, or, 1>; // VBIC : Vector Bitwise Bit Clear (AND NOT) -def VBICd : N3V<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst), +def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), IIC_VBINiD, - "vbic\t$dst, $src1, $src2", "", + "vbic", "$dst, $src1, $src2", "", [(set DPR:$dst, (v2i32 (and DPR:$src1, (vnot_conv DPR:$src2))))]>; -def VBICq : N3V<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), +def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), IIC_VBINiQ, - "vbic\t$dst, $src1, $src2", "", + "vbic", "$dst, $src1, $src2", "", [(set QPR:$dst, (v4i32 (and QPR:$src1, (vnot_conv QPR:$src2))))]>; // VORN : Vector Bitwise OR NOT -def VORNd : N3V<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$dst), +def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), IIC_VBINiD, - "vorn\t$dst, $src1, $src2", "", + "vorn", "$dst, $src1, $src2", "", [(set DPR:$dst, (v2i32 (or DPR:$src1, (vnot_conv DPR:$src2))))]>; -def VORNq : N3V<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$dst), +def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), IIC_VBINiQ, - "vorn\t$dst, $src1, $src2", "", + "vorn", "$dst, $src1, $src2", "", [(set QPR:$dst, (v4i32 (or QPR:$src1, (vnot_conv QPR:$src2))))]>; // VMVN : Vector Bitwise NOT -def VMVNd : N2V<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0, +def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0, (outs DPR:$dst), (ins DPR:$src), IIC_VSHLiD, - "vmvn\t$dst, $src", "", + "vmvn", "$dst, $src", "", [(set DPR:$dst, (v2i32 (vnot DPR:$src)))]>; -def VMVNq : N2V<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0, +def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0, (outs QPR:$dst), (ins QPR:$src), IIC_VSHLiD, - "vmvn\t$dst, $src", "", + "vmvn", "$dst, $src", "", [(set QPR:$dst, (v4i32 (vnot QPR:$src)))]>; def : Pat<(v2i32 (vnot_conv DPR:$src)), (VMVNd DPR:$src)>; def : Pat<(v4i32 (vnot_conv QPR:$src)), (VMVNq QPR:$src)>; // VBSL : Vector Bitwise Select -def VBSLd : N3V<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst), +def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), IIC_VCNTiD, - "vbsl\t$dst, $src2, $src3", "$src1 = $dst", + "vbsl", "$dst, $src2, $src3", "$src1 = $dst", [(set DPR:$dst, (v2i32 (or (and DPR:$src2, DPR:$src1), (and DPR:$src3, (vnot_conv DPR:$src1)))))]>; -def VBSLq : N3V<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), +def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), IIC_VCNTiQ, - "vbsl\t$dst, $src2, $src3", "$src1 = $dst", + "vbsl", "$dst, $src2, $src3", "$src1 = $dst", [(set QPR:$dst, (v4i32 (or (and QPR:$src2, QPR:$src1), (and QPR:$src3, (vnot_conv QPR:$src1)))))]>; // VBIF : Vector Bitwise Insert if False -// like VBSL but with: "vbif\t$dst, $src3, $src1", "$src2 = $dst", +// like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst", // VBIT : Vector Bitwise Insert if True -// like VBSL but with: "vbit\t$dst, $src2, $src1", "$src3 = $dst", +// like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst", // These are not yet implemented. The TwoAddress pass will not go looking // for equivalent operations with different register constraints; it just // inserts copies. @@ -1974,259 +2199,270 @@ def VBSLq : N3V<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst), // Vector Absolute Differences. // VABD : Vector Absolute Difference -defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vabd.s", int_arm_neon_vabds, 0>; -defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vabd.u", int_arm_neon_vabdu, 0>; -def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, IIC_VBIND, "vabd.f32", v2f32, v2f32, - int_arm_neon_vabds, 0>; -def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vabd.f32", v4f32, v4f32, - int_arm_neon_vabds, 0>; +defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, IIC_VBINi4D, IIC_VBINi4D, + IIC_VBINi4Q, IIC_VBINi4Q, + "vabd", "s", int_arm_neon_vabds, 0>; +defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, IIC_VBINi4D, IIC_VBINi4D, + IIC_VBINi4Q, IIC_VBINi4Q, + "vabd", "u", int_arm_neon_vabdu, 0>; +def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, IIC_VBIND, + "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 0>; +def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, IIC_VBINQ, + "vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 0>; // VABDL : Vector Absolute Difference Long (Q = | D - D |) -defm VABDLs : N3VLInt_QHS<0,1,0b0111,0, IIC_VBINi4Q, "vabdl.s", int_arm_neon_vabdls, 0>; -defm VABDLu : N3VLInt_QHS<1,1,0b0111,0, IIC_VBINi4Q, "vabdl.u", int_arm_neon_vabdlu, 0>; +defm VABDLs : N3VLInt_QHS<0,1,0b0111,0, IIC_VBINi4Q, + "vabdl", "s", int_arm_neon_vabdls, 0>; +defm VABDLu : N3VLInt_QHS<1,1,0b0111,0, IIC_VBINi4Q, + "vabdl", "u", int_arm_neon_vabdlu, 0>; // VABA : Vector Absolute Difference and Accumulate -defm VABAs : N3VInt3_QHS<0,0,0b0111,1, "vaba.s", int_arm_neon_vabas>; -defm VABAu : N3VInt3_QHS<1,0,0b0111,1, "vaba.u", int_arm_neon_vabau>; +defm VABAs : N3VInt3_QHS<0,0,0b0111,1, "vaba", "s", int_arm_neon_vabas>; +defm VABAu : N3VInt3_QHS<1,0,0b0111,1, "vaba", "u", int_arm_neon_vabau>; // VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |) -defm VABALs : N3VLInt3_QHS<0,1,0b0101,0, "vabal.s", int_arm_neon_vabals>; -defm VABALu : N3VLInt3_QHS<1,1,0b0101,0, "vabal.u", int_arm_neon_vabalu>; +defm VABALs : N3VLInt3_QHS<0,1,0b0101,0, "vabal", "s", int_arm_neon_vabals>; +defm VABALu : N3VLInt3_QHS<1,1,0b0101,0, "vabal", "u", int_arm_neon_vabalu>; // Vector Maximum and Minimum. // VMAX : Vector Maximum defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vmax.s", int_arm_neon_vmaxs, 1>; + IIC_VBINi4Q, "vmax", "s", int_arm_neon_vmaxs, 1>; defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vmax.u", int_arm_neon_vmaxu, 1>; -def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, IIC_VBIND, "vmax.f32", v2f32, v2f32, - int_arm_neon_vmaxs, 1>; -def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, IIC_VBINQ, "vmax.f32", v4f32, v4f32, - int_arm_neon_vmaxs, 1>; + IIC_VBINi4Q, "vmax", "u", int_arm_neon_vmaxu, 1>; +def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, IIC_VBIND, "vmax", "f32", + v2f32, v2f32, int_arm_neon_vmaxs, 1>; +def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, IIC_VBINQ, "vmax", "f32", + v4f32, v4f32, int_arm_neon_vmaxs, 1>; // VMIN : Vector Minimum defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vmin.s", int_arm_neon_vmins, 1>; + IIC_VBINi4Q, "vmin", "s", int_arm_neon_vmins, 1>; defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, - IIC_VBINi4Q, "vmin.u", int_arm_neon_vminu, 1>; -def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, IIC_VBIND, "vmin.f32", v2f32, v2f32, - int_arm_neon_vmins, 1>; -def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, IIC_VBINQ, "vmin.f32", v4f32, v4f32, - int_arm_neon_vmins, 1>; + IIC_VBINi4Q, "vmin", "u", int_arm_neon_vminu, 1>; +def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, IIC_VBIND, "vmin", "f32", + v2f32, v2f32, int_arm_neon_vmins, 1>; +def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, IIC_VBINQ, "vmin", "f32", + v4f32, v4f32, int_arm_neon_vmins, 1>; // Vector Pairwise Operations. // VPADD : Vector Pairwise Add -def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, IIC_VBINiD, "vpadd.i8", v8i8, v8i8, - int_arm_neon_vpadd, 0>; -def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, IIC_VBINiD, "vpadd.i16", v4i16, v4i16, - int_arm_neon_vpadd, 0>; -def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, IIC_VBINiD, "vpadd.i32", v2i32, v2i32, - int_arm_neon_vpadd, 0>; -def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, IIC_VBIND, "vpadd.f32", v2f32, v2f32, - int_arm_neon_vpadd, 0>; +def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, IIC_VBINiD, "vpadd", "i8", + v8i8, v8i8, int_arm_neon_vpadd, 0>; +def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, IIC_VBINiD, "vpadd", "i16", + v4i16, v4i16, int_arm_neon_vpadd, 0>; +def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, IIC_VBINiD, "vpadd", "i32", + v2i32, v2i32, int_arm_neon_vpadd, 0>; +def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, IIC_VBIND, "vpadd", "f32", + v2f32, v2f32, int_arm_neon_vpadd, 0>; // VPADDL : Vector Pairwise Add Long -defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl.s", +defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s", int_arm_neon_vpaddls>; -defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl.u", +defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl", "u", int_arm_neon_vpaddlu>; // VPADAL : Vector Pairwise Add and Accumulate Long -defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal.s", +defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal", "s", int_arm_neon_vpadals>; -defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal.u", +defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u", int_arm_neon_vpadalu>; // VPMAX : Vector Pairwise Maximum -def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, IIC_VBINi4D, "vpmax.s8", v8i8, v8i8, - int_arm_neon_vpmaxs, 0>; -def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, IIC_VBINi4D, "vpmax.s16", v4i16, v4i16, - int_arm_neon_vpmaxs, 0>; -def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, IIC_VBINi4D, "vpmax.s32", v2i32, v2i32, - int_arm_neon_vpmaxs, 0>; -def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, IIC_VBINi4D, "vpmax.u8", v8i8, v8i8, - int_arm_neon_vpmaxu, 0>; -def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, IIC_VBINi4D, "vpmax.u16", v4i16, v4i16, - int_arm_neon_vpmaxu, 0>; -def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, IIC_VBINi4D, "vpmax.u32", v2i32, v2i32, - int_arm_neon_vpmaxu, 0>; -def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, IIC_VBINi4D, "vpmax.f32", v2f32, v2f32, - int_arm_neon_vpmaxs, 0>; +def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, IIC_VBINi4D, "vpmax", "s8", + v8i8, v8i8, int_arm_neon_vpmaxs, 0>; +def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, IIC_VBINi4D, "vpmax", "s16", + v4i16, v4i16, int_arm_neon_vpmaxs, 0>; +def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, IIC_VBINi4D, "vpmax", "s32", + v2i32, v2i32, int_arm_neon_vpmaxs, 0>; +def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, IIC_VBINi4D, "vpmax", "u8", + v8i8, v8i8, int_arm_neon_vpmaxu, 0>; +def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, IIC_VBINi4D, "vpmax", "u16", + v4i16, v4i16, int_arm_neon_vpmaxu, 0>; +def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, IIC_VBINi4D, "vpmax", "u32", + v2i32, v2i32, int_arm_neon_vpmaxu, 0>; +def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, IIC_VBINi4D, "vpmax", "f32", + v2f32, v2f32, int_arm_neon_vpmaxs, 0>; // VPMIN : Vector Pairwise Minimum -def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, IIC_VBINi4D, "vpmin.s8", v8i8, v8i8, - int_arm_neon_vpmins, 0>; -def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, IIC_VBINi4D, "vpmin.s16", v4i16, v4i16, - int_arm_neon_vpmins, 0>; -def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, IIC_VBINi4D, "vpmin.s32", v2i32, v2i32, - int_arm_neon_vpmins, 0>; -def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, IIC_VBINi4D, "vpmin.u8", v8i8, v8i8, - int_arm_neon_vpminu, 0>; -def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, IIC_VBINi4D, "vpmin.u16", v4i16, v4i16, - int_arm_neon_vpminu, 0>; -def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, IIC_VBINi4D, "vpmin.u32", v2i32, v2i32, - int_arm_neon_vpminu, 0>; -def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, IIC_VBINi4D, "vpmin.f32", v2f32, v2f32, - int_arm_neon_vpmins, 0>; +def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, IIC_VBINi4D, "vpmin", "s8", + v8i8, v8i8, int_arm_neon_vpmins, 0>; +def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, IIC_VBINi4D, "vpmin", "s16", + v4i16, v4i16, int_arm_neon_vpmins, 0>; +def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, IIC_VBINi4D, "vpmin", "s32", + v2i32, v2i32, int_arm_neon_vpmins, 0>; +def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, IIC_VBINi4D, "vpmin", "u8", + v8i8, v8i8, int_arm_neon_vpminu, 0>; +def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, IIC_VBINi4D, "vpmin", "u16", + v4i16, v4i16, int_arm_neon_vpminu, 0>; +def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, IIC_VBINi4D, "vpmin", "u32", + v2i32, v2i32, int_arm_neon_vpminu, 0>; +def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, IIC_VBINi4D, "vpmin", "f32", + v2f32, v2f32, int_arm_neon_vpmins, 0>; // Vector Reciprocal and Reciprocal Square Root Estimate and Step. // VRECPE : Vector Reciprocal Estimate def VRECPEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, - IIC_VUNAD, "vrecpe.u32", + IIC_VUNAD, "vrecpe", "u32", v2i32, v2i32, int_arm_neon_vrecpe>; def VRECPEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01000, 0, - IIC_VUNAQ, "vrecpe.u32", + IIC_VUNAQ, "vrecpe", "u32", v4i32, v4i32, int_arm_neon_vrecpe>; def VRECPEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, - IIC_VUNAD, "vrecpe.f32", + IIC_VUNAD, "vrecpe", "f32", v2f32, v2f32, int_arm_neon_vrecpe>; def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0, - IIC_VUNAQ, "vrecpe.f32", + IIC_VUNAQ, "vrecpe", "f32", v4f32, v4f32, int_arm_neon_vrecpe>; // VRECPS : Vector Reciprocal Step -def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, IIC_VRECSD, "vrecps.f32", v2f32, v2f32, - int_arm_neon_vrecps, 1>; -def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, IIC_VRECSQ, "vrecps.f32", v4f32, v4f32, - int_arm_neon_vrecps, 1>; +def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, + IIC_VRECSD, "vrecps", "f32", + v2f32, v2f32, int_arm_neon_vrecps, 1>; +def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, + IIC_VRECSQ, "vrecps", "f32", + v4f32, v4f32, int_arm_neon_vrecps, 1>; // VRSQRTE : Vector Reciprocal Square Root Estimate def VRSQRTEd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, - IIC_VUNAD, "vrsqrte.u32", + IIC_VUNAD, "vrsqrte", "u32", v2i32, v2i32, int_arm_neon_vrsqrte>; def VRSQRTEq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01001, 0, - IIC_VUNAQ, "vrsqrte.u32", + IIC_VUNAQ, "vrsqrte", "u32", v4i32, v4i32, int_arm_neon_vrsqrte>; def VRSQRTEfd : N2VDInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, - IIC_VUNAD, "vrsqrte.f32", + IIC_VUNAD, "vrsqrte", "f32", v2f32, v2f32, int_arm_neon_vrsqrte>; def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0, - IIC_VUNAQ, "vrsqrte.f32", + IIC_VUNAQ, "vrsqrte", "f32", v4f32, v4f32, int_arm_neon_vrsqrte>; // VRSQRTS : Vector Reciprocal Square Root Step -def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, IIC_VRECSD, "vrsqrts.f32", v2f32, v2f32, - int_arm_neon_vrsqrts, 1>; -def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, IIC_VRECSQ, "vrsqrts.f32", v4f32, v4f32, - int_arm_neon_vrsqrts, 1>; +def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, + IIC_VRECSD, "vrsqrts", "f32", + v2f32, v2f32, int_arm_neon_vrsqrts, 1>; +def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, + IIC_VRECSQ, "vrsqrts", "f32", + v4f32, v4f32, int_arm_neon_vrsqrts, 1>; // Vector Shifts. // VSHL : Vector Shift defm VSHLs : N3VInt_QHSD<0, 0, 0b0100, 0, IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, - IIC_VSHLiQ, "vshl.s", int_arm_neon_vshifts, 0>; + IIC_VSHLiQ, "vshl", "s", int_arm_neon_vshifts, 0>; defm VSHLu : N3VInt_QHSD<1, 0, 0b0100, 0, IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, - IIC_VSHLiQ, "vshl.u", int_arm_neon_vshiftu, 0>; + IIC_VSHLiQ, "vshl", "u", int_arm_neon_vshiftu, 0>; // VSHL : Vector Shift Left (Immediate) -defm VSHLi : N2VSh_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl.i", NEONvshl>; +defm VSHLi : N2VSh_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>; // VSHR : Vector Shift Right (Immediate) -defm VSHRs : N2VSh_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr.s", NEONvshrs>; -defm VSHRu : N2VSh_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr.u", NEONvshru>; +defm VSHRs : N2VSh_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", NEONvshrs>; +defm VSHRu : N2VSh_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", NEONvshru>; // VSHLL : Vector Shift Left Long -defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll.s", NEONvshlls>; -defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll.u", NEONvshllu>; +defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", NEONvshlls>; +defm VSHLLu : N2VLSh_QHS<1, 1, 0b1010, 0, 0, 1, "vshll", "u", NEONvshllu>; // VSHLL : Vector Shift Left Long (with maximum shift count) class N2VLShMax op21_16, bits<4> op11_8, bit op7, - bit op6, bit op4, string OpcodeStr, ValueType ResTy, + bit op6, bit op4, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode> - : N2VLSh { + : N2VLSh { let Inst{21-16} = op21_16; } -def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll.i8", +def VSHLLi8 : N2VLShMax<1, 1, 0b110010, 0b0011, 0, 0, 0, "vshll", "i8", v8i16, v8i8, NEONvshlli>; -def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll.i16", +def VSHLLi16 : N2VLShMax<1, 1, 0b110110, 0b0011, 0, 0, 0, "vshll", "i16", v4i32, v4i16, NEONvshlli>; -def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll.i32", +def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32", v2i64, v2i32, NEONvshlli>; // VSHRN : Vector Shift Right and Narrow -defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn.i", NEONvshrn>; +defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i", NEONvshrn>; // VRSHL : Vector Rounding Shift defm VRSHLs : N3VInt_QHSD<0,0,0b0101,0, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, - IIC_VSHLi4Q, "vrshl.s", int_arm_neon_vrshifts, 0>; + IIC_VSHLi4Q, "vrshl", "s", int_arm_neon_vrshifts, 0>; defm VRSHLu : N3VInt_QHSD<1,0,0b0101,0, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, - IIC_VSHLi4Q, "vrshl.u", int_arm_neon_vrshiftu, 0>; + IIC_VSHLi4Q, "vrshl", "u", int_arm_neon_vrshiftu, 0>; // VRSHR : Vector Rounding Shift Right -defm VRSHRs : N2VSh_QHSD<0, 1, 0b0010, 1, IIC_VSHLi4D, "vrshr.s", NEONvrshrs>; -defm VRSHRu : N2VSh_QHSD<1, 1, 0b0010, 1, IIC_VSHLi4D, "vrshr.u", NEONvrshru>; +defm VRSHRs : N2VSh_QHSD<0, 1, 0b0010, 1, IIC_VSHLi4D, "vrshr", "s", NEONvrshrs>; +defm VRSHRu : N2VSh_QHSD<1, 1, 0b0010, 1, IIC_VSHLi4D, "vrshr", "u", NEONvrshru>; // VRSHRN : Vector Rounding Shift Right and Narrow -defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn.i", +defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i", NEONvrshrn>; // VQSHL : Vector Saturating Shift defm VQSHLs : N3VInt_QHSD<0,0,0b0100,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, - IIC_VSHLi4Q, "vqshl.s", int_arm_neon_vqshifts, 0>; + IIC_VSHLi4Q, "vqshl", "s", int_arm_neon_vqshifts, 0>; defm VQSHLu : N3VInt_QHSD<1,0,0b0100,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, - IIC_VSHLi4Q, "vqshl.u", int_arm_neon_vqshiftu, 0>; + IIC_VSHLi4Q, "vqshl", "u", int_arm_neon_vqshiftu, 0>; // VQSHL : Vector Saturating Shift Left (Immediate) -defm VQSHLsi : N2VSh_QHSD<0, 1, 0b0111, 1, IIC_VSHLi4D, "vqshl.s", NEONvqshls>; -defm VQSHLui : N2VSh_QHSD<1, 1, 0b0111, 1, IIC_VSHLi4D, "vqshl.u", NEONvqshlu>; +defm VQSHLsi : N2VSh_QHSD<0, 1, 0b0111, 1, IIC_VSHLi4D, "vqshl", "s", NEONvqshls>; +defm VQSHLui : N2VSh_QHSD<1, 1, 0b0111, 1, IIC_VSHLi4D, "vqshl", "u", NEONvqshlu>; // VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned) -defm VQSHLsu : N2VSh_QHSD<1, 1, 0b0110, 1, IIC_VSHLi4D, "vqshlu.s", NEONvqshlsu>; +defm VQSHLsu : N2VSh_QHSD<1, 1, 0b0110, 1, IIC_VSHLi4D, "vqshlu", "s", NEONvqshlsu>; // VQSHRN : Vector Saturating Shift Right and Narrow -defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn.s", +defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s", NEONvqshrns>; -defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn.u", +defm VQSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "u", NEONvqshrnu>; // VQSHRUN : Vector Saturating Shift Right and Narrow (Unsigned) -defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun.s", +defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s", NEONvqshrnsu>; // VQRSHL : Vector Saturating Rounding Shift defm VQRSHLs : N3VInt_QHSD<0, 0, 0b0101, 1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, - IIC_VSHLi4Q, "vqrshl.s", int_arm_neon_vqrshifts, 0>; + IIC_VSHLi4Q, "vqrshl", "s", + int_arm_neon_vqrshifts, 0>; defm VQRSHLu : N3VInt_QHSD<1, 0, 0b0101, 1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, - IIC_VSHLi4Q, "vqrshl.u", int_arm_neon_vqrshiftu, 0>; + IIC_VSHLi4Q, "vqrshl", "u", + int_arm_neon_vqrshiftu, 0>; // VQRSHRN : Vector Saturating Rounding Shift Right and Narrow -defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn.s", +defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s", NEONvqrshrns>; -defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn.u", +defm VQRSHRNu : N2VNSh_HSD<1, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "u", NEONvqrshrnu>; // VQRSHRUN : Vector Saturating Rounding Shift Right and Narrow (Unsigned) -defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun.s", +defm VQRSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vqrshrun", "s", NEONvqrshrnsu>; // VSRA : Vector Shift Right and Accumulate -defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra.s", NEONvshrs>; -defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra.u", NEONvshru>; +defm VSRAs : N2VShAdd_QHSD<0, 1, 0b0001, 1, "vsra", "s", NEONvshrs>; +defm VSRAu : N2VShAdd_QHSD<1, 1, 0b0001, 1, "vsra", "u", NEONvshru>; // VRSRA : Vector Rounding Shift Right and Accumulate -defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra.s", NEONvrshrs>; -defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra.u", NEONvrshru>; +defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>; +defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>; // VSLI : Vector Shift Left and Insert -defm VSLI : N2VShIns_QHSD<1, 1, 0b0101, 1, "vsli.", NEONvsli>; +defm VSLI : N2VShIns_QHSD<1, 1, 0b0101, 1, "vsli", NEONvsli>; // VSRI : Vector Shift Right and Insert -defm VSRI : N2VShIns_QHSD<1, 1, 0b0100, 1, "vsri.", NEONvsri>; +defm VSRI : N2VShIns_QHSD<1, 1, 0b0100, 1, "vsri", NEONvsri>; // Vector Absolute and Saturating Absolute. // VABS : Vector Absolute Value defm VABS : N2VInt_QHS<0b11, 0b11, 0b01, 0b00110, 0, - IIC_VUNAiD, IIC_VUNAiQ, "vabs.s", + IIC_VUNAiD, IIC_VUNAiQ, "vabs", "s", int_arm_neon_vabs>; def VABSfd : N2VDInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, - IIC_VUNAD, "vabs.f32", + IIC_VUNAD, "vabs", "f32", v2f32, v2f32, int_arm_neon_vabs>; def VABSfq : N2VQInt<0b11, 0b11, 0b10, 0b01, 0b01110, 0, - IIC_VUNAQ, "vabs.f32", + IIC_VUNAQ, "vabs", "f32", v4f32, v4f32, int_arm_neon_vabs>; // VQABS : Vector Saturating Absolute Value defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, - IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs.s", + IIC_VQUNAiD, IIC_VQUNAiQ, "vqabs", "s", int_arm_neon_vqabs>; // Vector Negate. @@ -2234,31 +2470,31 @@ defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0, def vneg : PatFrag<(ops node:$in), (sub immAllZerosV, node:$in)>; def vneg_conv : PatFrag<(ops node:$in), (sub immAllZerosV_bc, node:$in)>; -class VNEGD size, string OpcodeStr, ValueType Ty> +class VNEGD size, string OpcodeStr, string Dt, ValueType Ty> : N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$dst), (ins DPR:$src), - IIC_VSHLiD, !strconcat(OpcodeStr, "\t$dst, $src"), "", + IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "", [(set DPR:$dst, (Ty (vneg DPR:$src)))]>; -class VNEGQ size, string OpcodeStr, ValueType Ty> +class VNEGQ size, string OpcodeStr, string Dt, ValueType Ty> : N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$dst), (ins QPR:$src), - IIC_VSHLiD, !strconcat(OpcodeStr, "\t$dst, $src"), "", + IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "", [(set QPR:$dst, (Ty (vneg QPR:$src)))]>; // VNEG : Vector Negate -def VNEGs8d : VNEGD<0b00, "vneg.s8", v8i8>; -def VNEGs16d : VNEGD<0b01, "vneg.s16", v4i16>; -def VNEGs32d : VNEGD<0b10, "vneg.s32", v2i32>; -def VNEGs8q : VNEGQ<0b00, "vneg.s8", v16i8>; -def VNEGs16q : VNEGQ<0b01, "vneg.s16", v8i16>; -def VNEGs32q : VNEGQ<0b10, "vneg.s32", v4i32>; +def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>; +def VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>; +def VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>; +def VNEGs8q : VNEGQ<0b00, "vneg", "s8", v16i8>; +def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>; +def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>; // VNEG : Vector Negate (floating-point) def VNEGf32d : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, (outs DPR:$dst), (ins DPR:$src), IIC_VUNAD, - "vneg.f32\t$dst, $src", "", + "vneg", "f32", "$dst, $src", "", [(set DPR:$dst, (v2f32 (fneg DPR:$src)))]>; def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0, (outs QPR:$dst), (ins QPR:$src), IIC_VUNAQ, - "vneg.f32\t$dst, $src", "", + "vneg", "f32", "$dst, $src", "", [(set QPR:$dst, (v4f32 (fneg QPR:$src)))]>; def : Pat<(v8i8 (vneg_conv DPR:$src)), (VNEGs8d DPR:$src)>; @@ -2270,35 +2506,35 @@ def : Pat<(v4i32 (vneg_conv QPR:$src)), (VNEGs32q QPR:$src)>; // VQNEG : Vector Saturating Negate defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0, - IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg.s", + IIC_VQUNAiD, IIC_VQUNAiQ, "vqneg", "s", int_arm_neon_vqneg>; // Vector Bit Counting Operations. // VCLS : Vector Count Leading Sign Bits defm VCLS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01000, 0, - IIC_VCNTiD, IIC_VCNTiQ, "vcls.s", + IIC_VCNTiD, IIC_VCNTiQ, "vcls", "s", int_arm_neon_vcls>; // VCLZ : Vector Count Leading Zeros defm VCLZ : N2VInt_QHS<0b11, 0b11, 0b00, 0b01001, 0, - IIC_VCNTiD, IIC_VCNTiQ, "vclz.i", + IIC_VCNTiD, IIC_VCNTiQ, "vclz", "i", int_arm_neon_vclz>; // VCNT : Vector Count One Bits def VCNTd : N2VDInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, - IIC_VCNTiD, "vcnt.8", + IIC_VCNTiD, "vcnt", "8", v8i8, v8i8, int_arm_neon_vcnt>; def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, - IIC_VCNTiQ, "vcnt.8", + IIC_VCNTiQ, "vcnt", "8", v16i8, v16i8, int_arm_neon_vcnt>; // Vector Move Operations. // VMOV : Vector Move (Register) -def VMOVDneon: N3V<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src), - IIC_VMOVD, "vmov\t$dst, $src", "", []>; -def VMOVQ : N3V<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src), - IIC_VMOVD, "vmov\t$dst, $src", "", []>; +def VMOVDneon: N3VX<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src), + IIC_VMOVD, "vmov", "$dst, $src", "", []>; +def VMOVQ : N3VX<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src), + IIC_VMOVD, "vmov", "$dst, $src", "", []>; // VMOV : Vector Move (Immediate) @@ -2339,65 +2575,65 @@ def vmovImm64 : PatLeaf<(build_vector), [{ def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$dst), (ins h8imm:$SIMM), IIC_VMOVImm, - "vmov.i8\t$dst, $SIMM", "", + "vmov", "i8", "$dst, $SIMM", "", [(set DPR:$dst, (v8i8 vmovImm8:$SIMM))]>; def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$dst), (ins h8imm:$SIMM), IIC_VMOVImm, - "vmov.i8\t$dst, $SIMM", "", + "vmov", "i8", "$dst, $SIMM", "", [(set QPR:$dst, (v16i8 vmovImm8:$SIMM))]>; -def VMOVv4i16 : N1ModImm<1, 0b000, 0b1000, 0, 0, 0, 1, (outs DPR:$dst), +def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,?}, 0, 0, {?}, 1, (outs DPR:$dst), (ins h16imm:$SIMM), IIC_VMOVImm, - "vmov.i16\t$dst, $SIMM", "", + "vmov", "i16", "$dst, $SIMM", "", [(set DPR:$dst, (v4i16 vmovImm16:$SIMM))]>; -def VMOVv8i16 : N1ModImm<1, 0b000, 0b1000, 0, 1, 0, 1, (outs QPR:$dst), +def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,?}, 0, 1, {?}, 1, (outs QPR:$dst), (ins h16imm:$SIMM), IIC_VMOVImm, - "vmov.i16\t$dst, $SIMM", "", + "vmov", "i16", "$dst, $SIMM", "", [(set QPR:$dst, (v8i16 vmovImm16:$SIMM))]>; -def VMOVv2i32 : N1ModImm<1, 0b000, 0b0000, 0, 0, 0, 1, (outs DPR:$dst), +def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, {?}, 1, (outs DPR:$dst), (ins h32imm:$SIMM), IIC_VMOVImm, - "vmov.i32\t$dst, $SIMM", "", + "vmov", "i32", "$dst, $SIMM", "", [(set DPR:$dst, (v2i32 vmovImm32:$SIMM))]>; -def VMOVv4i32 : N1ModImm<1, 0b000, 0b0000, 0, 1, 0, 1, (outs QPR:$dst), +def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, {?}, 1, (outs QPR:$dst), (ins h32imm:$SIMM), IIC_VMOVImm, - "vmov.i32\t$dst, $SIMM", "", + "vmov", "i32", "$dst, $SIMM", "", [(set QPR:$dst, (v4i32 vmovImm32:$SIMM))]>; def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$dst), (ins h64imm:$SIMM), IIC_VMOVImm, - "vmov.i64\t$dst, $SIMM", "", + "vmov", "i64", "$dst, $SIMM", "", [(set DPR:$dst, (v1i64 vmovImm64:$SIMM))]>; def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$dst), (ins h64imm:$SIMM), IIC_VMOVImm, - "vmov.i64\t$dst, $SIMM", "", + "vmov", "i64", "$dst, $SIMM", "", [(set QPR:$dst, (v2i64 vmovImm64:$SIMM))]>; // VMOV : Vector Get Lane (move scalar to ARM core register) -def VGETLNs8 : NVGetLane<0b11100101, 0b1011, 0b00, +def VGETLNs8 : NVGetLane<{1,1,1,0,0,1,?,1}, 0b1011, {?,?}, (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), - IIC_VMOVSI, "vmov", ".s8\t$dst, $src[$lane]", + IIC_VMOVSI, "vmov", "s8", "$dst, $src[$lane]", [(set GPR:$dst, (NEONvgetlanes (v8i8 DPR:$src), imm:$lane))]>; -def VGETLNs16 : NVGetLane<0b11100001, 0b1011, 0b01, +def VGETLNs16 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, {?,1}, (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), - IIC_VMOVSI, "vmov", ".s16\t$dst, $src[$lane]", + IIC_VMOVSI, "vmov", "s16", "$dst, $src[$lane]", [(set GPR:$dst, (NEONvgetlanes (v4i16 DPR:$src), imm:$lane))]>; -def VGETLNu8 : NVGetLane<0b11101101, 0b1011, 0b00, +def VGETLNu8 : NVGetLane<{1,1,1,0,1,1,?,1}, 0b1011, {?,?}, (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), - IIC_VMOVSI, "vmov", ".u8\t$dst, $src[$lane]", + IIC_VMOVSI, "vmov", "u8", "$dst, $src[$lane]", [(set GPR:$dst, (NEONvgetlaneu (v8i8 DPR:$src), imm:$lane))]>; -def VGETLNu16 : NVGetLane<0b11101001, 0b1011, 0b01, +def VGETLNu16 : NVGetLane<{1,1,1,0,1,0,?,1}, 0b1011, {?,1}, (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), - IIC_VMOVSI, "vmov", ".u16\t$dst, $src[$lane]", + IIC_VMOVSI, "vmov", "u16", "$dst, $src[$lane]", [(set GPR:$dst, (NEONvgetlaneu (v4i16 DPR:$src), imm:$lane))]>; -def VGETLNi32 : NVGetLane<0b11100001, 0b1011, 0b00, +def VGETLNi32 : NVGetLane<{1,1,1,0,0,0,?,1}, 0b1011, 0b00, (outs GPR:$dst), (ins DPR:$src, nohash_imm:$lane), - IIC_VMOVSI, "vmov", ".32\t$dst, $src[$lane]", + IIC_VMOVSI, "vmov", "32", "$dst, $src[$lane]", [(set GPR:$dst, (extractelt (v2i32 DPR:$src), imm:$lane))]>; // def VGETLNf32: see FMRDH and FMRDL in ARMInstrVFP.td @@ -2436,19 +2672,19 @@ def : Pat<(extractelt (v2f64 QPR:$src1), imm:$src2), // VMOV : Vector Set Lane (move ARM core register to scalar) let Constraints = "$src1 = $dst" in { -def VSETLNi8 : NVSetLane<0b11100100, 0b1011, 0b00, (outs DPR:$dst), +def VSETLNi8 : NVSetLane<{1,1,1,0,0,1,?,0}, 0b1011, {?,?}, (outs DPR:$dst), (ins DPR:$src1, GPR:$src2, nohash_imm:$lane), - IIC_VMOVISL, "vmov", ".8\t$dst[$lane], $src2", + IIC_VMOVISL, "vmov", "8", "$dst[$lane], $src2", [(set DPR:$dst, (vector_insert (v8i8 DPR:$src1), GPR:$src2, imm:$lane))]>; -def VSETLNi16 : NVSetLane<0b11100000, 0b1011, 0b01, (outs DPR:$dst), +def VSETLNi16 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, {?,1}, (outs DPR:$dst), (ins DPR:$src1, GPR:$src2, nohash_imm:$lane), - IIC_VMOVISL, "vmov", ".16\t$dst[$lane], $src2", + IIC_VMOVISL, "vmov", "16", "$dst[$lane], $src2", [(set DPR:$dst, (vector_insert (v4i16 DPR:$src1), GPR:$src2, imm:$lane))]>; -def VSETLNi32 : NVSetLane<0b11100000, 0b1011, 0b00, (outs DPR:$dst), +def VSETLNi32 : NVSetLane<{1,1,1,0,0,0,?,0}, 0b1011, 0b00, (outs DPR:$dst), (ins DPR:$src1, GPR:$src2, nohash_imm:$lane), - IIC_VMOVISL, "vmov", ".32\t$dst[$lane], $src2", + IIC_VMOVISL, "vmov", "32", "$dst[$lane], $src2", [(set DPR:$dst, (insertelt (v2i32 DPR:$src1), GPR:$src2, imm:$lane))]>; } @@ -2512,55 +2748,57 @@ def : Pat<(v4i32 (scalar_to_vector GPR:$src)), // VDUP : Vector Duplicate (from ARM core register to all elements) -class VDUPD opcod1, bits<2> opcod3, string asmSize, ValueType Ty> +class VDUPD opcod1, bits<2> opcod3, string Dt, ValueType Ty> : NVDup; -class VDUPQ opcod1, bits<2> opcod3, string asmSize, ValueType Ty> +class VDUPQ opcod1, bits<2> opcod3, string Dt, ValueType Ty> : NVDup; -def VDUP8d : VDUPD<0b11101100, 0b00, ".8", v8i8>; -def VDUP16d : VDUPD<0b11101000, 0b01, ".16", v4i16>; -def VDUP32d : VDUPD<0b11101000, 0b00, ".32", v2i32>; -def VDUP8q : VDUPQ<0b11101110, 0b00, ".8", v16i8>; -def VDUP16q : VDUPQ<0b11101010, 0b01, ".16", v8i16>; -def VDUP32q : VDUPQ<0b11101010, 0b00, ".32", v4i32>; +def VDUP8d : VDUPD<0b11101100, 0b00, "8", v8i8>; +def VDUP16d : VDUPD<0b11101000, 0b01, "16", v4i16>; +def VDUP32d : VDUPD<0b11101000, 0b00, "32", v2i32>; +def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>; +def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>; +def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>; def VDUPfd : NVDup<0b11101000, 0b1011, 0b00, (outs DPR:$dst), (ins GPR:$src), - IIC_VMOVIS, "vdup", ".32\t$dst, $src", + IIC_VMOVIS, "vdup", "32", "$dst, $src", [(set DPR:$dst, (v2f32 (NEONvdup (f32 (bitconvert GPR:$src)))))]>; def VDUPfq : NVDup<0b11101010, 0b1011, 0b00, (outs QPR:$dst), (ins GPR:$src), - IIC_VMOVIS, "vdup", ".32\t$dst, $src", + IIC_VMOVIS, "vdup", "32", "$dst, $src", [(set QPR:$dst, (v4f32 (NEONvdup (f32 (bitconvert GPR:$src)))))]>; // VDUP : Vector Duplicate Lane (from scalar to all elements) -class VDUPLND - : N2VDup<0b11, 0b11, 0b11000, 0, 0, +class VDUPLND op19_18, bits<2> op17_16, + string OpcodeStr, string Dt, ValueType Ty> + : N2V<0b11, 0b11, op19_18, op17_16, 0b11000, 0, 0, (outs DPR:$dst), (ins DPR:$src, nohash_imm:$lane), IIC_VMOVD, - !strconcat(OpcodeStr, "\t$dst, $src[$lane]"), "", + OpcodeStr, Dt, "$dst, $src[$lane]", "", [(set DPR:$dst, (Ty (NEONvduplane (Ty DPR:$src), imm:$lane)))]>; -class VDUPLNQ - : N2VDup<0b11, 0b11, 0b11000, 1, 0, +class VDUPLNQ op19_18, bits<2> op17_16, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy> + : N2V<0b11, 0b11, op19_18, op17_16, 0b11000, 1, 0, (outs QPR:$dst), (ins DPR:$src, nohash_imm:$lane), IIC_VMOVD, - !strconcat(OpcodeStr, "\t$dst, $src[$lane]"), "", + OpcodeStr, Dt, "$dst, $src[$lane]", "", [(set QPR:$dst, (ResTy (NEONvduplane (OpTy DPR:$src), imm:$lane)))]>; // Inst{19-16} is partially specified depending on the element size. -def VDUPLN8d : VDUPLND<"vdup.8", v8i8> { let Inst{16} = 1; } -def VDUPLN16d : VDUPLND<"vdup.16", v4i16> { let Inst{17-16} = 0b10; } -def VDUPLN32d : VDUPLND<"vdup.32", v2i32> { let Inst{18-16} = 0b100; } -def VDUPLNfd : VDUPLND<"vdup.32", v2f32> { let Inst{18-16} = 0b100; } -def VDUPLN8q : VDUPLNQ<"vdup.8", v16i8, v8i8> { let Inst{16} = 1; } -def VDUPLN16q : VDUPLNQ<"vdup.16", v8i16, v4i16> { let Inst{17-16} = 0b10; } -def VDUPLN32q : VDUPLNQ<"vdup.32", v4i32, v2i32> { let Inst{18-16} = 0b100; } -def VDUPLNfq : VDUPLNQ<"vdup.32", v4f32, v2f32> { let Inst{18-16} = 0b100; } +def VDUPLN8d : VDUPLND<{?,?}, {?,1}, "vdup", "8", v8i8>; +def VDUPLN16d : VDUPLND<{?,?}, {1,0}, "vdup", "16", v4i16>; +def VDUPLN32d : VDUPLND<{?,1}, {0,0}, "vdup", "32", v2i32>; +def VDUPLNfd : VDUPLND<{?,1}, {0,0}, "vdup", "32", v2f32>; +def VDUPLN8q : VDUPLNQ<{?,?}, {?,1}, "vdup", "8", v16i8, v8i8>; +def VDUPLN16q : VDUPLNQ<{?,?}, {1,0}, "vdup", "16", v8i16, v4i16>; +def VDUPLN32q : VDUPLNQ<{?,1}, {0,0}, "vdup", "32", v4i32, v2i32>; +def VDUPLNfq : VDUPLNQ<{?,1}, {0,0}, "vdup", "32", v4f32, v2f32>; def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)), (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src, @@ -2579,19 +2817,15 @@ def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)), (DSubReg_i32_reg imm:$lane))), (SubReg_i32_lane imm:$lane)))>; -def VDUPfdf : N2VDup<0b11, 0b11, 0b11000, 0, 0, - (outs DPR:$dst), (ins SPR:$src), - IIC_VMOVD, "vdup.32\t$dst, ${src:lane}", "", - [(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]> { - let Inst{18-16} = 0b100; -} +def VDUPfdf : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 0, 0, + (outs DPR:$dst), (ins SPR:$src), + IIC_VMOVD, "vdup", "32", "$dst, ${src:lane}", "", + [(set DPR:$dst, (v2f32 (NEONvdup (f32 SPR:$src))))]>; -def VDUPfqf : N2VDup<0b11, 0b11, 0b11000, 1, 0, - (outs QPR:$dst), (ins SPR:$src), - IIC_VMOVD, "vdup.32\t$dst, ${src:lane}", "", - [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]> { - let Inst{18-16} = 0b100; -} +def VDUPfqf : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 1, 0, + (outs QPR:$dst), (ins SPR:$src), + IIC_VMOVD, "vdup", "32", "$dst, ${src:lane}", "", + [(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>; def : Pat<(v2i64 (NEONvduplane (v2i64 QPR:$src), imm:$lane)), (INSERT_SUBREG QPR:$src, @@ -2603,176 +2837,178 @@ def : Pat<(v2f64 (NEONvduplane (v2f64 QPR:$src), imm:$lane)), (DSubReg_f64_other_reg imm:$lane))>; // VMOVN : Vector Narrowing Move -defm VMOVN : N2VNInt_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD, "vmovn.i", - int_arm_neon_vmovn>; +defm VMOVN : N2VNInt_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD, + "vmovn", "i", int_arm_neon_vmovn>; // VQMOVN : Vector Saturating Narrowing Move -defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD, "vqmovn.s", - int_arm_neon_vqmovns>; -defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD, "vqmovn.u", - int_arm_neon_vqmovnu>; -defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD, "vqmovun.s", - int_arm_neon_vqmovnsu>; +defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD, + "vqmovn", "s", int_arm_neon_vqmovns>; +defm VQMOVNu : N2VNInt_HSD<0b11,0b11,0b10,0b00101,1,0, IIC_VQUNAiD, + "vqmovn", "u", int_arm_neon_vqmovnu>; +defm VQMOVNsu : N2VNInt_HSD<0b11,0b11,0b10,0b00100,1,0, IIC_VQUNAiD, + "vqmovun", "s", int_arm_neon_vqmovnsu>; // VMOVL : Vector Lengthening Move -defm VMOVLs : N2VLInt_QHS<0b01,0b10100,0,1, "vmovl.s", int_arm_neon_vmovls>; -defm VMOVLu : N2VLInt_QHS<0b11,0b10100,0,1, "vmovl.u", int_arm_neon_vmovlu>; +defm VMOVLs : N2VLInt_QHS<0b01,0b10100,0,1, "vmovl", "s", + int_arm_neon_vmovls>; +defm VMOVLu : N2VLInt_QHS<0b11,0b10100,0,1, "vmovl", "u", + int_arm_neon_vmovlu>; // Vector Conversions. // VCVT : Vector Convert Between Floating-Point and Integers -def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt.s32.f32", +def VCVTf2sd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", v2i32, v2f32, fp_to_sint>; -def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt.u32.f32", +def VCVTf2ud : N2VD<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", v2i32, v2f32, fp_to_uint>; -def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt.f32.s32", +def VCVTs2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", v2f32, v2i32, sint_to_fp>; -def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt.f32.u32", +def VCVTu2fd : N2VD<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", v2f32, v2i32, uint_to_fp>; -def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt.s32.f32", +def VCVTf2sq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", v4i32, v4f32, fp_to_sint>; -def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt.u32.f32", +def VCVTf2uq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", v4i32, v4f32, fp_to_uint>; -def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt.f32.s32", +def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", v4f32, v4i32, sint_to_fp>; -def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt.f32.u32", +def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", v4f32, v4i32, uint_to_fp>; // VCVT : Vector Convert Between Floating-Point and Fixed-Point. -def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt.s32.f32", +def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", v2i32, v2f32, int_arm_neon_vcvtfp2fxs>; -def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt.u32.f32", +def VCVTf2xud : N2VCvtD<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", v2i32, v2f32, int_arm_neon_vcvtfp2fxu>; -def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt.f32.s32", +def VCVTxs2fd : N2VCvtD<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", v2f32, v2i32, int_arm_neon_vcvtfxs2fp>; -def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt.f32.u32", +def VCVTxu2fd : N2VCvtD<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", v2f32, v2i32, int_arm_neon_vcvtfxu2fp>; -def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt.s32.f32", +def VCVTf2xsq : N2VCvtQ<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", v4i32, v4f32, int_arm_neon_vcvtfp2fxs>; -def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt.u32.f32", +def VCVTf2xuq : N2VCvtQ<1, 1, 0b1111, 0, 1, "vcvt", "u32.f32", v4i32, v4f32, int_arm_neon_vcvtfp2fxu>; -def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt.f32.s32", +def VCVTxs2fq : N2VCvtQ<0, 1, 0b1110, 0, 1, "vcvt", "f32.s32", v4f32, v4i32, int_arm_neon_vcvtfxs2fp>; -def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt.f32.u32", +def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; // Vector Reverse. // VREV64 : Vector Reverse elements within 64-bit doublewords -class VREV64D op19_18, string OpcodeStr, ValueType Ty> +class VREV64D op19_18, string OpcodeStr, string Dt, ValueType Ty> : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 0, 0, (outs DPR:$dst), (ins DPR:$src), IIC_VMOVD, - !strconcat(OpcodeStr, "\t$dst, $src"), "", + OpcodeStr, Dt, "$dst, $src", "", [(set DPR:$dst, (Ty (NEONvrev64 (Ty DPR:$src))))]>; -class VREV64Q op19_18, string OpcodeStr, ValueType Ty> +class VREV64Q op19_18, string OpcodeStr, string Dt, ValueType Ty> : N2V<0b11, 0b11, op19_18, 0b00, 0b00000, 1, 0, (outs QPR:$dst), (ins QPR:$src), IIC_VMOVD, - !strconcat(OpcodeStr, "\t$dst, $src"), "", + OpcodeStr, Dt, "$dst, $src", "", [(set QPR:$dst, (Ty (NEONvrev64 (Ty QPR:$src))))]>; -def VREV64d8 : VREV64D<0b00, "vrev64.8", v8i8>; -def VREV64d16 : VREV64D<0b01, "vrev64.16", v4i16>; -def VREV64d32 : VREV64D<0b10, "vrev64.32", v2i32>; -def VREV64df : VREV64D<0b10, "vrev64.32", v2f32>; +def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>; +def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>; +def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>; +def VREV64df : VREV64D<0b10, "vrev64", "32", v2f32>; -def VREV64q8 : VREV64Q<0b00, "vrev64.8", v16i8>; -def VREV64q16 : VREV64Q<0b01, "vrev64.16", v8i16>; -def VREV64q32 : VREV64Q<0b10, "vrev64.32", v4i32>; -def VREV64qf : VREV64Q<0b10, "vrev64.32", v4f32>; +def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>; +def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>; +def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>; +def VREV64qf : VREV64Q<0b10, "vrev64", "32", v4f32>; // VREV32 : Vector Reverse elements within 32-bit words -class VREV32D op19_18, string OpcodeStr, ValueType Ty> +class VREV32D op19_18, string OpcodeStr, string Dt, ValueType Ty> : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 0, 0, (outs DPR:$dst), (ins DPR:$src), IIC_VMOVD, - !strconcat(OpcodeStr, "\t$dst, $src"), "", + OpcodeStr, Dt, "$dst, $src", "", [(set DPR:$dst, (Ty (NEONvrev32 (Ty DPR:$src))))]>; -class VREV32Q op19_18, string OpcodeStr, ValueType Ty> +class VREV32Q op19_18, string OpcodeStr, string Dt, ValueType Ty> : N2V<0b11, 0b11, op19_18, 0b00, 0b00001, 1, 0, (outs QPR:$dst), (ins QPR:$src), IIC_VMOVD, - !strconcat(OpcodeStr, "\t$dst, $src"), "", + OpcodeStr, Dt, "$dst, $src", "", [(set QPR:$dst, (Ty (NEONvrev32 (Ty QPR:$src))))]>; -def VREV32d8 : VREV32D<0b00, "vrev32.8", v8i8>; -def VREV32d16 : VREV32D<0b01, "vrev32.16", v4i16>; +def VREV32d8 : VREV32D<0b00, "vrev32", "8", v8i8>; +def VREV32d16 : VREV32D<0b01, "vrev32", "16", v4i16>; -def VREV32q8 : VREV32Q<0b00, "vrev32.8", v16i8>; -def VREV32q16 : VREV32Q<0b01, "vrev32.16", v8i16>; +def VREV32q8 : VREV32Q<0b00, "vrev32", "8", v16i8>; +def VREV32q16 : VREV32Q<0b01, "vrev32", "16", v8i16>; // VREV16 : Vector Reverse elements within 16-bit halfwords -class VREV16D op19_18, string OpcodeStr, ValueType Ty> +class VREV16D op19_18, string OpcodeStr, string Dt, ValueType Ty> : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 0, 0, (outs DPR:$dst), (ins DPR:$src), IIC_VMOVD, - !strconcat(OpcodeStr, "\t$dst, $src"), "", + OpcodeStr, Dt, "$dst, $src", "", [(set DPR:$dst, (Ty (NEONvrev16 (Ty DPR:$src))))]>; -class VREV16Q op19_18, string OpcodeStr, ValueType Ty> +class VREV16Q op19_18, string OpcodeStr, string Dt, ValueType Ty> : N2V<0b11, 0b11, op19_18, 0b00, 0b00010, 1, 0, (outs QPR:$dst), (ins QPR:$src), IIC_VMOVD, - !strconcat(OpcodeStr, "\t$dst, $src"), "", + OpcodeStr, Dt, "$dst, $src", "", [(set QPR:$dst, (Ty (NEONvrev16 (Ty QPR:$src))))]>; -def VREV16d8 : VREV16D<0b00, "vrev16.8", v8i8>; -def VREV16q8 : VREV16Q<0b00, "vrev16.8", v16i8>; +def VREV16d8 : VREV16D<0b00, "vrev16", "8", v8i8>; +def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>; // Other Vector Shuffles. // VEXT : Vector Extract -class VEXTd - : N3VImm<0,1,0b11,0,0, (outs DPR:$dst), - (ins DPR:$lhs, DPR:$rhs, i32imm:$index), IIC_VEXTD, - !strconcat(OpcodeStr, "\t$dst, $lhs, $rhs, $index"), "", - [(set DPR:$dst, (Ty (NEONvext (Ty DPR:$lhs), - (Ty DPR:$rhs), imm:$index)))]>; +class VEXTd + : N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$dst), + (ins DPR:$lhs, DPR:$rhs, i32imm:$index), IIC_VEXTD, + OpcodeStr, Dt, "$dst, $lhs, $rhs, $index", "", + [(set DPR:$dst, (Ty (NEONvext (Ty DPR:$lhs), + (Ty DPR:$rhs), imm:$index)))]>; -class VEXTq - : N3VImm<0,1,0b11,1,0, (outs QPR:$dst), - (ins QPR:$lhs, QPR:$rhs, i32imm:$index), IIC_VEXTQ, - !strconcat(OpcodeStr, "\t$dst, $lhs, $rhs, $index"), "", - [(set QPR:$dst, (Ty (NEONvext (Ty QPR:$lhs), - (Ty QPR:$rhs), imm:$index)))]>; +class VEXTq + : N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$dst), + (ins QPR:$lhs, QPR:$rhs, i32imm:$index), IIC_VEXTQ, + OpcodeStr, Dt, "$dst, $lhs, $rhs, $index", "", + [(set QPR:$dst, (Ty (NEONvext (Ty QPR:$lhs), + (Ty QPR:$rhs), imm:$index)))]>; -def VEXTd8 : VEXTd<"vext.8", v8i8>; -def VEXTd16 : VEXTd<"vext.16", v4i16>; -def VEXTd32 : VEXTd<"vext.32", v2i32>; -def VEXTdf : VEXTd<"vext.32", v2f32>; +def VEXTd8 : VEXTd<"vext", "8", v8i8>; +def VEXTd16 : VEXTd<"vext", "16", v4i16>; +def VEXTd32 : VEXTd<"vext", "32", v2i32>; +def VEXTdf : VEXTd<"vext", "32", v2f32>; -def VEXTq8 : VEXTq<"vext.8", v16i8>; -def VEXTq16 : VEXTq<"vext.16", v8i16>; -def VEXTq32 : VEXTq<"vext.32", v4i32>; -def VEXTqf : VEXTq<"vext.32", v4f32>; +def VEXTq8 : VEXTq<"vext", "8", v16i8>; +def VEXTq16 : VEXTq<"vext", "16", v8i16>; +def VEXTq32 : VEXTq<"vext", "32", v4i32>; +def VEXTqf : VEXTq<"vext", "32", v4f32>; // VTRN : Vector Transpose -def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn.8">; -def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn.16">; -def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn.32">; +def VTRNd8 : N2VDShuffle<0b00, 0b00001, "vtrn", "8">; +def VTRNd16 : N2VDShuffle<0b01, 0b00001, "vtrn", "16">; +def VTRNd32 : N2VDShuffle<0b10, 0b00001, "vtrn", "32">; -def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn.8">; -def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn.16">; -def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn.32">; +def VTRNq8 : N2VQShuffle<0b00, 0b00001, IIC_VPERMQ, "vtrn", "8">; +def VTRNq16 : N2VQShuffle<0b01, 0b00001, IIC_VPERMQ, "vtrn", "16">; +def VTRNq32 : N2VQShuffle<0b10, 0b00001, IIC_VPERMQ, "vtrn", "32">; // VUZP : Vector Unzip (Deinterleave) -def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp.8">; -def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp.16">; -def VUZPd32 : N2VDShuffle<0b10, 0b00010, "vuzp.32">; +def VUZPd8 : N2VDShuffle<0b00, 0b00010, "vuzp", "8">; +def VUZPd16 : N2VDShuffle<0b01, 0b00010, "vuzp", "16">; +def VUZPd32 : N2VDShuffle<0b10, 0b00010, "vuzp", "32">; -def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp.8">; -def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp.16">; -def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp.32">; +def VUZPq8 : N2VQShuffle<0b00, 0b00010, IIC_VPERMQ3, "vuzp", "8">; +def VUZPq16 : N2VQShuffle<0b01, 0b00010, IIC_VPERMQ3, "vuzp", "16">; +def VUZPq32 : N2VQShuffle<0b10, 0b00010, IIC_VPERMQ3, "vuzp", "32">; // VZIP : Vector Zip (Interleave) -def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip.8">; -def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip.16">; -def VZIPd32 : N2VDShuffle<0b10, 0b00011, "vzip.32">; +def VZIPd8 : N2VDShuffle<0b00, 0b00011, "vzip", "8">; +def VZIPd16 : N2VDShuffle<0b01, 0b00011, "vzip", "16">; +def VZIPd32 : N2VDShuffle<0b10, 0b00011, "vzip", "32">; -def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip.8">; -def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip.16">; -def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip.32">; +def VZIPq8 : N2VQShuffle<0b00, 0b00011, IIC_VPERMQ3, "vzip", "8">; +def VZIPq16 : N2VQShuffle<0b01, 0b00011, IIC_VPERMQ3, "vzip", "16">; +def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">; // Vector Table Lookup and Table Extension. @@ -2780,25 +3016,25 @@ def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip.32">; def VTBL1 : N3V<1,1,0b11,0b1000,0,0, (outs DPR:$dst), (ins DPR:$tbl1, DPR:$src), IIC_VTB1, - "vtbl.8\t$dst, \\{$tbl1\\}, $src", "", + "vtbl", "8", "$dst, \\{$tbl1\\}, $src", "", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl1 DPR:$tbl1, DPR:$src)))]>; let hasExtraSrcRegAllocReq = 1 in { def VTBL2 : N3V<1,1,0b11,0b1001,0,0, (outs DPR:$dst), (ins DPR:$tbl1, DPR:$tbl2, DPR:$src), IIC_VTB2, - "vtbl.8\t$dst, \\{$tbl1,$tbl2\\}, $src", "", + "vtbl", "8", "$dst, \\{$tbl1,$tbl2\\}, $src", "", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl2 DPR:$tbl1, DPR:$tbl2, DPR:$src)))]>; def VTBL3 : N3V<1,1,0b11,0b1010,0,0, (outs DPR:$dst), (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), IIC_VTB3, - "vtbl.8\t$dst, \\{$tbl1,$tbl2,$tbl3\\}, $src", "", + "vtbl", "8", "$dst, \\{$tbl1,$tbl2,$tbl3\\}, $src", "", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl3 DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src)))]>; def VTBL4 : N3V<1,1,0b11,0b1011,0,0, (outs DPR:$dst), (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), IIC_VTB4, - "vtbl.8\t$dst, \\{$tbl1,$tbl2,$tbl3,$tbl4\\}, $src", "", + "vtbl", "8", "$dst, \\{$tbl1,$tbl2,$tbl3,$tbl4\\}, $src", "", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbl4 DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src)))]>; } // hasExtraSrcRegAllocReq = 1 @@ -2807,26 +3043,26 @@ def VTBL4 def VTBX1 : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$dst), (ins DPR:$orig, DPR:$tbl1, DPR:$src), IIC_VTBX1, - "vtbx.8\t$dst, \\{$tbl1\\}, $src", "$orig = $dst", + "vtbx", "8", "$dst, \\{$tbl1\\}, $src", "$orig = $dst", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx1 DPR:$orig, DPR:$tbl1, DPR:$src)))]>; let hasExtraSrcRegAllocReq = 1 in { def VTBX2 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$dst), (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src), IIC_VTBX2, - "vtbx.8\t$dst, \\{$tbl1,$tbl2\\}, $src", "$orig = $dst", + "vtbx", "8", "$dst, \\{$tbl1,$tbl2\\}, $src", "$orig = $dst", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx2 DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src)))]>; def VTBX3 : N3V<1,1,0b11,0b1010,1,0, (outs DPR:$dst), (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), IIC_VTBX3, - "vtbx.8\t$dst, \\{$tbl1,$tbl2,$tbl3\\}, $src", "$orig = $dst", + "vtbx", "8", "$dst, \\{$tbl1,$tbl2,$tbl3\\}, $src", "$orig = $dst", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx3 DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src)))]>; def VTBX4 : N3V<1,1,0b11,0b1011,1,0, (outs DPR:$dst), (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), IIC_VTBX4, - "vtbx.8\t$dst, \\{$tbl1,$tbl2,$tbl3,$tbl4\\}, $src", "$orig = $dst", + "vtbx", "8", "$dst, \\{$tbl1,$tbl2,$tbl3,$tbl4\\}, $src", "$orig = $dst", [(set DPR:$dst, (v8i8 (int_arm_neon_vtbx4 DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src)))]>; } // hasExtraSrcRegAllocReq = 1 @@ -2840,17 +3076,17 @@ def VTBX4 // Vector Add Operations used for single-precision FP let neverHasSideEffects = 1 in -def VADDfd_sfp : N3VDs<0, 0, 0b00, 0b1101, 0, "vadd.f32", v2f32, v2f32, fadd,1>; +def VADDfd_sfp : N3VDs<0, 0, 0b00, 0b1101, 0, "vadd", "f32", v2f32, v2f32, fadd,1>; def : N3VDsPat; // Vector Sub Operations used for single-precision FP let neverHasSideEffects = 1 in -def VSUBfd_sfp : N3VDs<0, 0, 0b10, 0b1101, 0, "vsub.f32", v2f32, v2f32, fsub,0>; +def VSUBfd_sfp : N3VDs<0, 0, 0b10, 0b1101, 0, "vsub", "f32", v2f32, v2f32, fsub,0>; def : N3VDsPat; // Vector Multiply Operations used for single-precision FP let neverHasSideEffects = 1 in -def VMULfd_sfp : N3VDs<1, 0, 0b00, 0b1101, 1, "vmul.f32", v2f32, v2f32, fmul,1>; +def VMULfd_sfp : N3VDs<1, 0, 0b00, 0b1101, 1, "vmul", "f32", v2f32, v2f32, fmul,1>; def : N3VDsPat; // Vector Multiply-Accumulate/Subtract used for single-precision FP @@ -2858,17 +3094,17 @@ def : N3VDsPat; // we want to avoid them for now. e.g., alternating vmla/vadd instructions. //let neverHasSideEffects = 1 in -//def VMLAfd_sfp : N3VDMulOps<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla.f32", v2f32,fmul,fadd>; +//def VMLAfd_sfp : N3VDMulOps<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", v2f32,fmul,fadd>; //def : N3VDMulOpsPat; //let neverHasSideEffects = 1 in -//def VMLSfd_sfp : N3VDMulOps<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls.f32", v2f32,fmul,fsub>; +//def VMLSfd_sfp : N3VDMulOps<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", v2f32,fmul,fsub>; //def : N3VDMulOpsPat; // Vector Absolute used for single-precision FP let neverHasSideEffects = 1 in def VABSfd_sfp : N2VDInts<0b11, 0b11, 0b10, 0b01, 0b01110, 0, - IIC_VUNAD, "vabs.f32", + IIC_VUNAD, "vabs", "f32", v2f32, v2f32, int_arm_neon_vabs>; def : N2VDIntsPat; @@ -2876,27 +3112,27 @@ def : N2VDIntsPat; let neverHasSideEffects = 1 in def VNEGf32d_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), IIC_VUNAD, - "vneg.f32\t$dst, $src", "", []>; + "vneg", "f32", "$dst, $src", "", []>; def : N2VDIntsPat; // Vector Convert between single-precision FP and integer let neverHasSideEffects = 1 in -def VCVTf2sd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt.s32.f32", +def VCVTf2sd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", v2i32, v2f32, fp_to_sint>; def : N2VDsPat; let neverHasSideEffects = 1 in -def VCVTf2ud_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt.u32.f32", +def VCVTf2ud_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32", v2i32, v2f32, fp_to_uint>; def : N2VDsPat; let neverHasSideEffects = 1 in -def VCVTs2fd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt.f32.s32", +def VCVTs2fd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", v2f32, v2i32, sint_to_fp>; def : N2VDsPat; let neverHasSideEffects = 1 in -def VCVTu2fd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt.f32.u32", +def VCVTu2fd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", v2f32, v2i32, uint_to_fp>; def : N2VDsPat; diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index d1831d1e4888..b5956a32c587 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -296,7 +296,7 @@ let isBranch = 1, isTerminator = 1 in { // Load Store Instructions. // -let canFoldAsLoad = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in def tLDR : T1pI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), IIC_iLoadr, "ldr", "\t$dst, $addr", [(set tGPR:$dst, (load t_addrmode_s4:$addr))]>; @@ -332,13 +332,14 @@ def tRestore : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoadi, // Load tconstpool // FIXME: Use ldr.n to work around a Darwin assembler bug. -let canFoldAsLoad = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in def tLDRpci : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadi, "ldr", ".n\t$dst, $addr", [(set tGPR:$dst, (load (ARMWrapper tconstpool:$addr)))]>; // Special LDR for loads from non-pc-relative constpools. -let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1 in +let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, + mayHaveSideEffects = 1 in def tLDRcp : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadi, "ldr", "\t$dst, $addr", []>; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 1bb9bfd6f5e4..948981529921 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -49,8 +49,8 @@ def t2_so_imm_neg_XFORM : SDNodeXForm, PatLeaf<(imm), [{ return ARM_AM::getT2SOImmVal((uint32_t)N->getZExtValue()) != -1; @@ -88,6 +88,21 @@ def t2_so_imm2part_2 : SDNodeXFormgetTargetConstant(V, MVT::i32); }]>; +def t2_so_neg_imm2part : Operand, PatLeaf<(imm), [{ + return ARM_AM::isT2SOImmTwoPartVal(-(int)N->getZExtValue()); + }]> { +} + +def t2_so_neg_imm2part_1 : SDNodeXFormgetZExtValue()); + return CurDAG->getTargetConstant(V, MVT::i32); +}]>; + +def t2_so_neg_imm2part_2 : SDNodeXFormgetZExtValue()); + return CurDAG->getTargetConstant(V, MVT::i32); +}]>; + /// imm1_31 predicate - True if the 32-bit immediate is in the range [1,31]. def imm1_31 : PatLeaf<(i32 imm), [{ return (int32_t)N->getZExtValue() >= 1 && (int32_t)N->getZExtValue() < 32; @@ -252,9 +267,9 @@ multiclass T2I_bin_ii12rs { [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>; } -/// T2I_adde_sube_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a -/// binary operation that produces a value and use and define the carry bit. -/// It's not predicable. +/// T2I_adde_sube_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns +/// for a binary operation that produces a value and use and define the carry +/// bit. It's not predicable. let Uses = [CPSR] in { multiclass T2I_adde_sube_irs { // shifted imm @@ -471,7 +486,7 @@ def t2SUBrSPs_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs), // // Load -let canFoldAsLoad = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in defm t2LDR : T2I_ld<"ldr", UnOpFrag<(load node:$Src)>>; // Loads with zero extension @@ -615,7 +630,7 @@ def t2STR_POST : T2Iidxldst<(outs GPR:$base_wb), AddrModeT2_i8, IndexModePost, IIC_iStoreiu, "str", "\t$src, [$base], $offset", "$base = $base_wb", [(set GPR:$base_wb, - (post_store GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>; + (post_store GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>; def t2STRH_PRE : T2Iidxldst<(outs GPR:$base_wb), (ins GPR:$src, GPR:$base, t2am_imm8_offset:$offset), @@ -718,9 +733,9 @@ def : T2Pat<(and (srl GPR:$Src, (i32 8)), 0xFF00FF), (t2UXTB16r_rot GPR:$Src, 8)>; defm t2UXTAB : T2I_bin_rrot<"uxtab", - BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>; + BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>; defm t2UXTAH : T2I_bin_rrot<"uxtah", - BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>; + BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>; } //===----------------------------------------------------------------------===// @@ -1162,15 +1177,9 @@ def : T2Pat<(xor GPR:$LHS, t2_so_imm2part:$RHS), def : T2Pat<(add GPR:$LHS, t2_so_imm2part:$RHS), (t2ADDri (t2ADDri GPR:$LHS, (t2_so_imm2part_1 imm:$RHS)), (t2_so_imm2part_2 imm:$RHS))>; -def : T2Pat<(sub GPR:$LHS, t2_so_imm2part:$RHS), - (t2SUBri (t2SUBri GPR:$LHS, (t2_so_imm2part_1 imm:$RHS)), - (t2_so_imm2part_2 imm:$RHS))>; - -// ConstantPool, GlobalAddress, and JumpTable -def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2LEApcrel tglobaladdr :$dst)>; -def : T2Pat<(ARMWrapper tconstpool :$dst), (t2LEApcrel tconstpool :$dst)>; -def : T2Pat<(ARMWrapperJT tjumptable:$dst, imm:$id), - (t2LEApcrelJT tjumptable:$dst, imm:$id)>; +def : T2Pat<(add GPR:$LHS, t2_so_neg_imm2part:$RHS), + (t2SUBri (t2SUBri GPR:$LHS, (t2_so_neg_imm2part_1 imm:$RHS)), + (t2_so_neg_imm2part_2 imm:$RHS))>; // 32-bit immediate using movw + movt. // This is a single pseudo instruction to make it re-materializable. Remove @@ -1180,10 +1189,20 @@ def t2MOVi32imm : T2Ix2<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVi, "movw", "\t$dst, ${src:lo16}\n\tmovt${p}\t$dst, ${src:hi16}", [(set GPR:$dst, (i32 imm:$src))]>; +// ConstantPool, GlobalAddress, and JumpTable +def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2LEApcrel tglobaladdr :$dst)>, + Requires<[IsThumb2, DontUseMovt]>; +def : T2Pat<(ARMWrapper tconstpool :$dst), (t2LEApcrel tconstpool :$dst)>; +def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2MOVi32imm tglobaladdr :$dst)>, + Requires<[IsThumb2, UseMovt]>; + +def : T2Pat<(ARMWrapperJT tjumptable:$dst, imm:$id), + (t2LEApcrelJT tjumptable:$dst, imm:$id)>; + // Pseudo instruction that combines ldr from constpool and add pc. This should // be expanded into two instructions late to allow if-conversion and // scheduling. -let isReMaterializable = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in def t2LDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp), NoItinerary, "@ ldr.w\t$dst, $addr\n$cp:\n\tadd\t$dst, pc", [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)), diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index ba341f487e58..5bfe89d25344 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -54,7 +54,7 @@ def vfp_f64imm : Operand, // Load / store Instructions. // -let canFoldAsLoad = 1 in { +let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in { def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$dst), (ins addrmode5:$addr), IIC_fpLoad64, "vldr", ".64\t$dst, $addr", [(set DPR:$dst, (load addrmode5:$addr))]>; @@ -437,7 +437,7 @@ def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs", let isReMaterializable = 1 in { def FCONSTD : VFPAI<(outs DPR:$dst), (ins vfp_f64imm:$imm), VFPMiscFrm, IIC_VMOVImm, - "fconstd", "\t$dst, $imm", + "vmov", ".f64\t$dst, $imm", [(set DPR:$dst, vfp_f64imm:$imm)]>, Requires<[HasVFP3]> { let Inst{27-23} = 0b11101; let Inst{21-20} = 0b11; @@ -448,7 +448,7 @@ def FCONSTD : VFPAI<(outs DPR:$dst), (ins vfp_f64imm:$imm), def FCONSTS : VFPAI<(outs SPR:$dst), (ins vfp_f32imm:$imm), VFPMiscFrm, IIC_VMOVImm, - "fconsts", "\t$dst, $imm", + "vmov", ".f32\t$dst, $imm", [(set SPR:$dst, vfp_f32imm:$imm)]>, Requires<[HasVFP3]> { let Inst{27-23} = 0b11101; let Inst{21-20} = 0b11; diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp index 24990e67a381..aa50cfd3074d 100644 --- a/lib/Target/ARM/ARMJITInfo.cpp +++ b/lib/Target/ARM/ARMJITInfo.cpp @@ -139,7 +139,8 @@ ARMJITInfo::getLazyResolverFunction(JITCompilerFn F) { void *ARMJITInfo::emitGlobalValueIndirectSym(const GlobalValue *GV, void *Ptr, JITCodeEmitter &JCE) { - JCE.startGVStub(GV, 4, 4); + MachineCodeEmitter::BufferState BS; + JCE.startGVStub(BS, GV, 4, 4); intptr_t Addr = (intptr_t)JCE.getCurrentPCValue(); if (!sys::Memory::setRangeWritable((void*)Addr, 4)) { llvm_unreachable("ERROR: Unable to mark indirect symbol writable"); @@ -148,19 +149,27 @@ void *ARMJITInfo::emitGlobalValueIndirectSym(const GlobalValue *GV, void *Ptr, if (!sys::Memory::setRangeExecutable((void*)Addr, 4)) { llvm_unreachable("ERROR: Unable to mark indirect symbol executable"); } - void *PtrAddr = JCE.finishGVStub(GV); + void *PtrAddr = JCE.finishGVStub(BS); addIndirectSymAddr(Ptr, (intptr_t)PtrAddr); return PtrAddr; } +TargetJITInfo::StubLayout ARMJITInfo::getStubLayout() { + // The stub contains up to 3 4-byte instructions, aligned at 4 bytes, and a + // 4-byte address. See emitFunctionStub for details. + StubLayout Result = {16, 4}; + return Result; +} + void *ARMJITInfo::emitFunctionStub(const Function* F, void *Fn, JITCodeEmitter &JCE) { + void *Addr; // If this is just a call to an external function, emit a branch instead of a // call. The code is the same except for one bit of the last instruction. if (Fn != (void*)(intptr_t)ARMCompilationCallback) { // Branch to the corresponding function addr. if (IsPIC) { - // The stub is 8-byte size and 4-aligned. + // The stub is 16-byte size and 4-aligned. intptr_t LazyPtr = getIndirectSymAddr(Fn); if (!LazyPtr) { // In PIC mode, the function stub is loading a lazy-ptr. @@ -172,30 +181,30 @@ void *ARMJITInfo::emitFunctionStub(const Function* F, void *Fn, errs() << "JIT: Stub emitted at [" << LazyPtr << "] for external function at '" << Fn << "'\n"); } - JCE.startGVStub(F, 16, 4); - intptr_t Addr = (intptr_t)JCE.getCurrentPCValue(); - if (!sys::Memory::setRangeWritable((void*)Addr, 16)) { + JCE.emitAlignment(4); + Addr = (void*)JCE.getCurrentPCValue(); + if (!sys::Memory::setRangeWritable(Addr, 16)) { llvm_unreachable("ERROR: Unable to mark stub writable"); } - JCE.emitWordLE(0xe59fc004); // ldr pc, [pc, #+4] + JCE.emitWordLE(0xe59fc004); // ldr ip, [pc, #+4] JCE.emitWordLE(0xe08fc00c); // L_func$scv: add ip, pc, ip JCE.emitWordLE(0xe59cf000); // ldr pc, [ip] - JCE.emitWordLE(LazyPtr - (Addr+4+8)); // func - (L_func$scv+8) - sys::Memory::InvalidateInstructionCache((void*)Addr, 16); - if (!sys::Memory::setRangeExecutable((void*)Addr, 16)) { + JCE.emitWordLE(LazyPtr - (intptr_t(Addr)+4+8)); // func - (L_func$scv+8) + sys::Memory::InvalidateInstructionCache(Addr, 16); + if (!sys::Memory::setRangeExecutable(Addr, 16)) { llvm_unreachable("ERROR: Unable to mark stub executable"); } } else { // The stub is 8-byte size and 4-aligned. - JCE.startGVStub(F, 8, 4); - intptr_t Addr = (intptr_t)JCE.getCurrentPCValue(); - if (!sys::Memory::setRangeWritable((void*)Addr, 8)) { + JCE.emitAlignment(4); + Addr = (void*)JCE.getCurrentPCValue(); + if (!sys::Memory::setRangeWritable(Addr, 8)) { llvm_unreachable("ERROR: Unable to mark stub writable"); } JCE.emitWordLE(0xe51ff004); // ldr pc, [pc, #-4] JCE.emitWordLE((intptr_t)Fn); // addr of function - sys::Memory::InvalidateInstructionCache((void*)Addr, 8); - if (!sys::Memory::setRangeExecutable((void*)Addr, 8)) { + sys::Memory::InvalidateInstructionCache(Addr, 8); + if (!sys::Memory::setRangeExecutable(Addr, 8)) { llvm_unreachable("ERROR: Unable to mark stub executable"); } } @@ -207,9 +216,9 @@ void *ARMJITInfo::emitFunctionStub(const Function* F, void *Fn, // // Branch and link to the compilation callback. // The stub is 16-byte size and 4-byte aligned. - JCE.startGVStub(F, 16, 4); - intptr_t Addr = (intptr_t)JCE.getCurrentPCValue(); - if (!sys::Memory::setRangeWritable((void*)Addr, 16)) { + JCE.emitAlignment(4); + Addr = (void*)JCE.getCurrentPCValue(); + if (!sys::Memory::setRangeWritable(Addr, 16)) { llvm_unreachable("ERROR: Unable to mark stub writable"); } // Save LR so the callback can determine which stub called it. @@ -222,13 +231,13 @@ void *ARMJITInfo::emitFunctionStub(const Function* F, void *Fn, JCE.emitWordLE(0xe51ff004); // ldr pc, [pc, #-4] // The address of the compilation callback. JCE.emitWordLE((intptr_t)ARMCompilationCallback); - sys::Memory::InvalidateInstructionCache((void*)Addr, 16); - if (!sys::Memory::setRangeExecutable((void*)Addr, 16)) { + sys::Memory::InvalidateInstructionCache(Addr, 16); + if (!sys::Memory::setRangeExecutable(Addr, 16)) { llvm_unreachable("ERROR: Unable to mark stub executable"); } } - return JCE.finishGVStub(F); + return Addr; } intptr_t ARMJITInfo::resolveRelocDestAddr(MachineRelocation *MR) const { diff --git a/lib/Target/ARM/ARMJITInfo.h b/lib/Target/ARM/ARMJITInfo.h index 7dfeed8b7bf3..ff332b7ee15b 100644 --- a/lib/Target/ARM/ARMJITInfo.h +++ b/lib/Target/ARM/ARMJITInfo.h @@ -61,6 +61,10 @@ namespace llvm { virtual void *emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr, JITCodeEmitter &JCE); + // getStubLayout - Returns the size and alignment of the largest call stub + // on ARM. + virtual StubLayout getStubLayout(); + /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a /// small native function that simply calls the function at the specified /// address. diff --git a/lib/Target/ARM/ARMScheduleV7.td b/lib/Target/ARM/ARMScheduleV7.td index 427645c47471..bbbf41397566 100644 --- a/lib/Target/ARM/ARMScheduleV7.td +++ b/lib/Target/ARM/ARMScheduleV7.td @@ -180,7 +180,7 @@ def CortexA8Itineraries : ProcessorItineraries<[ // Double-precision FP Unary InstrItinData, InstrStage<4, [FU_NPipe], 0>, - InstrStage<4, [FU_NLSPipe]>]>, + InstrStage<4, [FU_NLSPipe]>], [4, 1]>, // // Single-precision FP Compare InstrItinData, @@ -189,17 +189,17 @@ def CortexA8Itineraries : ProcessorItineraries<[ // Double-precision FP Compare InstrItinData, InstrStage<4, [FU_NPipe], 0>, - InstrStage<4, [FU_NLSPipe]>]>, + InstrStage<4, [FU_NLSPipe]>], [4, 1]>, // // Single to Double FP Convert InstrItinData, InstrStage<7, [FU_NPipe], 0>, - InstrStage<7, [FU_NLSPipe]>]>, + InstrStage<7, [FU_NLSPipe]>], [7, 1]>, // // Double to Single FP Convert InstrItinData, InstrStage<5, [FU_NPipe], 0>, - InstrStage<5, [FU_NLSPipe]>]>, + InstrStage<5, [FU_NLSPipe]>], [5, 1]>, // // Single-Precision FP to Integer Convert InstrItinData, @@ -208,7 +208,7 @@ def CortexA8Itineraries : ProcessorItineraries<[ // Double-Precision FP to Integer Convert InstrItinData, InstrStage<8, [FU_NPipe], 0>, - InstrStage<8, [FU_NLSPipe]>]>, + InstrStage<8, [FU_NLSPipe]>], [8, 1]>, // // Integer to Single-Precision FP Convert InstrItinData, @@ -217,7 +217,7 @@ def CortexA8Itineraries : ProcessorItineraries<[ // Integer to Double-Precision FP Convert InstrItinData, InstrStage<8, [FU_NPipe], 0>, - InstrStage<8, [FU_NLSPipe]>]>, + InstrStage<8, [FU_NLSPipe]>], [8, 1]>, // // Single-precision FP ALU InstrItinData, @@ -226,7 +226,7 @@ def CortexA8Itineraries : ProcessorItineraries<[ // Double-precision FP ALU InstrItinData, InstrStage<9, [FU_NPipe], 0>, - InstrStage<9, [FU_NLSPipe]>]>, + InstrStage<9, [FU_NLSPipe]>], [9, 1, 1]>, // // Single-precision FP Multiply InstrItinData, @@ -235,7 +235,7 @@ def CortexA8Itineraries : ProcessorItineraries<[ // Double-precision FP Multiply InstrItinData, InstrStage<11, [FU_NPipe], 0>, - InstrStage<11, [FU_NLSPipe]>]>, + InstrStage<11, [FU_NLSPipe]>], [11, 1, 1]>, // // Single-precision FP MAC InstrItinData, @@ -244,27 +244,27 @@ def CortexA8Itineraries : ProcessorItineraries<[ // Double-precision FP MAC InstrItinData, InstrStage<19, [FU_NPipe], 0>, - InstrStage<19, [FU_NLSPipe]>]>, + InstrStage<19, [FU_NLSPipe]>], [19, 2, 1, 1]>, // // Single-precision FP DIV InstrItinData, InstrStage<20, [FU_NPipe], 0>, - InstrStage<20, [FU_NLSPipe]>]>, + InstrStage<20, [FU_NLSPipe]>], [20, 1, 1]>, // // Double-precision FP DIV InstrItinData, InstrStage<29, [FU_NPipe], 0>, - InstrStage<29, [FU_NLSPipe]>]>, + InstrStage<29, [FU_NLSPipe]>], [29, 1, 1]>, // // Single-precision FP SQRT InstrItinData, InstrStage<19, [FU_NPipe], 0>, - InstrStage<19, [FU_NLSPipe]>]>, + InstrStage<19, [FU_NLSPipe]>], [19, 1]>, // // Double-precision FP SQRT InstrItinData, InstrStage<29, [FU_NPipe], 0>, - InstrStage<29, [FU_NLSPipe]>]>, + InstrStage<29, [FU_NLSPipe]>], [29, 1]>, // // Single-precision FP Load // use FU_Issue to enforce the 1 load/store per cycle limit diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 432ed78c19a7..71f388354d4f 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -27,6 +27,10 @@ UseNEONFP("arm-use-neon-fp", cl::desc("Use NEON for single-precision FP"), cl::init(false), cl::Hidden); +static cl::opt +UseMOVT("arm-use-movt", + cl::init(true), cl::Hidden); + ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, bool isT) : ARMArchVersion(V4T) @@ -36,6 +40,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, , ThumbMode(Thumb1) , PostRAScheduler(false) , IsR9Reserved(ReserveR9) + , UseMovt(UseMOVT) , stackAlignment(4) , CPUString("generic") , TargetType(isELF) // Default to ELF unless otherwise specified. @@ -109,8 +114,6 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, if (UseNEONFP.getPosition() == 0) UseNEONForSinglePrecisionFP = true; } - HasBranchTargetBuffer = (CPUString == "cortex-a8" || - CPUString == "cortex-a9"); } /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol. diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 3d0e01e99b79..3f06b7b7f157 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -50,9 +50,6 @@ protected: /// determine if NEON should actually be used. bool UseNEONForSinglePrecisionFP; - /// HasBranchTargetBuffer - True if processor can predict indirect branches. - bool HasBranchTargetBuffer; - /// IsThumb - True if we are in thumb mode, false if in ARM mode. bool IsThumb; @@ -65,6 +62,10 @@ protected: /// IsR9Reserved - True if R9 is a not available as general purpose register. bool IsR9Reserved; + /// UseMovt - True if MOVT / MOVW pairs are used for materialization of 32-bit + /// imms (including global addresses). + bool UseMovt; + /// stackAlignment - The minimum alignment known to hold of the stack frame on /// entry to the function and which must be maintained by every function. unsigned stackAlignment; @@ -126,12 +127,12 @@ protected: bool isThumb2() const { return IsThumb && (ThumbMode == Thumb2); } bool hasThumb2() const { return ThumbMode >= Thumb2; } - bool hasBranchTargetBuffer() const { return HasBranchTargetBuffer; } - bool isR9Reserved() const { return IsR9Reserved; } + bool useMovt() const { return UseMovt && hasV6T2Ops(); } + const std::string & getCPUString() const { return CPUString; } - + /// enablePostRAScheduler - True at 'More' optimization. bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, TargetSubtarget::AntiDepBreakMode& Mode, diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp index dd4a240f6c0d..692bb1924261 100644 --- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp @@ -330,6 +330,8 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, const char *Modifier) { const MachineOperand &MO = MI->getOperand(OpNum); + unsigned TF = MO.getTargetFlags(); + switch (MO.getType()) { default: assert(0 && ""); @@ -356,12 +358,12 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, case MachineOperand::MO_Immediate: { int64_t Imm = MO.getImm(); O << '#'; - if (Modifier) { - if (strcmp(Modifier, "lo16") == 0) - O << ":lower16:"; - else if (strcmp(Modifier, "hi16") == 0) - O << ":upper16:"; - } + if ((Modifier && strcmp(Modifier, "lo16") == 0) || + (TF & ARMII::MO_LO16)) + O << ":lower16:"; + else if ((Modifier && strcmp(Modifier, "hi16") == 0) || + (TF & ARMII::MO_HI16)) + O << ":upper16:"; O << Imm; break; } @@ -371,6 +373,13 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, case MachineOperand::MO_GlobalAddress: { bool isCallOp = Modifier && !strcmp(Modifier, "call"); GlobalValue *GV = MO.getGlobal(); + + if ((Modifier && strcmp(Modifier, "lo16") == 0) || + (TF & ARMII::MO_LO16)) + O << ":lower16:"; + else if ((Modifier && strcmp(Modifier, "hi16") == 0) || + (TF & ARMII::MO_HI16)) + O << ":upper16:"; O << Mang->getMangledName(GV); printOffset(MO.getOffset()); @@ -998,7 +1007,7 @@ void ARMAsmPrinter::printNoHashImmediate(const MachineInstr *MI, int OpNum) { void ARMAsmPrinter::printVFPf32ImmOperand(const MachineInstr *MI, int OpNum) { const ConstantFP *FP = MI->getOperand(OpNum).getFPImm(); - O << '#' << ARM::getVFPf32Imm(FP->getValueAPF()); + O << '#' << FP->getValueAPF().convertToFloat(); if (VerboseAsm) { O.PadToColumn(MAI->getCommentColumn()); O << MAI->getCommentString() << ' '; @@ -1008,7 +1017,7 @@ void ARMAsmPrinter::printVFPf32ImmOperand(const MachineInstr *MI, int OpNum) { void ARMAsmPrinter::printVFPf64ImmOperand(const MachineInstr *MI, int OpNum) { const ConstantFP *FP = MI->getOperand(OpNum).getFPImm(); - O << '#' << ARM::getVFPf64Imm(FP->getValueAPF()); + O << '#' << FP->getValueAPF().convertToDouble(); if (VerboseAsm) { O.PadToColumn(MAI->getCommentColumn()); O << MAI->getCommentString() << ' '; diff --git a/lib/Target/ARM/NEONMoveFix.cpp b/lib/Target/ARM/NEONMoveFix.cpp index 7d767ec52615..50abcf464e0d 100644 --- a/lib/Target/ARM/NEONMoveFix.cpp +++ b/lib/Target/ARM/NEONMoveFix.cpp @@ -81,8 +81,8 @@ bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) { // afterwards // - The imp-defs / imp-uses are superregs only, we don't care about // them. - BuildMI(MBB, *MI, MI->getDebugLoc(), - TII->get(ARM::VMOVDneon), DestReg).addReg(SrcReg); + AddDefaultPred(BuildMI(MBB, *MI, MI->getDebugLoc(), + TII->get(ARM::VMOVDneon), DestReg).addReg(SrcReg)); MBB.erase(MI); MachineBasicBlock::iterator I = prior(NextMII); MI = &*I; diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index ad1739c69053..b2fd7b334d87 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -78,7 +78,7 @@ namespace { { ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 0 }, { ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 0 }, { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0 }, - { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0 }, + { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 1 }, // FIXME: Do we need the 16-bit 'S' variant? { ARM::t2MOVr,ARM::tMOVgpr2gpr,0, 0, 0, 0, 0, 1,0, 0 }, { ARM::t2MOVCCr,0, ARM::tMOVCCr, 0, 0, 0, 0, 0,1, 0 }, @@ -413,6 +413,12 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, if (MI->getOperand(2).getImm() == 0) return ReduceToNarrow(MBB, MI, Entry, LiveCPSR); break; + case ARM::t2MOVi16: + // Can convert only 'pure' immediate operands, not immediates obtained as + // globals' addresses. + if (MI->getOperand(1).isImm()) + return ReduceToNarrow(MBB, MI, Entry, LiveCPSR); + break; } return false; } diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp index 921752214c97..b5579f4a1289 100644 --- a/lib/Target/Alpha/AlphaISelLowering.cpp +++ b/lib/Target/Alpha/AlphaISelLowering.cpp @@ -127,10 +127,6 @@ AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM) setOperationAction(ISD::BIT_CONVERT, MVT::f32, Promote); - // We don't have line number support yet. - setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand); - setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); - setOperationAction(ISD::DBG_LABEL, MVT::Other, Expand); setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); // Not implemented yet. diff --git a/lib/Target/Alpha/AlphaJITInfo.cpp b/lib/Target/Alpha/AlphaJITInfo.cpp index d32813552f01..b3b711eea98d 100644 --- a/lib/Target/Alpha/AlphaJITInfo.cpp +++ b/lib/Target/Alpha/AlphaJITInfo.cpp @@ -190,17 +190,27 @@ extern "C" { #endif } +TargetJITInfo::StubLayout AlphaJITInfo::getStubLayout() { + // The stub contains 19 4-byte instructions, aligned at 4 bytes: + // R0 = R27 + // 8 x "R27 <<= 8; R27 |= 8-bits-of-Target" == 16 instructions + // JMP R27 + // Magic number so the compilation callback can recognize the stub. + StubLayout Result = {19 * 4, 4}; + return Result; +} + void *AlphaJITInfo::emitFunctionStub(const Function* F, void *Fn, JITCodeEmitter &JCE) { + MachineCodeEmitter::BufferState BS; //assert(Fn == AlphaCompilationCallback && "Where are you going?\n"); //Do things in a stupid slow way! - JCE.startGVStub(F, 19*4); void* Addr = (void*)(intptr_t)JCE.getCurrentPCValue(); for (int x = 0; x < 19; ++ x) JCE.emitWordLE(0); EmitBranchToAt(Addr, Fn); DEBUG(errs() << "Emitting Stub to " << Fn << " at [" << Addr << "]\n"); - return JCE.finishGVStub(F); + return Addr; } TargetJITInfo::LazyResolverFn diff --git a/lib/Target/Alpha/AlphaJITInfo.h b/lib/Target/Alpha/AlphaJITInfo.h index ecb467fbc5ec..bd358a413128 100644 --- a/lib/Target/Alpha/AlphaJITInfo.h +++ b/lib/Target/Alpha/AlphaJITInfo.h @@ -31,6 +31,7 @@ namespace llvm { explicit AlphaJITInfo(TargetMachine &tm) : TM(tm) { useGOT = true; } + virtual StubLayout getStubLayout(); virtual void *emitFunctionStub(const Function* F, void *Fn, JITCodeEmitter &JCE); virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn); diff --git a/lib/Target/Blackfin/BlackfinISelLowering.cpp b/lib/Target/Blackfin/BlackfinISelLowering.cpp index c5c96f8bfdae..ad2510a51aca 100644 --- a/lib/Target/Blackfin/BlackfinISelLowering.cpp +++ b/lib/Target/Blackfin/BlackfinISelLowering.cpp @@ -114,10 +114,6 @@ BlackfinTargetLowering::BlackfinTargetLowering(TargetMachine &TM) // READCYCLECOUNTER needs special type legalization. setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom); - // We don't have line number support yet. - setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand); - setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); - setOperationAction(ISD::DBG_LABEL, MVT::Other, Expand); setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); // Use the default implementation. diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.td b/lib/Target/Blackfin/BlackfinRegisterInfo.td index 642d10f5aa67..d396cc807e88 100644 --- a/lib/Target/Blackfin/BlackfinRegisterInfo.td +++ b/lib/Target/Blackfin/BlackfinRegisterInfo.td @@ -44,7 +44,7 @@ class Ra num, string n, list subs> : BlackfinReg { let Num = num; } -// Ywo halves of 32-bit register +// Two halves of 32-bit register multiclass Rss group, bits<3> num, string n> { def H : Rs; def L : Rs; diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index 4dd82a6768db..23e192e62bd5 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -387,10 +387,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) // We cannot sextinreg(i1). Expand to shifts. setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); - // Support label based line numbers. - setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand); - setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); - // We want to legalize GlobalAddress and ConstantPool nodes into the // appropriate instructions to materialize the address. for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128; diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td index d3b575a10d19..f24ffd2f8d4d 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.td +++ b/lib/Target/CellSPU/SPUInstrInfo.td @@ -30,14 +30,6 @@ let hasCtrlDep = 1, Defs = [R1], Uses = [R1] in { [(callseq_end timm:$amt)]>; } -//===----------------------------------------------------------------------===// -// DWARF debugging Pseudo Instructions -//===----------------------------------------------------------------------===// - -def DWARF_LOC : Pseudo<(outs), (ins i32imm:$line, i32imm:$col, i32imm:$file), - ".loc $file, $line, $col", - [(dwarf_loc (i32 imm:$line), (i32 imm:$col), (i32 imm:$file))]>; - //===----------------------------------------------------------------------===// // Loads: // NB: The ordering is actually important, since the instruction selection diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp index c0084be91795..beccb2ca5840 100644 --- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp +++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp @@ -312,8 +312,8 @@ bool MSP430DAGToDAGISel::SelectAddr(SDValue Op, SDValue N, else if (AM.JT != -1) Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i16, 0/*AM.SymbolFlags*/); else if (AM.BlockAddr) - Disp = CurDAG->getBlockAddress(AM.BlockAddr, DebugLoc()/*MVT::i32*/, - true /*AM.SymbolFlags*/); + Disp = CurDAG->getBlockAddress(AM.BlockAddr, MVT::i32, + true, 0/*AM.SymbolFlags*/); else Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i16); diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index 5a925f5eb3e4..29cc370bef99 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -162,7 +162,7 @@ SDValue MSP430TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { /// getFunctionAlignment - Return the Log2 alignment of this function. unsigned MSP430TargetLowering::getFunctionAlignment(const Function *F) const { - return F->hasFnAttr(Attribute::OptimizeForSize) ? 1 : 4; + return F->hasFnAttr(Attribute::OptimizeForSize) ? 1 : 2; } //===----------------------------------------------------------------------===// @@ -594,9 +594,17 @@ static SDValue EmitCMP(SDValue &LHS, SDValue &RHS, SDValue &TargetCC, default: llvm_unreachable("Invalid integer condition!"); case ISD::SETEQ: TCC = MSP430CC::COND_E; // aka COND_Z + // Minor optimization: if RHS is a constant, swap operands, then the + // constant can be folded into comparison. + if (RHS.getOpcode() == ISD::Constant) + std::swap(LHS, RHS); break; case ISD::SETNE: TCC = MSP430CC::COND_NE; // aka COND_NZ + // Minor optimization: if RHS is a constant, swap operands, then the + // constant can be folded into comparison. + if (RHS.getOpcode() == ISD::Constant) + std::swap(LHS, RHS); break; case ISD::SETULE: std::swap(LHS, RHS); // FALLTHROUGH diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td index c3bbfe877d99..7a26f6cc42f8 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.td +++ b/lib/Target/MSP430/MSP430InstrInfo.td @@ -823,37 +823,6 @@ def CMP16mr : Pseudo<(outs), (ins memsrc:$src1, GR16:$src2), "cmp.w\t{$src1, $src2}", [(MSP430cmp (load addr:$src1), GR16:$src2), (implicit SRW)]>; -def CMP8mi0 : Pseudo<(outs), (ins memsrc:$src1), - "cmp.b\t{$src1, #0}", - [(MSP430cmp (load addr:$src1), (i8 0)), (implicit SRW)]>; -def CMP16mi0: Pseudo<(outs), (ins memsrc:$src1), - "cmp.w\t{$src1, #0}", - [(MSP430cmp (load addr:$src1), (i16 0)), (implicit SRW)]>; -def CMP8mi1 : Pseudo<(outs), (ins memsrc:$src1), - "cmp.b\t{$src1, #1}", - [(MSP430cmp (load addr:$src1), (i8 1)), (implicit SRW)]>; -def CMP16mi1: Pseudo<(outs), (ins memsrc:$src1), - "cmp.w\t{$src1, #1}", - [(MSP430cmp (load addr:$src1), (i16 1)), (implicit SRW)]>; -def CMP8mi2 : Pseudo<(outs), (ins memsrc:$src1), - "cmp.b\t{$src1, #2}", - [(MSP430cmp (load addr:$src1), (i8 2)), (implicit SRW)]>; -def CMP16mi2: Pseudo<(outs), (ins memsrc:$src1), - "cmp.w\t{$src1, #2}", - [(MSP430cmp (load addr:$src1), (i16 2)), (implicit SRW)]>; -def CMP8mi4 : Pseudo<(outs), (ins memsrc:$src1), - "cmp.b\t{$src1, #4}", - [(MSP430cmp (load addr:$src1), (i8 4)), (implicit SRW)]>; -def CMP16mi4: Pseudo<(outs), (ins memsrc:$src1), - "cmp.w\t{$src1, #4}", - [(MSP430cmp (load addr:$src1), (i16 4)), (implicit SRW)]>; -def CMP8mi8 : Pseudo<(outs), (ins memsrc:$src1), - "cmp.b\t{$src1, #8}", - [(MSP430cmp (load addr:$src1), (i8 8)), (implicit SRW)]>; -def CMP16mi8: Pseudo<(outs), (ins memsrc:$src1), - "cmp.w\t{$src1, #8}", - [(MSP430cmp (load addr:$src1), (i16 8)), (implicit SRW)]>; - } // Defs = [SRW] //===----------------------------------------------------------------------===// diff --git a/lib/Target/MSP430/MSP430MCAsmInfo.cpp b/lib/Target/MSP430/MSP430MCAsmInfo.cpp index 4e3a8d0575ff..516eacb53564 100644 --- a/lib/Target/MSP430/MSP430MCAsmInfo.cpp +++ b/lib/Target/MSP430/MSP430MCAsmInfo.cpp @@ -19,6 +19,7 @@ MSP430MCAsmInfo::MSP430MCAsmInfo(const Target &T, const StringRef &TT) { WeakRefDirective ="\t.weak\t"; SetDirective = "\t.set\t"; PCSymbol="."; + CommentString = ";"; AlignmentIsInBytes = false; AllowNameToStartWithDigit = true; diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index 2990ba906478..ede111d5090b 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -144,6 +144,7 @@ SelectAddr(SDValue Op, SDValue Addr, SDValue &Offset, SDValue &Base) // on PIC code Load GA if (TM.getRelocationModel() == Reloc::PIC_) { if ((Addr.getOpcode() == ISD::TargetGlobalAddress) || + (Addr.getOpcode() == ISD::TargetConstantPool) || (Addr.getOpcode() == ISD::TargetJumpTable)){ Base = CurDAG->getRegister(Mips::GP, MVT::i32); Offset = Addr; @@ -174,23 +175,21 @@ SelectAddr(SDValue Op, SDValue Addr, SDValue &Offset, SDValue &Base) } // When loading from constant pools, load the lower address part in - // the instruction itself. Instead of: + // the instruction itself. Example, instead of: // lui $2, %hi($CPI1_0) // addiu $2, $2, %lo($CPI1_0) // lwc1 $f0, 0($2) // Generate: // lui $2, %hi($CPI1_0) // lwc1 $f0, %lo($CPI1_0)($2) - if (Addr.getOperand(0).getOpcode() == MipsISD::Hi && + if ((Addr.getOperand(0).getOpcode() == MipsISD::Hi || + Addr.getOperand(0).getOpcode() == ISD::LOAD) && Addr.getOperand(1).getOpcode() == MipsISD::Lo) { SDValue LoVal = Addr.getOperand(1); - if (ConstantPoolSDNode *CP = dyn_cast( - LoVal.getOperand(0))) { - if (!CP->getOffset()) { - Base = Addr.getOperand(0); - Offset = LoVal.getOperand(0); - return true; - } + if (dyn_cast(LoVal.getOperand(0))) { + Base = Addr.getOperand(0); + Offset = LoVal.getOperand(0); + return true; } } } @@ -235,6 +234,10 @@ SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDValue N) { else return NULL; + // Choose the offsets depending on the endianess + if (TM.getTargetData()->isBigEndian()) + std::swap(Offset0, Offset1); + // Instead of: // ldc $f0, X($3) // Generate: @@ -296,6 +299,10 @@ SDNode *MipsDAGToDAGISel::SelectStoreFp64(SDValue N) { else return NULL; + // Choose the offsets depending on the endianess + if (TM.getTargetData()->isBigEndian()) + std::swap(Offset0, Offset1); + // Instead of: // sdc $f0, X($3) // Generate: diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index c9a43b474f8f..ced8b939336e 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -132,10 +132,6 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::FLOG10, MVT::f32, Expand); setOperationAction(ISD::FEXP, MVT::f32, Expand); - // We don't have line number support yet. - setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand); - setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); - setOperationAction(ISD::DBG_LABEL, MVT::Other, Expand); setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); // Use the default for now @@ -567,8 +563,6 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) SDValue ResNode; ConstantPoolSDNode *N = cast(Op); Constant *C = N->getConstVal(); - SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), - N->getOffset(), MipsII::MO_ABS_HILO); // FIXME there isn't actually debug info here DebugLoc dl = Op.getDebugLoc(); @@ -581,11 +575,21 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) // SDValue GPRelNode = DAG.getNode(MipsISD::GPRel, MVT::i32, CP); // SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(MVT::i32); // ResNode = DAG.getNode(ISD::ADD, MVT::i32, GOT, GPRelNode); - //} else { // %hi/%lo relocation + + if (getTargetMachine().getRelocationModel() != Reloc::PIC_) { + SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), + N->getOffset(), MipsII::MO_ABS_HILO); SDValue HiPart = DAG.getNode(MipsISD::Hi, dl, MVT::i32, CP); SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CP); ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo); - //} + } else { + SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), + N->getOffset(), MipsII::MO_GOT); + SDValue Load = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), + CP, NULL, 0); + SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CP); + ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, Load, Lo); + } return ResNode; } diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp index af64c9f74112..6d8e160e7261 100644 --- a/lib/Target/Mips/MipsInstrInfo.cpp +++ b/lib/Target/Mips/MipsInstrInfo.cpp @@ -200,22 +200,33 @@ void MipsInstrInfo:: storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned SrcReg, bool isKill, int FI, const TargetRegisterClass *RC) const { - unsigned Opc; - DebugLoc DL = DebugLoc::getUnknownLoc(); if (I != MBB.end()) DL = I->getDebugLoc(); if (RC == Mips::CPURegsRegisterClass) - Opc = Mips::SW; - else if (RC == Mips::FGR32RegisterClass) - Opc = Mips::SWC1; - else { - assert(RC == Mips::AFGR64RegisterClass); - Opc = Mips::SDC1; - } - - BuildMI(MBB, I, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill)) + BuildMI(MBB, I, DL, get(Mips::SW)).addReg(SrcReg, getKillRegState(isKill)) .addImm(0).addFrameIndex(FI); + else if (RC == Mips::FGR32RegisterClass) + BuildMI(MBB, I, DL, get(Mips::SWC1)).addReg(SrcReg, getKillRegState(isKill)) + .addImm(0).addFrameIndex(FI); + else if (RC == Mips::AFGR64RegisterClass) { + if (!TM.getSubtarget().isMips1()) { + BuildMI(MBB, I, DL, get(Mips::SDC1)) + .addReg(SrcReg, getKillRegState(isKill)) + .addImm(0).addFrameIndex(FI); + } else { + const TargetRegisterInfo *TRI = + MBB.getParent()->getTarget().getRegisterInfo(); + const unsigned *SubSet = TRI->getSubRegisters(SrcReg); + BuildMI(MBB, I, DL, get(Mips::SWC1)) + .addReg(SubSet[0], getKillRegState(isKill)) + .addImm(0).addFrameIndex(FI); + BuildMI(MBB, I, DL, get(Mips::SWC1)) + .addReg(SubSet[1], getKillRegState(isKill)) + .addImm(4).addFrameIndex(FI); + } + } else + llvm_unreachable("Register class not handled!"); } void MipsInstrInfo:: @@ -223,19 +234,27 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, const TargetRegisterClass *RC) const { - unsigned Opc; - if (RC == Mips::CPURegsRegisterClass) - Opc = Mips::LW; - else if (RC == Mips::FGR32RegisterClass) - Opc = Mips::LWC1; - else { - assert(RC == Mips::AFGR64RegisterClass); - Opc = Mips::LDC1; - } - DebugLoc DL = DebugLoc::getUnknownLoc(); if (I != MBB.end()) DL = I->getDebugLoc(); - BuildMI(MBB, I, DL, get(Opc), DestReg).addImm(0).addFrameIndex(FI); + + if (RC == Mips::CPURegsRegisterClass) + BuildMI(MBB, I, DL, get(Mips::LW), DestReg).addImm(0).addFrameIndex(FI); + else if (RC == Mips::FGR32RegisterClass) + BuildMI(MBB, I, DL, get(Mips::LWC1), DestReg).addImm(0).addFrameIndex(FI); + else if (RC == Mips::AFGR64RegisterClass) { + if (!TM.getSubtarget().isMips1()) { + BuildMI(MBB, I, DL, get(Mips::LDC1), DestReg).addImm(0).addFrameIndex(FI); + } else { + const TargetRegisterInfo *TRI = + MBB.getParent()->getTarget().getRegisterInfo(); + const unsigned *SubSet = TRI->getSubRegisters(DestReg); + BuildMI(MBB, I, DL, get(Mips::LWC1), SubSet[0]) + .addImm(0).addFrameIndex(FI); + BuildMI(MBB, I, DL, get(Mips::LWC1), SubSet[1]) + .addImm(4).addFrameIndex(FI); + } + } else + llvm_unreachable("Register class not handled!"); } MachineInstr *MipsInstrInfo:: @@ -278,11 +297,14 @@ foldMemoryOperandImpl(MachineFunction &MF, const TargetRegisterClass *RC = RI.getRegClass(MI->getOperand(0).getReg()); unsigned StoreOpc, LoadOpc; + bool IsMips1 = TM.getSubtarget().isMips1(); if (RC == Mips::FGR32RegisterClass) { LoadOpc = Mips::LWC1; StoreOpc = Mips::SWC1; } else { assert(RC == Mips::AFGR64RegisterClass); + // Mips1 doesn't have ldc/sdc instructions. + if (IsMips1) break; LoadOpc = Mips::LDC1; StoreOpc = Mips::SDC1; } diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index ad326db2ac89..cae41814eed5 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -107,8 +107,7 @@ getCalleeSavedRegs(const MachineFunction *MF) const static const unsigned BitMode32CalleeSavedRegs[] = { Mips::S0, Mips::S1, Mips::S2, Mips::S3, Mips::S4, Mips::S5, Mips::S6, Mips::S7, - Mips::F20, Mips::F22, Mips::F24, Mips::F26, Mips::F28, Mips::F30, - Mips::D10, Mips::D11, Mips::D12, Mips::D13, Mips::D14, Mips::D15,0 + Mips::F20, Mips::F22, Mips::F24, Mips::F26, Mips::F28, Mips::F30, 0 }; if (Subtarget.isSingleFloat()) @@ -136,9 +135,7 @@ MipsRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, &Mips::CPURegsRegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass, - &Mips::FGR32RegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass, - &Mips::AFGR64RegClass, &Mips::AFGR64RegClass, &Mips::AFGR64RegClass, - &Mips::AFGR64RegClass, &Mips::AFGR64RegClass, &Mips::AFGR64RegClass, 0 + &Mips::FGR32RegClass, &Mips::FGR32RegClass, &Mips::FGR32RegClass, 0 }; if (Subtarget.isSingleFloat()) diff --git a/lib/Target/PIC16/PIC16DebugInfo.cpp b/lib/Target/PIC16/PIC16DebugInfo.cpp index 0ed44d21fc40..6e0e3cefac82 100644 --- a/lib/Target/PIC16/PIC16DebugInfo.cpp +++ b/lib/Target/PIC16/PIC16DebugInfo.cpp @@ -306,10 +306,9 @@ void PIC16DbgInfo::EmitCompositeTypeElements (DICompositeType CTy, int ElementAux[PIC16Dbg::AuxSize] = { 0 }; std::string TagName = ""; DIDerivedType DITy(Element.getNode()); - const char *ElementName = DITy.getName(); unsigned short ElementSize = DITy.getSizeInBits()/8; // Get mangleddd name for this structure/union element. - std::string MangMemName = ElementName + SuffixNo; + std::string MangMemName = DITy.getName().str() + SuffixNo; PopulateDebugInfo(DITy, TypeNo, HasAux, ElementAux, TagName); short Class = 0; if( CTy.getTag() == dwarf::DW_TAG_union_type) @@ -337,12 +336,11 @@ void PIC16DbgInfo::EmitCompositeTypeDecls(Module &M) { continue; if (CTy.getTag() == dwarf::DW_TAG_union_type || CTy.getTag() == dwarf::DW_TAG_structure_type ) { - const char *Name = CTy.getName(); // Get the number after llvm.dbg.composite and make UniqueSuffix from // it. std::string DIVar = CTy.getNode()->getNameStr(); std::string UniqueSuffix = "." + DIVar.substr(18); - std::string MangledCTyName = Name + UniqueSuffix; + std::string MangledCTyName = CTy.getName().str() + UniqueSuffix; unsigned short size = CTy.getSizeInBits()/8; int Aux[PIC16Dbg::AuxSize] = {0}; // 7th and 8th byte represent size of structure/union. diff --git a/lib/Target/PowerPC/PPCFrameInfo.h b/lib/Target/PowerPC/PPCFrameInfo.h index 65f113e6fb9a..73d30bf5bed1 100644 --- a/lib/Target/PowerPC/PPCFrameInfo.h +++ b/lib/Target/PowerPC/PPCFrameInfo.h @@ -42,11 +42,12 @@ public: /// frame pointer. static unsigned getFramePointerSaveOffset(bool isPPC64, bool isDarwinABI) { // For the Darwin ABI: - // Use the TOC save slot in the PowerPC linkage area for saving the frame - // pointer (if needed.) LLVM does not generate code that uses the TOC (R2 - // is treated as a caller saved register.) + // We cannot use the TOC save slot (offset +20) in the PowerPC linkage area + // for saving the frame pointer (if needed.) While the published ABI has + // not used this slot since at least MacOSX 10.2, there is older code + // around that does use it, and that needs to continue to work. if (isDarwinABI) - return isPPC64 ? 40 : 20; + return isPPC64 ? -8U : -4U; // SVR4 ABI: First slot in the general register save area. return -4U; @@ -90,6 +91,17 @@ public: // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack. const SpillSlot * getCalleeSavedSpillSlots(unsigned &NumEntries) const { + if (TM.getSubtarget().isDarwinABI()) { + NumEntries = 1; + if (TM.getSubtarget().isPPC64()) { + static const SpillSlot darwin64Offsets = {PPC::X31, -8}; + return &darwin64Offsets; + } else { + static const SpillSlot darwinOffsets = {PPC::R31, -4}; + return &darwinOffsets; + } + } + // Early exit if not using the SVR4 ABI. if (!TM.getSubtarget().isSVR4ABI()) { NumEntries = 0; diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index fb9a2409e716..e7334b54d473 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -86,7 +86,7 @@ namespace { /// isRotateAndMask - Returns true if Mask and Shift can be folded into a /// rotate and mask opcode and mask operation. - static bool isRotateAndMask(SDNode *N, unsigned Mask, bool IsShiftMask, + static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask, unsigned &SH, unsigned &MB, unsigned &ME); /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC @@ -358,7 +358,7 @@ bool PPCDAGToDAGISel::isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) { } bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask, - bool IsShiftMask, unsigned &SH, + bool isShiftMask, unsigned &SH, unsigned &MB, unsigned &ME) { // Don't even go down this path for i64, since different logic will be // necessary for rldicl/rldicr/rldimi. @@ -374,12 +374,12 @@ bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask, if (Opcode == ISD::SHL) { // apply shift left to mask if it comes first - if (IsShiftMask) Mask = Mask << Shift; + if (isShiftMask) Mask = Mask << Shift; // determine which bits are made indeterminant by shift Indeterminant = ~(0xFFFFFFFFu << Shift); } else if (Opcode == ISD::SRL) { // apply shift right to mask if it comes first - if (IsShiftMask) Mask = Mask >> Shift; + if (isShiftMask) Mask = Mask >> Shift; // determine which bits are made indeterminant by shift Indeterminant = ~(0xFFFFFFFFu >> Shift); // adjust for the left rotate @@ -443,8 +443,7 @@ SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) { unsigned MB, ME; if (InsertMask && isRunOfOnes(InsertMask, MB, ME)) { - SDValue Tmp1, Tmp2, Tmp3; - bool DisjointMask = (TargetMask ^ InsertMask) == 0xFFFFFFFF; + SDValue Tmp1, Tmp2; if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) && isInt32Immediate(Op1.getOperand(1), Value)) { @@ -461,10 +460,9 @@ SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) { Op1 = Op1.getOperand(0); } } - - Tmp3 = (Op0Opc == ISD::AND && DisjointMask) ? Op0.getOperand(0) : Op0; + SH &= 31; - SDValue Ops[] = { Tmp3, Op1, getI32Imm(SH), getI32Imm(MB), + SDValue Ops[] = { Op0, Op1, getI32Imm(SH), getI32Imm(MB), getI32Imm(ME) }; return CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops, 5); } diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 099fcb5e8a92..30a7861a61b0 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -182,10 +182,6 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // We cannot sextinreg(i1). Expand to shifts. setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); - // Support label based line numbers. - setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand); - setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); - setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); @@ -1174,7 +1170,7 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) { DebugLoc DL = Op.getDebugLoc(); BlockAddress *BA = cast(Op)->getBlockAddress(); - SDValue TgtBA = DAG.getBlockAddress(BA, DL, /*isTarget=*/true); + SDValue TgtBA = DAG.getBlockAddress(BA, PtrVT, /*isTarget=*/true); SDValue Zero = DAG.getConstant(0, PtrVT); SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, TgtBA, Zero); SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, TgtBA, Zero); @@ -2177,10 +2173,10 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG, /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be /// adjusted to accomodate the arguments for the tailcall. -static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool IsTailCall, +static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall, unsigned ParamSize) { - if (!IsTailCall) return 0; + if (!isTailCall) return 0; PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo(); unsigned CallerMinReservedArea = FI->getMinReservedArea(); @@ -3190,8 +3186,8 @@ SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); // Construct the stack pointer operand. - bool IsPPC64 = Subtarget.isPPC64(); - unsigned SP = IsPPC64 ? PPC::X1 : PPC::R1; + bool isPPC64 = Subtarget.isPPC64(); + unsigned SP = isPPC64 ? PPC::X1 : PPC::R1; SDValue StackPtr = DAG.getRegister(SP, PtrVT); // Get the operands for the STACKRESTORE. @@ -3213,7 +3209,7 @@ SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const { MachineFunction &MF = DAG.getMachineFunction(); - bool IsPPC64 = PPCSubTarget.isPPC64(); + bool isPPC64 = PPCSubTarget.isPPC64(); bool isDarwinABI = PPCSubTarget.isDarwinABI(); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); @@ -3225,9 +3221,9 @@ PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const { // If the frame pointer save index hasn't been defined yet. if (!RASI) { // Find out what the fix offset of the frame pointer save area. - int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, isDarwinABI); + int LROffset = PPCFrameInfo::getReturnSaveOffset(isPPC64, isDarwinABI); // Allocate the frame index for frame pointer save area. - RASI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, LROffset, + RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, true, false); // Save the result. FI->setReturnAddrSaveIndex(RASI); @@ -3238,7 +3234,7 @@ PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const { SDValue PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const { MachineFunction &MF = DAG.getMachineFunction(); - bool IsPPC64 = PPCSubTarget.isPPC64(); + bool isPPC64 = PPCSubTarget.isPPC64(); bool isDarwinABI = PPCSubTarget.isDarwinABI(); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); @@ -3250,11 +3246,11 @@ PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const { // If the frame pointer save index hasn't been defined yet. if (!FPSI) { // Find out what the fix offset of the frame pointer save area. - int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, + int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(isPPC64, isDarwinABI); // Allocate the frame index for frame pointer save area. - FPSI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, FPOffset, + FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true, false); // Save the result. FI->setFramePointerSaveIndex(FPSI); diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index f5c095a3c7cc..2b3f80da5a72 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -1357,15 +1357,6 @@ def RLWNM : MForm_2<23, } -//===----------------------------------------------------------------------===// -// DWARF Pseudo Instructions -// - -def DWARF_LOC : Pseudo<(outs), (ins i32imm:$line, i32imm:$col, i32imm:$file), - "${:comment} .loc $file, $line, $col", - [(dwarf_loc (i32 imm:$line), (i32 imm:$col), - (i32 imm:$file))]>; - //===----------------------------------------------------------------------===// // PowerPC Instruction Patterns // diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp index ef25d92f719a..c679bcdf58bc 100644 --- a/lib/Target/PowerPC/PPCJITInfo.cpp +++ b/lib/Target/PowerPC/PPCJITInfo.cpp @@ -323,6 +323,15 @@ PPCJITInfo::getLazyResolverFunction(JITCompilerFn Fn) { return is64Bit ? PPC64CompilationCallback : PPC32CompilationCallback; } +TargetJITInfo::StubLayout PPCJITInfo::getStubLayout() { + // The stub contains up to 10 4-byte instructions, aligned at 4 bytes: 3 + // instructions to save the caller's address if this is a lazy-compilation + // stub, plus a 1-, 4-, or 7-instruction sequence to load an arbitrary address + // into a register and jump through it. + StubLayout Result = {10*4, 4}; + return Result; +} + #if (defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)) && \ defined(__APPLE__) extern "C" void sys_icache_invalidate(const void *Addr, size_t len); @@ -330,12 +339,12 @@ extern "C" void sys_icache_invalidate(const void *Addr, size_t len); void *PPCJITInfo::emitFunctionStub(const Function* F, void *Fn, JITCodeEmitter &JCE) { + MachineCodeEmitter::BufferState BS; // If this is just a call to an external function, emit a branch instead of a // call. The code is the same except for one bit of the last instruction. if (Fn != (void*)(intptr_t)PPC32CompilationCallback && Fn != (void*)(intptr_t)PPC64CompilationCallback) { - JCE.startGVStub(F, 7*4); - intptr_t Addr = (intptr_t)JCE.getCurrentPCValue(); + void *Addr = (void*)JCE.getCurrentPCValue(); JCE.emitWordBE(0); JCE.emitWordBE(0); JCE.emitWordBE(0); @@ -343,13 +352,12 @@ void *PPCJITInfo::emitFunctionStub(const Function* F, void *Fn, JCE.emitWordBE(0); JCE.emitWordBE(0); JCE.emitWordBE(0); - EmitBranchToAt(Addr, (intptr_t)Fn, false, is64Bit); - sys::Memory::InvalidateInstructionCache((void*)Addr, 7*4); - return JCE.finishGVStub(F); + EmitBranchToAt((intptr_t)Addr, (intptr_t)Fn, false, is64Bit); + sys::Memory::InvalidateInstructionCache(Addr, 7*4); + return Addr; } - JCE.startGVStub(F, 10*4); - intptr_t Addr = (intptr_t)JCE.getCurrentPCValue(); + void *Addr = (void*)JCE.getCurrentPCValue(); if (is64Bit) { JCE.emitWordBE(0xf821ffb1); // stdu r1,-80(r1) JCE.emitWordBE(0x7d6802a6); // mflr r11 @@ -372,8 +380,8 @@ void *PPCJITInfo::emitFunctionStub(const Function* F, void *Fn, JCE.emitWordBE(0); JCE.emitWordBE(0); EmitBranchToAt(BranchAddr, (intptr_t)Fn, true, is64Bit); - sys::Memory::InvalidateInstructionCache((void*)Addr, 10*4); - return JCE.finishGVStub(F); + sys::Memory::InvalidateInstructionCache(Addr, 10*4); + return Addr; } diff --git a/lib/Target/PowerPC/PPCJITInfo.h b/lib/Target/PowerPC/PPCJITInfo.h index 2e25b295f432..47ead59b587d 100644 --- a/lib/Target/PowerPC/PPCJITInfo.h +++ b/lib/Target/PowerPC/PPCJITInfo.h @@ -30,6 +30,7 @@ namespace llvm { is64Bit = tmIs64Bit; } + virtual StubLayout getStubLayout(); virtual void *emitFunctionStub(const Function* F, void *Fn, JITCodeEmitter &JCE); virtual LazyResolverFn getLazyResolverFunction(JITCompilerFn); diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index e65e64412899..0c3c8eb6493f 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -1032,18 +1032,17 @@ PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // Save R31 if necessary int FPSI = FI->getFramePointerSaveIndex(); - bool IsPPC64 = Subtarget.isPPC64(); - bool IsSVR4ABI = Subtarget.isSVR4ABI(); + bool isPPC64 = Subtarget.isPPC64(); bool isDarwinABI = Subtarget.isDarwinABI(); MachineFrameInfo *MFI = MF.getFrameInfo(); // If the frame pointer save index hasn't been defined yet. - if (!FPSI && needsFP(MF) && IsSVR4ABI) { + if (!FPSI && needsFP(MF)) { // Find out what the fix offset of the frame pointer save area. - int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, + int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(isPPC64, isDarwinABI); // Allocate the frame index for frame pointer save area. - FPSI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, FPOffset, + FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true, false); // Save the result. FI->setFramePointerSaveIndex(FPSI); @@ -1067,7 +1066,7 @@ PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, if (needsFP(MF) || spillsCR(MF)) { const TargetRegisterClass *GPRC = &PPC::GPRCRegClass; const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; - const TargetRegisterClass *RC = IsPPC64 ? G8RC : GPRC; + const TargetRegisterClass *RC = isPPC64 ? G8RC : GPRC; RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false)); @@ -1297,7 +1296,7 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const { int NegFrameSize = -FrameSize; // Get processor type. - bool IsPPC64 = Subtarget.isPPC64(); + bool isPPC64 = Subtarget.isPPC64(); // Get operating system bool isDarwinABI = Subtarget.isDarwinABI(); // Check if the link register (LR) must be saved. @@ -1306,7 +1305,7 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const { // Do we have a frame pointer for this function? bool HasFP = hasFP(MF) && FrameSize; - int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, isDarwinABI); + int LROffset = PPCFrameInfo::getReturnSaveOffset(isPPC64, isDarwinABI); int FPOffset = 0; if (HasFP) { @@ -1316,11 +1315,11 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const { assert(FPIndex && "No Frame Pointer Save Slot!"); FPOffset = FFI->getObjectOffset(FPIndex); } else { - FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, isDarwinABI); + FPOffset = PPCFrameInfo::getFramePointerSaveOffset(isPPC64, isDarwinABI); } } - if (IsPPC64) { + if (isPPC64) { if (MustSaveLR) BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR8), PPC::X0); @@ -1361,7 +1360,7 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const { // Adjust stack pointer: r1 += NegFrameSize. // If there is a preferred stack alignment, align R1 now - if (!IsPPC64) { + if (!isPPC64) { // PPC32. if (ALIGN_STACK && MaxAlign > TargetAlign) { assert(isPowerOf2_32(MaxAlign)&&isInt16(MaxAlign)&&"Invalid alignment!"); @@ -1444,19 +1443,19 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const { MachineLocation SPSrc(MachineLocation::VirtualFP, NegFrameSize); Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc)); } else { - MachineLocation SP(IsPPC64 ? PPC::X31 : PPC::R31); + MachineLocation SP(isPPC64 ? PPC::X31 : PPC::R31); Moves.push_back(MachineMove(FrameLabelId, SP, SP)); } if (HasFP) { MachineLocation FPDst(MachineLocation::VirtualFP, FPOffset); - MachineLocation FPSrc(IsPPC64 ? PPC::X31 : PPC::R31); + MachineLocation FPSrc(isPPC64 ? PPC::X31 : PPC::R31); Moves.push_back(MachineMove(FrameLabelId, FPDst, FPSrc)); } if (MustSaveLR) { MachineLocation LRDst(MachineLocation::VirtualFP, LROffset); - MachineLocation LRSrc(IsPPC64 ? PPC::LR8 : PPC::LR); + MachineLocation LRSrc(isPPC64 ? PPC::LR8 : PPC::LR); Moves.push_back(MachineMove(FrameLabelId, LRDst, LRSrc)); } } @@ -1465,7 +1464,7 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const { // If there is a frame pointer, copy R1 into R31 if (HasFP) { - if (!IsPPC64) { + if (!isPPC64) { BuildMI(MBB, MBBI, dl, TII.get(PPC::OR), PPC::R31) .addReg(PPC::R1) .addReg(PPC::R1); @@ -1481,8 +1480,8 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const { // Mark effective beginning of when frame pointer is ready. BuildMI(MBB, MBBI, dl, TII.get(PPC::DBG_LABEL)).addImm(ReadyLabelId); - MachineLocation FPDst(HasFP ? (IsPPC64 ? PPC::X31 : PPC::R31) : - (IsPPC64 ? PPC::X1 : PPC::R1)); + MachineLocation FPDst(HasFP ? (isPPC64 ? PPC::X31 : PPC::R31) : + (isPPC64 ? PPC::X1 : PPC::R1)); MachineLocation FPSrc(MachineLocation::VirtualFP); Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc)); } @@ -1528,7 +1527,7 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF, int FrameSize = MFI->getStackSize(); // Get processor type. - bool IsPPC64 = Subtarget.isPPC64(); + bool isPPC64 = Subtarget.isPPC64(); // Get operating system bool isDarwinABI = Subtarget.isDarwinABI(); // Check if the link register (LR) has been saved. @@ -1537,7 +1536,7 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF, // Do we have a frame pointer for this function? bool HasFP = hasFP(MF) && FrameSize; - int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, isDarwinABI); + int LROffset = PPCFrameInfo::getReturnSaveOffset(isPPC64, isDarwinABI); int FPOffset = 0; if (HasFP) { @@ -1547,7 +1546,7 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF, assert(FPIndex && "No Frame Pointer Save Slot!"); FPOffset = FFI->getObjectOffset(FPIndex); } else { - FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, isDarwinABI); + FPOffset = PPCFrameInfo::getFramePointerSaveOffset(isPPC64, isDarwinABI); } } @@ -1575,7 +1574,7 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF, if (FrameSize) { // The loaded (or persistent) stack pointer value is offset by the 'stwu' // on entry to the function. Add this offset back now. - if (!IsPPC64) { + if (!isPPC64) { // If this function contained a fastcc call and PerformTailCallOpt is // enabled (=> hasFastCall()==true) the fastcc call might contain a tail // call which invalidates the stack pointer value in SP(0). So we use the @@ -1629,7 +1628,7 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF, } } - if (IsPPC64) { + if (isPPC64) { if (MustSaveLR) BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X0) .addImm(LROffset/4).addReg(PPC::X1); @@ -1659,13 +1658,13 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF, MF.getFunction()->getCallingConv() == CallingConv::Fast) { PPCFunctionInfo *FI = MF.getInfo(); unsigned CallerAllocatedAmt = FI->getMinReservedArea(); - unsigned StackReg = IsPPC64 ? PPC::X1 : PPC::R1; - unsigned FPReg = IsPPC64 ? PPC::X31 : PPC::R31; - unsigned TmpReg = IsPPC64 ? PPC::X0 : PPC::R0; - unsigned ADDIInstr = IsPPC64 ? PPC::ADDI8 : PPC::ADDI; - unsigned ADDInstr = IsPPC64 ? PPC::ADD8 : PPC::ADD4; - unsigned LISInstr = IsPPC64 ? PPC::LIS8 : PPC::LIS; - unsigned ORIInstr = IsPPC64 ? PPC::ORI8 : PPC::ORI; + unsigned StackReg = isPPC64 ? PPC::X1 : PPC::R1; + unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31; + unsigned TmpReg = isPPC64 ? PPC::X0 : PPC::R0; + unsigned ADDIInstr = isPPC64 ? PPC::ADDI8 : PPC::ADDI; + unsigned ADDInstr = isPPC64 ? PPC::ADD8 : PPC::ADD4; + unsigned LISInstr = isPPC64 ? PPC::LIS8 : PPC::LIS; + unsigned ORIInstr = isPPC64 ? PPC::ORI8 : PPC::ORI; if (CallerAllocatedAmt && isInt16(CallerAllocatedAmt)) { BuildMI(MBB, MBBI, dl, TII.get(ADDIInstr), StackReg) diff --git a/lib/Target/README.txt b/lib/Target/README.txt index aad621f440ac..2d8a687ebebb 100644 --- a/lib/Target/README.txt +++ b/lib/Target/README.txt @@ -2,6 +2,29 @@ Target Independent Opportunities: //===---------------------------------------------------------------------===// +Dead argument elimination should be enhanced to handle cases when an argument is +dead to an externally visible function. Though the argument can't be removed +from the externally visible function, the caller doesn't need to pass it in. +For example in this testcase: + + void foo(int X) __attribute__((noinline)); + void foo(int X) { sideeffect(); } + void bar(int A) { foo(A+1); } + +We compile bar to: + +define void @bar(i32 %A) nounwind ssp { + %0 = add nsw i32 %A, 1 ; [#uses=1] + tail call void @foo(i32 %0) nounwind noinline ssp + ret void +} + +The add is dead, we could pass in 'i32 undef' instead. This occurs for C++ +templates etc, which usually have linkonce_odr/weak_odr linkage, not internal +linkage. + +//===---------------------------------------------------------------------===// + With the recent changes to make the implicit def/use set explicit in machineinstrs, we should change the target descriptions for 'call' instructions so that the .td files don't list all the call-clobbered registers as implicit @@ -220,7 +243,7 @@ so cool to turn it into something like: ... which would only do one 32-bit XOR per loop iteration instead of two. It would also be nice to recognize the reg->size doesn't alias reg->node[i], but -alas. +this requires TBAA. //===---------------------------------------------------------------------===// @@ -280,6 +303,9 @@ unsigned int popcount(unsigned int input) { return count; } +This is a form of idiom recognition for loops, the same thing that could be +useful for recognizing memset/memcpy. + //===---------------------------------------------------------------------===// These should turn into single 16-bit (unaligned?) loads on little/big endian @@ -343,7 +369,7 @@ PHI Slicing could be extended to do this. //===---------------------------------------------------------------------===// -LSR should know what GPR types a target has. This code: +LSR should know what GPR types a target has from TargetData. This code: volatile short X, Y; // globals @@ -369,7 +395,6 @@ LBB1_2: LSR should reuse the "+" IV for the exit test. - //===---------------------------------------------------------------------===// Tail call elim should be more aggressive, checking to see if the call is @@ -441,25 +466,6 @@ entry: //===---------------------------------------------------------------------===// -"basicaa" should know how to look through "or" instructions that act like add -instructions. For example in this code, the x*4+1 is turned into x*4 | 1, and -basicaa can't analyze the array subscript, leading to duplicated loads in the -generated code: - -void test(int X, int Y, int a[]) { -int i; - for (i=2; i<1000; i+=4) { - a[i+0] = a[i-1+0]*a[i-2+0]; - a[i+1] = a[i-1+1]*a[i-2+1]; - a[i+2] = a[i-1+2]*a[i-2+2]; - a[i+3] = a[i-1+3]*a[i-2+3]; - } -} - -BasicAA also doesn't do this for add. It needs to know that &A[i+1] != &A[i]. - -//===---------------------------------------------------------------------===// - We should investigate an instruction sinking pass. Consider this silly example in pic mode: @@ -1110,6 +1116,8 @@ later. //===---------------------------------------------------------------------===// +[STORE SINKING] + Store sinking: This code: void f (int n, int *cond, int *res) { @@ -1165,6 +1173,8 @@ This is GCC PR38204. //===---------------------------------------------------------------------===// +[STORE SINKING] + GCC PR37810 is an interesting case where we should sink load/store reload into the if block and outside the loop, so we don't reload/store it on the non-call path. @@ -1192,7 +1202,7 @@ we don't sink the store. We need partially dead store sinking. //===---------------------------------------------------------------------===// -[PHI TRANSLATE GEPs] +[LOAD PRE CRIT EDGE SPLITTING] GCC PR37166: Sinking of loads prevents SROA'ing the "g" struct on the stack leading to excess stack traffic. This could be handled by GVN with some crazy @@ -1209,100 +1219,60 @@ bb3: ; preds = %bb1, %bb2, %bb %10 = getelementptr %struct.f* %c_addr.0, i32 0, i32 0 %11 = load i32* %10, align 4 -%11 is fully redundant, an in BB2 it should have the value %8. +%11 is partially redundant, an in BB2 it should have the value %8. + +GCC PR33344 and PR35287 are similar cases. -GCC PR33344 is a similar case. //===---------------------------------------------------------------------===// -[PHI TRANSLATE INDEXED GEPs] PR5313 - -Load redundancy elimination for simple loop. This loop: - -void append_text(const char* text,unsigned char * const io) { - while(*text) - *io=*text++; -} - -Compiles to have a fully redundant load in the loop (%2): - -define void @append_text(i8* nocapture %text, i8* nocapture %io) nounwind { -entry: - %0 = load i8* %text, align 1 ; [#uses=1] - %1 = icmp eq i8 %0, 0 ; [#uses=1] - br i1 %1, label %return, label %bb - -bb: ; preds = %bb, %entry - %indvar = phi i32 [ 0, %entry ], [ %tmp, %bb ] ; [#uses=2] - %text_addr.04 = getelementptr i8* %text, i32 %indvar ; [#uses=1] - %2 = load i8* %text_addr.04, align 1 ; [#uses=1] - store i8 %2, i8* %io, align 1 - %tmp = add i32 %indvar, 1 ; [#uses=2] - %scevgep = getelementptr i8* %text, i32 %tmp ; [#uses=1] - %3 = load i8* %scevgep, align 1 ; [#uses=1] - %4 = icmp eq i8 %3, 0 ; [#uses=1] - br i1 %4, label %return, label %bb - -return: ; preds = %bb, %entry - ret void -} - -//===---------------------------------------------------------------------===// +[LOAD PRE] There are many load PRE testcases in testsuite/gcc.dg/tree-ssa/loadpre* in the -GCC testsuite. There are many pre testcases as ssa-pre-*.c +GCC testsuite, ones we don't get yet are (checked through loadpre25): + +[CRIT EDGE BREAKING] +loadpre3.c predcom-4.c + +[PRE OF READONLY CALL] +loadpre5.c + +[TURN SELECT INTO BRANCH] +loadpre14.c loadpre15.c + +actually a conditional increment: loadpre18.c loadpre19.c + + +//===---------------------------------------------------------------------===// + +[SCALAR PRE] +There are many PRE testcases in testsuite/gcc.dg/tree-ssa/ssa-pre-*.c in the +GCC testsuite. //===---------------------------------------------------------------------===// There are some interesting cases in testsuite/gcc.dg/tree-ssa/pred-comm* in the -GCC testsuite. For example, predcom-1.c is: +GCC testsuite. For example, we get the first example in predcom-1.c, but +miss the second one: - for (i = 2; i < 1000; i++) - fib[i] = (fib[i-1] + fib[i - 2]) & 0xffff; +unsigned fib[1000]; +unsigned avg[1000]; -which compiles into: +__attribute__ ((noinline)) +void count_averages(int n) { + int i; + for (i = 1; i < n; i++) + avg[i] = (((unsigned long) fib[i - 1] + fib[i] + fib[i + 1]) / 3) & 0xffff; +} -bb1: ; preds = %bb1, %bb1.thread - %indvar = phi i32 [ 0, %bb1.thread ], [ %0, %bb1 ] - %i.0.reg2mem.0 = add i32 %indvar, 2 - %0 = add i32 %indvar, 1 ; [#uses=3] - %1 = getelementptr [1000 x i32]* @fib, i32 0, i32 %0 - %2 = load i32* %1, align 4 ; [#uses=1] - %3 = getelementptr [1000 x i32]* @fib, i32 0, i32 %indvar - %4 = load i32* %3, align 4 ; [#uses=1] - %5 = add i32 %4, %2 ; [#uses=1] - %6 = and i32 %5, 65535 ; [#uses=1] - %7 = getelementptr [1000 x i32]* @fib, i32 0, i32 %i.0.reg2mem.0 - store i32 %6, i32* %7, align 4 - %exitcond = icmp eq i32 %0, 998 ; [#uses=1] - br i1 %exitcond, label %return, label %bb1 +which compiles into two loads instead of one in the loop. -This is basically: - LOAD fib[i+1] - LOAD fib[i] - STORE fib[i+2] +predcom-2.c is the same as predcom-1.c -instead of handling this as a loop or other xform, all we'd need to do is teach -load PRE to phi translate the %0 add (i+1) into the predecessor as (i'+1+1) = -(i'+2) (where i' is the previous iteration of i). This would find the store -which feeds it. - -predcom-2.c is apparently the same as predcom-1.c predcom-3.c is very similar but needs loads feeding each other instead of store->load. -predcom-4.c seems the same as the rest. -//===---------------------------------------------------------------------===// - -Other simple load PRE cases: -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=35287 [LPRE crit edge splitting] - -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34677 (licm does this, LPRE crit edge) - llvm-gcc t2.c -S -o - -O0 -emit-llvm | llvm-as | opt -mem2reg -simplifycfg -gvn | llvm-dis - -http://gcc.gnu.org/bugzilla/show_bug.cgi?id=16799 [BITCAST PHI TRANS] - //===---------------------------------------------------------------------===// Type based alias analysis: @@ -1334,7 +1304,7 @@ Interesting missed case because of control flow flattening (should be 2 loads): http://gcc.gnu.org/bugzilla/show_bug.cgi?id=26629 With: llvm-gcc t2.c -S -o - -O0 -emit-llvm | llvm-as | opt -mem2reg -gvn -instcombine | llvm-dis -we miss it because we need 1) GEP PHI TRAN, 2) CRIT EDGE 3) MULTIPLE DIFFERENT +we miss it because we need 1) CRIT EDGE 2) MULTIPLE DIFFERENT VALS PRODUCED BY ONE BLOCK OVER DIFFERENT PATHS //===---------------------------------------------------------------------===// diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index 133f8283950a..1b3ca3ed1cd7 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -644,10 +644,6 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM) setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); - // We don't have line number support yet. - setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand); - setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); - setOperationAction(ISD::DBG_LABEL, MVT::Other, Expand); setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); // VASTART needs to be custom lowered to use the VarArgsFrameIndex. @@ -663,8 +659,6 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM) setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); // No debug info support yet. - setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand); - setOperationAction(ISD::DBG_LABEL, MVT::Other, Expand); setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); setStackPointerRegisterToSaveRestore(SP::O6); diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp index 6fdbc92036de..f887523c5b71 100644 --- a/lib/Target/TargetLoweringObjectFile.cpp +++ b/lib/Target/TargetLoweringObjectFile.cpp @@ -783,8 +783,8 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx, } // Exception Handling. - LSDASection = getMachOSection("__TEXT", "__gcc_except_tab", 0, - SectionKind::getReadOnlyWithRel()); + LSDASection = getMachOSection("__DATA", "__gcc_except_tab", 0, + SectionKind::getDataRel()); EHFrameSection = getMachOSection("__TEXT", "__eh_frame", MCSectionMachO::S_COALESCED | diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp index be9f4b265f68..38c0c284774f 100644 --- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp +++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp @@ -43,7 +43,6 @@ MCSymbol *X86MCInstLower::GetPICBaseSymbol() const { Twine(AsmPrinter.getFunctionNumber())+"$pb"); } - /// LowerGlobalAddressOperand - Lower an MO_GlobalAddress operand to an /// MCOperand. MCSymbol *X86MCInstLower:: @@ -231,6 +230,19 @@ GetConstantPoolIndexSymbol(const MachineOperand &MO) const { return Ctx.GetOrCreateSymbol(Name.str()); } +MCSymbol *X86MCInstLower:: +GetBlockAddressSymbol(const MachineOperand &MO) const { + const char *Suffix = ""; + switch (MO.getTargetFlags()) { + default: llvm_unreachable("Unknown target flag on BA operand"); + case X86II::MO_NO_FLAG: break; // No flag. + case X86II::MO_PIC_BASE_OFFSET: break; // Doesn't modify symbol name. + case X86II::MO_GOTOFF: Suffix = "@GOTOFF"; break; + } + + return AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress(), Suffix); +} + MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const { // FIXME: We would like an efficient form for this, so we don't have to do a @@ -331,8 +343,7 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { MCOp = LowerSymbolOperand(MO, GetConstantPoolIndexSymbol(MO)); break; case MachineOperand::MO_BlockAddress: - MCOp = LowerSymbolOperand(MO, AsmPrinter.GetBlockAddressSymbol( - MO.getBlockAddress())); + MCOp = LowerSymbolOperand(MO, GetBlockAddressSymbol(MO)); break; } diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.h b/lib/Target/X86/AsmPrinter/X86MCInstLower.h index fa25b906d543..94f8bfcc91a0 100644 --- a/lib/Target/X86/AsmPrinter/X86MCInstLower.h +++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.h @@ -43,6 +43,7 @@ public: MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const; MCSymbol *GetJumpTableSymbol(const MachineOperand &MO) const; MCSymbol *GetConstantPoolIndexSymbol(const MachineOperand &MO) const; + MCSymbol *GetBlockAddressSymbol(const MachineOperand &MO) const; MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const; private: diff --git a/lib/Target/X86/Disassembler/CMakeLists.txt b/lib/Target/X86/Disassembler/CMakeLists.txt new file mode 100644 index 000000000000..b329e897b980 --- /dev/null +++ b/lib/Target/X86/Disassembler/CMakeLists.txt @@ -0,0 +1,6 @@ +include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) + +add_llvm_library(LLVMX86Disassembler + X86Disassembler.cpp + ) +add_dependencies(LLVMX86Disassembler X86CodeGenTable_gen) diff --git a/lib/Target/X86/Disassembler/Makefile b/lib/Target/X86/Disassembler/Makefile new file mode 100644 index 000000000000..b2896477df48 --- /dev/null +++ b/lib/Target/X86/Disassembler/Makefile @@ -0,0 +1,16 @@ +##===- lib/Target/X86/Disassembler/Makefile ----------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../../.. +LIBRARYNAME = LLVMX86Disassembler + +# Hack: we need to include 'main' x86 target directory to grab private headers +CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. + +include $(LEVEL)/Makefile.common diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp new file mode 100644 index 000000000000..2ebbc9bdbdb6 --- /dev/null +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -0,0 +1,29 @@ +//===- X86Disassembler.cpp - Disassembler for x86 and x86_64 ----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCDisassembler.h" +#include "llvm/Target/TargetRegistry.h" +#include "X86.h" +using namespace llvm; + +static const MCDisassembler *createX86_32Disassembler(const Target &T) { + return 0; +} + +static const MCDisassembler *createX86_64Disassembler(const Target &T) { + return 0; +} + +extern "C" void LLVMInitializeX86Disassembler() { + // Register the disassembler. + TargetRegistry::RegisterMCDisassembler(TheX86_32Target, + createX86_32Disassembler); + TargetRegistry::RegisterMCDisassembler(TheX86_64Target, + createX86_64Disassembler); +} diff --git a/lib/Target/X86/Makefile b/lib/Target/X86/Makefile index 220831d88db3..b311a6ed86a9 100644 --- a/lib/Target/X86/Makefile +++ b/lib/Target/X86/Makefile @@ -18,6 +18,6 @@ BUILT_SOURCES = X86GenRegisterInfo.h.inc X86GenRegisterNames.inc \ X86GenFastISel.inc \ X86GenCallingConv.inc X86GenSubtarget.inc -DIRS = AsmPrinter AsmParser TargetInfo +DIRS = AsmPrinter AsmParser Disassembler TargetInfo include $(LEVEL)/Makefile.common diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index 4497931e8651..4892e1746079 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -595,7 +595,6 @@ void Emitter::emitInstruction(const MachineInstr &MI, break; case TargetInstrInfo::IMPLICIT_DEF: case TargetInstrInfo::KILL: - case X86::DWARF_LOC: case X86::FP_REG_KILL: break; case X86::MOVPC32r: { diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 6a3577aaaf18..a9a78be3e31c 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -252,8 +252,8 @@ namespace { else if (AM.JT != -1) Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags); else if (AM.BlockAddr) - Disp = CurDAG->getBlockAddress(AM.BlockAddr, DebugLoc()/*MVT::i32*/, - true /*AM.SymbolFlags*/); + Disp = CurDAG->getBlockAddress(AM.BlockAddr, MVT::i32, + true, AM.SymbolFlags); else Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i32); @@ -777,7 +777,7 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { AM.SymbolFlags = J->getTargetFlags(); } else { AM.BlockAddr = cast(N0)->getBlockAddress(); - //AM.SymbolFlags = cast(N0)->getTargetFlags(); + AM.SymbolFlags = cast(N0)->getTargetFlags(); } if (N.getOpcode() == X86ISD::WrapperRIP) @@ -808,7 +808,7 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { AM.SymbolFlags = J->getTargetFlags(); } else { AM.BlockAddr = cast(N0)->getBlockAddress(); - //AM.SymbolFlags = cast(N0)->getTargetFlags(); + AM.SymbolFlags = cast(N0)->getTargetFlags(); } return false; } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 6018cf5ef7fc..d80b8ec66ecd 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -373,13 +373,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom); } - // Use the default ISD::DBG_STOPPOINT. - setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand); // FIXME - use subtarget debug flags if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetELF() && !Subtarget->isTargetCygMing()) { - setOperationAction(ISD::DBG_LABEL, MVT::Other, Expand); setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); } @@ -978,6 +975,19 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) computeRegisterProperties(); + // Divide and reminder operations have no vector equivalent and can + // trap. Do a custom widening for these operations in which we never + // generate more divides/remainder than the original vector width. + for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; + VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { + if (!isTypeLegal((MVT::SimpleValueType)VT)) { + setOperationAction(ISD::SDIV, (MVT::SimpleValueType) VT, Custom); + setOperationAction(ISD::UDIV, (MVT::SimpleValueType) VT, Custom); + setOperationAction(ISD::SREM, (MVT::SimpleValueType) VT, Custom); + setOperationAction(ISD::UREM, (MVT::SimpleValueType) VT, Custom); + } + } + // FIXME: These should be based on subtarget info. Plus, the values should // be smaller when we are in optimizing for size mode. maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores @@ -4722,18 +4732,27 @@ X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) { SDValue X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) { - unsigned WrapperKind = X86ISD::Wrapper; + // Create the TargetBlockAddressAddress node. + unsigned char OpFlags = + Subtarget->ClassifyBlockAddressReference(); CodeModel::Model M = getTargetMachine().getCodeModel(); + BlockAddress *BA = cast(Op)->getBlockAddress(); + DebugLoc dl = Op.getDebugLoc(); + SDValue Result = DAG.getBlockAddress(BA, getPointerTy(), + /*isTarget=*/true, OpFlags); + if (Subtarget->isPICStyleRIPRel() && (M == CodeModel::Small || M == CodeModel::Kernel)) - WrapperKind = X86ISD::WrapperRIP; + Result = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Result); + else + Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result); - DebugLoc DL = Op.getDebugLoc(); - - BlockAddress *BA = cast(Op)->getBlockAddress(); - SDValue Result = DAG.getBlockAddress(BA, DL, /*isTarget=*/true); - - Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result); + // With PIC, the address is actually $g + Offset. + if (isGlobalRelativeToPICBase(OpFlags)) { + Result = DAG.getNode(ISD::ADD, dl, getPointerTy(), + DAG.getNode(X86ISD::GlobalBaseReg, dl, getPointerTy()), + Result); + } return Result; } @@ -7164,6 +7183,14 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, Results.push_back(edx.getValue(1)); return; } + case ISD::SDIV: + case ISD::UDIV: + case ISD::SREM: + case ISD::UREM: { + EVT WidenVT = getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + Results.push_back(DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements())); + return; + } case ISD::ATOMIC_CMP_SWAP: { EVT T = N->getValueType(0); assert (T == MVT::i64 && "Only know how to expand i64 Cmp and Swap"); diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index a01534b70d24..b5fa862712e5 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -1663,7 +1663,7 @@ def : Pat<(X86tcret GR64:$dst, imm:$off), (TCRETURNri64 GR64:$dst, imm:$off)>; def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off), - (TCRETURNdi64 texternalsym:$dst, imm:$off)>; + (TCRETURNdi64 tglobaladdr:$dst, imm:$off)>; def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off), (TCRETURNdi64 texternalsym:$dst, imm:$off)>; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 1ddceb1abf2e..a37013d25bde 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -3133,7 +3133,6 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, break; case TargetInstrInfo::IMPLICIT_DEF: case TargetInstrInfo::KILL: - case X86::DWARF_LOC: case X86::FP_REG_KILL: break; case X86::MOVPC32r: { diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index a79f262b4616..90ef1f4f1f05 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -718,7 +718,6 @@ def TCRETURNri : I<0, Pseudo, (outs), (ins GR32:$dst, i32imm:$offset, variable_o []>; let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in - def TAILJMPd : IBr<0xE9, (ins i32imm_pcrel:$dst), "jmp\t$dst # TAILCALL", []>; let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in @@ -3505,16 +3504,6 @@ def FS_MOV32rm : I<0x8B, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), "movl\t%fs:$src, $dst", [(set GR32:$dst, (fsload addr:$src))]>, SegFS; -//===----------------------------------------------------------------------===// -// DWARF Pseudo Instructions -// - -def DWARF_LOC : I<0, Pseudo, (outs), - (ins i32imm:$line, i32imm:$col, i32imm:$file), - ".loc\t$file $line $col", - [(dwarf_loc (i32 imm:$line), (i32 imm:$col), - (i32 imm:$file))]>; - //===----------------------------------------------------------------------===// // EH Pseudo Instructions // diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index ee63d56f3f17..dfdd4ce36c6d 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2820,40 +2820,40 @@ defm PSIGND : SS3I_binop_rm_int_32<0x0A, "psignd", let Constraints = "$src1 = $dst" in { def PALIGNR64rr : SS3AI<0x0F, MRMSrcReg, (outs VR64:$dst), - (ins VR64:$src1, VR64:$src2, i16imm:$src3), + (ins VR64:$src1, VR64:$src2, i8imm:$src3), "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>; def PALIGNR64rm : SS3AI<0x0F, MRMSrcMem, (outs VR64:$dst), - (ins VR64:$src1, i64mem:$src2, i16imm:$src3), + (ins VR64:$src1, i64mem:$src2, i8imm:$src3), "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>; def PALIGNR128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, VR128:$src2, i32imm:$src3), + (ins VR128:$src1, VR128:$src2, i8imm:$src3), "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>, OpSize; def PALIGNR128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst), - (ins VR128:$src1, i128mem:$src2, i32imm:$src3), + (ins VR128:$src1, i128mem:$src2, i8imm:$src3), "palignr\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>, OpSize; } // palignr patterns. -def : Pat<(int_x86_ssse3_palign_r VR64:$src1, VR64:$src2, (i16 imm:$src3)), +def : Pat<(int_x86_ssse3_palign_r VR64:$src1, VR64:$src2, (i8 imm:$src3)), (PALIGNR64rr VR64:$src1, VR64:$src2, (BYTE_imm imm:$src3))>, Requires<[HasSSSE3]>; def : Pat<(int_x86_ssse3_palign_r VR64:$src1, (memop64 addr:$src2), - (i16 imm:$src3)), + (i8 imm:$src3)), (PALIGNR64rm VR64:$src1, addr:$src2, (BYTE_imm imm:$src3))>, Requires<[HasSSSE3]>; -def : Pat<(int_x86_ssse3_palign_r_128 VR128:$src1, VR128:$src2, (i32 imm:$src3)), +def : Pat<(int_x86_ssse3_palign_r_128 VR128:$src1, VR128:$src2, (i8 imm:$src3)), (PALIGNR128rr VR128:$src1, VR128:$src2, (BYTE_imm imm:$src3))>, Requires<[HasSSSE3]>; def : Pat<(int_x86_ssse3_palign_r_128 VR128:$src1, (memopv2i64 addr:$src2), - (i32 imm:$src3)), + (i8 imm:$src3)), (PALIGNR128rm VR128:$src1, addr:$src2, (BYTE_imm imm:$src3))>, Requires<[HasSSSE3]>; diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp index 0792bdd4dd2b..ce06f0fdebe7 100644 --- a/lib/Target/X86/X86JITInfo.cpp +++ b/lib/Target/X86/X86JITInfo.cpp @@ -426,83 +426,77 @@ X86JITInfo::X86JITInfo(X86TargetMachine &tm) : TM(tm) { void *X86JITInfo::emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr, JITCodeEmitter &JCE) { + MachineCodeEmitter::BufferState BS; #if defined (X86_64_JIT) - JCE.startGVStub(GV, 8, 8); + JCE.startGVStub(BS, GV, 8, 8); JCE.emitWordLE((unsigned)(intptr_t)ptr); JCE.emitWordLE((unsigned)(((intptr_t)ptr) >> 32)); #else - JCE.startGVStub(GV, 4, 4); + JCE.startGVStub(BS, GV, 4, 4); JCE.emitWordLE((intptr_t)ptr); #endif - return JCE.finishGVStub(GV); + return JCE.finishGVStub(BS); } -void *X86JITInfo::emitFunctionStub(const Function* F, void *Fn, +TargetJITInfo::StubLayout X86JITInfo::getStubLayout() { + // The 64-bit stub contains: + // movabs r10 <- 8-byte-target-address # 10 bytes + // call|jmp *r10 # 3 bytes + // The 32-bit stub contains a 5-byte call|jmp. + // If the stub is a call to the compilation callback, an extra byte is added + // to mark it as a stub. + StubLayout Result = {14, 4}; + return Result; +} + +void *X86JITInfo::emitFunctionStub(const Function* F, void *Target, JITCodeEmitter &JCE) { + MachineCodeEmitter::BufferState BS; // Note, we cast to intptr_t here to silence a -pedantic warning that // complains about casting a function pointer to a normal pointer. #if defined (X86_32_JIT) && !defined (_MSC_VER) - bool NotCC = (Fn != (void*)(intptr_t)X86CompilationCallback && - Fn != (void*)(intptr_t)X86CompilationCallback_SSE); + bool NotCC = (Target != (void*)(intptr_t)X86CompilationCallback && + Target != (void*)(intptr_t)X86CompilationCallback_SSE); #else - bool NotCC = Fn != (void*)(intptr_t)X86CompilationCallback; + bool NotCC = Target != (void*)(intptr_t)X86CompilationCallback; #endif + JCE.emitAlignment(4); + void *Result = (void*)JCE.getCurrentPCValue(); if (NotCC) { #if defined (X86_64_JIT) - JCE.startGVStub(F, 13, 4); JCE.emitByte(0x49); // REX prefix JCE.emitByte(0xB8+2); // movabsq r10 - JCE.emitWordLE((unsigned)(intptr_t)Fn); - JCE.emitWordLE((unsigned)(((intptr_t)Fn) >> 32)); + JCE.emitWordLE((unsigned)(intptr_t)Target); + JCE.emitWordLE((unsigned)(((intptr_t)Target) >> 32)); JCE.emitByte(0x41); // REX prefix JCE.emitByte(0xFF); // jmpq *r10 JCE.emitByte(2 | (4 << 3) | (3 << 6)); #else - JCE.startGVStub(F, 5, 4); JCE.emitByte(0xE9); - JCE.emitWordLE((intptr_t)Fn-JCE.getCurrentPCValue()-4); + JCE.emitWordLE((intptr_t)Target-JCE.getCurrentPCValue()-4); #endif - return JCE.finishGVStub(F); + return Result; } #if defined (X86_64_JIT) - JCE.startGVStub(F, 14, 4); JCE.emitByte(0x49); // REX prefix JCE.emitByte(0xB8+2); // movabsq r10 - JCE.emitWordLE((unsigned)(intptr_t)Fn); - JCE.emitWordLE((unsigned)(((intptr_t)Fn) >> 32)); + JCE.emitWordLE((unsigned)(intptr_t)Target); + JCE.emitWordLE((unsigned)(((intptr_t)Target) >> 32)); JCE.emitByte(0x41); // REX prefix JCE.emitByte(0xFF); // callq *r10 JCE.emitByte(2 | (2 << 3) | (3 << 6)); #else - JCE.startGVStub(F, 6, 4); JCE.emitByte(0xE8); // Call with 32 bit pc-rel destination... - JCE.emitWordLE((intptr_t)Fn-JCE.getCurrentPCValue()-4); + JCE.emitWordLE((intptr_t)Target-JCE.getCurrentPCValue()-4); #endif // This used to use 0xCD, but that value is used by JITMemoryManager to // initialize the buffer with garbage, which means it may follow a // noreturn function call, confusing X86CompilationCallback2. PR 4929. JCE.emitByte(0xCE); // Interrupt - Just a marker identifying the stub! - return JCE.finishGVStub(F); -} - -void X86JITInfo::emitFunctionStubAtAddr(const Function* F, void *Fn, void *Stub, - JITCodeEmitter &JCE) { - // Note, we cast to intptr_t here to silence a -pedantic warning that - // complains about casting a function pointer to a normal pointer. - JCE.startGVStub(F, Stub, 5); - JCE.emitByte(0xE9); -#if defined (X86_64_JIT) && !defined (NDEBUG) - // Yes, we need both of these casts, or some broken versions of GCC (4.2.4) - // get the signed-ness of the expression wrong. Go figure. - intptr_t Displacement = (intptr_t)Fn - (intptr_t)JCE.getCurrentPCValue() - 5; - assert(((Displacement << 32) >> 32) == Displacement - && "PIC displacement does not fit in displacement field!"); -#endif - JCE.emitWordLE((intptr_t)Fn-JCE.getCurrentPCValue()-4); - JCE.finishGVStub(F); + return Result; } /// getPICJumpTableEntry - Returns the value of the jumptable entry for the diff --git a/lib/Target/X86/X86JITInfo.h b/lib/Target/X86/X86JITInfo.h index c381433bf357..238420c236b1 100644 --- a/lib/Target/X86/X86JITInfo.h +++ b/lib/Target/X86/X86JITInfo.h @@ -43,18 +43,16 @@ namespace llvm { virtual void *emitGlobalValueIndirectSym(const GlobalValue* GV, void *ptr, JITCodeEmitter &JCE); + // getStubLayout - Returns the size and alignment of the largest call stub + // on X86. + virtual StubLayout getStubLayout(); + /// emitFunctionStub - Use the specified JITCodeEmitter object to emit a /// small native function that simply calls the function at the specified /// address. - virtual void *emitFunctionStub(const Function* F, void *Fn, + virtual void *emitFunctionStub(const Function* F, void *Target, JITCodeEmitter &JCE); - /// emitFunctionStubAtAddr - Use the specified JITCodeEmitter object to - /// emit a small native function that simply calls Fn. Emit the stub into - /// the supplied buffer. - virtual void emitFunctionStubAtAddr(const Function* F, void *Fn, - void *Buffer, JITCodeEmitter &JCE); - /// getPICJumpTableEntry - Returns the value of the jumptable entry for the /// specific basic block. virtual uintptr_t getPICJumpTableEntry(uintptr_t BB, uintptr_t JTBase); diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index f577fcf13e05..33852bd238bd 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -1262,7 +1262,7 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF, else if (RetOpcode== X86::TCRETURNri64) BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64), JumpTarget.getReg()); else - BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr), JumpTarget.getReg()); + BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr), JumpTarget.getReg()); // Delete the pseudo instruction TCRETURN. MBB.erase(MBBI); diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index b901c1483b82..661f56046d09 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -28,6 +28,21 @@ using namespace llvm; #include #endif +/// ClassifyBlockAddressReference - Classify a blockaddress reference for the +/// current subtarget according to how we should reference it in a non-pcrel +/// context. +unsigned char X86Subtarget:: +ClassifyBlockAddressReference() const { + if (isPICStyleGOT()) // 32-bit ELF targets. + return X86II::MO_GOTOFF; + + if (isPICStyleStubPIC()) // Darwin/32 in PIC mode. + return X86II::MO_PIC_BASE_OFFSET; + + // Direct static reference to label. + return X86II::MO_NO_FLAG; +} + /// ClassifyGlobalReference - Classify a global variable reference for the /// current subtarget according to how we should reference it in a non-pcrel /// context. diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 23f2841a8c2b..fb457ddd8805 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -199,6 +199,11 @@ public: unsigned char ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM)const; + /// ClassifyBlockAddressReference - Classify a blockaddress reference for the + /// current subtarget according to how we should reference it in a non-pcrel + /// context. + unsigned char ClassifyBlockAddressReference() const; + /// IsLegalToCallImmediateAddr - Return true if the subtarget allows calls /// to immediate address. bool IsLegalToCallImmediateAddr(const TargetMachine &TM) const; diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index 00dcce653ee5..f3104569879a 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -142,10 +142,6 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); - // Debug - setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand); - setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); - maxStoresPerMemset = 4; maxStoresPerMemmove = maxStoresPerMemcpy = 2; @@ -295,7 +291,7 @@ LowerBlockAddress(SDValue Op, SelectionDAG &DAG) DebugLoc DL = Op.getDebugLoc(); BlockAddress *BA = cast(Op)->getBlockAddress(); - SDValue Result = DAG.getBlockAddress(BA, DL, /*isTarget=*/true); + SDValue Result = DAG.getBlockAddress(BA, getPointerTy(), /*isTarget=*/true); return DAG.getNode(XCoreISD::PCRelativeWrapper, DL, getPointerTy(), Result); } diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp index b3a832f12d0b..a16d335ef50f 100644 --- a/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -212,7 +212,7 @@ bool FunctionAttrs::AddNoCaptureAttrs(const std::vector &SCC) { for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A!=E; ++A) if (isa(A->getType()) && !A->hasNoCaptureAttr() && - !PointerMayBeCaptured(A, true)) { + !PointerMayBeCaptured(A, true, /*StoreCaptures=*/false)) { A->addAttr(Attribute::NoCapture); ++NumNoCapture; Changed = true; @@ -280,7 +280,7 @@ bool FunctionAttrs::IsFunctionMallocLike(Function *F, return false; // Did not come from an allocation. } - if (PointerMayBeCaptured(RetVal, false)) + if (PointerMayBeCaptured(RetVal, false, /*StoreCaptures=*/false)) return false; } diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 442f2fb65528..4635d0e61c39 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -1898,6 +1898,15 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) { // Global variables without names cannot be referenced outside this module. if (!GV->hasName() && !GV->isDeclaration()) GV->setLinkage(GlobalValue::InternalLinkage); + // Simplify the initializer. + if (GV->hasInitializer()) + if (ConstantExpr *CE = dyn_cast(GV->getInitializer())) { + TargetData *TD = getAnalysisIfAvailable(); + Constant *New = ConstantFoldConstantExpression(CE, TD); + if (New && New != CE) + GV->setInitializer(New); + } + // Do more involved optimizations if the global is internal. if (!GV->isConstant() && GV->hasLocalLinkage() && GV->hasInitializer()) Changed |= ProcessInternalGlobal(GV, GVI); diff --git a/lib/Transforms/IPO/IPConstantPropagation.cpp b/lib/Transforms/IPO/IPConstantPropagation.cpp index 023e642e648c..df2456f9f2b7 100644 --- a/lib/Transforms/IPO/IPConstantPropagation.cpp +++ b/lib/Transforms/IPO/IPConstantPropagation.cpp @@ -19,7 +19,6 @@ #include "llvm/Transforms/IPO.h" #include "llvm/Constants.h" #include "llvm/Instructions.h" -#include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/Analysis/ValueTracking.h" @@ -155,7 +154,7 @@ bool IPCP::PropagateConstantsIntoArguments(Function &F) { // callers will be updated to use the value they pass in directly instead of // using the return value. bool IPCP::PropagateConstantReturn(Function &F) { - if (F.getReturnType() == Type::getVoidTy(F.getContext())) + if (F.getReturnType()->isVoidTy()) return false; // No return value. // If this function could be overridden later in the link stage, we can't @@ -163,8 +162,6 @@ bool IPCP::PropagateConstantReturn(Function &F) { if (F.mayBeOverridden()) return false; - LLVMContext &Context = F.getContext(); - // Check to see if this function returns a constant. SmallVector RetVals; const StructType *STy = dyn_cast(F.getReturnType()); @@ -188,7 +185,7 @@ bool IPCP::PropagateConstantReturn(Function &F) { if (!STy) V = RI->getOperand(i); else - V = FindInsertedValue(RI->getOperand(0), i, Context); + V = FindInsertedValue(RI->getOperand(0), i); if (V) { // Ignore undefs, we can change them into anything diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index a8f39c1433cd..6f1c32c004e8 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -1425,26 +1425,40 @@ bool GVN::processNonLocalLoad(LoadInst *LI, assert(UnavailablePred != 0 && "Fully available value should be eliminated above!"); - // If the loaded pointer is PHI node defined in this block, do PHI translation - // to get its value in the predecessor. - Value *LoadPtr = LI->getOperand(0)->DoPHITranslation(LoadBB, UnavailablePred); - - // Make sure the value is live in the predecessor. If it was defined by a - // non-PHI instruction in this block, we don't know how to recompute it above. - if (Instruction *LPInst = dyn_cast(LoadPtr)) - if (!DT->dominates(LPInst->getParent(), UnavailablePred)) { - DEBUG(errs() << "COULDN'T PRE LOAD BECAUSE PTR IS UNAVAILABLE IN PRED: " - << *LPInst << '\n' << *LI << "\n"); - return false; - } - // We don't currently handle critical edges :( if (UnavailablePred->getTerminator()->getNumSuccessors() != 1) { DEBUG(errs() << "COULD NOT PRE LOAD BECAUSE OF CRITICAL EDGE '" << UnavailablePred->getName() << "': " << *LI << '\n'); return false; } - + + // Do PHI translation to get its value in the predecessor if necessary. The + // returned pointer (if non-null) is guaranteed to dominate UnavailablePred. + // + // FIXME: This may insert a computation, but we don't tell scalar GVN + // optimization stuff about it. How do we do this? + SmallVector NewInsts; + Value *LoadPtr = 0; + + // If all preds have a single successor, then we know it is safe to insert the + // load on the pred (?!?), so we can insert code to materialize the pointer if + // it is not available. + if (allSingleSucc) { + LoadPtr = MD->InsertPHITranslatedPointer(LI->getOperand(0), LoadBB, + UnavailablePred, TD, *DT,NewInsts); + } else { + LoadPtr = MD->GetAvailablePHITranslatedValue(LI->getOperand(0), LoadBB, + UnavailablePred, TD, *DT); + } + + // If we couldn't find or insert a computation of this phi translated value, + // we fail PRE. + if (LoadPtr == 0) { + DEBUG(errs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: " + << *LI->getOperand(0) << "\n"); + return false; + } + // Make sure it is valid to move this load here. We have to watch out for: // @1 = getelementptr (i8* p, ... // test p and branch if == 0 @@ -1455,14 +1469,20 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // we do not have this case. Otherwise, check that the load is safe to // put anywhere; this can be improved, but should be conservatively safe. if (!allSingleSucc && - !isSafeToLoadUnconditionally(LoadPtr, UnavailablePred->getTerminator())) + // FIXME: REEVALUTE THIS. + !isSafeToLoadUnconditionally(LoadPtr, UnavailablePred->getTerminator())) { + assert(NewInsts.empty() && "Should not have inserted instructions"); return false; + } // Okay, we can eliminate this load by inserting a reload in the predecessor // and using PHI construction to get the value in the other predecessors, do // it. DEBUG(errs() << "GVN REMOVING PRE LOAD: " << *LI << '\n'); - + DEBUG(if (!NewInsts.empty()) + errs() << "INSERTED " << NewInsts.size() << " INSTS: " + << *NewInsts.back() << '\n'); + Value *NewLoad = new LoadInst(LoadPtr, LI->getName()+".pre", false, LI->getAlignment(), UnavailablePred->getTerminator()); diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp index 1c48366e89fb..d12ad815f5ac 100644 --- a/lib/Transforms/Scalar/InstructionCombining.cpp +++ b/lib/Transforms/Scalar/InstructionCombining.cpp @@ -2163,8 +2163,8 @@ bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) { // Add has the property that adding any two 2's complement numbers can only // have one carry bit which can change a sign. As such, if LHS and RHS each - // have at least two sign bits, we know that the addition of the two values will - // sign extend fine. + // have at least two sign bits, we know that the addition of the two values + // will sign extend fine. if (ComputeNumSignBits(LHS) > 1 && ComputeNumSignBits(RHS) > 1) return true; @@ -2184,15 +2184,12 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { bool Changed = SimplifyCommutative(I); Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(), + I.hasNoUnsignedWrap(), TD)) + return ReplaceInstUsesWith(I, V); + + if (Constant *RHSC = dyn_cast(RHS)) { - // X + undef -> undef - if (isa(RHS)) - return ReplaceInstUsesWith(I, RHS); - - // X + 0 --> X - if (RHSC->isNullValue()) - return ReplaceInstUsesWith(I, LHS); - if (ConstantInt *CI = dyn_cast(RHSC)) { // X + (signbit) --> X ^ signbit const APInt& Val = CI->getValue(); @@ -4070,6 +4067,21 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS, /// FoldAndOfICmps - Fold (icmp)&(icmp) if possible. Instruction *InstCombiner::FoldAndOfICmps(Instruction &I, ICmpInst *LHS, ICmpInst *RHS) { + // (icmp eq A, null) & (icmp eq B, null) --> + // (icmp eq (ptrtoint(A)|ptrtoint(B)), 0) + if (TD && + LHS->getPredicate() == ICmpInst::ICMP_EQ && + RHS->getPredicate() == ICmpInst::ICMP_EQ && + isa(LHS->getOperand(1)) && + isa(RHS->getOperand(1))) { + const Type *IntPtrTy = TD->getIntPtrType(I.getContext()); + Value *A = Builder->CreatePtrToInt(LHS->getOperand(0), IntPtrTy); + Value *B = Builder->CreatePtrToInt(RHS->getOperand(0), IntPtrTy); + Value *NewOr = Builder->CreateOr(A, B); + return new ICmpInst(ICmpInst::ICMP_EQ, NewOr, + Constant::getNullValue(IntPtrTy)); + } + Value *Val, *Val2; ConstantInt *LHSCst, *RHSCst; ICmpInst::Predicate LHSCC, RHSCC; @@ -4081,12 +4093,20 @@ Instruction *InstCombiner::FoldAndOfICmps(Instruction &I, m_ConstantInt(RHSCst)))) return 0; - // (icmp ult A, C) & (icmp ult B, C) --> (icmp ult (A|B), C) - // where C is a power of 2 - if (LHSCst == RHSCst && LHSCC == RHSCC && LHSCC == ICmpInst::ICMP_ULT && - LHSCst->getValue().isPowerOf2()) { - Value *NewOr = Builder->CreateOr(Val, Val2); - return new ICmpInst(LHSCC, NewOr, LHSCst); + if (LHSCst == RHSCst && LHSCC == RHSCC) { + // (icmp ult A, C) & (icmp ult B, C) --> (icmp ult (A|B), C) + // where C is a power of 2 + if (LHSCC == ICmpInst::ICMP_ULT && + LHSCst->getValue().isPowerOf2()) { + Value *NewOr = Builder->CreateOr(Val, Val2); + return new ICmpInst(LHSCC, NewOr, LHSCst); + } + + // (icmp eq A, 0) & (icmp eq B, 0) --> (icmp eq (A|B), 0) + if (LHSCC == ICmpInst::ICMP_EQ && LHSCst->isZero()) { + Value *NewOr = Builder->CreateOr(Val, Val2); + return new ICmpInst(LHSCC, NewOr, LHSCst); + } } // From here on, we only handle: @@ -4322,7 +4342,6 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { if (Value *V = SimplifyAndInst(Op0, Op1, TD)) return ReplaceInstUsesWith(I, V); - // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. @@ -4743,16 +4762,37 @@ static Instruction *MatchSelectFromAndOr(Value *A, Value *B, /// FoldOrOfICmps - Fold (icmp)|(icmp) if possible. Instruction *InstCombiner::FoldOrOfICmps(Instruction &I, ICmpInst *LHS, ICmpInst *RHS) { + // (icmp ne A, null) | (icmp ne B, null) --> + // (icmp ne (ptrtoint(A)|ptrtoint(B)), 0) + if (TD && + LHS->getPredicate() == ICmpInst::ICMP_NE && + RHS->getPredicate() == ICmpInst::ICMP_NE && + isa(LHS->getOperand(1)) && + isa(RHS->getOperand(1))) { + const Type *IntPtrTy = TD->getIntPtrType(I.getContext()); + Value *A = Builder->CreatePtrToInt(LHS->getOperand(0), IntPtrTy); + Value *B = Builder->CreatePtrToInt(RHS->getOperand(0), IntPtrTy); + Value *NewOr = Builder->CreateOr(A, B); + return new ICmpInst(ICmpInst::ICMP_NE, NewOr, + Constant::getNullValue(IntPtrTy)); + } + Value *Val, *Val2; ConstantInt *LHSCst, *RHSCst; ICmpInst::Predicate LHSCC, RHSCC; // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2). - if (!match(LHS, m_ICmp(LHSCC, m_Value(Val), - m_ConstantInt(LHSCst))) || - !match(RHS, m_ICmp(RHSCC, m_Value(Val2), - m_ConstantInt(RHSCst)))) + if (!match(LHS, m_ICmp(LHSCC, m_Value(Val), m_ConstantInt(LHSCst))) || + !match(RHS, m_ICmp(RHSCC, m_Value(Val2), m_ConstantInt(RHSCst)))) return 0; + + + // (icmp ne A, 0) | (icmp ne B, 0) --> (icmp ne (A|B), 0) + if (LHSCst == RHSCst && LHSCC == RHSCC && + LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) { + Value *NewOr = Builder->CreateOr(Val, Val2); + return new ICmpInst(LHSCC, NewOr, LHSCst); + } // From here on, we only handle: // (icmp1 A, C1) | (icmp2 A, C2) --> something simpler. @@ -8539,6 +8579,36 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI, } } + // icmp ne A, B is equal to xor A, B when A and B only really have one bit. + // It is also profitable to transform icmp eq into not(xor(A, B)) because that + // may lead to additional simplifications. + if (ICI->isEquality() && CI.getType() == ICI->getOperand(0)->getType()) { + if (const IntegerType *ITy = dyn_cast(CI.getType())) { + uint32_t BitWidth = ITy->getBitWidth(); + if (BitWidth > 1) { + Value *LHS = ICI->getOperand(0); + Value *RHS = ICI->getOperand(1); + + APInt KnownZeroLHS(BitWidth, 0), KnownOneLHS(BitWidth, 0); + APInt KnownZeroRHS(BitWidth, 0), KnownOneRHS(BitWidth, 0); + APInt TypeMask(APInt::getHighBitsSet(BitWidth, BitWidth-1)); + ComputeMaskedBits(LHS, TypeMask, KnownZeroLHS, KnownOneLHS); + ComputeMaskedBits(RHS, TypeMask, KnownZeroRHS, KnownOneRHS); + + if (KnownZeroLHS.countLeadingOnes() == BitWidth-1 && + KnownZeroRHS.countLeadingOnes() == BitWidth-1) { + if (!DoXform) return ICI; + + Value *Xor = Builder->CreateXor(LHS, RHS); + if (ICI->getPredicate() == ICmpInst::ICMP_EQ) + Xor = Builder->CreateXor(Xor, ConstantInt::get(ITy, 1)); + Xor->takeName(ICI); + return ReplaceInstUsesWith(CI, Xor); + } + } + } + } + return 0; } @@ -9842,6 +9912,126 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { if (Operand->getIntrinsicID() == Intrinsic::bswap) return ReplaceInstUsesWith(CI, Operand->getOperand(1)); break; + case Intrinsic::uadd_with_overflow: { + Value *LHS = II->getOperand(1), *RHS = II->getOperand(2); + const IntegerType *IT = cast(II->getOperand(1)->getType()); + uint32_t BitWidth = IT->getBitWidth(); + APInt Mask = APInt::getSignBit(BitWidth); + APInt LHSKnownZero(BitWidth, 0); + APInt LHSKnownOne(BitWidth, 0); + ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne); + bool LHSKnownNegative = LHSKnownOne[BitWidth - 1]; + bool LHSKnownPositive = LHSKnownZero[BitWidth - 1]; + + if (LHSKnownNegative || LHSKnownPositive) { + APInt RHSKnownZero(BitWidth, 0); + APInt RHSKnownOne(BitWidth, 0); + ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne); + bool RHSKnownNegative = RHSKnownOne[BitWidth - 1]; + bool RHSKnownPositive = RHSKnownZero[BitWidth - 1]; + if (LHSKnownNegative && RHSKnownNegative) { + // The sign bit is set in both cases: this MUST overflow. + // Create a simple add instruction, and insert it into the struct. + Instruction *Add = BinaryOperator::CreateAdd(LHS, RHS, "", &CI); + Worklist.Add(Add); + Constant *V[] = { + UndefValue::get(LHS->getType()), ConstantInt::getTrue(*Context) + }; + Constant *Struct = ConstantStruct::get(*Context, V, 2, false); + return InsertValueInst::Create(Struct, Add, 0); + } + + if (LHSKnownPositive && RHSKnownPositive) { + // The sign bit is clear in both cases: this CANNOT overflow. + // Create a simple add instruction, and insert it into the struct. + Instruction *Add = BinaryOperator::CreateNUWAdd(LHS, RHS, "", &CI); + Worklist.Add(Add); + Constant *V[] = { + UndefValue::get(LHS->getType()), ConstantInt::getFalse(*Context) + }; + Constant *Struct = ConstantStruct::get(*Context, V, 2, false); + return InsertValueInst::Create(Struct, Add, 0); + } + } + } + // FALL THROUGH uadd into sadd + case Intrinsic::sadd_with_overflow: + // Canonicalize constants into the RHS. + if (isa(II->getOperand(1)) && + !isa(II->getOperand(2))) { + Value *LHS = II->getOperand(1); + II->setOperand(1, II->getOperand(2)); + II->setOperand(2, LHS); + return II; + } + + // X + undef -> undef + if (isa(II->getOperand(2))) + return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); + + if (ConstantInt *RHS = dyn_cast(II->getOperand(2))) { + // X + 0 -> {X, false} + if (RHS->isZero()) { + Constant *V[] = { + UndefValue::get(II->getOperand(0)->getType()), + ConstantInt::getFalse(*Context) + }; + Constant *Struct = ConstantStruct::get(*Context, V, 2, false); + return InsertValueInst::Create(Struct, II->getOperand(1), 0); + } + } + break; + case Intrinsic::usub_with_overflow: + case Intrinsic::ssub_with_overflow: + // undef - X -> undef + // X - undef -> undef + if (isa(II->getOperand(1)) || + isa(II->getOperand(2))) + return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); + + if (ConstantInt *RHS = dyn_cast(II->getOperand(2))) { + // X - 0 -> {X, false} + if (RHS->isZero()) { + Constant *V[] = { + UndefValue::get(II->getOperand(1)->getType()), + ConstantInt::getFalse(*Context) + }; + Constant *Struct = ConstantStruct::get(*Context, V, 2, false); + return InsertValueInst::Create(Struct, II->getOperand(1), 0); + } + } + break; + case Intrinsic::umul_with_overflow: + case Intrinsic::smul_with_overflow: + // Canonicalize constants into the RHS. + if (isa(II->getOperand(1)) && + !isa(II->getOperand(2))) { + Value *LHS = II->getOperand(1); + II->setOperand(1, II->getOperand(2)); + II->setOperand(2, LHS); + return II; + } + + // X * undef -> undef + if (isa(II->getOperand(2))) + return ReplaceInstUsesWith(CI, UndefValue::get(II->getType())); + + if (ConstantInt *RHSI = dyn_cast(II->getOperand(2))) { + // X*0 -> {0, false} + if (RHSI->isZero()) + return ReplaceInstUsesWith(CI, Constant::getNullValue(II->getType())); + + // X * 1 -> {X, false} + if (RHSI->equalsInt(1)) { + Constant *V[] = { + UndefValue::get(II->getOperand(1)->getType()), + ConstantInt::getFalse(*Context) + }; + Constant *Struct = ConstantStruct::get(*Context, V, 2, false); + return InsertValueInst::Create(Struct, II->getOperand(1), 0); + } + } + break; case Intrinsic::ppc_altivec_lvx: case Intrinsic::ppc_altivec_lvxl: case Intrinsic::x86_sse_loadu_ps: @@ -11282,21 +11472,16 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) { } Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { + SmallVector Ops(GEP.op_begin(), GEP.op_end()); + + if (Value *V = SimplifyGEPInst(&Ops[0], Ops.size(), TD)) + return ReplaceInstUsesWith(GEP, V); + Value *PtrOp = GEP.getOperand(0); - // Eliminate 'getelementptr %P, i32 0' and 'getelementptr %P', they are noops. - if (GEP.getNumOperands() == 1) - return ReplaceInstUsesWith(GEP, PtrOp); if (isa(GEP.getOperand(0))) return ReplaceInstUsesWith(GEP, UndefValue::get(GEP.getType())); - bool HasZeroPointerIndex = false; - if (Constant *C = dyn_cast(GEP.getOperand(1))) - HasZeroPointerIndex = C->isNullValue(); - - if (GEP.getNumOperands() == 2 && HasZeroPointerIndex) - return ReplaceInstUsesWith(GEP, PtrOp); - // Eliminate unneeded casts for indices. if (TD) { bool MadeChange = false; @@ -11401,6 +11586,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { return 0; } + bool HasZeroPointerIndex = false; + if (ConstantInt *C = dyn_cast(GEP.getOperand(1))) + HasZeroPointerIndex = C->isZero(); + // Transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... // into : GEP [10 x i8]* X, i32 0, ... // @@ -11952,12 +12141,6 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { Value *Val = SI.getOperand(0); Value *Ptr = SI.getOperand(1); - if (isa(Ptr)) { // store X, undef -> noop (even if volatile) - EraseInstFromFunction(SI); - ++NumCombined; - return 0; - } - // If the RHS is an alloca with a single use, zapify the store, making the // alloca dead. // If the RHS is an alloca with a two uses, the other one being a @@ -12920,7 +13103,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { if (LHSMask.size() == Mask.size()) { std::vector NewMask; for (unsigned i = 0, e = Mask.size(); i != e; ++i) - if (Mask[i] >= 2*e) + if (Mask[i] >= e) NewMask.push_back(2*e); else NewMask.push_back(LHSMask[Mask[i]]); diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index 58641135ede8..1b93f3441e41 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -158,12 +158,18 @@ bool JumpThreading::runOnFunction(Function &F) { if (BBI->isTerminator()) { // Since TryToSimplifyUncondBranchFromEmptyBlock may delete the // block, we have to make sure it isn't in the LoopHeaders set. We - // reinsert afterward in the rare case when the block isn't deleted. + // reinsert afterward if needed. bool ErasedFromLoopHeaders = LoopHeaders.erase(BB); + BasicBlock *Succ = BI->getSuccessor(0); - if (TryToSimplifyUncondBranchFromEmptyBlock(BB)) + if (TryToSimplifyUncondBranchFromEmptyBlock(BB)) { Changed = true; - else if (ErasedFromLoopHeaders) + // If we deleted BB and BB was the header of a loop, then the + // successor is now the header of the loop. + BB = Succ; + } + + if (ErasedFromLoopHeaders) LoopHeaders.insert(BB); } } diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index 104c8739c0e3..5511387c8da4 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -63,15 +63,6 @@ static cl::opt DisablePromotion("disable-licm-promotion", cl::Hidden, cl::desc("Disable memory promotion in LICM pass")); -// This feature is currently disabled by default because CodeGen is not yet -// capable of rematerializing these constants in PIC mode, so it can lead to -// degraded performance. Compile test/CodeGen/X86/remat-constant.ll with -// -relocation-model=pic to see an example of this. -static cl::opt -EnableLICMConstantMotion("enable-licm-constant-variables", cl::Hidden, - cl::desc("Enable hoisting/sinking of constant " - "global variables")); - namespace { struct LICM : public LoopPass { static char ID; // Pass identification, replacement for typeid @@ -383,8 +374,7 @@ bool LICM::canSinkOrHoistInst(Instruction &I) { // Loads from constant memory are always safe to move, even if they end up // in the same alias set as something that ends up being modified. - if (EnableLICMConstantMotion && - AA->pointsToConstantMemory(LI->getOperand(0))) + if (AA->pointsToConstantMemory(LI->getOperand(0))) return true; // Don't hoist loads which have may-aliased stores in loop. @@ -603,7 +593,7 @@ void LICM::sink(Instruction &I) { if (AI) { std::vector Allocas; Allocas.push_back(AI); - PromoteMemToReg(Allocas, *DT, *DF, AI->getContext(), CurAST); + PromoteMemToReg(Allocas, *DT, *DF, CurAST); } } } @@ -779,7 +769,7 @@ void LICM::PromoteValuesInLoop() { PromotedAllocas.reserve(PromotedValues.size()); for (unsigned i = 0, e = PromotedValues.size(); i != e; ++i) PromotedAllocas.push_back(PromotedValues[i].first); - PromoteMemToReg(PromotedAllocas, *DT, *DF, Preheader->getContext(), CurAST); + PromoteMemToReg(PromotedAllocas, *DT, *DF, CurAST); } /// FindPromotableValuesInLoop - Check the current loop for stores to definite diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index c202a2c41de8..d8c59b1d7421 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -1869,8 +1869,16 @@ bool IPSCCP::runOnModule(Module &M) { for (unsigned i = 0, e = BlocksToErase.size(); i != e; ++i) { // If there are any PHI nodes in this successor, drop entries for BB now. BasicBlock *DeadBB = BlocksToErase[i]; - while (!DeadBB->use_empty()) { - Instruction *I = cast(DeadBB->use_back()); + for (Value::use_iterator UI = DeadBB->use_begin(), UE = DeadBB->use_end(); + UI != UE; ) { + // Grab the user and then increment the iterator early, as the user + // will be deleted. Step past all adjacent uses from the same user. + Instruction *I = dyn_cast(*UI); + do { ++UI; } while (UI != UE && *UI == I); + + // Ignore blockaddress users; BasicBlock's dtor will handle them. + if (!I) continue; + bool Folded = ConstantFoldTerminator(I->getParent()); if (!Folded) { // The constant folder may not have been able to fold the terminator diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 2e3b6943bbfd..ae6ad74d54fd 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -192,7 +192,7 @@ bool SROA::performPromotion(Function &F) { if (Allocas.empty()) break; - PromoteMemToReg(Allocas, DT, DF, F.getContext()); + PromoteMemToReg(Allocas, DT, DF); NumPromoted += Allocas.size(); Changed = true; } @@ -469,15 +469,41 @@ void SROA::isSafeElementUse(Value *Ptr, bool isFirstElt, AllocaInst *AI, case Instruction::GetElementPtr: { GetElementPtrInst *GEP = cast(User); bool AreAllZeroIndices = isFirstElt; - if (GEP->getNumOperands() > 1) { - if (!isa(GEP->getOperand(1)) || - !cast(GEP->getOperand(1))->isZero()) - // Using pointer arithmetic to navigate the array. - return MarkUnsafe(Info); - - if (AreAllZeroIndices) - AreAllZeroIndices = GEP->hasAllZeroIndices(); + if (GEP->getNumOperands() > 1 && + (!isa(GEP->getOperand(1)) || + !cast(GEP->getOperand(1))->isZero())) + // Using pointer arithmetic to navigate the array. + return MarkUnsafe(Info); + + // Verify that any array subscripts are in range. + for (gep_type_iterator GEPIt = gep_type_begin(GEP), + E = gep_type_end(GEP); GEPIt != E; ++GEPIt) { + // Ignore struct elements, no extra checking needed for these. + if (isa(*GEPIt)) + continue; + + // This GEP indexes an array. Verify that this is an in-range + // constant integer. Specifically, consider A[0][i]. We cannot know that + // the user isn't doing invalid things like allowing i to index an + // out-of-range subscript that accesses A[1]. Because of this, we have + // to reject SROA of any accesses into structs where any of the + // components are variables. + ConstantInt *IdxVal = dyn_cast(GEPIt.getOperand()); + if (!IdxVal) return MarkUnsafe(Info); + + // Are all indices still zero? + AreAllZeroIndices &= IdxVal->isZero(); + + if (const ArrayType *AT = dyn_cast(*GEPIt)) { + if (IdxVal->getZExtValue() >= AT->getNumElements()) + return MarkUnsafe(Info); + } else if (const VectorType *VT = dyn_cast(*GEPIt)) { + if (IdxVal->getZExtValue() >= VT->getNumElements()) + return MarkUnsafe(Info); + } } + + isSafeElementUse(GEP, AreAllZeroIndices, AI, Info); if (Info.isUnsafe) return; break; diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp index 6a8148040d94..e905952c5db7 100644 --- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -26,7 +26,6 @@ #include "llvm/Transforms/Utils/Local.h" #include "llvm/Constants.h" #include "llvm/Instructions.h" -#include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Attributes.h" #include "llvm/Support/CFG.h" @@ -57,7 +56,7 @@ FunctionPass *llvm::createCFGSimplificationPass() { /// ChangeToUnreachable - Insert an unreachable instruction before the specified /// instruction, making it and the rest of the code in the block dead. -static void ChangeToUnreachable(Instruction *I, LLVMContext &Context) { +static void ChangeToUnreachable(Instruction *I) { BasicBlock *BB = I->getParent(); // Loop over all of the successors, removing BB's entry from any PHI // nodes. @@ -95,8 +94,7 @@ static void ChangeToCall(InvokeInst *II) { } static bool MarkAliveBlocks(BasicBlock *BB, - SmallPtrSet &Reachable, - LLVMContext &Context) { + SmallPtrSet &Reachable) { SmallVector Worklist; Worklist.push_back(BB); @@ -119,7 +117,7 @@ static bool MarkAliveBlocks(BasicBlock *BB, // though. ++BBI; if (!isa(BBI)) { - ChangeToUnreachable(BBI, Context); + ChangeToUnreachable(BBI); Changed = true; } break; @@ -135,7 +133,7 @@ static bool MarkAliveBlocks(BasicBlock *BB, if (isa(Ptr) || (isa(Ptr) && SI->getPointerAddressSpace() == 0)) { - ChangeToUnreachable(SI, Context); + ChangeToUnreachable(SI); Changed = true; break; } @@ -161,7 +159,7 @@ static bool MarkAliveBlocks(BasicBlock *BB, /// otherwise. static bool RemoveUnreachableBlocksFromFn(Function &F) { SmallPtrSet Reachable; - bool Changed = MarkAliveBlocks(F.begin(), Reachable, F.getContext()); + bool Changed = MarkAliveBlocks(F.begin(), Reachable); // If there are unreachable blocks in the CFG... if (Reachable.size() == F.size()) diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index 611505ef363a..f9b929c7e838 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -81,6 +81,11 @@ public: Value *EmitMemCpy(Value *Dst, Value *Src, Value *Len, unsigned Align, IRBuilder<> &B); + /// EmitMemMove - Emit a call to the memmove function to the builder. This + /// always expects that the size has type 'intptr_t' and Dst/Src are pointers. + Value *EmitMemMove(Value *Dst, Value *Src, Value *Len, + unsigned Align, IRBuilder<> &B); + /// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is /// a pointer, Val is an i32 value, and Len is an 'intptr_t' value. Value *EmitMemChr(Value *Ptr, Value *Val, Value *Len, IRBuilder<> &B); @@ -160,6 +165,21 @@ Value *LibCallOptimization::EmitMemCpy(Value *Dst, Value *Src, Value *Len, ConstantInt::get(Type::getInt32Ty(*Context), Align)); } +/// EmitMemMOve - Emit a call to the memmove function to the builder. This +/// always expects that the size has type 'intptr_t' and Dst/Src are pointers. +Value *LibCallOptimization::EmitMemMove(Value *Dst, Value *Src, Value *Len, + unsigned Align, IRBuilder<> &B) { + Module *M = Caller->getParent(); + Intrinsic::ID IID = Intrinsic::memmove; + const Type *Tys[1]; + Tys[0] = TD->getIntPtrType(*Context); + Value *MemMove = Intrinsic::getDeclaration(M, IID, Tys, 1); + Value *D = CastToCStr(Dst, B); + Value *S = CastToCStr(Src, B); + Value *A = ConstantInt::get(Type::getInt32Ty(*Context), Align); + return B.CreateCall4(MemMove, D, S, Len, A); +} + /// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is /// a pointer, Val is an i32 value, and Len is an 'intptr_t' value. Value *LibCallOptimization::EmitMemChr(Value *Ptr, Value *Val, @@ -511,27 +531,6 @@ static bool IsOnlyUsedInZeroEqualityComparison(Value *V) { return true; } -//===----------------------------------------------------------------------===// -// Miscellaneous LibCall/Intrinsic Optimizations -//===----------------------------------------------------------------------===// - -namespace { -struct SizeOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - // TODO: We can do more with this, but delaying to here should be no change - // in behavior. - ConstantInt *Const = dyn_cast(CI->getOperand(2)); - - if (!Const) return 0; - - if (Const->getZExtValue() < 2) - return Constant::getAllOnesValue(Const->getType()); - else - return ConstantInt::get(Const->getType(), 0); - } -}; -} - //===----------------------------------------------------------------------===// // String and Memory LibCall Optimizations //===----------------------------------------------------------------------===// @@ -1010,16 +1009,7 @@ struct MemMoveOpt : public LibCallOptimization { return 0; // memmove(x, y, n) -> llvm.memmove(x, y, n, 1) - Module *M = Caller->getParent(); - Intrinsic::ID IID = Intrinsic::memmove; - const Type *Tys[1]; - Tys[0] = TD->getIntPtrType(*Context); - Value *MemMove = Intrinsic::getDeclaration(M, IID, Tys, 1); - Value *Dst = CastToCStr(CI->getOperand(1), B); - Value *Src = CastToCStr(CI->getOperand(2), B); - Value *Size = CI->getOperand(3); - Value *Align = ConstantInt::get(Type::getInt32Ty(*Context), 1); - B.CreateCall4(MemMove, Dst, Src, Size, Align); + EmitMemMove(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3), 1, B); return CI->getOperand(1); } }; @@ -1047,6 +1037,118 @@ struct MemSetOpt : public LibCallOptimization { } }; +//===----------------------------------------------------------------------===// +// Object Size Checking Optimizations +//===----------------------------------------------------------------------===// + +//===---------------------------------------===// +// 'object size' +namespace { +struct SizeOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // TODO: We can do more with this, but delaying to here should be no change + // in behavior. + ConstantInt *Const = dyn_cast(CI->getOperand(2)); + + if (!Const) return 0; + + const Type *Ty = Callee->getFunctionType()->getReturnType(); + + if (Const->getZExtValue() < 2) + return Constant::getAllOnesValue(Ty); + else + return ConstantInt::get(Ty, 0); + } +}; +} + +//===---------------------------------------===// +// 'memcpy_chk' Optimizations + +struct MemCpyChkOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // These optimizations require TargetData. + if (!TD) return 0; + + const FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || + !isa(FT->getParamType(0)) || + !isa(FT->getParamType(1)) || + !isa(FT->getParamType(3)) || + FT->getParamType(2) != TD->getIntPtrType(*Context)) + return 0; + + ConstantInt *SizeCI = dyn_cast(CI->getOperand(4)); + if (!SizeCI) + return 0; + if (SizeCI->isAllOnesValue()) { + EmitMemCpy(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3), 1, B); + return CI->getOperand(1); + } + + return 0; + } +}; + +//===---------------------------------------===// +// 'memset_chk' Optimizations + +struct MemSetChkOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // These optimizations require TargetData. + if (!TD) return 0; + + const FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || + !isa(FT->getParamType(0)) || + !isa(FT->getParamType(1)) || + !isa(FT->getParamType(3)) || + FT->getParamType(2) != TD->getIntPtrType(*Context)) + return 0; + + ConstantInt *SizeCI = dyn_cast(CI->getOperand(4)); + if (!SizeCI) + return 0; + if (SizeCI->isAllOnesValue()) { + Value *Val = B.CreateIntCast(CI->getOperand(2), Type::getInt8Ty(*Context), + false); + EmitMemSet(CI->getOperand(1), Val, CI->getOperand(3), B); + return CI->getOperand(1); + } + + return 0; + } +}; + +//===---------------------------------------===// +// 'memmove_chk' Optimizations + +struct MemMoveChkOpt : public LibCallOptimization { + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // These optimizations require TargetData. + if (!TD) return 0; + + const FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) || + !isa(FT->getParamType(0)) || + !isa(FT->getParamType(1)) || + !isa(FT->getParamType(3)) || + FT->getParamType(2) != TD->getIntPtrType(*Context)) + return 0; + + ConstantInt *SizeCI = dyn_cast(CI->getOperand(4)); + if (!SizeCI) + return 0; + if (SizeCI->isAllOnesValue()) { + EmitMemMove(CI->getOperand(1), CI->getOperand(2), CI->getOperand(3), + 1, B); + return CI->getOperand(1); + } + + return 0; + } +}; + //===----------------------------------------------------------------------===// // Math Library Optimizations //===----------------------------------------------------------------------===// @@ -1356,7 +1458,7 @@ struct PrintFOpt : public LibCallOptimization { if (FormatStr == "%c" && CI->getNumOperands() > 2 && isa(CI->getOperand(2)->getType())) { Value *Res = EmitPutChar(CI->getOperand(2), B); - + if (CI->use_empty()) return CI; return B.CreateIntCast(Res, CI->getType(), true); } @@ -1586,7 +1688,10 @@ namespace { // Formatting and IO Optimizations SPrintFOpt SPrintF; PrintFOpt PrintF; FWriteOpt FWrite; FPutsOpt FPuts; FPrintFOpt FPrintF; + + // Object Size Checking SizeOpt ObjectSize; + MemCpyChkOpt MemCpyChk; MemSetChkOpt MemSetChk; MemMoveChkOpt MemMoveChk; bool Modified; // This is only used by doInitialization. public: @@ -1692,9 +1797,13 @@ void SimplifyLibCalls::InitOptimizations() { Optimizations["fwrite"] = &FWrite; Optimizations["fputs"] = &FPuts; Optimizations["fprintf"] = &FPrintF; - - // Miscellaneous - Optimizations["llvm.objectsize"] = &ObjectSize; + + // Object Size Checking + Optimizations["llvm.objectsize.i32"] = &ObjectSize; + Optimizations["llvm.objectsize.i64"] = &ObjectSize; + Optimizations["__memcpy_chk"] = &MemCpyChk; + Optimizations["__memset_chk"] = &MemSetChk; + Optimizations["__memmove_chk"] = &MemMoveChk; } diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp index 44a2c1f85181..690972dc558b 100644 --- a/lib/Transforms/Utils/LoopSimplify.cpp +++ b/lib/Transforms/Utils/LoopSimplify.cpp @@ -477,8 +477,13 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) { SmallVector OuterLoopPreds; for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (PN->getIncomingValue(i) != PN || - !L->contains(PN->getIncomingBlock(i))) + !L->contains(PN->getIncomingBlock(i))) { + // We can't split indirectbr edges. + if (isa(PN->getIncomingBlock(i)->getTerminator())) + return 0; + OuterLoopPreds.push_back(PN->getIncomingBlock(i)); + } BasicBlock *Header = L->getHeader(); BasicBlock *NewBB = SplitBlockPredecessors(Header, &OuterLoopPreds[0], diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp index 941660436b46..99203b662120 100644 --- a/lib/Transforms/Utils/Mem2Reg.cpp +++ b/lib/Transforms/Utils/Mem2Reg.cpp @@ -73,7 +73,7 @@ bool PromotePass::runOnFunction(Function &F) { if (Allocas.empty()) break; - PromoteMemToReg(Allocas, DT, DF, F.getContext()); + PromoteMemToReg(Allocas, DT, DF); NumPromoted += Allocas.size(); Changed = true; } diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index de6ad1dde580..e25f9e2a999a 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -23,7 +23,6 @@ #include "llvm/Function.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/AliasSetTracker.h" #include "llvm/ADT/DenseMap.h" @@ -180,8 +179,6 @@ namespace { /// AliasSetTracker *AST; - LLVMContext &Context; - /// AllocaLookup - Reverse mapping of Allocas. /// std::map AllocaLookup; @@ -212,9 +209,8 @@ namespace { DenseMap BBNumPreds; public: PromoteMem2Reg(const std::vector &A, DominatorTree &dt, - DominanceFrontier &df, AliasSetTracker *ast, - LLVMContext &C) - : Allocas(A), DT(dt), DF(df), AST(ast), Context(C) {} + DominanceFrontier &df, AliasSetTracker *ast) + : Allocas(A), DT(dt), DF(df), AST(ast) {} void run(); @@ -1003,9 +999,9 @@ NextIteration: /// void llvm::PromoteMemToReg(const std::vector &Allocas, DominatorTree &DT, DominanceFrontier &DF, - LLVMContext &Context, AliasSetTracker *AST) { + AliasSetTracker *AST) { // If there is nothing to do, bail out... if (Allocas.empty()) return; - PromoteMem2Reg(Allocas, DT, DF, AST, Context).run(); + PromoteMem2Reg(Allocas, DT, DF, AST).run(); } diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp index 78cd4dcafc6a..449e9671ab7f 100644 --- a/lib/VMCore/Core.cpp +++ b/lib/VMCore/Core.cpp @@ -1860,9 +1860,9 @@ LLVMValueRef LLVMBuildPointerCast(LLVMBuilderRef B, LLVMValueRef Val, } LLVMValueRef LLVMBuildIntCast(LLVMBuilderRef B, LLVMValueRef Val, - LLVMTypeRef DestTy, int isSigned, - const char *Name) { - return wrap(unwrap(B)->CreateIntCast(unwrap(Val), unwrap(DestTy), isSigned, Name)); + LLVMTypeRef DestTy, const char *Name) { + return wrap(unwrap(B)->CreateIntCast(unwrap(Val), unwrap(DestTy), + /*isSigned*/true, Name)); } LLVMValueRef LLVMBuildFPCast(LLVMBuilderRef B, LLVMValueRef Val, diff --git a/lib/VMCore/Metadata.cpp b/lib/VMCore/Metadata.cpp index 24e715b427e7..b80b6bfebc43 100644 --- a/lib/VMCore/Metadata.cpp +++ b/lib/VMCore/Metadata.cpp @@ -33,10 +33,8 @@ MDString *MDString::get(LLVMContext &Context, StringRef Str) { StringMapEntry &Entry = pImpl->MDStringCache.GetOrCreateValue(Str); MDString *&S = Entry.getValue(); - if (S) return S; - - return S = - new MDString(Context, Entry.getKey()); + if (!S) S = new MDString(Context, Entry.getKey()); + return S; } MDString *MDString::get(LLVMContext &Context, const char *Str) { @@ -44,10 +42,8 @@ MDString *MDString::get(LLVMContext &Context, const char *Str) { StringMapEntry &Entry = pImpl->MDStringCache.GetOrCreateValue(Str ? StringRef(Str) : StringRef()); MDString *&S = Entry.getValue(); - if (S) return S; - - return S = - new MDString(Context, Entry.getKey()); + if (!S) S = new MDString(Context, Entry.getKey()); + return S; } //===----------------------------------------------------------------------===// @@ -74,28 +70,19 @@ MDNode *MDNode::get(LLVMContext &Context, Value*const* Vals, unsigned NumVals) { ID.AddPointer(Vals[i]); void *InsertPoint; - MDNode *N; - { - N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint); - } - if (N) return N; - - N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint); + MDNode *N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint); if (!N) { // InsertPoint will have been set by the FindNodeOrInsertPos call. N = new MDNode(Context, Vals, NumVals); pImpl->MDNodeSet.InsertNode(N, InsertPoint); } - return N; } /// ~MDNode - Destroy MDNode. MDNode::~MDNode() { - { - LLVMContextImpl *pImpl = getType()->getContext().pImpl; - pImpl->MDNodeSet.RemoveNode(this); - } + LLVMContextImpl *pImpl = getType()->getContext().pImpl; + pImpl->MDNodeSet.RemoveNode(this); delete [] Node; Node = NULL; } @@ -231,7 +218,7 @@ public: /// addMD - Attach the metadata of given kind to an Instruction. void addMD(unsigned Kind, MDNode *Node, Instruction *Inst); - /// removeMD - Remove metadata of given kind attached with an instuction. + /// removeMD - Remove metadata of given kind attached with an instruction. void removeMD(unsigned Kind, Instruction *Inst); /// removeAllMetadata - Remove all metadata attached with an instruction. @@ -241,7 +228,7 @@ public: /// the same metadata to In2. void copyMD(Instruction *In1, Instruction *In2); - /// getHandlerNames - Populate client supplied smallvector using custome + /// getHandlerNames - Populate client-supplied smallvector using custom /// metadata name and ID. void getHandlerNames(SmallVectorImpl >&) const; @@ -302,7 +289,7 @@ void MetadataContextImpl::addMD(unsigned MDKind, MDNode *Node, Info.push_back(std::make_pair(MDKind, Node)); } -/// removeMD - Remove metadata of given kind attached with an instuction. +/// removeMD - Remove metadata of given kind attached with an instruction. void MetadataContextImpl::removeMD(unsigned Kind, Instruction *Inst) { MDStoreTy::iterator I = MetadataStore.find(Inst); if (I == MetadataStore.end()) @@ -317,7 +304,7 @@ void MetadataContextImpl::removeMD(unsigned Kind, Instruction *Inst) { } } } - + /// removeAllMetadata - Remove all metadata attached with an instruction. void MetadataContextImpl::removeAllMetadata(Instruction *Inst) { MetadataStore.erase(Inst); @@ -454,12 +441,12 @@ getMDs(const Instruction *Inst, void MetadataContext::addMD(unsigned Kind, MDNode *Node, Instruction *Inst) { pImpl->addMD(Kind, Node, Inst); } - -/// removeMD - Remove metadata of given kind attached with an instuction. + +/// removeMD - Remove metadata of given kind attached with an instruction. void MetadataContext::removeMD(unsigned Kind, Instruction *Inst) { pImpl->removeMD(Kind, Inst); } - + /// removeAllMetadata - Remove all metadata attached with an instruction. void MetadataContext::removeAllMetadata(Instruction *Inst) { pImpl->removeAllMetadata(Inst); diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp index d3d61f5a5eb5..ae418a0b128f 100644 --- a/lib/VMCore/PassManager.cpp +++ b/lib/VMCore/PassManager.cpp @@ -1231,6 +1231,9 @@ bool FunctionPassManager::doFinalization() { bool FunctionPassManagerImpl::doInitialization(Module &M) { bool Changed = false; + dumpArguments(); + dumpPasses(); + for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index) Changed |= getContainedManager(Index)->doInitialization(M); @@ -1274,9 +1277,6 @@ bool FunctionPassManagerImpl::run(Function &F) { bool Changed = false; TimingInfo::createTheTimeInfo(); - dumpArguments(); - dumpPasses(); - initializeAllAnalysisInfo(); for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index) Changed |= getContainedManager(Index)->runOnFunction(F); diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp index 7ab7b1516844..7aa86b776c76 100644 --- a/lib/VMCore/Verifier.cpp +++ b/lib/VMCore/Verifier.cpp @@ -1475,6 +1475,9 @@ void Verifier::visitInstruction(Instruction &I) { void Verifier::VerifyType(const Type *Ty) { if (!Types.insert(Ty)) return; + Assert1(&Mod->getContext() == &Ty->getContext(), + "Type context does not match Module context!", Ty); + switch (Ty->getTypeID()) { case Type::FunctionTyID: { const FunctionType *FTy = cast(Ty); diff --git a/test/Analysis/BasicAA/2008-12-09-GEP-IndicesAlias.ll b/test/Analysis/BasicAA/2008-12-09-GEP-IndicesAlias.ll deleted file mode 100644 index aaf9061953e7..000000000000 --- a/test/Analysis/BasicAA/2008-12-09-GEP-IndicesAlias.ll +++ /dev/null @@ -1,16 +0,0 @@ -; RUN: opt < %s -aa-eval -print-all-alias-modref-info -disable-output |& grep {MustAlias:.*%R,.*%r} -; Make sure that basicaa thinks R and r are must aliases. - -define i32 @test(i8 * %P) { -entry: - %Q = bitcast i8* %P to {i32, i32}* - %R = getelementptr {i32, i32}* %Q, i32 0, i32 1 - %S = load i32* %R - - %q = bitcast i8* %P to {i32, i32}* - %r = getelementptr {i32, i32}* %q, i32 0, i32 1 - %s = load i32* %r - - %t = sub i32 %S, %s - ret i32 %t -} diff --git a/test/Analysis/BasicAA/gep-alias.ll b/test/Analysis/BasicAA/gep-alias.ll new file mode 100644 index 000000000000..1ed031224713 --- /dev/null +++ b/test/Analysis/BasicAA/gep-alias.ll @@ -0,0 +1,171 @@ +; RUN: opt < %s -gvn -instcombine -S |& FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" + +; Make sure that basicaa thinks R and r are must aliases. +define i32 @test1(i8 * %P) { +entry: + %Q = bitcast i8* %P to {i32, i32}* + %R = getelementptr {i32, i32}* %Q, i32 0, i32 1 + %S = load i32* %R + + %q = bitcast i8* %P to {i32, i32}* + %r = getelementptr {i32, i32}* %q, i32 0, i32 1 + %s = load i32* %r + + %t = sub i32 %S, %s + ret i32 %t +; CHECK: @test1 +; CHECK: ret i32 0 +} + +define i32 @test2(i8 * %P) { +entry: + %Q = bitcast i8* %P to {i32, i32, i32}* + %R = getelementptr {i32, i32, i32}* %Q, i32 0, i32 1 + %S = load i32* %R + + %r = getelementptr {i32, i32, i32}* %Q, i32 0, i32 2 + store i32 42, i32* %r + + %s = load i32* %R + + %t = sub i32 %S, %s + ret i32 %t +; CHECK: @test2 +; CHECK: ret i32 0 +} + + +; This was a miscompilation. +define i32 @test3({float, {i32, i32, i32}}* %P) { +entry: + %P2 = getelementptr {float, {i32, i32, i32}}* %P, i32 0, i32 1 + %R = getelementptr {i32, i32, i32}* %P2, i32 0, i32 1 + %S = load i32* %R + + %r = getelementptr {i32, i32, i32}* %P2, i32 0, i32 2 + store i32 42, i32* %r + + %s = load i32* %R + + %t = sub i32 %S, %s + ret i32 %t +; CHECK: @test3 +; CHECK: ret i32 0 +} + + +;; This is reduced from the SmallPtrSet constructor. +%SmallPtrSetImpl = type { i8**, i32, i32, i32, [1 x i8*] } +%SmallPtrSet64 = type { %SmallPtrSetImpl, [64 x i8*] } + +define i32 @test4(%SmallPtrSet64* %P) { +entry: + %tmp2 = getelementptr inbounds %SmallPtrSet64* %P, i64 0, i32 0, i32 1 + store i32 64, i32* %tmp2, align 8 + %tmp3 = getelementptr inbounds %SmallPtrSet64* %P, i64 0, i32 0, i32 4, i64 64 + store i8* null, i8** %tmp3, align 8 + %tmp4 = load i32* %tmp2, align 8 + ret i32 %tmp4 +; CHECK: @test4 +; CHECK: ret i32 64 +} + +; P[i] != p[i+1] +define i32 @test5(i32* %p, i64 %i) { + %pi = getelementptr i32* %p, i64 %i + %i.next = add i64 %i, 1 + %pi.next = getelementptr i32* %p, i64 %i.next + %x = load i32* %pi + store i32 42, i32* %pi.next + %y = load i32* %pi + %z = sub i32 %x, %y + ret i32 %z +; CHECK: @test5 +; CHECK: ret i32 0 +} + +; P[i] != p[(i*4)|1] +define i32 @test6(i32* %p, i64 %i1) { + %i = shl i64 %i1, 2 + %pi = getelementptr i32* %p, i64 %i + %i.next = or i64 %i, 1 + %pi.next = getelementptr i32* %p, i64 %i.next + %x = load i32* %pi + store i32 42, i32* %pi.next + %y = load i32* %pi + %z = sub i32 %x, %y + ret i32 %z +; CHECK: @test6 +; CHECK: ret i32 0 +} + +; P[1] != P[i*4] +define i32 @test7(i32* %p, i64 %i) { + %pi = getelementptr i32* %p, i64 1 + %i.next = shl i64 %i, 2 + %pi.next = getelementptr i32* %p, i64 %i.next + %x = load i32* %pi + store i32 42, i32* %pi.next + %y = load i32* %pi + %z = sub i32 %x, %y + ret i32 %z +; CHECK: @test7 +; CHECK: ret i32 0 +} + +; P[zext(i)] != p[zext(i+1)] +; PR1143 +define i32 @test8(i32* %p, i32 %i) { + %i1 = zext i32 %i to i64 + %pi = getelementptr i32* %p, i64 %i1 + %i.next = add i32 %i, 1 + %i.next2 = zext i32 %i.next to i64 + %pi.next = getelementptr i32* %p, i64 %i.next2 + %x = load i32* %pi + store i32 42, i32* %pi.next + %y = load i32* %pi + %z = sub i32 %x, %y + ret i32 %z +; CHECK: @test8 +; CHECK: ret i32 0 +} + +define i8 @test9([4 x i8] *%P, i32 %i, i32 %j) { + %i2 = shl i32 %i, 2 + %i3 = add i32 %i2, 1 + ; P2 = P + 1 + 4*i + %P2 = getelementptr [4 x i8] *%P, i32 0, i32 %i3 + + %j2 = shl i32 %j, 2 + + ; P4 = P + 4*j + %P4 = getelementptr [4 x i8]* %P, i32 0, i32 %j2 + + %x = load i8* %P2 + store i8 42, i8* %P4 + %y = load i8* %P2 + %z = sub i8 %x, %y + ret i8 %z +; CHECK: @test9 +; CHECK: ret i8 0 +} + +define i8 @test10([4 x i8] *%P, i32 %i) { + %i2 = shl i32 %i, 2 + %i3 = add i32 %i2, 4 + ; P2 = P + 4 + 4*i + %P2 = getelementptr [4 x i8] *%P, i32 0, i32 %i3 + + ; P4 = P + 4*i + %P4 = getelementptr [4 x i8]* %P, i32 0, i32 %i2 + + %x = load i8* %P2 + store i8 42, i8* %P4 + %y = load i8* %P2 + %z = sub i8 %x, %y + ret i8 %z +; CHECK: @test10 +; CHECK: ret i8 0 +} diff --git a/test/Analysis/BasicAA/modref.ll b/test/Analysis/BasicAA/modref.ll index 02db861c609f..3f642cff195c 100644 --- a/test/Analysis/BasicAA/modref.ll +++ b/test/Analysis/BasicAA/modref.ll @@ -4,6 +4,7 @@ target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:1 declare void @llvm.memset.i32(i8*, i8, i32, i32) declare void @llvm.memset.i8(i8*, i8, i8, i32) declare void @llvm.memcpy.i8(i8*, i8*, i8, i32) +declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) declare void @llvm.lifetime.end(i64, i8* nocapture) declare void @external(i32*) @@ -90,3 +91,35 @@ define void @test3a(i8* %P, i8 %X) { ret void ; CHECK: ret void } + +@G1 = external global i32 +@G2 = external global [4000 x i32] + +define i32 @test4(i8* %P) { + %tmp = load i32* @G1 + call void @llvm.memset.i32(i8* bitcast ([4000 x i32]* @G2 to i8*), i8 0, i32 4000, i32 1) + %tmp2 = load i32* @G1 + %sub = sub i32 %tmp2, %tmp + ret i32 %sub +; CHECK: @test4 +; CHECK: load i32* @G +; CHECK: memset.i32 +; CHECK-NOT: load +; CHECK: sub i32 %tmp, %tmp +} + +; Verify that basicaa is handling variable length memcpy, knowing it doesn't +; write to G1. +define i32 @test5(i8* %P, i32 %Len) { + %tmp = load i32* @G1 + call void @llvm.memcpy.i32(i8* bitcast ([4000 x i32]* @G2 to i8*), i8* bitcast (i32* @G1 to i8*), i32 %Len, i32 1) + %tmp2 = load i32* @G1 + %sub = sub i32 %tmp2, %tmp + ret i32 %sub +; CHECK: @test5 +; CHECK: load i32* @G +; CHECK: memcpy.i32 +; CHECK-NOT: load +; CHECK: sub i32 %tmp, %tmp +} + diff --git a/test/Analysis/ScalarEvolution/scev-aa.ll b/test/Analysis/ScalarEvolution/scev-aa.ll index 0dcf52977a0a..e07aca2e3cb5 100644 --- a/test/Analysis/ScalarEvolution/scev-aa.ll +++ b/test/Analysis/ScalarEvolution/scev-aa.ll @@ -1,8 +1,8 @@ ; RUN: opt < %s -scev-aa -aa-eval -print-all-alias-modref-info \ ; RUN: |& FileCheck %s -; At the time of this writing, all of these CHECK lines are cases that -; plain -basicaa misses. +; At the time of this writing, -basicaa only misses the example of the form +; A[i+(j+1)] != A[i+j], which can arise from multi-dimensional array references. target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64" diff --git a/test/Assembler/msasm.ll b/test/Assembler/msasm.ll deleted file mode 100644 index 5e32963abd8e..000000000000 --- a/test/Assembler/msasm.ll +++ /dev/null @@ -1,36 +0,0 @@ -; RUN: llvm-as < %s | llvm-dis | FileCheck %s -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" -target triple = "i386-apple-darwin10.0" - -define void @test1() nounwind { -; CHECK: test1 -; CHECK: sideeffect -; CHECK-NOT: msasm - tail call void asm sideeffect "mov", "~{dirflag},~{fpsr},~{flags}"() nounwind - ret void -; CHECK: ret -} -define void @test2() nounwind { -; CHECK: test2 -; CHECK: sideeffect -; CHECK: msasm - tail call void asm sideeffect msasm "mov", "~{dirflag},~{fpsr},~{flags}"() nounwind - ret void -; CHECK: ret -} -define void @test3() nounwind { -; CHECK: test3 -; CHECK-NOT: sideeffect -; CHECK: msasm - tail call void asm msasm "mov", "~{dirflag},~{fpsr},~{flags}"() nounwind - ret void -; CHECK: ret -} -define void @test4() nounwind { -; CHECK: test4 -; CHECK-NOT: sideeffect -; CHECK-NOT: msasm - tail call void asm "mov", "~{dirflag},~{fpsr},~{flags}"() nounwind - ret void -; CHECK: ret -} diff --git a/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll b/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll index dd2845fe6aa5..7aae3acd76e6 100644 --- a/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll +++ b/test/CodeGen/ARM/2009-11-07-SubRegAsmPrinting.ll @@ -13,7 +13,7 @@ entry: %4 = fadd float 0.000000e+00, %3 ; [#uses=1] %5 = fsub float 1.000000e+00, %4 ; [#uses=1] ; CHECK: foo: -; CHECK: fconsts s{{[0-9]+}}, #112 +; CHECK: vmov.f32 s{{[0-9]+}}, #1.000000e+00 %6 = fsub float 1.000000e+00, undef ; [#uses=2] %7 = fsub float %2, undef ; [#uses=1] %8 = fsub float 0.000000e+00, undef ; [#uses=3] diff --git a/test/CodeGen/ARM/2009-11-30-LiveVariablesBug.ll b/test/CodeGen/ARM/2009-11-30-LiveVariablesBug.ll new file mode 100644 index 000000000000..efe74cfd1387 --- /dev/null +++ b/test/CodeGen/ARM/2009-11-30-LiveVariablesBug.ll @@ -0,0 +1,41 @@ +; RUN: llc -mtriple=armv7-eabi -mcpu=cortex-a8 < %s +; PR5614 + +%"als" = type { i32 (...)** } +%"av" = type { %"als" } +%"c" = type { %"lsm", %"Vec3", %"av"*, float, i8, float, %"lsm", i8, %"Vec3", %"Vec3", %"Vec3", float, float, float, %"Vec3", %"Vec3" } +%"lsm" = type { %"als", %"Vec3", %"Vec3", %"Vec3", %"Vec3" } +%"Vec3" = type { float, float, float } + +define arm_aapcs_vfpcc void @foo(%"c"* %this, %"Vec3"* nocapture %adjustment) { +entry: + switch i32 undef, label %return [ + i32 1, label %bb + i32 2, label %bb72 + i32 3, label %bb31 + i32 4, label %bb79 + i32 5, label %bb104 + ] + +bb: ; preds = %entry + ret void + +bb31: ; preds = %entry + %0 = call arm_aapcs_vfpcc %"Vec3" undef(%"lsm"* undef) ; <%"Vec3"> [#uses=1] + %mrv_gr69 = extractvalue %"Vec3" %0, 1 ; [#uses=1] + %1 = fsub float %mrv_gr69, undef ; [#uses=1] + store float %1, float* undef, align 4 + ret void + +bb72: ; preds = %entry + ret void + +bb79: ; preds = %entry + ret void + +bb104: ; preds = %entry + ret void + +return: ; preds = %entry + ret void +} diff --git a/test/CodeGen/ARM/bic.ll b/test/CodeGen/ARM/bic.ll index b16dcc6755b1..1dfd6278287d 100644 --- a/test/CodeGen/ARM/bic.ll +++ b/test/CodeGen/ARM/bic.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm | grep {bic\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*r\[0-9\]*} | count 2 +; RUN: llc < %s -march=arm | FileCheck %s define i32 @f1(i32 %a, i32 %b) { %tmp = xor i32 %b, 4294967295 @@ -6,8 +6,12 @@ define i32 @f1(i32 %a, i32 %b) { ret i32 %tmp1 } +; CHECK: bic r0, r0, r1 + define i32 @f2(i32 %a, i32 %b) { %tmp = xor i32 %b, 4294967295 %tmp1 = and i32 %tmp, %a ret i32 %tmp1 } + +; CHECK: bic r0, r0, r1 diff --git a/test/CodeGen/ARM/fabss.ll b/test/CodeGen/ARM/fabss.ll index 46f136ba1fef..e5b5791b3cda 100644 --- a/test/CodeGen/ARM/fabss.ll +++ b/test/CodeGen/ARM/fabss.ll @@ -1,8 +1,8 @@ -; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {vabs.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vabs.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1 -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {vabs.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 -; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vabs.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1 -; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {vabs.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2 +; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | FileCheck %s -check-prefix=NFP1 +; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | FileCheck %s -check-prefix=NFP0 +; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8 +; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9 define float @test(float %a, float %b) { entry: @@ -13,3 +13,16 @@ entry: } declare float @fabsf(float) + +; VFP2: test: +; VFP2: vabs.f32 s1, s1 + +; NFP1: test: +; NFP1: vabs.f32 d1, d1 +; NFP0: test: +; NFP0: vabs.f32 s1, s1 + +; CORTEXA8: test: +; CORTEXA8: vabs.f32 d1, d1 +; CORTEXA9: test: +; CORTEXA9: vabs.f32 s1, s1 diff --git a/test/CodeGen/ARM/fadds.ll b/test/CodeGen/ARM/fadds.ll index 1426a2dc883a..db18a86eccd8 100644 --- a/test/CodeGen/ARM/fadds.ll +++ b/test/CodeGen/ARM/fadds.ll @@ -1,8 +1,8 @@ -; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {vadd.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vadd.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1 -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {vadd.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 -; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vadd.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1 -; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {vadd.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2 +; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | FileCheck %s -check-prefix=NFP1 +; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | FileCheck %s -check-prefix=NFP0 +; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8 +; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9 define float @test(float %a, float %b) { entry: @@ -10,3 +10,15 @@ entry: ret float %0 } +; VFP2: test: +; VFP2: vadd.f32 s0, s1, s0 + +; NFP1: test: +; NFP1: vadd.f32 d0, d1, d0 +; NFP0: test: +; NFP0: vadd.f32 s0, s1, s0 + +; CORTEXA8: test: +; CORTEXA8: vadd.f32 d0, d1, d0 +; CORTEXA9: test: +; CORTEXA9: vadd.f32 s0, s1, s0 diff --git a/test/CodeGen/ARM/fdivs.ll b/test/CodeGen/ARM/fdivs.ll index 45803f6d3c32..a5c86bf26343 100644 --- a/test/CodeGen/ARM/fdivs.ll +++ b/test/CodeGen/ARM/fdivs.ll @@ -1,8 +1,8 @@ -; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {vdiv.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vdiv.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {vdiv.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 -; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vdiv.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 -; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {vdiv.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2 +; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | FileCheck %s -check-prefix=NFP1 +; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | FileCheck %s -check-prefix=NFP0 +; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8 +; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9 define float @test(float %a, float %b) { entry: @@ -10,3 +10,15 @@ entry: ret float %0 } +; VFP2: test: +; VFP2: vdiv.f32 s0, s1, s0 + +; NFP1: test: +; NFP1: vdiv.f32 s0, s1, s0 +; NFP0: test: +; NFP0: vdiv.f32 s0, s1, s0 + +; CORTEXA8: test: +; CORTEXA8: vdiv.f32 s0, s1, s0 +; CORTEXA9: test: +; CORTEXA9: vdiv.f32 s0, s1, s0 diff --git a/test/CodeGen/ARM/fmacs.ll b/test/CodeGen/ARM/fmacs.ll index 57efa8264041..904a58739370 100644 --- a/test/CodeGen/ARM/fmacs.ll +++ b/test/CodeGen/ARM/fmacs.ll @@ -1,8 +1,8 @@ -; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {vmla.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vmul.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1 -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {vmla.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 -; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vmul.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1 -; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {vmla.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2 +; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | FileCheck %s -check-prefix=NFP1 +; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | FileCheck %s -check-prefix=NFP0 +; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8 +; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9 define float @test(float %acc, float %a, float %b) { entry: @@ -11,3 +11,15 @@ entry: ret float %1 } +; VFP2: test: +; VFP2: vmla.f32 s2, s1, s0 + +; NFP1: test: +; NFP1: vmul.f32 d0, d1, d0 +; NFP0: test: +; NFP0: vmla.f32 s2, s1, s0 + +; CORTEXA8: test: +; CORTEXA8: vmul.f32 d0, d1, d0 +; CORTEXA9: test: +; CORTEXA9: vmla.f32 s2, s1, s0 diff --git a/test/CodeGen/ARM/fmscs.ll b/test/CodeGen/ARM/fmscs.ll index 31b5c52d38d8..7b9e029b676e 100644 --- a/test/CodeGen/ARM/fmscs.ll +++ b/test/CodeGen/ARM/fmscs.ll @@ -1,8 +1,8 @@ -; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {vnmls.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vnmls.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {vnmls.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 -; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vnmls.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 -; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {vnmls.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2 +; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | FileCheck %s -check-prefix=NFP1 +; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | FileCheck %s -check-prefix=NFP0 +; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8 +; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9 define float @test(float %acc, float %a, float %b) { entry: @@ -11,3 +11,15 @@ entry: ret float %1 } +; VFP2: test: +; VFP2: vnmls.f32 s2, s1, s0 + +; NFP1: test: +; NFP1: vnmls.f32 s2, s1, s0 +; NFP0: test: +; NFP0: vnmls.f32 s2, s1, s0 + +; CORTEXA8: test: +; CORTEXA8: vnmls.f32 s2, s1, s0 +; CORTEXA9: test: +; CORTEXA9: vnmls.f32 s2, s1, s0 diff --git a/test/CodeGen/ARM/fmuls.ll b/test/CodeGen/ARM/fmuls.ll index 735263c9a310..d3c9c82e9745 100644 --- a/test/CodeGen/ARM/fmuls.ll +++ b/test/CodeGen/ARM/fmuls.ll @@ -1,8 +1,8 @@ -; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {vmul.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vmul.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1 -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {vmul.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 -; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vmul.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1 -; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {vmul.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2 +; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | FileCheck %s -check-prefix=NFP1 +; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | FileCheck %s -check-prefix=NFP0 +; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8 +; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9 define float @test(float %a, float %b) { entry: @@ -10,3 +10,15 @@ entry: ret float %0 } +; VFP2: test: +; VFP2: vmul.f32 s0, s1, s0 + +; NFP1: test: +; NFP1: vmul.f32 d0, d1, d0 +; NFP0: test: +; NFP0: vmul.f32 s0, s1, s0 + +; CORTEXA8: test: +; CORTEXA8: vmul.f32 d0, d1, d0 +; CORTEXA9: test: +; CORTEXA9: vmul.f32 s0, s1, s0 diff --git a/test/CodeGen/ARM/fnegs.ll b/test/CodeGen/ARM/fnegs.ll index bc3d42de75ca..d6c22f14a4ca 100644 --- a/test/CodeGen/ARM/fnegs.ll +++ b/test/CodeGen/ARM/fnegs.ll @@ -1,8 +1,8 @@ -; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {vneg.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 2 -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vneg.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 2 -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {vneg.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 2 -; RUN: llc < %s -march=arm -mcpu=cortex-a8 | grep -E {vneg.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 2 -; RUN: llc < %s -march=arm -mcpu=cortex-a9 | grep -E {vneg.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 2 +; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2 +; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | FileCheck %s -check-prefix=NFP1 +; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | FileCheck %s -check-prefix=NFP0 +; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=CORTEXA8 +; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=CORTEXA9 define float @test1(float* %a) { entry: @@ -13,6 +13,20 @@ entry: %retval = select i1 %3, float %1, float %0 ; [#uses=1] ret float %retval } +; VFP2: test1: +; VFP2: vneg.f32 s1, s0 + +; NFP1: test1: +; NFP1: vneg.f32 d1, d0 + +; NFP0: test1: +; NFP0: vneg.f32 s1, s0 + +; CORTEXA8: test1: +; CORTEXA8: vneg.f32 d1, d0 + +; CORTEXA9: test1: +; CORTEXA9: vneg.f32 s1, s0 define float @test2(float* %a) { entry: @@ -23,3 +37,18 @@ entry: %retval = select i1 %3, float %1, float %0 ; [#uses=1] ret float %retval } +; VFP2: test2: +; VFP2: vneg.f32 s1, s0 + +; NFP1: test2: +; NFP1: vneg.f32 d1, d0 + +; NFP0: test2: +; NFP0: vneg.f32 s1, s0 + +; CORTEXA8: test2: +; CORTEXA8: vneg.f32 d1, d0 + +; CORTEXA9: test2: +; CORTEXA9: vneg.f32 s1, s0 + diff --git a/test/CodeGen/ARM/fpconsts.ll b/test/CodeGen/ARM/fpconsts.ll index 4de18bc3b456..710994d8d736 100644 --- a/test/CodeGen/ARM/fpconsts.ll +++ b/test/CodeGen/ARM/fpconsts.ll @@ -3,7 +3,7 @@ define arm_apcscc float @t1(float %x) nounwind readnone optsize { entry: ; CHECK: t1: -; CHECK: fconsts s1, #16 +; CHECK: vmov.f32 s1, #4.000000e+00 %0 = fadd float %x, 4.000000e+00 ret float %0 } @@ -11,7 +11,7 @@ entry: define arm_apcscc double @t2(double %x) nounwind readnone optsize { entry: ; CHECK: t2: -; CHECK: fconstd d1, #8 +; CHECK: vmov.f64 d1, #3.000000e+00 %0 = fadd double %x, 3.000000e+00 ret double %0 } @@ -19,7 +19,7 @@ entry: define arm_apcscc double @t3(double %x) nounwind readnone optsize { entry: ; CHECK: t3: -; CHECK: fconstd d1, #170 +; CHECK: vmov.f64 d1, #-1.300000e+01 %0 = fmul double %x, -1.300000e+01 ret double %0 } @@ -27,7 +27,7 @@ entry: define arm_apcscc float @t4(float %x) nounwind readnone optsize { entry: ; CHECK: t4: -; CHECK: fconsts s1, #184 +; CHECK: vmov.f32 s1, #-2.400000e+01 %0 = fmul float %x, -2.400000e+01 ret float %0 } diff --git a/test/CodeGen/ARM/fptoint.ll b/test/CodeGen/ARM/fptoint.ll index 4cacc5de7eec..299cb8f81503 100644 --- a/test/CodeGen/ARM/fptoint.ll +++ b/test/CodeGen/ARM/fptoint.ll @@ -1,5 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | grep -E {vmov\\W*r\[0-9\]+,\\W*s\[0-9\]+} | count 1 -; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | not grep fmrrd +; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 | FileCheck %s @i = weak global i32 0 ; [#uses=2] @u = weak global i32 0 ; [#uses=2] @@ -45,3 +44,6 @@ define void @foo9(double %x) { store i16 %tmp, i16* null ret void } +; CHECK: foo9: +; CHECK: vmov r0, s0 + diff --git a/test/CodeGen/ARM/fsubs.ll b/test/CodeGen/ARM/fsubs.ll index f84ccdd480b2..ae98be307892 100644 --- a/test/CodeGen/ARM/fsubs.ll +++ b/test/CodeGen/ARM/fsubs.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -march=arm -mattr=+vfp2 | grep -E {vsub.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | grep -E {vsub.f32\\W*d\[0-9\]+,\\W*d\[0-9\]+,\\W*d\[0-9\]+} | count 1 -; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | grep -E {vsub.f32\\W*s\[0-9\]+,\\W*s\[0-9\]+,\\W*s\[0-9\]+} | count 1 +; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2 +; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=1 | FileCheck %s -check-prefix=NFP1 +; RUN: llc < %s -march=arm -mattr=+neon -arm-use-neon-fp=0 | FileCheck %s -check-prefix=NFP0 define float @test(float %a, float %b) { entry: @@ -8,3 +8,6 @@ entry: ret float %0 } +; VFP2: vsub.f32 s0, s1, s0 +; NFP1: vsub.f32 d0, d1, d0 +; NFP0: vsub.f32 s0, s1, s0 diff --git a/test/CodeGen/ARM/load-global.ll b/test/CodeGen/ARM/load-global.ll deleted file mode 100644 index 56a4a477f510..000000000000 --- a/test/CodeGen/ARM/load-global.ll +++ /dev/null @@ -1,15 +0,0 @@ -; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=static | \ -; RUN: not grep {L_G\$non_lazy_ptr} -; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=dynamic-no-pic | \ -; RUN: grep {L_G\$non_lazy_ptr} | count 2 -; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic | \ -; RUN: grep {ldr.*pc} | count 1 -; RUN: llc < %s -mtriple=arm-linux-gnueabi -relocation-model=pic | \ -; RUN: grep {GOT} | count 1 - -@G = external global i32 - -define i32 @test1() { - %tmp = load i32* @G - ret i32 %tmp -} diff --git a/test/CodeGen/ARM/mls.ll b/test/CodeGen/ARM/mls.ll index 85407fa254b0..a6cdba445451 100644 --- a/test/CodeGen/ARM/mls.ll +++ b/test/CodeGen/ARM/mls.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+v6t2 | grep {mls\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]} | count 1 +; RUN: llc < %s -march=arm -mattr=+v6t2 | FileCheck %s define i32 @f1(i32 %a, i32 %b, i32 %c) { %tmp1 = mul i32 %a, %b @@ -12,3 +12,5 @@ define i32 @f2(i32 %a, i32 %b, i32 %c) { %tmp2 = sub i32 %tmp1, %c ret i32 %tmp2 } + +; CHECK: mls r0, r0, r1, r2 diff --git a/test/CodeGen/ARM/movt-movw-global.ll b/test/CodeGen/ARM/movt-movw-global.ll new file mode 100644 index 000000000000..886ff3fea7a8 --- /dev/null +++ b/test/CodeGen/ARM/movt-movw-global.ll @@ -0,0 +1,20 @@ +; RUN: llc < %s | FileCheck %s +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" +target triple = "armv7-eabi" + +@foo = common global i32 0 ; [#uses=1] + +define arm_aapcs_vfpcc i32* @bar1() nounwind readnone { +entry: +; CHECK: movw r0, :lower16:foo +; CHECK-NEXT: movt r0, :upper16:foo + ret i32* @foo +} + +define arm_aapcs_vfpcc void @bar2(i32 %baz) nounwind { +entry: +; CHECK: movw r1, :lower16:foo +; CHECK-NEXT: movt r1, :upper16:foo + store i32 %baz, i32* @foo, align 4 + ret void +} diff --git a/test/CodeGen/ARM/remat-2.ll b/test/CodeGen/ARM/remat-2.ll new file mode 100644 index 000000000000..1a871d258e30 --- /dev/null +++ b/test/CodeGen/ARM/remat-2.ll @@ -0,0 +1,65 @@ +; RUN: llc < %s -march=arm -mattr=+v6,+vfp2 -stats -info-output-file - | grep "Number of re-materialization" + +define arm_apcscc i32 @main(i32 %argc, i8** nocapture %argv) nounwind { +entry: + br i1 undef, label %smvp.exit, label %bb.i3 + +bb.i3: ; preds = %bb.i3, %bb134 + br i1 undef, label %smvp.exit, label %bb.i3 + +smvp.exit: ; preds = %bb.i3 + %0 = fmul double undef, 2.400000e-03 ; [#uses=2] + br i1 undef, label %bb138.preheader, label %bb159 + +bb138.preheader: ; preds = %smvp.exit + br label %bb138 + +bb138: ; preds = %bb138, %bb138.preheader + br i1 undef, label %bb138, label %bb145.loopexit + +bb142: ; preds = %bb.nph218.bb.nph218.split_crit_edge, %phi0.exit + %1 = fmul double undef, -1.200000e-03 ; [#uses=1] + %2 = fadd double undef, %1 ; [#uses=1] + %3 = fmul double %2, undef ; [#uses=1] + %4 = fsub double 0.000000e+00, %3 ; [#uses=1] + br i1 %14, label %phi1.exit, label %bb.i35 + +bb.i35: ; preds = %bb142 + %5 = call arm_apcscc double @sin(double %15) nounwind readonly ; [#uses=1] + %6 = fmul double %5, 0x4031740AFA84AD8A ; [#uses=1] + %7 = fsub double 1.000000e+00, undef ; [#uses=1] + %8 = fdiv double %7, 6.000000e-01 ; [#uses=1] + br label %phi1.exit + +phi1.exit: ; preds = %bb.i35, %bb142 + %.pn = phi double [ %6, %bb.i35 ], [ 0.000000e+00, %bb142 ] ; [#uses=0] + %9 = phi double [ %8, %bb.i35 ], [ 0.000000e+00, %bb142 ] ; [#uses=1] + %10 = fmul double undef, %9 ; [#uses=0] + br i1 %14, label %phi0.exit, label %bb.i + +bb.i: ; preds = %phi1.exit + unreachable + +phi0.exit: ; preds = %phi1.exit + %11 = fsub double %4, undef ; [#uses=1] + %12 = fadd double 0.000000e+00, %11 ; [#uses=1] + store double %12, double* undef, align 4 + br label %bb142 + +bb145.loopexit: ; preds = %bb138 + br i1 undef, label %bb.nph218.bb.nph218.split_crit_edge, label %bb159 + +bb.nph218.bb.nph218.split_crit_edge: ; preds = %bb145.loopexit + %13 = fmul double %0, 0x401921FB54442D18 ; [#uses=1] + %14 = fcmp ugt double %0, 6.000000e-01 ; [#uses=2] + %15 = fdiv double %13, 6.000000e-01 ; [#uses=1] + br label %bb142 + +bb159: ; preds = %bb145.loopexit, %smvp.exit, %bb134 + unreachable + +bb166: ; preds = %bb127 + unreachable +} + +declare arm_apcscc double @sin(double) nounwind readonly diff --git a/test/CodeGen/ARM/remat.ll b/test/CodeGen/ARM/remat.ll index 50da997ed468..9565c8bca6b0 100644 --- a/test/CodeGen/ARM/remat.ll +++ b/test/CodeGen/ARM/remat.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -mtriple=arm-apple-darwin -; RUN: llc < %s -mtriple=arm-apple-darwin -stats -info-output-file - | grep "Number of re-materialization" | grep 5 +; RUN: llc < %s -mtriple=arm-apple-darwin -stats -info-output-file - | grep "Number of re-materialization" | grep 3 %struct.CONTENTBOX = type { i32, i32, i32, i32, i32 } %struct.LOCBOX = type { i32, i32, i32, i32 } diff --git a/test/CodeGen/ARM/select-imm.ll b/test/CodeGen/ARM/select-imm.ll new file mode 100644 index 000000000000..07edc91519df --- /dev/null +++ b/test/CodeGen/ARM/select-imm.ll @@ -0,0 +1,48 @@ +; RUN: llc < %s -march=arm | FileCheck %s --check-prefix=ARM +; RUN: llc < %s -march=arm -mattr=+thumb2 | FileCheck %s --check-prefix=T2 + +define arm_apcscc i32 @t1(i32 %c) nounwind readnone { +entry: +; ARM: t1: +; ARM: mov r1, #101 +; ARM: orr r1, r1, #1, 24 +; ARM: movgt r0, #123 + +; T2: t1: +; T2: movw r0, #357 +; T2: movgt r0, #123 + + %0 = icmp sgt i32 %c, 1 + %1 = select i1 %0, i32 123, i32 357 + ret i32 %1 +} + +define arm_apcscc i32 @t2(i32 %c) nounwind readnone { +entry: +; ARM: t2: +; ARM: mov r1, #101 +; ARM: orr r1, r1, #1, 24 +; ARM: movle r0, #123 + +; T2: t2: +; T2: movw r0, #357 +; T2: movle r0, #123 + + %0 = icmp sgt i32 %c, 1 + %1 = select i1 %0, i32 357, i32 123 + ret i32 %1 +} + +define arm_apcscc i32 @t3(i32 %a) nounwind readnone { +entry: +; ARM: t3: +; ARM: mov r0, #0 +; ARM: moveq r0, #1 + +; T2: t3: +; T2: mov r0, #0 +; T2: moveq r0, #1 + %0 = icmp eq i32 %a, 160 + %1 = zext i1 %0 to i32 + ret i32 %1 +} diff --git a/test/CodeGen/Generic/2007-06-06-CriticalEdgeLandingPad.ll b/test/CodeGen/Generic/2007-06-06-CriticalEdgeLandingPad.ll index 33a36452b2e5..1519fe665cae 100644 --- a/test/CodeGen/Generic/2007-06-06-CriticalEdgeLandingPad.ll +++ b/test/CodeGen/Generic/2007-06-06-CriticalEdgeLandingPad.ll @@ -1,5 +1,4 @@ -; RUN: llc < %s -march=x86 -enable-eh -asm-verbose -o - | \ -; RUN: grep -A 3 {Llabel138.*Region start} | grep {3.*Action} +; RUN: llc < %s -march=x86 -enable-eh -asm-verbose -o - | FileCheck %s ; PR1422 ; PR1508 @@ -2864,3 +2863,8 @@ declare void @system__img_enum__image_enumeration_8(%struct.string___XUP* sret , declare i32 @memcmp(i8*, i8*, i32, ...) declare void @report__result() + +; CHECK: {{Llabel138.*Region start}} +; CHECK-NEXT: Region length +; CHECK-NEXT: Landing pad +; CHECK-NEXT: {{3.*Action}} diff --git a/test/CodeGen/MSP430/2009-11-20-NewNode.ll b/test/CodeGen/MSP430/2009-11-20-NewNode.ll new file mode 100644 index 000000000000..887c7d6fa24e --- /dev/null +++ b/test/CodeGen/MSP430/2009-11-20-NewNode.ll @@ -0,0 +1,36 @@ +; RUN: llc -march=msp430 < %s +; PR5558 + +define i64 @_strtoll_r(i16 %base) nounwind { +entry: + br i1 undef, label %if.then, label %if.end27 + +if.then: ; preds = %do.end + br label %if.end27 + +if.end27: ; preds = %if.then, %do.end + %cond66 = select i1 undef, i64 -9223372036854775808, i64 9223372036854775807 ; [#uses=3] + %conv69 = sext i16 %base to i64 ; [#uses=1] + %div = udiv i64 %cond66, %conv69 ; [#uses=1] + br label %for.cond + +for.cond: ; preds = %if.end116, %if.end27 + br i1 undef, label %if.then152, label %if.then93 + +if.then93: ; preds = %for.cond + br i1 undef, label %if.end116, label %if.then152 + +if.end116: ; preds = %if.then93 + %cmp123 = icmp ugt i64 undef, %div ; [#uses=1] + %or.cond = or i1 undef, %cmp123 ; [#uses=0] + br label %for.cond + +if.then152: ; preds = %if.then93, %for.cond + br i1 undef, label %if.end182, label %if.then172 + +if.then172: ; preds = %if.then152 + ret i64 %cond66 + +if.end182: ; preds = %if.then152 + ret i64 %cond66 +} diff --git a/test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll b/test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll index f59639f66adf..636b318014a5 100644 --- a/test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll +++ b/test/CodeGen/Mips/2009-11-16-CstPoolLoad.ll @@ -4,7 +4,7 @@ target triple = "mips-unknown-linux" define float @h() nounwind readnone { entry: -; CHECK: lui $2, %hi($CPI1_0) +; CHECK: lw $2, %got($CPI1_0)($gp) ; CHECK: lwc1 $f0, %lo($CPI1_0)($2) ret float 0x400B333340000000 } diff --git a/test/CodeGen/PIC16/2009-07-17-PR4566-pic16.ll b/test/CodeGen/PIC16/2009-07-17-PR4566-pic16.ll index d7072dd9b5d7..b508026c21f6 100644 --- a/test/CodeGen/PIC16/2009-07-17-PR4566-pic16.ll +++ b/test/CodeGen/PIC16/2009-07-17-PR4566-pic16.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=pic16 | grep {movf \\+@i + 0, \\+W} +; RUN: llc < %s -march=pic16 | FileCheck %s target datalayout = "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8-f32:32:32" target triple = "pic16-" @@ -27,3 +27,5 @@ entry: store i8 %conv8, i8* %tmp9 ret void } + +; CHECK: movf @i + 0, W diff --git a/test/CodeGen/PIC16/2009-11-20-NewNode.ll b/test/CodeGen/PIC16/2009-11-20-NewNode.ll new file mode 100644 index 000000000000..d68f0f41c4a5 --- /dev/null +++ b/test/CodeGen/PIC16/2009-11-20-NewNode.ll @@ -0,0 +1,36 @@ +; RUN: llc -march=pic16 < %s +; PR5558 + +define i64 @_strtoll_r(i16 %base) nounwind { +entry: + br i1 undef, label %if.then, label %if.end27 + +if.then: ; preds = %do.end + br label %if.end27 + +if.end27: ; preds = %if.then, %do.end + %cond66 = select i1 undef, i64 -9223372036854775808, i64 9223372036854775807 ; [#uses=3] + %conv69 = sext i16 %base to i64 ; [#uses=1] + %div = udiv i64 %cond66, %conv69 ; [#uses=1] + br label %for.cond + +for.cond: ; preds = %if.end116, %if.end27 + br i1 undef, label %if.then152, label %if.then93 + +if.then93: ; preds = %for.cond + br i1 undef, label %if.end116, label %if.then152 + +if.end116: ; preds = %if.then93 + %cmp123 = icmp ugt i64 undef, %div ; [#uses=1] + %or.cond = or i1 undef, %cmp123 ; [#uses=0] + br label %for.cond + +if.then152: ; preds = %if.then93, %for.cond + br i1 undef, label %if.end182, label %if.then172 + +if.then172: ; preds = %if.then152 + ret i64 %cond66 + +if.end182: ; preds = %if.then152 + ret i64 %cond66 +} diff --git a/test/CodeGen/PowerPC/2009-11-25-ImpDefBug.ll b/test/CodeGen/PowerPC/2009-11-25-ImpDefBug.ll new file mode 100644 index 000000000000..9a22a6f76c24 --- /dev/null +++ b/test/CodeGen/PowerPC/2009-11-25-ImpDefBug.ll @@ -0,0 +1,56 @@ +; RUN: llc < %s -mtriple=powerpc-apple-darwin9.5 -mcpu=g5 +; rdar://7422268 + +%struct..0EdgeT = type { i32, i32, float, float, i32, i32, i32, float, i32, i32 } + +define void @smooth_color_z_triangle(i32 %v0, i32 %v1, i32 %v2, i32 %pv) nounwind { +entry: + br i1 undef, label %return, label %bb14 + +bb14: ; preds = %entry + br i1 undef, label %bb15, label %return + +bb15: ; preds = %bb14 + br i1 undef, label %bb16, label %bb17 + +bb16: ; preds = %bb15 + br label %bb17 + +bb17: ; preds = %bb16, %bb15 + %0 = fcmp olt float undef, 0.000000e+00 ; [#uses=2] + %eTop.eMaj = select i1 %0, %struct..0EdgeT* undef, %struct..0EdgeT* null ; <%struct..0EdgeT*> [#uses=1] + br label %bb69 + +bb24: ; preds = %bb69 + br i1 undef, label %bb25, label %bb28 + +bb25: ; preds = %bb24 + br label %bb33 + +bb28: ; preds = %bb24 + br i1 undef, label %return, label %bb32 + +bb32: ; preds = %bb28 + br i1 %0, label %bb38, label %bb33 + +bb33: ; preds = %bb32, %bb25 + br i1 undef, label %bb34, label %bb38 + +bb34: ; preds = %bb33 + br label %bb38 + +bb38: ; preds = %bb34, %bb33, %bb32 + %eRight.08 = phi %struct..0EdgeT* [ %eTop.eMaj, %bb32 ], [ undef, %bb34 ], [ undef, %bb33 ] ; <%struct..0EdgeT*> [#uses=0] + %fdgOuter.0 = phi i32 [ %fdgOuter.1, %bb32 ], [ undef, %bb34 ], [ %fdgOuter.1, %bb33 ] ; [#uses=1] + %fz.3 = phi i32 [ %fz.2, %bb32 ], [ 2147483647, %bb34 ], [ %fz.2, %bb33 ] ; [#uses=1] + %1 = add i32 undef, 1 ; [#uses=0] + br label %bb69 + +bb69: ; preds = %bb38, %bb17 + %fdgOuter.1 = phi i32 [ undef, %bb17 ], [ %fdgOuter.0, %bb38 ] ; [#uses=2] + %fz.2 = phi i32 [ undef, %bb17 ], [ %fz.3, %bb38 ] ; [#uses=2] + br i1 undef, label %bb24, label %return + +return: ; preds = %bb69, %bb28, %bb14, %entry + ret void +} diff --git a/test/CodeGen/PowerPC/Frames-alloca.ll b/test/CodeGen/PowerPC/Frames-alloca.ll index 25fc626550d2..aed4fdbb2dcc 100644 --- a/test/CodeGen/PowerPC/Frames-alloca.ll +++ b/test/CodeGen/PowerPC/Frames-alloca.ll @@ -6,23 +6,23 @@ ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -enable-ppc32-regscavenger | FileCheck %s -check-prefix=PPC32-RS ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim -enable-ppc32-regscavenger | FileCheck %s -check-prefix=PPC32-RS-NOFP -; CHECK-PPC32: stw r31, 20(r1) +; CHECK-PPC32: stw r31, -4(r1) ; CHECK-PPC32: lwz r1, 0(r1) -; CHECK-PPC32: lwz r31, 20(r1) -; CHECK-PPC32-NOFP: stw r31, 20(r1) +; CHECK-PPC32: lwz r31, -4(r1) +; CHECK-PPC32-NOFP: stw r31, -4(r1) ; CHECK-PPC32-NOFP: lwz r1, 0(r1) -; CHECK-PPC32-NOFP: lwz r31, 20(r1) +; CHECK-PPC32-NOFP: lwz r31, -4(r1) ; CHECK-PPC32-RS: stwu r1, -80(r1) ; CHECK-PPC32-RS-NOFP: stwu r1, -80(r1) -; CHECK-PPC64: std r31, 40(r1) -; CHECK-PPC64: stdu r1, -112(r1) +; CHECK-PPC64: std r31, -8(r1) +; CHECK-PPC64: stdu r1, -128(r1) ; CHECK-PPC64: ld r1, 0(r1) -; CHECK-PPC64: ld r31, 40(r1) -; CHECK-PPC64-NOFP: std r31, 40(r1) -; CHECK-PPC64-NOFP: stdu r1, -112(r1) +; CHECK-PPC64: ld r31, -8(r1) +; CHECK-PPC64-NOFP: std r31, -8(r1) +; CHECK-PPC64-NOFP: stdu r1, -128(r1) ; CHECK-PPC64-NOFP: ld r1, 0(r1) -; CHECK-PPC64-NOFP: ld r31, 40(r1) +; CHECK-PPC64-NOFP: ld r31, -8(r1) define i32* @f1(i32 %n) { %tmp = alloca i32, i32 %n ; [#uses=1] diff --git a/test/CodeGen/PowerPC/Frames-large.ll b/test/CodeGen/PowerPC/Frames-large.ll index fda2e4ff9ce9..302d3df28436 100644 --- a/test/CodeGen/PowerPC/Frames-large.ll +++ b/test/CodeGen/PowerPC/Frames-large.ll @@ -22,13 +22,13 @@ define i32* @f1() nounwind { ; PPC32-NOFP: blr ; PPC32-FP: _f1: -; PPC32-FP: stw r31, 20(r1) +; PPC32-FP: stw r31, -4(r1) ; PPC32-FP: lis r0, -1 ; PPC32-FP: ori r0, r0, 32704 ; PPC32-FP: stwux r1, r1, r0 ; ... ; PPC32-FP: lwz r1, 0(r1) -; PPC32-FP: lwz r31, 20(r1) +; PPC32-FP: lwz r31, -4(r1) ; PPC32-FP: blr @@ -42,11 +42,11 @@ define i32* @f1() nounwind { ; PPC64-FP: _f1: -; PPC64-FP: std r31, 40(r1) +; PPC64-FP: std r31, -8(r1) ; PPC64-FP: lis r0, -1 -; PPC64-FP: ori r0, r0, 32656 +; PPC64-FP: ori r0, r0, 32640 ; PPC64-FP: stdux r1, r1, r0 ; ... ; PPC64-FP: ld r1, 0(r1) -; PPC64-FP: ld r31, 40(r1) +; PPC64-FP: ld r31, -8(r1) ; PPC64-FP: blr diff --git a/test/CodeGen/PowerPC/Frames-small.ll b/test/CodeGen/PowerPC/Frames-small.ll index 6875704cf30d..404fdd01966c 100644 --- a/test/CodeGen/PowerPC/Frames-small.ll +++ b/test/CodeGen/PowerPC/Frames-small.ll @@ -1,26 +1,26 @@ ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -o %t1 -; RUN not grep {stw r31, 20(r1)} %t1 +; RUN not grep {stw r31, -4(r1)} %t1 ; RUN: grep {stwu r1, -16448(r1)} %t1 ; RUN: grep {addi r1, r1, 16448} %t1 ; RUN: llc < %s -march=ppc32 | \ -; RUN: not grep {lwz r31, 20(r1)} +; RUN: not grep {lwz r31, -4(r1)} ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -disable-fp-elim \ ; RUN: -o %t2 -; RUN: grep {stw r31, 20(r1)} %t2 +; RUN: grep {stw r31, -4(r1)} %t2 ; RUN: grep {stwu r1, -16448(r1)} %t2 ; RUN: grep {addi r1, r1, 16448} %t2 -; RUN: grep {lwz r31, 20(r1)} %t2 +; RUN: grep {lwz r31, -4(r1)} %t2 ; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin8 -o %t3 -; RUN: not grep {std r31, 40(r1)} %t3 +; RUN: not grep {std r31, -8(r1)} %t3 ; RUN: grep {stdu r1, -16496(r1)} %t3 ; RUN: grep {addi r1, r1, 16496} %t3 -; RUN: not grep {ld r31, 40(r1)} %t3 +; RUN: not grep {ld r31, -8(r1)} %t3 ; RUN: llc < %s -march=ppc64 -mtriple=powerpc-apple-darwin8 -disable-fp-elim \ ; RUN: -o %t4 -; RUN: grep {std r31, 40(r1)} %t4 -; RUN: grep {stdu r1, -16496(r1)} %t4 -; RUN: grep {addi r1, r1, 16496} %t4 -; RUN: grep {ld r31, 40(r1)} %t4 +; RUN: grep {std r31, -8(r1)} %t4 +; RUN: grep {stdu r1, -16512(r1)} %t4 +; RUN: grep {addi r1, r1, 16512} %t4 +; RUN: grep {ld r31, -8(r1)} %t4 define i32* @f1() { %tmp = alloca i32, i32 4095 ; [#uses=1] diff --git a/test/CodeGen/PowerPC/bswap-load-store.ll b/test/CodeGen/PowerPC/bswap-load-store.ll index 7eb3bbb8d308..4f6bfc729913 100644 --- a/test/CodeGen/PowerPC/bswap-load-store.ll +++ b/test/CodeGen/PowerPC/bswap-load-store.ll @@ -1,11 +1,6 @@ -; RUN: llc < %s -march=ppc32 | \ -; RUN: grep {stwbrx\\|lwbrx\\|sthbrx\\|lhbrx} | count 4 -; RUN: llc < %s -march=ppc32 | not grep rlwinm -; RUN: llc < %s -march=ppc32 | not grep rlwimi -; RUN: llc < %s -march=ppc64 | \ -; RUN: grep {stwbrx\\|lwbrx\\|sthbrx\\|lhbrx} | count 4 -; RUN: llc < %s -march=ppc64 | not grep rlwinm -; RUN: llc < %s -march=ppc64 | not grep rlwimi +; RUN: llc < %s -march=ppc32 | FileCheck %s -check-prefix=X32 +; RUN: llc < %s -march=ppc64 | FileCheck %s -check-prefix=X64 + define void @STWBRX(i32 %i, i8* %ptr, i32 %off) { %tmp1 = getelementptr i8* %ptr, i32 %off ; [#uses=1] @@ -43,3 +38,14 @@ declare i32 @llvm.bswap.i32(i32) declare i16 @llvm.bswap.i16(i16) + +; X32: stwbrx +; X32: lwbrx +; X32: sthbrx +; X32: lhbrx + +; X64: stwbrx +; X64: lwbrx +; X64: sthbrx +; X64: lhbrx + diff --git a/test/CodeGen/PowerPC/ppc-prologue.ll b/test/CodeGen/PowerPC/ppc-prologue.ll index 581d010a901e..e49dcb82c60d 100644 --- a/test/CodeGen/PowerPC/ppc-prologue.ll +++ b/test/CodeGen/PowerPC/ppc-prologue.ll @@ -2,7 +2,7 @@ define i32 @_Z4funci(i32 %a) ssp { ; CHECK: mflr r0 -; CHECK-NEXT: stw r31, 20(r1) +; CHECK-NEXT: stw r31, -4(r1) ; CHECK-NEXT: stw r0, 8(r1) ; CHECK-NEXT: stwu r1, -80(r1) ; CHECK-NEXT: Llabel1: diff --git a/test/CodeGen/PowerPC/rlwimi-keep-rsh.ll b/test/CodeGen/PowerPC/rlwimi-keep-rsh.ll new file mode 100644 index 000000000000..7bce01c00afa --- /dev/null +++ b/test/CodeGen/PowerPC/rlwimi-keep-rsh.ll @@ -0,0 +1,28 @@ +; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin | FileCheck %s +; Formerly dropped the RHS of %tmp6 when constructing rlwimi. +; 7346117 + +@foo = external global i32 + +define void @xxx(i32 %a, i32 %b, i32 %c, i32 %d) nounwind optsize { +; CHECK: _xxx: +; CHECK: or +; CHECK: and +; CHECK: rlwimi +entry: + %tmp0 = ashr i32 %d, 31 + %tmp1 = and i32 %tmp0, 255 + %tmp2 = xor i32 %tmp1, 255 + %tmp3 = ashr i32 %b, 31 + %tmp4 = ashr i32 %a, 4 + %tmp5 = or i32 %tmp3, %tmp4 + %tmp6 = and i32 %tmp2, %tmp5 + %tmp7 = shl i32 %c, 8 + %tmp8 = or i32 %tmp6, %tmp7 + store i32 %tmp8, i32* @foo, align 4 + br label %return + +return: + ret void +; CHECK: blr +} \ No newline at end of file diff --git a/test/CodeGen/PowerPC/vec_buildvector_loadstore.ll b/test/CodeGen/PowerPC/vec_buildvector_loadstore.ll new file mode 100644 index 000000000000..015c08605fea --- /dev/null +++ b/test/CodeGen/PowerPC/vec_buildvector_loadstore.ll @@ -0,0 +1,37 @@ +; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin -mattr=+altivec | FileCheck %s +; Formerly this did byte loads and word stores. +@a = external global <16 x i8> +@b = external global <16 x i8> +@c = external global <16 x i8> + +define void @foo() nounwind ssp { +; CHECK: _foo: +; CHECK-NOT: stw +entry: + %tmp0 = load <16 x i8>* @a, align 16 + %tmp180.i = extractelement <16 x i8> %tmp0, i32 0 ; [#uses=1] + %tmp181.i = insertelement <16 x i8> , i8 %tmp180.i, i32 2 ; <<16 x i8>> [#uses=1] + %tmp182.i = extractelement <16 x i8> %tmp0, i32 1 ; [#uses=1] + %tmp183.i = insertelement <16 x i8> %tmp181.i, i8 %tmp182.i, i32 3 ; <<16 x i8>> [#uses=1] + %tmp184.i = insertelement <16 x i8> %tmp183.i, i8 0, i32 4 ; <<16 x i8>> [#uses=1] + %tmp185.i = insertelement <16 x i8> %tmp184.i, i8 0, i32 5 ; <<16 x i8>> [#uses=1] + %tmp186.i = extractelement <16 x i8> %tmp0, i32 4 ; [#uses=1] + %tmp187.i = insertelement <16 x i8> %tmp185.i, i8 %tmp186.i, i32 6 ; <<16 x i8>> [#uses=1] + %tmp188.i = extractelement <16 x i8> %tmp0, i32 5 ; [#uses=1] + %tmp189.i = insertelement <16 x i8> %tmp187.i, i8 %tmp188.i, i32 7 ; <<16 x i8>> [#uses=1] + %tmp190.i = insertelement <16 x i8> %tmp189.i, i8 0, i32 8 ; <<16 x i8>> [#uses=1] + %tmp191.i = insertelement <16 x i8> %tmp190.i, i8 0, i32 9 ; <<16 x i8>> [#uses=1] + %tmp192.i = extractelement <16 x i8> %tmp0, i32 8 ; [#uses=1] + %tmp193.i = insertelement <16 x i8> %tmp191.i, i8 %tmp192.i, i32 10 ; <<16 x i8>> [#uses=1] + %tmp194.i = extractelement <16 x i8> %tmp0, i32 9 ; [#uses=1] + %tmp195.i = insertelement <16 x i8> %tmp193.i, i8 %tmp194.i, i32 11 ; <<16 x i8>> [#uses=1] + %tmp196.i = insertelement <16 x i8> %tmp195.i, i8 0, i32 12 ; <<16 x i8>> [#uses=1] + %tmp197.i = insertelement <16 x i8> %tmp196.i, i8 0, i32 13 ; <<16 x i8>> [#uses=1] +%tmp201 = shufflevector <16 x i8> %tmp197.i, <16 x i8> %tmp0, <16 x i32> ; ModuleID = 'try.c' + store <16 x i8> %tmp201, <16 x i8>* @c, align 16 + br label %return + +return: ; preds = %bb2 + ret void +; CHECK: blr +} diff --git a/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll b/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll index eefbae53e720..8f6449e8ffd5 100644 --- a/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll +++ b/test/CodeGen/Thumb2/cross-rc-coalescing-2.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | grep vmov.f32 | count 4 +; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | grep vmov.f32 | count 7 define arm_apcscc void @fht(float* nocapture %fz, i16 signext %n) nounwind { entry: diff --git a/test/CodeGen/Thumb2/ifcvt-neon.ll b/test/CodeGen/Thumb2/ifcvt-neon.ll new file mode 100644 index 000000000000..c667909e3c11 --- /dev/null +++ b/test/CodeGen/Thumb2/ifcvt-neon.ll @@ -0,0 +1,29 @@ +; RUN: llc < %s -march=thumb -mcpu=cortex-a8 | FileCheck %s +; rdar://7368193 + +@a = common global float 0.000000e+00 ; [#uses=2] +@b = common global float 0.000000e+00 ; [#uses=1] + +define arm_apcscc float @t(i32 %c) nounwind { +entry: + %0 = icmp sgt i32 %c, 1 ; [#uses=1] + %1 = load float* @a, align 4 ; [#uses=2] + %2 = load float* @b, align 4 ; [#uses=2] + br i1 %0, label %bb, label %bb1 + +bb: ; preds = %entry +; CHECK: ite lt +; CHECK: vsublt.f32 +; CHECK-NEXT: vaddge.f32 + %3 = fadd float %1, %2 ; [#uses=1] + br label %bb2 + +bb1: ; preds = %entry + %4 = fsub float %1, %2 ; [#uses=1] + br label %bb2 + +bb2: ; preds = %bb1, %bb + %storemerge = phi float [ %4, %bb1 ], [ %3, %bb ] ; [#uses=2] + store float %storemerge, float* @a + ret float %storemerge +} diff --git a/test/CodeGen/Thumb2/ldr-str-imm12.ll b/test/CodeGen/Thumb2/ldr-str-imm12.ll index 7cbe26097b54..47d85b1aa0e5 100644 --- a/test/CodeGen/Thumb2/ldr-str-imm12.ll +++ b/test/CodeGen/Thumb2/ldr-str-imm12.ll @@ -22,8 +22,7 @@ define arm_apcscc %union.rec* @Manifest(%union.rec* %x, %union.rec* %env, %struct.STYLE* %style, %union.rec** %bthr, %union.rec** %fthr, %union.rec** %target, %union.rec** %crs, i32 %ok, i32 %need_expand, %union.rec** %enclose, i32 %fcr) nounwind { entry: -; CHECK: ldr.w r9, [r7, #+32] -; CHECK-NEXT : str.w r9, [sp, #+28] +; CHECK: ldr.w r9, [r7, #+28] %xgaps.i = alloca [32 x %union.rec*], align 4 ; <[32 x %union.rec*]*> [#uses=0] %ycomp.i = alloca [32 x %union.rec*], align 4 ; <[32 x %union.rec*]*> [#uses=0] br i1 false, label %bb, label %bb20 @@ -53,7 +52,6 @@ bb420: ; preds = %bb20, %bb20 ; CHECK: str r{{[0-7]}}, [sp] ; CHECK: str r{{[0-7]}}, [sp, #+4] ; CHECK: str r{{[0-7]}}, [sp, #+8] -; CHECK: ldr r{{[0-7]}}, [sp, #+28] ; CHECK: str r{{[0-7]}}, [sp, #+24] store %union.rec* null, %union.rec** @zz_hold, align 4 store %union.rec* null, %union.rec** @zz_res, align 4 diff --git a/test/CodeGen/Thumb2/machine-licm.ll b/test/CodeGen/Thumb2/machine-licm.ll index 912939bf24eb..9ab19e9d5a61 100644 --- a/test/CodeGen/Thumb2/machine-licm.ll +++ b/test/CodeGen/Thumb2/machine-licm.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -mtriple=thumbv7-apple-darwin -relocation-model=pic -disable-fp-elim | FileCheck %s +; RUN: llc < %s -mtriple=thumbv7-apple-darwin -disable-fp-elim | FileCheck %s +; RUN: llc < %s -mtriple=thumbv7-apple-darwin -relocation-model=pic -disable-fp-elim | FileCheck %s --check-prefix=PIC ; rdar://7353541 ; rdar://7354376 @@ -17,12 +18,24 @@ entry: bb.nph: ; preds = %entry ; CHECK: BB#1 ; CHECK: ldr.n r2, LCPI1_0 -; CHECK: add r2, pc -; CHECK: ldr r{{[0-9]+}}, [r2] +; CHECK: ldr r3, [r2] +; CHECK: ldr r3, [r3] +; CHECK: ldr r2, [r2] ; CHECK: LBB1_2 ; CHECK: LCPI1_0: ; CHECK-NOT: LCPI1_1: ; CHECK: .section + +; PIC: BB#1 +; PIC: ldr.n r2, LCPI1_0 +; PIC: add r2, pc +; PIC: ldr r3, [r2] +; PIC: ldr r3, [r3] +; PIC: ldr r2, [r2] +; PIC: LBB1_2 +; PIC: LCPI1_0: +; PIC-NOT: LCPI1_1: +; PIC: .section %.pre = load i32* @GV, align 4 ; [#uses=1] br label %bb diff --git a/test/CodeGen/Thumb2/thumb2-add3.ll b/test/CodeGen/Thumb2/thumb2-add3.ll index 8d472cb110b8..58fc33372cf6 100644 --- a/test/CodeGen/Thumb2/thumb2-add3.ll +++ b/test/CodeGen/Thumb2/thumb2-add3.ll @@ -1,6 +1,9 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {addw\\W*r\[0-9\],\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#4095} | count 1 +; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s define i32 @f1(i32 %a) { %tmp = add i32 %a, 4095 ret i32 %tmp } + +; CHECK: f1: +; CHECK: addw r0, r0, #4095 diff --git a/test/CodeGen/Thumb2/thumb2-and2.ll b/test/CodeGen/Thumb2/thumb2-and2.ll index 1e2666f40368..76c56d00473d 100644 --- a/test/CodeGen/Thumb2/thumb2-and2.ll +++ b/test/CodeGen/Thumb2/thumb2-and2.ll @@ -1,31 +1,41 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {and\\W*r\[0-9\],\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#171\\|#1179666\\|#872428544\\|#1448498774\\|#66846720} | count 5 +; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s ; 171 = 0x000000ab define i32 @f1(i32 %a) { %tmp = and i32 %a, 171 ret i32 %tmp } +; CHECK: f1: +; CHECK: and r0, r0, #171 ; 1179666 = 0x00120012 define i32 @f2(i32 %a) { %tmp = and i32 %a, 1179666 ret i32 %tmp } +; CHECK: f2: +; CHECK: and r0, r0, #1179666 ; 872428544 = 0x34003400 define i32 @f3(i32 %a) { %tmp = and i32 %a, 872428544 ret i32 %tmp } +; CHECK: f3: +; CHECK: and r0, r0, #872428544 ; 1448498774 = 0x56565656 define i32 @f4(i32 %a) { %tmp = and i32 %a, 1448498774 ret i32 %tmp } +; CHECK: f4: +; CHECK: and r0, r0, #1448498774 ; 66846720 = 0x03fc0000 define i32 @f5(i32 %a) { %tmp = and i32 %a, 66846720 ret i32 %tmp } +; CHECK: f5: +; CHECK: and r0, r0, #66846720 diff --git a/test/CodeGen/Thumb2/thumb2-cmn.ll b/test/CodeGen/Thumb2/thumb2-cmn.ll index 401c56a72139..eeaaa7fbdf91 100644 --- a/test/CodeGen/Thumb2/thumb2-cmn.ll +++ b/test/CodeGen/Thumb2/thumb2-cmn.ll @@ -1,32 +1,36 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {cmn\\.w\\W*r\[0-9\],\\W*r\[0-9\]$} | count 4 -; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {cmn\\.w\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1 -; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {cmn\\.w\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1 -; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {cmn\\.w\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1 -; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {cmn\\.w\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1 +; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s define i1 @f1(i32 %a, i32 %b) { %nb = sub i32 0, %b %tmp = icmp ne i32 %a, %nb ret i1 %tmp } +; CHECK: f1: +; CHECK: cmn.w r0, r1 define i1 @f2(i32 %a, i32 %b) { %nb = sub i32 0, %b %tmp = icmp ne i32 %nb, %a ret i1 %tmp } +; CHECK: f2: +; CHECK: cmn.w r0, r1 define i1 @f3(i32 %a, i32 %b) { %nb = sub i32 0, %b %tmp = icmp eq i32 %a, %nb ret i1 %tmp } +; CHECK: f3: +; CHECK: cmn.w r0, r1 define i1 @f4(i32 %a, i32 %b) { %nb = sub i32 0, %b %tmp = icmp eq i32 %nb, %a ret i1 %tmp } +; CHECK: f4: +; CHECK: cmn.w r0, r1 define i1 @f5(i32 %a, i32 %b) { %tmp = shl i32 %b, 5 @@ -34,6 +38,8 @@ define i1 @f5(i32 %a, i32 %b) { %tmp1 = icmp eq i32 %nb, %a ret i1 %tmp1 } +; CHECK: f5: +; CHECK: cmn.w r0, r1, lsl #5 define i1 @f6(i32 %a, i32 %b) { %tmp = lshr i32 %b, 6 @@ -41,6 +47,8 @@ define i1 @f6(i32 %a, i32 %b) { %tmp1 = icmp ne i32 %nb, %a ret i1 %tmp1 } +; CHECK: f6: +; CHECK: cmn.w r0, r1, lsr #6 define i1 @f7(i32 %a, i32 %b) { %tmp = ashr i32 %b, 7 @@ -48,6 +56,8 @@ define i1 @f7(i32 %a, i32 %b) { %tmp1 = icmp eq i32 %a, %nb ret i1 %tmp1 } +; CHECK: f7: +; CHECK: cmn.w r0, r1, asr #7 define i1 @f8(i32 %a, i32 %b) { %l8 = shl i32 %a, 24 @@ -57,3 +67,6 @@ define i1 @f8(i32 %a, i32 %b) { %tmp1 = icmp ne i32 %a, %nb ret i1 %tmp1 } +; CHECK: f8: +; CHECK: cmn.w r0, r0, ror #8 + diff --git a/test/CodeGen/Thumb2/thumb2-mla.ll b/test/CodeGen/Thumb2/thumb2-mla.ll index be66425d7e66..c4cc749ea5c7 100644 --- a/test/CodeGen/Thumb2/thumb2-mla.ll +++ b/test/CodeGen/Thumb2/thumb2-mla.ll @@ -1,13 +1,17 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {mla\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]} | count 2 +; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s define i32 @f1(i32 %a, i32 %b, i32 %c) { %tmp1 = mul i32 %a, %b %tmp2 = add i32 %c, %tmp1 ret i32 %tmp2 } +; CHECK: f1: +; CHECK: mla r0, r0, r1, r2 define i32 @f2(i32 %a, i32 %b, i32 %c) { %tmp1 = mul i32 %a, %b %tmp2 = add i32 %tmp1, %c ret i32 %tmp2 } +; CHECK: f2: +; CHECK: mla r0, r0, r1, r2 diff --git a/test/CodeGen/Thumb2/thumb2-mls.ll b/test/CodeGen/Thumb2/thumb2-mls.ll index 782def966615..fc9e6bab48cb 100644 --- a/test/CodeGen/Thumb2/thumb2-mls.ll +++ b/test/CodeGen/Thumb2/thumb2-mls.ll @@ -1,10 +1,12 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {mls\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]} | count 1 +; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s define i32 @f1(i32 %a, i32 %b, i32 %c) { %tmp1 = mul i32 %a, %b %tmp2 = sub i32 %c, %tmp1 ret i32 %tmp2 } +; CHECK: f1: +; CHECK: mls r0, r0, r1, r2 ; sub doesn't commute, so no mls for this one define i32 @f2(i32 %a, i32 %b, i32 %c) { @@ -12,3 +14,6 @@ define i32 @f2(i32 %a, i32 %b, i32 %c) { %tmp2 = sub i32 %tmp1, %c ret i32 %tmp2 } +; CHECK: f2: +; CHECK: muls r0, r1 + diff --git a/test/CodeGen/Thumb2/thumb2-mov2.ll b/test/CodeGen/Thumb2/thumb2-mov2.ll deleted file mode 100644 index 64e2ddcf3fe3..000000000000 --- a/test/CodeGen/Thumb2/thumb2-mov2.ll +++ /dev/null @@ -1,73 +0,0 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s - -define i32 @t2MOVTi16_ok_1(i32 %a) { -; CHECK: t2MOVTi16_ok_1: -; CHECK: movt r0, #1234 - %1 = and i32 %a, 65535 - %2 = shl i32 1234, 16 - %3 = or i32 %1, %2 - - ret i32 %3 -} - -define i32 @t2MOVTi16_test_1(i32 %a) { -; CHECK: t2MOVTi16_test_1: -; CHECK: movt r0, #1234 - %1 = shl i32 255, 8 - %2 = shl i32 1234, 8 - %3 = or i32 %1, 255 ; This give us 0xFFFF in %3 - %4 = shl i32 %2, 8 ; This gives us (1234 << 16) in %4 - %5 = and i32 %a, %3 - %6 = or i32 %4, %5 - - ret i32 %6 -} - -define i32 @t2MOVTi16_test_2(i32 %a) { -; CHECK: t2MOVTi16_test_2: -; CHECK: movt r0, #1234 - %1 = shl i32 255, 8 - %2 = shl i32 1234, 8 - %3 = or i32 %1, 255 ; This give us 0xFFFF in %3 - %4 = shl i32 %2, 6 - %5 = and i32 %a, %3 - %6 = shl i32 %4, 2 ; This gives us (1234 << 16) in %6 - %7 = or i32 %5, %6 - - ret i32 %7 -} - -define i32 @t2MOVTi16_test_3(i32 %a) { -; CHECK: t2MOVTi16_test_3: -; CHECK: movt r0, #1234 - %1 = shl i32 255, 8 - %2 = shl i32 1234, 8 - %3 = or i32 %1, 255 ; This give us 0xFFFF in %3 - %4 = shl i32 %2, 6 - %5 = and i32 %a, %3 - %6 = shl i32 %4, 2 ; This gives us (1234 << 16) in %6 - %7 = lshr i32 %6, 6 - %8 = shl i32 %7, 6 - %9 = or i32 %5, %8 - - ret i32 %9 -} - -define i32 @t2MOVTi16_test_nomatch_1(i32 %a) { -; CHECK: t2MOVTi16_test_nomatch_1: -; CHECK: #8388608 -; CHECK: movw r1, #65535 -; CHECK-NEXT: movt r1, #154 -; CHECK: #1720320 - %1 = shl i32 255, 8 - %2 = shl i32 1234, 8 - %3 = or i32 %1, 255 ; This give us 0xFFFF in %3 - %4 = shl i32 %2, 6 - %5 = and i32 %a, %3 - %6 = shl i32 %4, 2 ; This gives us (1234 << 16) in %6 - %7 = lshr i32 %6, 3 - %8 = or i32 %5, %7 - ret i32 %8 -} - - diff --git a/test/CodeGen/Thumb2/thumb2-mov3.ll b/test/CodeGen/Thumb2/thumb2-mov3.ll deleted file mode 100644 index 46af6fb16c49..000000000000 --- a/test/CodeGen/Thumb2/thumb2-mov3.ll +++ /dev/null @@ -1,41 +0,0 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s - -; 171 = 0x000000ab -define i32 @f1(i32 %a) { -; CHECK: f1: -; CHECK: movs r0, #171 - %tmp = add i32 0, 171 - ret i32 %tmp -} - -; 1179666 = 0x00120012 -define i32 @f2(i32 %a) { -; CHECK: f2: -; CHECK: mov.w r0, #1179666 - %tmp = add i32 0, 1179666 - ret i32 %tmp -} - -; 872428544 = 0x34003400 -define i32 @f3(i32 %a) { -; CHECK: f3: -; CHECK: mov.w r0, #872428544 - %tmp = add i32 0, 872428544 - ret i32 %tmp -} - -; 1448498774 = 0x56565656 -define i32 @f4(i32 %a) { -; CHECK: f4: -; CHECK: mov.w r0, #1448498774 - %tmp = add i32 0, 1448498774 - ret i32 %tmp -} - -; 66846720 = 0x03fc0000 -define i32 @f5(i32 %a) { -; CHECK: f5: -; CHECK: mov.w r0, #66846720 - %tmp = add i32 0, 66846720 - ret i32 %tmp -} diff --git a/test/CodeGen/Thumb2/thumb2-mov4.ll b/test/CodeGen/Thumb2/thumb2-mov4.ll deleted file mode 100644 index 06fa238263ab..000000000000 --- a/test/CodeGen/Thumb2/thumb2-mov4.ll +++ /dev/null @@ -1,6 +0,0 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {movw\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#65535} | count 1 - -define i32 @f6(i32 %a) { - %tmp = add i32 0, 65535 - ret i32 %tmp -} diff --git a/test/CodeGen/Thumb2/thumb2-orn.ll b/test/CodeGen/Thumb2/thumb2-orn.ll index d4222c2b2dac..97a3fd75f068 100644 --- a/test/CodeGen/Thumb2/thumb2-orn.ll +++ b/test/CodeGen/Thumb2/thumb2-orn.ll @@ -1,32 +1,37 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {orn\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*r\[0-9\]*$} | count 4 -; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {orn\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1 -; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {orn\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1 -; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {orn\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1 -; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {orn\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1 +; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s + define i32 @f1(i32 %a, i32 %b) { %tmp = xor i32 %b, 4294967295 %tmp1 = or i32 %a, %tmp ret i32 %tmp1 } +; CHECK: f1: +; CHECK: orn r0, r0, r1 define i32 @f2(i32 %a, i32 %b) { %tmp = xor i32 %b, 4294967295 %tmp1 = or i32 %tmp, %a ret i32 %tmp1 } +; CHECK: f2: +; CHECK: orn r0, r0, r1 define i32 @f3(i32 %a, i32 %b) { %tmp = xor i32 4294967295, %b %tmp1 = or i32 %a, %tmp ret i32 %tmp1 } +; CHECK: f3: +; CHECK: orn r0, r0, r1 define i32 @f4(i32 %a, i32 %b) { %tmp = xor i32 4294967295, %b %tmp1 = or i32 %tmp, %a ret i32 %tmp1 } +; CHECK: f4: +; CHECK: orn r0, r0, r1 define i32 @f5(i32 %a, i32 %b) { %tmp = shl i32 %b, 5 @@ -34,6 +39,8 @@ define i32 @f5(i32 %a, i32 %b) { %tmp2 = or i32 %a, %tmp1 ret i32 %tmp2 } +; CHECK: f5: +; CHECK: orn r0, r0, r1, lsl #5 define i32 @f6(i32 %a, i32 %b) { %tmp = lshr i32 %b, 6 @@ -41,6 +48,8 @@ define i32 @f6(i32 %a, i32 %b) { %tmp2 = or i32 %a, %tmp1 ret i32 %tmp2 } +; CHECK: f6: +; CHECK: orn r0, r0, r1, lsr #6 define i32 @f7(i32 %a, i32 %b) { %tmp = ashr i32 %b, 7 @@ -48,6 +57,8 @@ define i32 @f7(i32 %a, i32 %b) { %tmp2 = or i32 %a, %tmp1 ret i32 %tmp2 } +; CHECK: f7: +; CHECK: orn r0, r0, r1, asr #7 define i32 @f8(i32 %a, i32 %b) { %l8 = shl i32 %a, 24 @@ -57,3 +68,5 @@ define i32 @f8(i32 %a, i32 %b) { %tmp2 = or i32 %a, %tmp1 ret i32 %tmp2 } +; CHECK: f8: +; CHECK: orn r0, r0, r0, ror #8 diff --git a/test/CodeGen/Thumb2/thumb2-orn2.ll b/test/CodeGen/Thumb2/thumb2-orn2.ll index 7b018826a621..34ab3a56663c 100644 --- a/test/CodeGen/Thumb2/thumb2-orn2.ll +++ b/test/CodeGen/Thumb2/thumb2-orn2.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {orn\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*#\[0-9\]*} |\ -; RUN: grep {#187\\|#11141290\\|#-872363008\\|#1114112} | count 4 +; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s + ; 0x000000bb = 187 define i32 @f1(i32 %a) { @@ -7,6 +7,8 @@ define i32 @f1(i32 %a) { %tmp2 = or i32 %a, %tmp1 ret i32 %tmp2 } +; CHECK: f1: +; CHECK: orn r0, r0, #187 ; 0x00aa00aa = 11141290 define i32 @f2(i32 %a) { @@ -14,6 +16,8 @@ define i32 @f2(i32 %a) { %tmp2 = or i32 %a, %tmp1 ret i32 %tmp2 } +; CHECK: f2: +; CHECK: orn r0, r0, #11141290 ; 0xcc00cc00 = 3422604288 define i32 @f3(i32 %a) { @@ -21,6 +25,8 @@ define i32 @f3(i32 %a) { %tmp2 = or i32 %a, %tmp1 ret i32 %tmp2 } +; CHECK: f3: +; CHECK: orn r0, r0, #-872363008 ; 0x00110000 = 1114112 define i32 @f5(i32 %a) { @@ -28,3 +34,5 @@ define i32 @f5(i32 %a) { %tmp2 = or i32 %a, %tmp1 ret i32 %tmp2 } +; CHECK: f5: +; CHECK: orn r0, r0, #1114112 diff --git a/test/CodeGen/Thumb2/thumb2-orr2.ll b/test/CodeGen/Thumb2/thumb2-orr2.ll index 759a5b8dd894..8f7a3c2a61a9 100644 --- a/test/CodeGen/Thumb2/thumb2-orr2.ll +++ b/test/CodeGen/Thumb2/thumb2-orr2.ll @@ -1,31 +1,42 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {orr\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*#\[0-9\]*} | grep {#187\\|#11141290\\|#-872363008\\|#1145324612\\|#1114112} | count 5 +; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s + ; 0x000000bb = 187 define i32 @f1(i32 %a) { %tmp2 = or i32 %a, 187 ret i32 %tmp2 } +; CHECK: f1: +; CHECK: orr r0, r0, #187 ; 0x00aa00aa = 11141290 define i32 @f2(i32 %a) { %tmp2 = or i32 %a, 11141290 ret i32 %tmp2 } +; CHECK: f2: +; CHECK: orr r0, r0, #11141290 ; 0xcc00cc00 = 3422604288 define i32 @f3(i32 %a) { %tmp2 = or i32 %a, 3422604288 ret i32 %tmp2 } +; CHECK: f3: +; CHECK: orr r0, r0, #-872363008 ; 0x44444444 = 1145324612 define i32 @f4(i32 %a) { %tmp2 = or i32 %a, 1145324612 ret i32 %tmp2 } +; CHECK: f4: +; CHECK: orr r0, r0, #1145324612 ; 0x00110000 = 1114112 define i32 @f5(i32 %a) { %tmp2 = or i32 %a, 1114112 ret i32 %tmp2 } +; CHECK: f5: +; CHECK: orr r0, r0, #1114112 diff --git a/test/CodeGen/Thumb2/thumb2-ror.ll b/test/CodeGen/Thumb2/thumb2-ror.ll index 01adb528087b..0200116fc31a 100644 --- a/test/CodeGen/Thumb2/thumb2-ror.ll +++ b/test/CodeGen/Thumb2/thumb2-ror.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {ror\\.w\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*#\[0-9\]*} | grep 22 | count 1 +; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s + define i32 @f1(i32 %a) { %l8 = shl i32 %a, 10 @@ -6,3 +7,5 @@ define i32 @f1(i32 %a) { %tmp = or i32 %l8, %r8 ret i32 %tmp } +; CHECK: f1: +; CHECK: ror.w r0, r0, #22 diff --git a/test/CodeGen/Thumb2/thumb2-rsb.ll b/test/CodeGen/Thumb2/thumb2-rsb.ll index 4611e9435034..15185be94621 100644 --- a/test/CodeGen/Thumb2/thumb2-rsb.ll +++ b/test/CodeGen/Thumb2/thumb2-rsb.ll @@ -1,30 +1,35 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {rsb\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsl\\W*#5$} | count 1 -; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {rsb\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*lsr\\W*#6$} | count 1 -; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {rsb\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*asr\\W*#7$} | count 1 -; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {rsb\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*ror\\W*#8$} | count 1 +; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s -define i32 @f2(i32 %a, i32 %b) { +define i32 @f1(i32 %a, i32 %b) { %tmp = shl i32 %b, 5 %tmp1 = sub i32 %tmp, %a ret i32 %tmp1 } +; CHECK: f1: +; CHECK: rsb r0, r0, r1, lsl #5 -define i32 @f3(i32 %a, i32 %b) { +define i32 @f2(i32 %a, i32 %b) { %tmp = lshr i32 %b, 6 %tmp1 = sub i32 %tmp, %a ret i32 %tmp1 } +; CHECK: f2: +; CHECK: rsb r0, r0, r1, lsr #6 -define i32 @f4(i32 %a, i32 %b) { +define i32 @f3(i32 %a, i32 %b) { %tmp = ashr i32 %b, 7 %tmp1 = sub i32 %tmp, %a ret i32 %tmp1 } +; CHECK: f3: +; CHECK: rsb r0, r0, r1, asr #7 -define i32 @f5(i32 %a, i32 %b) { +define i32 @f4(i32 %a, i32 %b) { %l8 = shl i32 %a, 24 %r8 = lshr i32 %a, 8 %tmp = or i32 %l8, %r8 %tmp1 = sub i32 %tmp, %a ret i32 %tmp1 } +; CHECK: f4: +; CHECK: rsb r0, r0, r0, ror #8 diff --git a/test/CodeGen/Thumb2/thumb2-rsb2.ll b/test/CodeGen/Thumb2/thumb2-rsb2.ll index 84a379677ad4..61fb619c40e7 100644 --- a/test/CodeGen/Thumb2/thumb2-rsb2.ll +++ b/test/CodeGen/Thumb2/thumb2-rsb2.ll @@ -1,31 +1,41 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {rsb\\.w\\W*r\[0-9\],\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#171\\|#1179666\\|#872428544\\|#1448498774\\|#66846720} | count 5 +; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s ; 171 = 0x000000ab define i32 @f1(i32 %a) { %tmp = sub i32 171, %a ret i32 %tmp } +; CHECK: f1: +; CHECK: rsb.w r0, r0, #171 ; 1179666 = 0x00120012 define i32 @f2(i32 %a) { %tmp = sub i32 1179666, %a ret i32 %tmp } +; CHECK: f2: +; CHECK: rsb.w r0, r0, #1179666 ; 872428544 = 0x34003400 define i32 @f3(i32 %a) { %tmp = sub i32 872428544, %a ret i32 %tmp } +; CHECK: f3: +; CHECK: rsb.w r0, r0, #872428544 ; 1448498774 = 0x56565656 define i32 @f4(i32 %a) { %tmp = sub i32 1448498774, %a ret i32 %tmp } +; CHECK: f4: +; CHECK: rsb.w r0, r0, #1448498774 ; 66846720 = 0x03fc0000 define i32 @f5(i32 %a) { %tmp = sub i32 66846720, %a ret i32 %tmp } +; CHECK: f5: +; CHECK: rsb.w r0, r0, #66846720 diff --git a/test/CodeGen/Thumb2/thumb2-select_xform.ll b/test/CodeGen/Thumb2/thumb2-select_xform.ll index 44fa2458c1e4..7fc2e2a49bd8 100644 --- a/test/CodeGen/Thumb2/thumb2-select_xform.ll +++ b/test/CodeGen/Thumb2/thumb2-select_xform.ll @@ -2,9 +2,9 @@ define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind { ; CHECK: t1 -; CHECK: mvn r0, #-2147483648 +; CHECK: sub.w r0, r1, #-2147483648 ; CHECK: cmp r2, #10 -; CHECK: add.w r0, r1, r0 +; CHECK: sub.w r0, r0, #1 ; CHECK: it gt ; CHECK: movgt r0, r1 %tmp1 = icmp sgt i32 %c, 10 diff --git a/test/CodeGen/Thumb2/thumb2-sub2.ll b/test/CodeGen/Thumb2/thumb2-sub2.ll index 6813f76d8932..bb99cbd67fcf 100644 --- a/test/CodeGen/Thumb2/thumb2-sub2.ll +++ b/test/CodeGen/Thumb2/thumb2-sub2.ll @@ -1,6 +1,8 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {subw\\W*r\[0-9\],\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#4095} | count 1 +; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s define i32 @f1(i32 %a) { %tmp = sub i32 %a, 4095 ret i32 %tmp } +; CHECK: f1: +; CHECK: subw r0, r0, #4095 diff --git a/test/CodeGen/Thumb2/thumb2-teq.ll b/test/CodeGen/Thumb2/thumb2-teq.ll index 634d318c85c4..69f03837f4bf 100644 --- a/test/CodeGen/Thumb2/thumb2-teq.ll +++ b/test/CodeGen/Thumb2/thumb2-teq.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {teq\\.w\\W*r\[0-9\],\\W*#\[0-9\]*} | \ -; RUN: grep {#187\\|#11141290\\|#-872363008\\|#1114112\\|#-572662307} | count 10 +; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s + ; 0x000000bb = 187 define i1 @f1(i32 %a) { @@ -7,6 +7,8 @@ define i1 @f1(i32 %a) { %tmp1 = icmp ne i32 %tmp, 0 ret i1 %tmp1 } +; CHECK: f1: +; CHECK: teq.w r0, #187 ; 0x000000bb = 187 define i1 @f2(i32 %a) { @@ -14,6 +16,8 @@ define i1 @f2(i32 %a) { %tmp1 = icmp eq i32 0, %tmp ret i1 %tmp1 } +; CHECK: f2: +; CHECK: teq.w r0, #187 ; 0x00aa00aa = 11141290 define i1 @f3(i32 %a) { @@ -21,6 +25,8 @@ define i1 @f3(i32 %a) { %tmp1 = icmp eq i32 %tmp, 0 ret i1 %tmp1 } +; CHECK: f3: +; CHECK: teq.w r0, #11141290 ; 0x00aa00aa = 11141290 define i1 @f4(i32 %a) { @@ -28,6 +34,8 @@ define i1 @f4(i32 %a) { %tmp1 = icmp ne i32 0, %tmp ret i1 %tmp1 } +; CHECK: f4: +; CHECK: teq.w r0, #11141290 ; 0xcc00cc00 = 3422604288 define i1 @f5(i32 %a) { @@ -35,6 +43,8 @@ define i1 @f5(i32 %a) { %tmp1 = icmp ne i32 %tmp, 0 ret i1 %tmp1 } +; CHECK: f5: +; CHECK: teq.w r0, #-872363008 ; 0xcc00cc00 = 3422604288 define i1 @f6(i32 %a) { @@ -42,6 +52,8 @@ define i1 @f6(i32 %a) { %tmp1 = icmp eq i32 0, %tmp ret i1 %tmp1 } +; CHECK: f6: +; CHECK: teq.w r0, #-872363008 ; 0xdddddddd = 3722304989 define i1 @f7(i32 %a) { @@ -49,6 +61,8 @@ define i1 @f7(i32 %a) { %tmp1 = icmp eq i32 %tmp, 0 ret i1 %tmp1 } +; CHECK: f7: +; CHECK: teq.w r0, #-572662307 ; 0xdddddddd = 3722304989 define i1 @f8(i32 %a) { @@ -56,6 +70,8 @@ define i1 @f8(i32 %a) { %tmp1 = icmp ne i32 0, %tmp ret i1 %tmp1 } +; CHECK: f8: +; CHECK: teq.w r0, #-572662307 ; 0x00110000 = 1114112 define i1 @f9(i32 %a) { @@ -63,6 +79,8 @@ define i1 @f9(i32 %a) { %tmp1 = icmp ne i32 %tmp, 0 ret i1 %tmp1 } +; CHECK: f9: +; CHECK: teq.w r0, #1114112 ; 0x00110000 = 1114112 define i1 @f10(i32 %a) { @@ -70,3 +88,6 @@ define i1 @f10(i32 %a) { %tmp1 = icmp eq i32 0, %tmp ret i1 %tmp1 } +; CHECK: f10: +; CHECK: teq.w r0, #1114112 + diff --git a/test/CodeGen/Thumb2/thumb2-tst.ll b/test/CodeGen/Thumb2/thumb2-tst.ll index 525a817fe37e..d905217189f4 100644 --- a/test/CodeGen/Thumb2/thumb2-tst.ll +++ b/test/CodeGen/Thumb2/thumb2-tst.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep {tst\\.w\\W*r\[0-9\],\\W*#\[0-9\]*} | \ -; RUN: grep {#187\\|#11141290\\|#-872363008\\|#1114112\\|#-572662307} | count 10 +; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s + ; 0x000000bb = 187 define i1 @f1(i32 %a) { @@ -7,6 +7,8 @@ define i1 @f1(i32 %a) { %tmp1 = icmp ne i32 %tmp, 0 ret i1 %tmp1 } +; CHECK: f1: +; CHECK: tst.w r0, #187 ; 0x000000bb = 187 define i1 @f2(i32 %a) { @@ -14,6 +16,8 @@ define i1 @f2(i32 %a) { %tmp1 = icmp eq i32 0, %tmp ret i1 %tmp1 } +; CHECK: f2: +; CHECK: tst.w r0, #187 ; 0x00aa00aa = 11141290 define i1 @f3(i32 %a) { @@ -21,6 +25,8 @@ define i1 @f3(i32 %a) { %tmp1 = icmp eq i32 %tmp, 0 ret i1 %tmp1 } +; CHECK: f3: +; CHECK: tst.w r0, #11141290 ; 0x00aa00aa = 11141290 define i1 @f4(i32 %a) { @@ -28,6 +34,8 @@ define i1 @f4(i32 %a) { %tmp1 = icmp ne i32 0, %tmp ret i1 %tmp1 } +; CHECK: f4: +; CHECK: tst.w r0, #11141290 ; 0xcc00cc00 = 3422604288 define i1 @f5(i32 %a) { @@ -35,6 +43,8 @@ define i1 @f5(i32 %a) { %tmp1 = icmp ne i32 %tmp, 0 ret i1 %tmp1 } +; CHECK: f5: +; CHECK: tst.w r0, #-872363008 ; 0xcc00cc00 = 3422604288 define i1 @f6(i32 %a) { @@ -42,6 +52,8 @@ define i1 @f6(i32 %a) { %tmp1 = icmp eq i32 0, %tmp ret i1 %tmp1 } +; CHECK: f6: +; CHECK: tst.w r0, #-872363008 ; 0xdddddddd = 3722304989 define i1 @f7(i32 %a) { @@ -49,6 +61,8 @@ define i1 @f7(i32 %a) { %tmp1 = icmp eq i32 %tmp, 0 ret i1 %tmp1 } +; CHECK: f7: +; CHECK: tst.w r0, #-572662307 ; 0xdddddddd = 3722304989 define i1 @f8(i32 %a) { @@ -56,6 +70,8 @@ define i1 @f8(i32 %a) { %tmp1 = icmp ne i32 0, %tmp ret i1 %tmp1 } +; CHECK: f8: +; CHECK: tst.w r0, #-572662307 ; 0x00110000 = 1114112 define i1 @f9(i32 %a) { @@ -63,6 +79,8 @@ define i1 @f9(i32 %a) { %tmp1 = icmp ne i32 %tmp, 0 ret i1 %tmp1 } +; CHECK: f9: +; CHECK: tst.w r0, #1114112 ; 0x00110000 = 1114112 define i1 @f10(i32 %a) { @@ -70,3 +88,5 @@ define i1 @f10(i32 %a) { %tmp1 = icmp eq i32 0, %tmp ret i1 %tmp1 } +; CHECK: f10: +; CHECK: tst.w r0, #1114112 diff --git a/test/CodeGen/X86/2008-08-05-SpillerBug.ll b/test/CodeGen/X86/2008-08-05-SpillerBug.ll index 1d166f488158..67e14ffae5e6 100644 --- a/test/CodeGen/X86/2008-08-05-SpillerBug.ll +++ b/test/CodeGen/X86/2008-08-05-SpillerBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin -disable-fp-elim -stats |& grep asm-printer | grep 59 +; RUN: llc < %s -mtriple=i386-apple-darwin -disable-fp-elim -stats |& grep asm-printer | grep 58 ; PR2568 @g_3 = external global i16 ; [#uses=1] diff --git a/test/CodeGen/X86/2009-03-13-PHIElimBug.ll b/test/CodeGen/X86/2009-03-13-PHIElimBug.ll index 878fa51d5dc3..ad7f9f7d1c11 100644 --- a/test/CodeGen/X86/2009-03-13-PHIElimBug.ll +++ b/test/CodeGen/X86/2009-03-13-PHIElimBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 | grep -A 2 {call.*f} | grep movl +; RUN: llc < %s -march=x86 | FileCheck %s ; Check the register copy comes after the call to f and before the call to g ; PR3784 @@ -26,3 +26,7 @@ lpad: ; preds = %cont, %entry %y = phi i32 [ %a, %entry ], [ %aa, %cont ] ; [#uses=1] ret i32 %y } + +; CHECK: call{{.*}}f +; CHECK-NEXT: Llabel1: +; CHECK-NEXT: movl %eax, %esi diff --git a/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll b/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll index adbd241cd98f..11c410173fcb 100644 --- a/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll +++ b/test/CodeGen/X86/2009-03-16-PHIElimInLPad.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86 -asm-verbose | grep -A 1 lpad | grep Llabel +; RUN: llc < %s -march=x86 -asm-verbose | FileCheck %s ; Check that register copies in the landing pad come after the EH_LABEL declare i32 @f() @@ -19,3 +19,6 @@ lpad: ; preds = %cont, %entry %v = phi i32 [ %x, %entry ], [ %a, %cont ] ; [#uses=1] ret i32 %v } + +; CHECK: lpad +; CHECK-NEXT: Llabel diff --git a/test/CodeGen/X86/2009-09-19-SchedCustomLoweringBug.ll b/test/CodeGen/X86/2009-09-19-SchedCustomLoweringBug.ll index f3cf1d5e7019..d372da336769 100644 --- a/test/CodeGen/X86/2009-09-19-SchedCustomLoweringBug.ll +++ b/test/CodeGen/X86/2009-09-19-SchedCustomLoweringBug.ll @@ -10,6 +10,7 @@ entry: bb: ; preds = %bb1, %entry ; CHECK: addl $1 +; CHECK-NEXT: movl %e ; CHECK-NEXT: adcl $0 %i.0 = phi i64 [ 0, %entry ], [ %0, %bb1 ] ; [#uses=1] %0 = add nsw i64 %i.0, 1 ; [#uses=2] diff --git a/test/CodeGen/X86/2009-10-08-MachineLICMBug.ll b/test/CodeGen/X86/2009-10-08-MachineLICMBug.ll index ef10ae59ab6b..91c5440b278f 100644 --- a/test/CodeGen/X86/2009-10-08-MachineLICMBug.ll +++ b/test/CodeGen/X86/2009-10-08-MachineLICMBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic -stats |& grep {machine-licm} | grep 1 +; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic -stats |& grep {machine-licm} | grep 2 ; rdar://7274692 %0 = type { [125 x i32] } diff --git a/test/CodeGen/X86/2009-11-25-ImpDefBug.ll b/test/CodeGen/X86/2009-11-25-ImpDefBug.ll new file mode 100644 index 000000000000..7606c0e1acc9 --- /dev/null +++ b/test/CodeGen/X86/2009-11-25-ImpDefBug.ll @@ -0,0 +1,116 @@ +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu +; pr5600 + +%struct..0__pthread_mutex_s = type { i32, i32, i32, i32, i32, i32, %struct.__pthread_list_t } +%struct.ASN1ObjHeader = type { i8, %"struct.__gmp_expr<__mpz_struct [1],__mpz_struct [1]>", i64, i32, i32, i32 } +%struct.ASN1Object = type { i32 (...)**, i32, i32, i64 } +%struct.ASN1Unit = type { [4 x i32 (%struct.ASN1ObjHeader*, %struct.ASN1Object**)*], %"struct.std::ASN1ObjList" } +%"struct.__gmp_expr<__mpz_struct [1],__mpz_struct [1]>" = type { [1 x %struct.__mpz_struct] } +%struct.__mpz_struct = type { i32, i32, i64* } +%struct.__pthread_list_t = type { %struct.__pthread_list_t*, %struct.__pthread_list_t* } +%struct.pthread_attr_t = type { i64, [48 x i8] } +%struct.pthread_mutex_t = type { %struct..0__pthread_mutex_s } +%struct.pthread_mutexattr_t = type { i32 } +%"struct.std::ASN1ObjList" = type { %"struct.std::_Vector_base >" } +%"struct.std::_Vector_base >" = type { %"struct.std::_Vector_base >::_Vector_impl" } +%"struct.std::_Vector_base >::_Vector_impl" = type { %struct.ASN1Object**, %struct.ASN1Object**, %struct.ASN1Object** } +%struct.xmstream = type { i8*, i64, i64, i64, i8 } + +declare void @_ZNSt6vectorIP10ASN1ObjectSaIS1_EE13_M_insert_auxEN9__gnu_cxx17__normal_iteratorIPS1_S3_EERKS1_(%"struct.std::ASN1ObjList"* nocapture, i64, %struct.ASN1Object** nocapture) + +declare i32 @_Z17LoadObjectFromBERR8xmstreamPP10ASN1ObjectPPF10ASN1StatusP13ASN1ObjHeaderS3_E(%struct.xmstream*, %struct.ASN1Object**, i32 (%struct.ASN1ObjHeader*, %struct.ASN1Object**)**) + +define i32 @_ZN8ASN1Unit4loadER8xmstreamjm18ASN1LengthEncoding(%struct.ASN1Unit* %this, %struct.xmstream* nocapture %stream, i32 %numObjects, i64 %size, i32 %lEncoding) { +entry: + br label %meshBB85 + +bb5: ; preds = %bb13.fragment.cl135, %bb13.fragment.cl, %bb.i.i.bbcl.disp, %bb13.fragment + %0 = invoke i32 @_Z17LoadObjectFromBERR8xmstreamPP10ASN1ObjectPPF10ASN1StatusP13ASN1ObjHeaderS3_E(%struct.xmstream* undef, %struct.ASN1Object** undef, i32 (%struct.ASN1ObjHeader*, %struct.ASN1Object**)** undef) + to label %meshBB81.bbcl.disp unwind label %lpad ; [#uses=0] + +bb10.fragment: ; preds = %bb13.fragment.bbcl.disp + br i1 undef, label %bb1.i.fragment.bbcl.disp, label %bb.i.i.bbcl.disp + +bb1.i.fragment: ; preds = %bb1.i.fragment.bbcl.disp + invoke void @_ZNSt6vectorIP10ASN1ObjectSaIS1_EE13_M_insert_auxEN9__gnu_cxx17__normal_iteratorIPS1_S3_EERKS1_(%"struct.std::ASN1ObjList"* undef, i64 undef, %struct.ASN1Object** undef) + to label %meshBB81.bbcl.disp unwind label %lpad + +bb13.fragment: ; preds = %bb13.fragment.bbcl.disp + br i1 undef, label %meshBB81.bbcl.disp, label %bb5 + +bb.i4: ; preds = %bb.i4.bbcl.disp, %bb1.i.fragment.bbcl.disp + ret i32 undef + +bb1.i5: ; preds = %bb.i1 + ret i32 undef + +lpad: ; preds = %bb1.i.fragment.cl, %bb1.i.fragment, %bb5 + %.SV10.phi807 = phi i8* [ undef, %bb1.i.fragment.cl ], [ undef, %bb1.i.fragment ], [ undef, %bb5 ] ; [#uses=1] + %1 = load i8* %.SV10.phi807, align 8 ; [#uses=0] + br i1 undef, label %meshBB81.bbcl.disp, label %bb13.fragment.bbcl.disp + +bb.i1: ; preds = %bb.i.i.bbcl.disp + br i1 undef, label %meshBB81.bbcl.disp, label %bb1.i5 + +meshBB81: ; preds = %meshBB81.bbcl.disp, %bb.i.i.bbcl.disp + br i1 undef, label %meshBB81.bbcl.disp, label %bb.i4.bbcl.disp + +meshBB85: ; preds = %meshBB81.bbcl.disp, %bb.i4.bbcl.disp, %bb1.i.fragment.bbcl.disp, %bb.i.i.bbcl.disp, %entry + br i1 undef, label %meshBB81.bbcl.disp, label %bb13.fragment.bbcl.disp + +bb.i.i.bbcl.disp: ; preds = %bb10.fragment + switch i8 undef, label %meshBB85 [ + i8 123, label %bb.i1 + i8 97, label %bb5 + i8 44, label %meshBB81 + i8 1, label %meshBB81.cl + i8 51, label %meshBB81.cl141 + ] + +bb1.i.fragment.cl: ; preds = %bb1.i.fragment.bbcl.disp + invoke void @_ZNSt6vectorIP10ASN1ObjectSaIS1_EE13_M_insert_auxEN9__gnu_cxx17__normal_iteratorIPS1_S3_EERKS1_(%"struct.std::ASN1ObjList"* undef, i64 undef, %struct.ASN1Object** undef) + to label %meshBB81.bbcl.disp unwind label %lpad + +bb1.i.fragment.bbcl.disp: ; preds = %bb10.fragment + switch i8 undef, label %bb.i4 [ + i8 97, label %bb1.i.fragment + i8 7, label %bb1.i.fragment.cl + i8 35, label %bb.i4.cl + i8 77, label %meshBB85 + ] + +bb13.fragment.cl: ; preds = %bb13.fragment.bbcl.disp + br i1 undef, label %meshBB81.bbcl.disp, label %bb5 + +bb13.fragment.cl135: ; preds = %bb13.fragment.bbcl.disp + br i1 undef, label %meshBB81.bbcl.disp, label %bb5 + +bb13.fragment.bbcl.disp: ; preds = %meshBB85, %lpad + switch i8 undef, label %bb10.fragment [ + i8 67, label %bb13.fragment.cl + i8 108, label %bb13.fragment + i8 58, label %bb13.fragment.cl135 + ] + +bb.i4.cl: ; preds = %bb.i4.bbcl.disp, %bb1.i.fragment.bbcl.disp + ret i32 undef + +bb.i4.bbcl.disp: ; preds = %meshBB81.cl141, %meshBB81.cl, %meshBB81 + switch i8 undef, label %bb.i4 [ + i8 35, label %bb.i4.cl + i8 77, label %meshBB85 + ] + +meshBB81.cl: ; preds = %meshBB81.bbcl.disp, %bb.i.i.bbcl.disp + br i1 undef, label %meshBB81.bbcl.disp, label %bb.i4.bbcl.disp + +meshBB81.cl141: ; preds = %meshBB81.bbcl.disp, %bb.i.i.bbcl.disp + br i1 undef, label %meshBB81.bbcl.disp, label %bb.i4.bbcl.disp + +meshBB81.bbcl.disp: ; preds = %meshBB81.cl141, %meshBB81.cl, %bb13.fragment.cl135, %bb13.fragment.cl, %bb1.i.fragment.cl, %meshBB85, %meshBB81, %bb.i1, %lpad, %bb13.fragment, %bb1.i.fragment, %bb5 + switch i8 undef, label %meshBB85 [ + i8 44, label %meshBB81 + i8 1, label %meshBB81.cl + i8 51, label %meshBB81.cl141 + ] +} diff --git a/test/CodeGen/X86/cmp1.ll b/test/CodeGen/X86/cmp1.ll deleted file mode 100644 index d4aa399ae95d..000000000000 --- a/test/CodeGen/X86/cmp1.ll +++ /dev/null @@ -1,7 +0,0 @@ -; RUN: llc < %s -march=x86-64 | grep -v cmp - -define i64 @foo(i64 %x) { - %t = icmp slt i64 %x, 1 - %r = zext i1 %t to i64 - ret i64 %r -} diff --git a/test/CodeGen/X86/fp_constant_op.ll b/test/CodeGen/X86/fp_constant_op.ll index 8e823ede56a0..b3ec5388d704 100644 --- a/test/CodeGen/X86/fp_constant_op.ll +++ b/test/CodeGen/X86/fp_constant_op.ll @@ -1,6 +1,4 @@ -; RUN: llc < %s -march=x86 -x86-asm-syntax=intel -mcpu=i486 | \ -; RUN: grep {fadd\\|fsub\\|fdiv\\|fmul} | not grep -i ST - +; RUN: llc < %s -march=x86 -x86-asm-syntax=intel -mcpu=i486 | FileCheck %s ; Test that the load of the constant is folded into the operation. @@ -8,28 +6,41 @@ define double @foo_add(double %P) { %tmp.1 = fadd double %P, 1.230000e+02 ; [#uses=1] ret double %tmp.1 } +; CHECK: foo_add: +; CHECK: fadd DWORD PTR define double @foo_mul(double %P) { %tmp.1 = fmul double %P, 1.230000e+02 ; [#uses=1] ret double %tmp.1 } +; CHECK: foo_mul: +; CHECK: fmul DWORD PTR define double @foo_sub(double %P) { %tmp.1 = fsub double %P, 1.230000e+02 ; [#uses=1] ret double %tmp.1 } +; CHECK: foo_sub: +; CHECK: fadd DWORD PTR define double @foo_subr(double %P) { %tmp.1 = fsub double 1.230000e+02, %P ; [#uses=1] ret double %tmp.1 } +; CHECK: foo_subr: +; CHECK: fsub QWORD PTR define double @foo_div(double %P) { %tmp.1 = fdiv double %P, 1.230000e+02 ; [#uses=1] ret double %tmp.1 } +; CHECK: foo_div: +; CHECK: fdiv DWORD PTR define double @foo_divr(double %P) { %tmp.1 = fdiv double 1.230000e+02, %P ; [#uses=1] ret double %tmp.1 } +; CHECK: foo_divr: +; CHECK: fdiv QWORD PTR + diff --git a/test/CodeGen/X86/palignr-2.ll b/test/CodeGen/X86/palignr-2.ll index 2936641e95d9..116d4c71814a 100644 --- a/test/CodeGen/X86/palignr-2.ll +++ b/test/CodeGen/X86/palignr-2.ll @@ -9,12 +9,12 @@ define void @t1(<2 x i64> %a, <2 x i64> %b) nounwind ssp { entry: ; CHECK: t1: ; palignr $3, %xmm1, %xmm0 - %0 = tail call <2 x i64> @llvm.x86.ssse3.palign.r.128(<2 x i64> %a, <2 x i64> %b, i32 24) nounwind readnone + %0 = tail call <2 x i64> @llvm.x86.ssse3.palign.r.128(<2 x i64> %a, <2 x i64> %b, i8 24) nounwind readnone store <2 x i64> %0, <2 x i64>* bitcast ([4 x i32]* @c to <2 x i64>*), align 16 ret void } -declare <2 x i64> @llvm.x86.ssse3.palign.r.128(<2 x i64>, <2 x i64>, i32) nounwind readnone +declare <2 x i64> @llvm.x86.ssse3.palign.r.128(<2 x i64>, <2 x i64>, i8) nounwind readnone define void @t2() nounwind ssp { entry: @@ -22,7 +22,7 @@ entry: ; palignr $4, _b, %xmm0 %0 = load <2 x i64>* bitcast ([4 x i32]* @b to <2 x i64>*), align 16 ; <<2 x i64>> [#uses=1] %1 = load <2 x i64>* bitcast ([4 x i32]* @a to <2 x i64>*), align 16 ; <<2 x i64>> [#uses=1] - %2 = tail call <2 x i64> @llvm.x86.ssse3.palign.r.128(<2 x i64> %1, <2 x i64> %0, i32 32) nounwind readnone + %2 = tail call <2 x i64> @llvm.x86.ssse3.palign.r.128(<2 x i64> %1, <2 x i64> %0, i8 32) nounwind readnone store <2 x i64> %2, <2 x i64>* bitcast ([4 x i32]* @c to <2 x i64>*), align 16 ret void } diff --git a/test/CodeGen/X86/pic-load-remat.ll b/test/CodeGen/X86/pic-load-remat.ll index d930f76a7747..77297521cd0d 100644 --- a/test/CodeGen/X86/pic-load-remat.ll +++ b/test/CodeGen/X86/pic-load-remat.ll @@ -1,10 +1,4 @@ ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 -relocation-model=pic | grep psllw | grep pb -; XFAIL: * - -; This is XFAIL'd because MachineLICM is now hoisting all of the loads, and the pic -; base appears killed in the entry block when remat is making its decisions. Remat's -; simple heuristic decides against rematting because it doesn't want to extend the -; live-range of the pic base; this isn't necessarily optimal. define void @f() nounwind { entry: diff --git a/test/CodeGen/X86/scalar_widen_div.ll b/test/CodeGen/X86/scalar_widen_div.ll new file mode 100644 index 000000000000..fc67e4417c4a --- /dev/null +++ b/test/CodeGen/X86/scalar_widen_div.ll @@ -0,0 +1,154 @@ +; RUN: llc < %s -disable-mmx -march=x86-64 -mattr=+sse42 | FileCheck %s + +; Verify when widening a divide/remainder operation, we only generate a +; divide/rem per element since divide/remainder can trap. + +define void @vectorDiv (<2 x i32> addrspace(1)* %nsource, <2 x i32> addrspace(1)* %dsource, <2 x i32> addrspace(1)* %qdest) nounwind { +; CHECK: idivl +; CHECK: idivl +; CHECK-NOT: idivl +; CHECK: ret +entry: + %nsource.addr = alloca <2 x i32> addrspace(1)*, align 4 + %dsource.addr = alloca <2 x i32> addrspace(1)*, align 4 + %qdest.addr = alloca <2 x i32> addrspace(1)*, align 4 + %index = alloca i32, align 4 + store <2 x i32> addrspace(1)* %nsource, <2 x i32> addrspace(1)** %nsource.addr + store <2 x i32> addrspace(1)* %dsource, <2 x i32> addrspace(1)** %dsource.addr + store <2 x i32> addrspace(1)* %qdest, <2 x i32> addrspace(1)** %qdest.addr + %tmp = load <2 x i32> addrspace(1)** %qdest.addr + %tmp1 = load i32* %index + %arrayidx = getelementptr <2 x i32> addrspace(1)* %tmp, i32 %tmp1 + %tmp2 = load <2 x i32> addrspace(1)** %nsource.addr + %tmp3 = load i32* %index + %arrayidx4 = getelementptr <2 x i32> addrspace(1)* %tmp2, i32 %tmp3 + %tmp5 = load <2 x i32> addrspace(1)* %arrayidx4 + %tmp6 = load <2 x i32> addrspace(1)** %dsource.addr + %tmp7 = load i32* %index + %arrayidx8 = getelementptr <2 x i32> addrspace(1)* %tmp6, i32 %tmp7 + %tmp9 = load <2 x i32> addrspace(1)* %arrayidx8 + %tmp10 = sdiv <2 x i32> %tmp5, %tmp9 + store <2 x i32> %tmp10, <2 x i32> addrspace(1)* %arrayidx + ret void +} + +define <3 x i8> @test_char_div(<3 x i8> %num, <3 x i8> %div) { +; CHECK: idivb +; CHECK: idivb +; CHECK: idivb +; CHECK-NOT: idivb +; CHECK: ret + %div.r = sdiv <3 x i8> %num, %div + ret <3 x i8> %div.r +} + +define <3 x i8> @test_uchar_div(<3 x i8> %num, <3 x i8> %div) { +; CHECK: divb +; CHECK: divb +; CHECK: divb +; CHECK-NOT: divb +; CHECK: ret + %div.r = udiv <3 x i8> %num, %div + ret <3 x i8> %div.r +} + +define <5 x i16> @test_short_div(<5 x i16> %num, <5 x i16> %div) { +; CHECK: idivw +; CHECK: idivw +; CHECK: idivw +; CHECK: idivw +; CHECK: idivw +; CHECK-NOT: idivw +; CHECK: ret + %div.r = sdiv <5 x i16> %num, %div + ret <5 x i16> %div.r +} + +define <4 x i16> @test_ushort_div(<4 x i16> %num, <4 x i16> %div) { +; CHECK: divw +; CHECK: divw +; CHECK: divw +; CHECK: divw +; CHECK-NOT: divw +; CHECK: ret + %div.r = udiv <4 x i16> %num, %div + ret <4 x i16> %div.r +} + +define <3 x i32> @test_uint_div(<3 x i32> %num, <3 x i32> %div) { +; CHECK: divl +; CHECK: divl +; CHECK: divl +; CHECK-NOT: divl +; CHECK: ret + %div.r = udiv <3 x i32> %num, %div + ret <3 x i32> %div.r +} + +define <3 x i64> @test_long_div(<3 x i64> %num, <3 x i64> %div) { +; CHECK: idivq +; CHECK: idivq +; CHECK: idivq +; CHECK-NOT: idivq +; CHECK: ret + %div.r = sdiv <3 x i64> %num, %div + ret <3 x i64> %div.r +} + +define <3 x i64> @test_ulong_div(<3 x i64> %num, <3 x i64> %div) { +; CHECK: divq +; CHECK: divq +; CHECK: divq +; CHECK-NOT: divq +; CHECK: ret + %div.r = udiv <3 x i64> %num, %div + ret <3 x i64> %div.r +} + + +define <4 x i8> @test_char_rem(<4 x i8> %num, <4 x i8> %rem) { +; CHECK: idivb +; CHECK: idivb +; CHECK: idivb +; CHECK: idivb +; CHECK-NOT: idivb +; CHECK: ret + %rem.r = srem <4 x i8> %num, %rem + ret <4 x i8> %rem.r +} + +define <5 x i16> @test_short_rem(<5 x i16> %num, <5 x i16> %rem) { +; CHECK: idivw +; CHECK: idivw +; CHECK: idivw +; CHECK: idivw +; CHECK: idivw +; CHECK-NOT: idivw +; CHECK: ret + %rem.r = srem <5 x i16> %num, %rem + ret <5 x i16> %rem.r +} + +define <4 x i32> @test_uint_rem(<4 x i32> %num, <4 x i32> %rem) { +; CHECK: idivl +; CHECK: idivl +; CHECK: idivl +; CHECK: idivl +; CHECK-NOT: idivl +; CHECK: ret + %rem.r = srem <4 x i32> %num, %rem + ret <4 x i32> %rem.r +} + + +define <5 x i64> @test_ulong_rem(<5 x i64> %num, <5 x i64> %rem) { +; CHECK: divq +; CHECK: divq +; CHECK: divq +; CHECK: divq +; CHECK: divq +; CHECK-NOT: divq +; CHECK: ret + %rem.r = urem <5 x i64> %num, %rem + ret <5 x i64> %rem.r +} diff --git a/test/CodeGen/X86/tailcall-fastisel.ll b/test/CodeGen/X86/tailcall-fastisel.ll new file mode 100644 index 000000000000..d54fb4115b07 --- /dev/null +++ b/test/CodeGen/X86/tailcall-fastisel.ll @@ -0,0 +1,13 @@ +; RUN: llc < %s -march=x86-64 -tailcallopt -fast-isel | grep TAILCALL + +; Fast-isel shouldn't attempt to handle this tail call, and it should +; cleanly terminate instruction selection in the block after it's +; done to avoid emitting invalid MachineInstrs. + +%0 = type { i64, i32, i8* } + +define fastcc i8* @"visit_array_aux<`Reference>"(%0 %arg, i32 %arg1) nounwind { +fail: ; preds = %entry + %tmp20 = tail call fastcc i8* @"visit_array_aux<`Reference>"(%0 %arg, i32 undef) ; [#uses=1] + ret i8* %tmp20 +} diff --git a/test/CodeGen/X86/tailcall-stackalign.ll b/test/CodeGen/X86/tailcall-stackalign.ll index 110472c8b9f3..0233139e8082 100644 --- a/test/CodeGen/X86/tailcall-stackalign.ll +++ b/test/CodeGen/X86/tailcall-stackalign.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=i686-unknown-linux -tailcallopt | grep -A 1 call | grep -A 1 tailcaller | grep subl | grep 12 +; RUN: llc < %s -mtriple=i686-unknown-linux -tailcallopt | FileCheck %s ; Linux has 8 byte alignment so the params cause stack size 20 when tailcallopt ; is enabled, ensure that a normal fastcc call has matching stack size @@ -19,6 +19,5 @@ define i32 @main(i32 %argc, i8** %argv) { ret i32 0 } - - - +; CHECK: call tailcaller +; CHECK-NEXT: subl $12 diff --git a/test/CodeGen/X86/trunc-to-bool.ll b/test/CodeGen/X86/trunc-to-bool.ll index 374d404a968c..bfab1aef9014 100644 --- a/test/CodeGen/X86/trunc-to-bool.ll +++ b/test/CodeGen/X86/trunc-to-bool.ll @@ -1,13 +1,13 @@ ; An integer truncation to i1 should be done with an and instruction to make ; sure only the LSBit survives. Test that this is the case both for a returned ; value and as the operand of a branch. -; RUN: llc < %s -march=x86 | grep {\\(and\\)\\|\\(test.*\\\$1\\)} | \ -; RUN: count 5 +; RUN: llc < %s -march=x86 | FileCheck %s define i1 @test1(i32 %X) zeroext { %Y = trunc i32 %X to i1 ret i1 %Y } +; CHECK: andl $1, %eax define i1 @test2(i32 %val, i32 %mask) { entry: @@ -20,6 +20,7 @@ ret_true: ret_false: ret i1 false } +; CHECK: testb $1, %al define i32 @test3(i8* %ptr) { %val = load i8* %ptr @@ -30,6 +31,7 @@ cond_true: cond_false: ret i32 42 } +; CHECK: testb $1, %al define i32 @test4(i8* %ptr) { %tmp = ptrtoint i8* %ptr to i1 @@ -39,6 +41,7 @@ cond_true: cond_false: ret i32 42 } +; CHECK: testb $1, %al define i32 @test6(double %d) { %tmp = fptosi double %d to i1 @@ -48,4 +51,4 @@ cond_true: cond_false: ret i32 42 } - +; CHECK: testb $1 diff --git a/test/Feature/md_on_instruction2.ll b/test/Feature/md_on_instruction2.ll deleted file mode 100644 index da9e49ebfb2f..000000000000 --- a/test/Feature/md_on_instruction2.ll +++ /dev/null @@ -1,22 +0,0 @@ -; RUN: llvm-as < %s | llvm-dis | grep " !dbg " | count 4 -define i32 @foo() nounwind ssp { -entry: - %retval = alloca i32 ; [#uses=2] - call void @llvm.dbg.func.start(metadata !0) - store i32 42, i32* %retval, !dbg !3 - br label %0, !dbg !3 - -;