From d0e4e96dc17a6c1c6de3340842c80f0e187ba349 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Sat, 26 Feb 2011 22:03:50 +0000 Subject: [PATCH 1/3] Vendor import of llvm trunk r126547: http://llvm.org/svn/llvm-project/llvm/trunk@126547 --- CMakeLists.txt | 3 +- Makefile.rules | 2 + cmake/modules/AddLLVM.cmake | 13 +- cmake/modules/CMakeLists.txt | 1 - cmake/modules/HandleLLVMOptions.cmake | 1 + cmake/modules/LLVM.cmake | 6 +- docs/GettingStartedVS.html | 4 +- docs/LangRef.html | 41 +- include/llvm/ADT/APInt.h | 6 + include/llvm/ADT/ArrayRef.h | 5 +- include/llvm/ADT/ImmutableIntervalMap.h | 2 +- include/llvm/ADT/ImmutableMap.h | 2 +- include/llvm/Analysis/DIBuilder.h | 154 +- include/llvm/CMakeLists.txt | 2 + include/llvm/CodeGen/AsmPrinter.h | 3 +- include/llvm/CodeGen/FunctionLoweringInfo.h | 63 +- include/llvm/CodeGen/MachineConstantPool.h | 3 + include/llvm/CodeGen/MachineFunction.h | 2 +- include/llvm/CodeGen/MachineRegisterInfo.h | 14 +- include/llvm/IntrinsicsXCore.td | 19 + include/llvm/MC/MCAsmInfo.h | 8 + .../llvm/MC/MCParser/MCAsmParserExtension.h | 4 + include/llvm/MC/MCStreamer.h | 37 +- include/llvm/Support/NoFolder.h | 68 +- include/llvm/Support/PathV1.h | 6 +- include/llvm/Target/TargetLowering.h | 96 +- include/llvm/Transforms/Utils/Local.h | 2 +- lib/Analysis/DIBuilder.cpp | 154 +- lib/Analysis/InstructionSimplify.cpp | 10 + lib/CodeGen/AllocationOrder.h | 2 + lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 10 +- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 9 +- lib/CodeGen/BranchFolding.cpp | 6 +- lib/CodeGen/InlineSpiller.cpp | 20 +- lib/CodeGen/LowerSubregs.cpp | 10 +- lib/CodeGen/MachineFunction.cpp | 12 +- lib/CodeGen/MachineRegisterInfo.cpp | 9 +- lib/CodeGen/RegAllocBase.h | 15 +- lib/CodeGen/RegAllocBasic.cpp | 60 +- lib/CodeGen/RegAllocGreedy.cpp | 193 ++- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 133 +- .../SelectionDAG/FunctionLoweringInfo.cpp | 118 ++ lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 49 +- .../SelectionDAG/LegalizeFloatTypes.cpp | 10 +- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 19 +- .../SelectionDAG/SelectionDAGBuilder.cpp | 31 +- .../SelectionDAG/SelectionDAGBuilder.h | 2 +- lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 55 +- lib/CodeGen/SelectionDAG/TargetLowering.cpp | 26 +- lib/CodeGen/SplitKit.cpp | 18 +- lib/CodeGen/SplitKit.h | 7 + lib/CodeGen/TargetLoweringObjectFileImpl.cpp | 30 +- lib/CodeGen/VirtRegRewriter.cpp | 6 +- lib/MC/ELFObjectWriter.cpp | 10 +- lib/MC/MCAsmInfo.cpp | 1 + lib/MC/MCAsmInfoDarwin.cpp | 1 + lib/MC/MCDisassembler/EDOperand.cpp | 20 +- lib/MC/MCDisassembler/EDToken.cpp | 4 + lib/MC/MCObjectStreamer.cpp | 18 +- lib/MC/MCParser/AsmParser.cpp | 2 + lib/MC/MCParser/ELFAsmParser.cpp | 12 +- lib/MC/MCParser/MCAsmParserExtension.cpp | 3 +- lib/MC/MCSectionMachO.cpp | 17 +- lib/MC/MCStreamer.cpp | 4 +- lib/Support/APInt.cpp | 2 +- lib/Target/ARM/ARMBaseInstrInfo.h | 3 +- lib/Target/ARM/ARMFastISel.cpp | 38 +- lib/Target/ARM/ARMFrameLowering.cpp | 16 +- lib/Target/ARM/ARMHazardRecognizer.cpp | 17 +- lib/Target/ARM/ARMISelDAGToDAG.cpp | 15 + lib/Target/ARM/ARMISelLowering.cpp | 102 +- lib/Target/ARM/ARMInstrFormats.td | 7 +- lib/Target/ARM/ARMInstrInfo.td | 4 +- lib/Target/ARM/ARMInstrNEON.td | 31 +- lib/Target/ARM/ARMInstrVFP.td | 162 +- lib/Target/ARM/ARMSubtarget.cpp | 4 +- lib/Target/ARM/MLxExpansionPass.cpp | 20 +- lib/Target/ARM/NEONMoveFix.cpp | 9 +- lib/Target/ARM/Thumb2InstrInfo.cpp | 6 + lib/Target/Alpha/AlphaISelLowering.cpp | 1 - lib/Target/Alpha/AlphaISelLowering.h | 14 +- lib/Target/Blackfin/BlackfinISelLowering.cpp | 1 - lib/Target/Blackfin/BlackfinISelLowering.h | 1 + lib/Target/CellSPU/SPUISelLowering.cpp | 7 +- lib/Target/CellSPU/SPUISelLowering.h | 6 +- lib/Target/MBlaze/MBlazeISelLowering.cpp | 4 +- lib/Target/MSP430/MSP430ISelLowering.cpp | 6 +- lib/Target/MSP430/MSP430ISelLowering.h | 2 + lib/Target/PowerPC/PPCISelLowering.cpp | 25 +- lib/Target/PowerPC/PPCISelLowering.h | 94 +- lib/Target/README.txt | 24 + lib/Target/Sparc/DelaySlotFiller.cpp | 31 + lib/Target/Sparc/SparcISelLowering.cpp | 43 +- lib/Target/Sparc/SparcISelLowering.h | 2 + lib/Target/Sparc/SparcInstrInfo.td | 18 +- lib/Target/SystemZ/SystemZISelLowering.cpp | 3 - lib/Target/SystemZ/SystemZISelLowering.h | 2 + lib/Target/X86/AsmParser/X86AsmParser.cpp | 13 + .../X86/Disassembler/X86Disassembler.cpp | 8 +- .../X86/Disassembler/X86DisassemblerDecoder.h | 2 +- lib/Target/X86/README.txt | 82 +- lib/Target/X86/X86FastISel.cpp | 8 +- lib/Target/X86/X86ISelLowering.cpp | 50 +- lib/Target/X86/X86ISelLowering.h | 18 +- lib/Target/X86/X86InstrFormats.td | 2 + lib/Target/X86/X86InstrInfo.cpp | 4 +- lib/Target/X86/X86InstrInfo.h | 4 + lib/Target/X86/X86InstrInfo.td | 3 + lib/Target/X86/X86InstrSystem.td | 5 + lib/Target/X86/X86MCCodeEmitter.cpp | 8 + lib/Target/X86/X86Subtarget.cpp | 7 +- lib/Target/X86/X86Subtarget.h | 2 + lib/Target/XCore/XCoreISelLowering.cpp | 125 +- lib/Target/XCore/XCoreISelLowering.h | 23 +- lib/Target/XCore/XCoreInstrInfo.td | 55 +- .../InstCombine/InstCombineAndOrXor.cpp | 33 + .../InstCombine/InstCombineCalls.cpp | 18 +- lib/Transforms/Scalar/LoopDeletion.cpp | 23 +- lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 18 +- lib/Transforms/Scalar/SimplifyLibCalls.cpp | 1343 ++++++++--------- lib/Transforms/Utils/Local.cpp | 42 +- .../Utils/PromoteMemoryToRegister.cpp | 13 +- lib/Transforms/Utils/SimplifyCFG.cpp | 5 + test/CMakeLists.txt | 2 + test/CodeGen/ARM/2009-10-16-Scope.ll | 32 + test/CodeGen/ARM/2010-08-04-StackVariable.ll | 124 ++ .../CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll | 11 +- test/CodeGen/ARM/available_externally.ll | 16 + test/CodeGen/ARM/fcopysign.ll | 17 +- test/CodeGen/ARM/vstlane.ll | 26 + test/CodeGen/SPARC/2011-01-19-DelaySlot.ll | 6 +- test/CodeGen/SPARC/2011-01-22-SRet.ll | 3 +- .../X86}/2009-10-16-Scope.ll | 1 - test/CodeGen/X86/2010-06-28-DbgEntryPC.ll | 2 +- .../X86}/2010-08-04-StackVariable.ll | 1 - .../2011-02-21-VirtRegRewriter-KillSubReg.ll | 50 + test/CodeGen/X86/2011-02-23-UnfoldBug.ll | 42 + test/CodeGen/X86/add.ll | 13 +- test/CodeGen/X86/break-sse-dep.ll | 15 +- test/CodeGen/X86/codegen-dce.ll | 43 - test/CodeGen/X86/codegen-prepare-extload.ll | 5 +- test/CodeGen/X86/constant-pool-sharing.ll | 7 +- test/CodeGen/X86/ctpop-combine.ll | 4 +- test/CodeGen/X86/dbg-live-in-location.ll | 84 -- test/CodeGen/X86/dbg-value-location.ll | 8 +- test/CodeGen/X86/divide-by-constant.ll | 2 +- test/CodeGen/X86/dll-linkage.ll | 5 + test/CodeGen/X86/fast-isel-cmp-branch.ll | 7 +- test/CodeGen/X86/fast-isel-gep.ll | 17 +- test/CodeGen/X86/gather-addresses.ll | 23 +- test/CodeGen/X86/i128-ret.ll | 6 +- test/CodeGen/X86/lea.ll | 9 +- test/CodeGen/X86/lsr-overflow.ll | 5 +- test/CodeGen/X86/lsr-reuse-trunc.ll | 9 +- test/CodeGen/X86/memcmp.ll | 21 +- test/CodeGen/X86/movgs.ll | 7 +- test/CodeGen/X86/non-globl-eh-frame.ll | 24 - test/CodeGen/X86/optimize-max-3.ll | 11 +- test/CodeGen/X86/phi-constants.ll | 35 + test/CodeGen/X86/pr9127.ll | 5 +- test/CodeGen/X86/red-zone.ll | 2 +- test/CodeGen/X86/remat-mov-0.ll | 15 +- test/CodeGen/X86/test-shrink.ll | 21 +- test/CodeGen/X86/use-add-flags.ll | 13 +- test/CodeGen/X86/vec_anyext.ll | 77 + test/CodeGen/X86/vec_sext.ll | 69 + test/CodeGen/X86/vec_shuffle-37.ll | 5 +- test/CodeGen/X86/vec_zext.ll | 69 + test/CodeGen/X86/xor.ll | 9 +- test/CodeGen/XCore/events.ll | 24 + test/CodeGen/XCore/resources.ll | 65 + test/DebugInfo/2009-03-03-deadstore.ll | 108 -- .../2009-03-03-store-to-load-forward.ll | 260 ---- test/FrontendC/2011-02-21-DATA-common.c | 5 + test/MC/ARM/bracket-darwin.s | 5 + test/MC/ARM/bracket-exprs.s | 15 + test/MC/{MachO => ARM}/darwin-ARM-reloc.s | 0 test/MC/{MachO => ARM}/darwin-Thumb-reloc.s | 0 .../MC/{AsmParser => ARM}/full_line_comment.s | 0 test/MC/AsmParser/exprs.s | 2 - test/MC/Disassembler/X86/enhanced.txt | 6 + test/MC/ELF/bracket-exprs.s | 15 + test/MC/{AsmParser/paren.s => ELF/bracket.s} | 0 test/MC/ELF/org.s | 13 + test/MC/ELF/pr9292.s | 26 + test/MC/ELF/relocation-pc.s | 33 + test/MC/X86/x86-32.s | 8 + test/MC/X86/x86-64.s | 37 +- .../2003-11-13-ConstExprCastCall.ll | 12 - test/Transforms/InstCombine/call.ll | 16 +- test/Transforms/InstCombine/or-xor.ll | 94 ++ .../Transforms/LoopDeletion/multiple-exits.ll | 26 + test/Transforms/SimplifyCFG/select-gep.ll | 40 + test/lit.cfg | 11 +- tools/bugpoint/OptimizerDriver.cpp | 3 +- tools/gold/gold-plugin.cpp | 48 +- tools/llvm-config/CMakeLists.txt | 3 +- tools/llvm-mc/Disassembler.cpp | 169 ++- tools/lto/LTOCodeGenerator.cpp | 157 +- tools/lto/LTOCodeGenerator.h | 10 +- tools/lto/LTOModule.cpp | 81 +- tools/lto/lto.cpp | 4 +- unittests/ADT/APIntTest.cpp | 18 + unittests/CMakeLists.txt | 2 + unittests/Transforms/Utils/Local.cpp | 11 + utils/FileCheck/CMakeLists.txt | 2 +- utils/FileUpdate/CMakeLists.txt | 2 +- utils/KillTheDoctor/CMakeLists.txt | 2 +- utils/TableGen/AsmWriterEmitter.cpp | 249 +++ utils/TableGen/AsmWriterEmitter.h | 1 + utils/TableGen/CMakeLists.txt | 2 +- utils/TableGen/ClangSACheckersEmitter.cpp | 1 + utils/TableGen/X86RecognizableInstr.cpp | 4 +- utils/buildit/GNUmakefile | 6 +- utils/buildit/build_llvm | 8 +- utils/count/CMakeLists.txt | 2 +- utils/llvmbuild | 740 +++++++++ utils/not/CMakeLists.txt | 2 +- utils/valgrind/i386-pc-linux-gnu.supp | 8 +- 219 files changed, 4974 insertions(+), 2667 deletions(-) create mode 100644 test/CodeGen/ARM/2009-10-16-Scope.ll create mode 100644 test/CodeGen/ARM/2010-08-04-StackVariable.ll create mode 100644 test/CodeGen/ARM/available_externally.ll rename test/{DebugInfo => CodeGen/X86}/2009-10-16-Scope.ll (96%) rename test/{DebugInfo => CodeGen/X86}/2010-08-04-StackVariable.ll (99%) create mode 100644 test/CodeGen/X86/2011-02-21-VirtRegRewriter-KillSubReg.ll create mode 100644 test/CodeGen/X86/2011-02-23-UnfoldBug.ll delete mode 100644 test/CodeGen/X86/codegen-dce.ll delete mode 100644 test/CodeGen/X86/dbg-live-in-location.ll delete mode 100644 test/CodeGen/X86/non-globl-eh-frame.ll create mode 100644 test/CodeGen/X86/phi-constants.ll create mode 100644 test/CodeGen/X86/vec_anyext.ll create mode 100644 test/CodeGen/X86/vec_sext.ll create mode 100644 test/CodeGen/X86/vec_zext.ll create mode 100644 test/CodeGen/XCore/events.ll delete mode 100644 test/DebugInfo/2009-03-03-deadstore.ll delete mode 100644 test/DebugInfo/2009-03-03-store-to-load-forward.ll create mode 100644 test/FrontendC/2011-02-21-DATA-common.c create mode 100644 test/MC/ARM/bracket-darwin.s create mode 100644 test/MC/ARM/bracket-exprs.s rename test/MC/{MachO => ARM}/darwin-ARM-reloc.s (100%) rename test/MC/{MachO => ARM}/darwin-Thumb-reloc.s (100%) rename test/MC/{AsmParser => ARM}/full_line_comment.s (100%) create mode 100644 test/MC/Disassembler/X86/enhanced.txt create mode 100644 test/MC/ELF/bracket-exprs.s rename test/MC/{AsmParser/paren.s => ELF/bracket.s} (100%) create mode 100644 test/MC/ELF/org.s create mode 100644 test/MC/ELF/pr9292.s create mode 100644 test/MC/ELF/relocation-pc.s delete mode 100644 test/Transforms/InstCombine/2003-11-13-ConstExprCastCall.ll create mode 100644 test/Transforms/InstCombine/or-xor.ll create mode 100644 test/Transforms/LoopDeletion/multiple-exits.ll create mode 100644 test/Transforms/SimplifyCFG/select-gep.ll create mode 100755 utils/llvmbuild diff --git a/CMakeLists.txt b/CMakeLists.txt index 0a5d5f39d85a..b357478bbc3a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,6 +12,8 @@ set(CMAKE_MODULE_PATH set(PACKAGE_VERSION "2.9") +set_property(GLOBAL PROPERTY USE_FOLDERS ON) + include(VersionFromVCS) option(LLVM_APPEND_VC_REV @@ -238,7 +240,6 @@ endif() option(LLVM_BUILD_TESTS "Build LLVM unit tests. If OFF, just generate build targes." OFF) -option(LLVM_INCLUDE_TESTS "Generate build targets for the LLVM unit tests." ON) if( LLVM_INCLUDE_TESTS ) add_subdirectory(test) add_subdirectory(utils/unittest) diff --git a/Makefile.rules b/Makefile.rules index 363fa9605b5a..c0a9112c31be 100644 --- a/Makefile.rules +++ b/Makefile.rules @@ -973,7 +973,9 @@ $(NativeExportsFile): $(EXPORTED_SYMBOL_FILE) $(ObjDir)/.dir $(Verb) echo "{" > $@ $(Verb) grep -q "\<" $< && echo " global:" >> $@ || : $(Verb) sed -e 's/$$/;/' -e 's/^/ /' < $< >> $@ +ifneq ($(HOST_OS),OpenBSD) $(Verb) echo " local: *;" >> $@ +endif $(Verb) echo "};" >> $@ clean-local:: -$(Verb) $(RM) -f $(NativeExportsFile) diff --git a/cmake/modules/AddLLVM.cmake b/cmake/modules/AddLLVM.cmake index dfe67cded331..764c6591c457 100755 --- a/cmake/modules/AddLLVM.cmake +++ b/cmake/modules/AddLLVM.cmake @@ -5,7 +5,6 @@ macro(add_llvm_library name) llvm_process_sources( ALL_FILES ${ARGN} ) add_library( ${name} ${ALL_FILES} ) set_property( GLOBAL APPEND PROPERTY LLVM_LIBS ${name} ) - set_property( GLOBAL APPEND PROPERTY LLVM_LIB_TARGETS ${name} ) if( LLVM_COMMON_DEPENDS ) add_dependencies( ${name} ${LLVM_COMMON_DEPENDS} ) endif( LLVM_COMMON_DEPENDS ) @@ -25,6 +24,7 @@ macro(add_llvm_library name) if( CURRENT_LLVM_TARGET ) add_dependencies(${name} ${CURRENT_LLVM_TARGET}) endif() + set_target_properties(${name} PROPERTIES FOLDER "Libraries") endmacro(add_llvm_library name) @@ -55,6 +55,8 @@ ${name} ignored.") LIBRARY DESTINATION lib${LLVM_LIBDIR_SUFFIX} ARCHIVE DESTINATION lib${LLVM_LIBDIR_SUFFIX}) endif() + + set_target_properties(${name} PROPERTIES FOLDER "Loadable modules") endmacro(add_llvm_loadable_module name) @@ -95,6 +97,7 @@ macro(add_llvm_tool name) if( LLVM_BUILD_TOOLS ) install(TARGETS ${name} RUNTIME DESTINATION bin) endif() + set_target_properties(${name} PROPERTIES FOLDER "Tools") endmacro(add_llvm_tool name) @@ -107,9 +110,16 @@ macro(add_llvm_example name) if( LLVM_BUILD_EXAMPLES ) install(TARGETS ${name} RUNTIME DESTINATION examples) endif() + set_target_properties(${name} PROPERTIES FOLDER "Examples") endmacro(add_llvm_example name) +macro(add_llvm_utility name) + add_llvm_executable(${name} ${ARGN}) + set_target_properties(${name} PROPERTIES FOLDER "Utils") +endmacro(add_llvm_utility name) + + macro(add_llvm_target target_name) if( TABLEGEN_OUTPUT ) add_custom_target(${target_name}Table_gen @@ -120,6 +130,7 @@ macro(add_llvm_target target_name) add_llvm_library(LLVM${target_name} ${ARGN} ${TABLEGEN_OUTPUT}) if ( TABLEGEN_OUTPUT ) add_dependencies(LLVM${target_name} ${target_name}Table_gen) + set_target_properties(${target_name}Table_gen PROPERTIES FOLDER "Tablegenning") endif (TABLEGEN_OUTPUT) set( CURRENT_LLVM_TARGET LLVM${target_name} ) endmacro(add_llvm_target) diff --git a/cmake/modules/CMakeLists.txt b/cmake/modules/CMakeLists.txt index 9a5566effb08..1ab94749f156 100644 --- a/cmake/modules/CMakeLists.txt +++ b/cmake/modules/CMakeLists.txt @@ -1,7 +1,6 @@ set(llvm_cmake_builddir "${LLVM_BINARY_DIR}/share/llvm/cmake") get_property(llvm_libs GLOBAL PROPERTY LLVM_LIBS) -get_property(llvm_lib_targets GLOBAL PROPERTY LLVM_LIB_TARGETS) configure_file( LLVM.cmake diff --git a/cmake/modules/HandleLLVMOptions.cmake b/cmake/modules/HandleLLVMOptions.cmake index 7ca2bd07fd53..f62e86ae494b 100644 --- a/cmake/modules/HandleLLVMOptions.cmake +++ b/cmake/modules/HandleLLVMOptions.cmake @@ -159,3 +159,4 @@ endif( MSVC ) add_llvm_definitions( -D__STDC_LIMIT_MACROS ) add_llvm_definitions( -D__STDC_CONSTANT_MACROS ) +option(LLVM_INCLUDE_TESTS "Generate build targets for the LLVM unit tests." ON) diff --git a/cmake/modules/LLVM.cmake b/cmake/modules/LLVM.cmake index d610f3e76516..9182afdf2758 100644 --- a/cmake/modules/LLVM.cmake +++ b/cmake/modules/LLVM.cmake @@ -4,14 +4,14 @@ set(LLVM_PACKAGE_VERSION @PACKAGE_VERSION@) set(LLVM_COMMON_DEPENDS @LLVM_COMMON_DEPENDS@) -set(llvm_libs @llvm_libs@) - -set(llvm_lib_targets @llvm_lib_targets@) +set_property( GLOBAL PROPERTY LLVM_LIBS "@llvm_libs@") set(LLVM_ALL_TARGETS @LLVM_ALL_TARGETS@) set(LLVM_TARGETS_TO_BUILD @LLVM_TARGETS_TO_BUILD@) +set(TARGET_TRIPLE "@TARGET_TRIPLE@") + set(LLVM_TOOLS_BINARY_DIR @LLVM_TOOLS_BINARY_DIR@) set(LLVM_ENABLE_THREADS @LLVM_ENABLE_THREADS@) diff --git a/docs/GettingStartedVS.html b/docs/GettingStartedVS.html index b6aa4c692d43..7c0bf007ec3a 100644 --- a/docs/GettingStartedVS.html +++ b/docs/GettingStartedVS.html @@ -348,8 +348,6 @@ out:

@@ -365,7 +363,7 @@ out:

Jeff Cohen
The LLVM Compiler Infrastructure
- Last modified: $Date: 2011-02-09 05:19:28 +0100 (Wed, 09 Feb 2011) $ + Last modified: $Date: 2011-02-20 16:34:12 +0100 (Sun, 20 Feb 2011) $ diff --git a/docs/LangRef.html b/docs/LangRef.html index 05130c29efc2..580ae7964b5a 100644 --- a/docs/LangRef.html +++ b/docs/LangRef.html @@ -4575,12 +4575,12 @@ entry: type ty2.

Arguments:
-

The 'trunc' instruction takes a value to trunc, which must - be an integer type, and a type that specifies the - size and type of the result, which must be - an integer type. The bit size of value must - be larger than the bit size of ty2. Equal sized types are not - allowed.

+

The 'trunc' instruction takes a value to trunc, and a type to trunc it to. + Both types must be of integer types, or vectors + of the same number of integers. + The bit size of the value must be larger than + the bit size of the destination type, ty2. + Equal sized types are not allowed.

Semantics:

The 'trunc' instruction truncates the high order bits @@ -4590,9 +4590,10 @@ entry:

Example:
-  %X = trunc i32 257 to i8              ; yields i8:1
-  %Y = trunc i32 123 to i1              ; yields i1:true
-  %Z = trunc i32 122 to i1              ; yields i1:false
+  %X = trunc i32 257 to i8                        ; yields i8:1
+  %Y = trunc i32 123 to i1                        ; yields i1:true
+  %Z = trunc i32 122 to i1                        ; yields i1:false
+  %W = trunc <2 x i16> <i16 8, i16 7> to <2 x i8> ; yields <i8 8, i8 7>
 
@@ -4614,10 +4615,11 @@ entry:
Arguments:
-

The 'zext' instruction takes a value to cast, which must be of - integer type, and a type to cast it to, which must - also be of integer type. The bit size of the - value must be smaller than the bit size of the destination type, +

The 'zext' instruction takes a value to cast, and a type to cast it to. + Both types must be of integer types, or vectors + of the same number of integers. + The bit size of the value must be smaller than + the bit size of the destination type, ty2.

Semantics:
@@ -4630,6 +4632,7 @@ entry:
   %X = zext i32 257 to i64              ; yields i64:257
   %Y = zext i1 true to i32              ; yields i32:1
+  %Z = zext <2 x i16> <i16 8, i16 7> to <2 x i32> ; yields <i32 8, i32 7>
 
@@ -4649,10 +4652,11 @@ entry:

The 'sext' sign extends value to the type ty2.

Arguments:
-

The 'sext' instruction takes a value to cast, which must be of - integer type, and a type to cast it to, which must - also be of integer type. The bit size of the - value must be smaller than the bit size of the destination type, +

The 'sext' instruction takes a value to cast, and a type to cast it to. + Both types must be of integer types, or vectors + of the same number of integers. + The bit size of the value must be smaller than + the bit size of the destination type, ty2.

Semantics:
@@ -4666,6 +4670,7 @@ entry:
   %X = sext i8  -1 to i16              ; yields i16   :65535
   %Y = sext i1 true to i32             ; yields i32:-1
+  %Z = sext <2 x i16> <i16 8, i16 7> to <2 x i32> ; yields <i32 8, i32 7>
 
@@ -7781,7 +7786,7 @@ LLVM.

Chris Lattner
The LLVM Compiler Infrastructure
- Last modified: $Date: 2011-02-09 17:44:44 +0100 (Wed, 09 Feb 2011) $ + Last modified: $Date: 2011-02-24 22:01:34 +0100 (Thu, 24 Feb 2011) $ diff --git a/include/llvm/ADT/APInt.h b/include/llvm/ADT/APInt.h index b91d5dc9bcf9..d1fd3e5034bf 100644 --- a/include/llvm/ADT/APInt.h +++ b/include/llvm/ADT/APInt.h @@ -1193,6 +1193,12 @@ class APInt { /// @brief Count the number of leading one bits. unsigned countLeadingOnes() const; + /// Computes the number of leading bits of this APInt that are equal to its + /// sign bit. + unsigned getNumSignBits() const { + return isNegative() ? countLeadingOnes() : countLeadingZeros(); + } + /// countTrailingZeros - This function is an APInt version of the /// countTrailingZeros_{32,64} functions in MathExtras.h. It counts /// the number of zeros from the least significant bit to the first set bit. diff --git a/include/llvm/ADT/ArrayRef.h b/include/llvm/ADT/ArrayRef.h index 1c5470d678b6..d3ea9c0f03b7 100644 --- a/include/llvm/ADT/ArrayRef.h +++ b/include/llvm/ADT/ArrayRef.h @@ -64,7 +64,10 @@ namespace llvm { /*implicit*/ ArrayRef(const std::vector &Vec) : Data(Vec.empty() ? (T*)0 : &Vec[0]), Length(Vec.size()) {} - // TODO: C arrays. + /// Construct an ArrayRef from a C array. + template + /*implicit*/ ArrayRef(const T (&Arr)[N]) + : Data(Arr), Length(N) {} /// @} /// @name Simple Operations diff --git a/include/llvm/ADT/ImmutableIntervalMap.h b/include/llvm/ADT/ImmutableIntervalMap.h index d3196ca23df9..0d8fcf343385 100644 --- a/include/llvm/ADT/ImmutableIntervalMap.h +++ b/include/llvm/ADT/ImmutableIntervalMap.h @@ -215,7 +215,7 @@ class ImmutableIntervalMap ImmutableIntervalMap add(ImmutableIntervalMap Old, key_type_ref K, data_type_ref D) { - TreeTy *T = F.add(Old.Root, std::make_pair(K, D)); + TreeTy *T = F.add(Old.Root, std::pair(K, D)); return ImmutableIntervalMap(F.getCanonicalTree(T)); } diff --git a/include/llvm/ADT/ImmutableMap.h b/include/llvm/ADT/ImmutableMap.h index e439a0994821..d6cce7ccfa05 100644 --- a/include/llvm/ADT/ImmutableMap.h +++ b/include/llvm/ADT/ImmutableMap.h @@ -108,7 +108,7 @@ class ImmutableMap { ImmutableMap getEmptyMap() { return ImmutableMap(F.getEmptyTree()); } ImmutableMap add(ImmutableMap Old, key_type_ref K, data_type_ref D) { - TreeTy *T = F.add(Old.Root, std::make_pair(K,D)); + TreeTy *T = F.add(Old.Root, std::pair(K,D)); return ImmutableMap(Canonicalize ? F.getCanonicalTree(T): T); } diff --git a/include/llvm/Analysis/DIBuilder.h b/include/llvm/Analysis/DIBuilder.h index bd221344e5fa..417dbc4e802c 100644 --- a/include/llvm/Analysis/DIBuilder.h +++ b/include/llvm/Analysis/DIBuilder.h @@ -58,7 +58,7 @@ namespace llvm { const MDNode *getCU() { return TheCU; } enum ComplexAddrKind { OpPlus=1, OpDeref }; - /// CreateCompileUnit - A CompileUnit provides an anchor for all debugging + /// createCompileUnit - A CompileUnit provides an anchor for all debugging /// information generated during this instance of compilation. /// @param Lang Source programming language, eg. dwarf::DW_LANG_C99 /// @param File File name @@ -72,67 +72,67 @@ namespace llvm { /// by a tool analyzing generated debugging information. /// @param RV This indicates runtime version for languages like /// Objective-C. - void CreateCompileUnit(unsigned Lang, StringRef File, StringRef Dir, + void createCompileUnit(unsigned Lang, StringRef File, StringRef Dir, StringRef Producer, bool isOptimized, StringRef Flags, unsigned RV); - /// CreateFile - Create a file descriptor to hold debugging information + /// createFile - Create a file descriptor to hold debugging information /// for a file. - DIFile CreateFile(StringRef Filename, StringRef Directory); + DIFile createFile(StringRef Filename, StringRef Directory); - /// CreateEnumerator - Create a single enumerator value. - DIEnumerator CreateEnumerator(StringRef Name, uint64_t Val); + /// createEnumerator - Create a single enumerator value. + DIEnumerator createEnumerator(StringRef Name, uint64_t Val); - /// CreateBasicType - Create debugging information entry for a basic + /// createBasicType - Create debugging information entry for a basic /// type. /// @param Name Type name. /// @param SizeInBits Size of the type. /// @param AlignInBits Type alignment. /// @param Encoding DWARF encoding code, e.g. dwarf::DW_ATE_float. - DIType CreateBasicType(StringRef Name, uint64_t SizeInBits, + DIType createBasicType(StringRef Name, uint64_t SizeInBits, uint64_t AlignInBits, unsigned Encoding); - /// CreateQualifiedType - Create debugging information entry for a qualified + /// createQualifiedType - Create debugging information entry for a qualified /// type, e.g. 'const int'. /// @param Tag Tag identifing type, e.g. dwarf::TAG_volatile_type /// @param FromTy Base Type. - DIType CreateQualifiedType(unsigned Tag, DIType FromTy); + DIType createQualifiedType(unsigned Tag, DIType FromTy); - /// CreatePointerType - Create debugging information entry for a pointer. + /// createPointerType - Create debugging information entry for a pointer. /// @param PointeeTy Type pointed by this pointer. /// @param SizeInBits Size. /// @param AlignInBits Alignment. (optional) /// @param Name Pointer type name. (optional) - DIType CreatePointerType(DIType PointeeTy, uint64_t SizeInBits, + DIType createPointerType(DIType PointeeTy, uint64_t SizeInBits, uint64_t AlignInBits = 0, StringRef Name = StringRef()); - /// CreateReferenceType - Create debugging information entry for a c++ + /// createReferenceType - Create debugging information entry for a c++ /// style reference. - DIType CreateReferenceType(DIType RTy); + DIType createReferenceType(DIType RTy); - /// CreateTypedef - Create debugging information entry for a typedef. + /// createTypedef - Create debugging information entry for a typedef. /// @param Ty Original type. /// @param Name Typedef name. /// @param File File where this type is defined. /// @param LineNo Line number. - DIType CreateTypedef(DIType Ty, StringRef Name, DIFile File, + DIType createTypedef(DIType Ty, StringRef Name, DIFile File, unsigned LineNo); - /// CreateFriend - Create debugging information entry for a 'friend'. - DIType CreateFriend(DIType Ty, DIType FriendTy); + /// createFriend - Create debugging information entry for a 'friend'. + DIType createFriend(DIType Ty, DIType FriendTy); - /// CreateInheritance - Create debugging information entry to establish + /// createInheritance - Create debugging information entry to establish /// inheritance relationship between two types. /// @param Ty Original type. /// @param BaseTy Base type. Ty is inherits from base. /// @param BaseOffset Base offset. /// @param Flags Flags to describe inheritance attribute, /// e.g. private - DIType CreateInheritance(DIType Ty, DIType BaseTy, uint64_t BaseOffset, + DIType createInheritance(DIType Ty, DIType BaseTy, uint64_t BaseOffset, unsigned Flags); - /// CreateMemberType - Create debugging information entry for a member. + /// createMemberType - Create debugging information entry for a member. /// @param Name Member name. /// @param File File where this member is defined. /// @param LineNo Line number. @@ -141,12 +141,12 @@ namespace llvm { /// @param OffsetInBits Member offset. /// @param Flags Flags to encode member attribute, e.g. private /// @param Ty Parent type. - DIType CreateMemberType(StringRef Name, DIFile File, + DIType createMemberType(StringRef Name, DIFile File, unsigned LineNo, uint64_t SizeInBits, uint64_t AlignInBits, uint64_t OffsetInBits, unsigned Flags, DIType Ty); - /// CreateClassType - Create debugging information entry for a class. + /// createClassType - Create debugging information entry for a class. /// @param Scope Scope in which this class is defined. /// @param Name class name. /// @param File File where this member is defined. @@ -161,14 +161,14 @@ namespace llvm { /// DW_AT_containing_type. See DWARF documentation /// for more info. /// @param TemplateParms Template type parameters. - DIType CreateClassType(DIDescriptor Scope, StringRef Name, DIFile File, + DIType createClassType(DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNumber, uint64_t SizeInBits, uint64_t AlignInBits, uint64_t OffsetInBits, unsigned Flags, DIType DerivedFrom, DIArray Elements, MDNode *VTableHolder = 0, MDNode *TemplateParms = 0); - /// CreateStructType - Create debugging information entry for a struct. + /// createStructType - Create debugging information entry for a struct. /// @param Scope Scope in which this struct is defined. /// @param Name Struct name. /// @param File File where this member is defined. @@ -178,12 +178,12 @@ namespace llvm { /// @param Flags Flags to encode member attribute, e.g. private /// @param Elements Struct elements. /// @param RunTimeLang Optional parameter, Objective-C runtime version. - DIType CreateStructType(DIDescriptor Scope, StringRef Name, DIFile File, + DIType createStructType(DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNumber, uint64_t SizeInBits, uint64_t AlignInBits, unsigned Flags, DIArray Elements, unsigned RunTimeLang = 0); - /// CreateUnionType - Create debugging information entry for an union. + /// createUnionType - Create debugging information entry for an union. /// @param Scope Scope in which this union is defined. /// @param Name Union name. /// @param File File where this member is defined. @@ -193,12 +193,12 @@ namespace llvm { /// @param Flags Flags to encode member attribute, e.g. private /// @param Elements Union elements. /// @param RunTimeLang Optional parameter, Objective-C runtime version. - DIType CreateUnionType(DIDescriptor Scope, StringRef Name, DIFile File, + DIType createUnionType(DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNumber, uint64_t SizeInBits, uint64_t AlignInBits, unsigned Flags, DIArray Elements, unsigned RunTimeLang = 0); - /// CreateTemplateTypeParameter - Create debugging information for template + /// createTemplateTypeParameter - Create debugging information for template /// type parameter. /// @param Scope Scope in which this type is defined. /// @param Name Type parameter name. @@ -207,11 +207,11 @@ namespace llvm { /// @param LineNo Line number. /// @param ColumnNo Column Number. DITemplateTypeParameter - CreateTemplateTypeParameter(DIDescriptor Scope, StringRef Name, DIType Ty, + createTemplateTypeParameter(DIDescriptor Scope, StringRef Name, DIType Ty, MDNode *File = 0, unsigned LineNo = 0, unsigned ColumnNo = 0); - /// CreateTemplateValueParameter - Create debugging information for template + /// createTemplateValueParameter - Create debugging information for template /// value parameter. /// @param Scope Scope in which this type is defined. /// @param Name Value parameter name. @@ -221,28 +221,28 @@ namespace llvm { /// @param LineNo Line number. /// @param ColumnNo Column Number. DITemplateValueParameter - CreateTemplateValueParameter(DIDescriptor Scope, StringRef Name, DIType Ty, + createTemplateValueParameter(DIDescriptor Scope, StringRef Name, DIType Ty, uint64_t Value, MDNode *File = 0, unsigned LineNo = 0, unsigned ColumnNo = 0); - /// CreateArrayType - Create debugging information entry for an array. + /// createArrayType - Create debugging information entry for an array. /// @param Size Array size. /// @param AlignInBits Alignment. /// @param Ty Element type. /// @param Subscripts Subscripts. - DIType CreateArrayType(uint64_t Size, uint64_t AlignInBits, + DIType createArrayType(uint64_t Size, uint64_t AlignInBits, DIType Ty, DIArray Subscripts); - /// CreateVectorType - Create debugging information entry for a vector type. + /// createVectorType - Create debugging information entry for a vector type. /// @param Size Array size. /// @param AlignInBits Alignment. /// @param Ty Element type. /// @param Subscripts Subscripts. - DIType CreateVectorType(uint64_t Size, uint64_t AlignInBits, + DIType createVectorType(uint64_t Size, uint64_t AlignInBits, DIType Ty, DIArray Subscripts); - /// CreateEnumerationType - Create debugging information entry for an + /// createEnumerationType - Create debugging information entry for an /// enumeration. /// @param Scope Scope in which this enumeration is defined. /// @param Name Union name. @@ -251,40 +251,40 @@ namespace llvm { /// @param SizeInBits Member size. /// @param AlignInBits Member alignment. /// @param Elements Enumeration elements. - DIType CreateEnumerationType(DIDescriptor Scope, StringRef Name, + DIType createEnumerationType(DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNumber, uint64_t SizeInBits, uint64_t AlignInBits, DIArray Elements); - /// CreateSubroutineType - Create subroutine type. + /// createSubroutineType - Create subroutine type. /// @param File File in which this subroutine is defined. /// @param ParamterTypes An array of subroutine parameter types. This /// includes return type at 0th index. - DIType CreateSubroutineType(DIFile File, DIArray ParameterTypes); + DIType createSubroutineType(DIFile File, DIArray ParameterTypes); - /// CreateArtificialType - Create a new DIType with "artificial" flag set. - DIType CreateArtificialType(DIType Ty); + /// createArtificialType - Create a new DIType with "artificial" flag set. + DIType createArtificialType(DIType Ty); - /// CreateTemporaryType - Create a temporary forward-declared type. - DIType CreateTemporaryType(); - DIType CreateTemporaryType(DIFile F); + /// createTemporaryType - Create a temporary forward-declared type. + DIType createTemporaryType(); + DIType createTemporaryType(DIFile F); - /// RetainType - Retain DIType in a module even if it is not referenced + /// retainType - Retain DIType in a module even if it is not referenced /// through debug info anchors. - void RetainType(DIType T); + void retainType(DIType T); - /// CreateUnspecifiedParameter - Create unspeicified type descriptor + /// createUnspecifiedParameter - Create unspeicified type descriptor /// for a subroutine type. - DIDescriptor CreateUnspecifiedParameter(); + DIDescriptor createUnspecifiedParameter(); - /// GetOrCreateArray - Get a DIArray, create one if required. - DIArray GetOrCreateArray(Value *const *Elements, unsigned NumElements); + /// getOrCreateArray - Get a DIArray, create one if required. + DIArray getOrCreateArray(Value *const *Elements, unsigned NumElements); - /// GetOrCreateSubrange - Create a descriptor for a value range. This + /// getOrCreateSubrange - Create a descriptor for a value range. This /// implicitly uniques the values returned. - DISubrange GetOrCreateSubrange(int64_t Lo, int64_t Hi); + DISubrange getOrCreateSubrange(int64_t Lo, int64_t Hi); - /// CreateGlobalVariable - Create a new descriptor for the specified global. + /// createGlobalVariable - Create a new descriptor for the specified global. /// @param Name Name of the variable. /// @param File File where this variable is defined. /// @param LineNo Line number. @@ -293,11 +293,11 @@ namespace llvm { /// externally visible or not. /// @param Val llvm::Value of the variable. DIGlobalVariable - CreateGlobalVariable(StringRef Name, DIFile File, unsigned LineNo, + createGlobalVariable(StringRef Name, DIFile File, unsigned LineNo, DIType Ty, bool isLocalToUnit, llvm::Value *Val); - /// CreateStaticVariable - Create a new descriptor for the specified + /// createStaticVariable - Create a new descriptor for the specified /// variable. /// @param Conext Variable scope. /// @param Name Name of the variable. @@ -309,12 +309,12 @@ namespace llvm { /// externally visible or not. /// @param Val llvm::Value of the variable. DIGlobalVariable - CreateStaticVariable(DIDescriptor Context, StringRef Name, + createStaticVariable(DIDescriptor Context, StringRef Name, StringRef LinkageName, DIFile File, unsigned LineNo, DIType Ty, bool isLocalToUnit, llvm::Value *Val); - /// CreateLocalVariable - Create a new descriptor for the specified + /// createLocalVariable - Create a new descriptor for the specified /// local variable. /// @param Tag Dwarf TAG. Usually DW_TAG_auto_variable or /// DW_TAG_arg_variable. @@ -326,14 +326,14 @@ namespace llvm { /// @param AlwaysPreserve Boolean. Set to true if debug info for this /// variable should be preserved in optimized build. /// @param Flags Flags, e.g. artificial variable. - DIVariable CreateLocalVariable(unsigned Tag, DIDescriptor Scope, + DIVariable createLocalVariable(unsigned Tag, DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNo, DIType Ty, bool AlwaysPreserve = false, unsigned Flags = 0); - /// CreateComplexVariable - Create a new descriptor for the specified + /// createComplexVariable - Create a new descriptor for the specified /// variable which has a complex address expression for its address. /// @param Tag Dwarf TAG. Usually DW_TAG_auto_variable or /// DW_TAG_arg_variable. @@ -344,12 +344,12 @@ namespace llvm { /// @param Ty Variable Type /// @param Addr A pointer to a vector of complex address operations. /// @param NumAddr Num of address operations in the vector. - DIVariable CreateComplexVariable(unsigned Tag, DIDescriptor Scope, + DIVariable createComplexVariable(unsigned Tag, DIDescriptor Scope, StringRef Name, DIFile F, unsigned LineNo, DIType Ty, Value *const *Addr, unsigned NumAddr); - /// CreateFunction - Create a new descriptor for the specified subprogram. + /// createFunction - Create a new descriptor for the specified subprogram. /// See comments in DISubprogram for descriptions of these fields. /// @param Scope Function scope. /// @param Name Function name. @@ -363,7 +363,7 @@ namespace llvm { /// This flags are used to emit dwarf attributes. /// @param isOptimized True if optimization is ON. /// @param Fn llvm::Function pointer. - DISubprogram CreateFunction(DIDescriptor Scope, StringRef Name, + DISubprogram createFunction(DIDescriptor Scope, StringRef Name, StringRef LinkageName, DIFile File, unsigned LineNo, DIType Ty, bool isLocalToUnit, @@ -372,7 +372,7 @@ namespace llvm { bool isOptimized = false, Function *Fn = 0); - /// CreateMethod - Create a new descriptor for the specified C++ method. + /// createMethod - Create a new descriptor for the specified C++ method. /// See comments in DISubprogram for descriptions of these fields. /// @param Scope Function scope. /// @param Name Function name. @@ -390,7 +390,7 @@ namespace llvm { /// This flags are used to emit dwarf attributes. /// @param isOptimized True if optimization is ON. /// @param Fn llvm::Function pointer. - DISubprogram CreateMethod(DIDescriptor Scope, StringRef Name, + DISubprogram createMethod(DIDescriptor Scope, StringRef Name, StringRef LinkageName, DIFile File, unsigned LineNo, DIType Ty, bool isLocalToUnit, @@ -401,55 +401,55 @@ namespace llvm { bool isOptimized = false, Function *Fn = 0); - /// CreateNameSpace - This creates new descriptor for a namespace + /// createNameSpace - This creates new descriptor for a namespace /// with the specified parent scope. /// @param Scope Namespace scope /// @param Name Name of this namespace /// @param File Source file /// @param LineNo Line number - DINameSpace CreateNameSpace(DIDescriptor Scope, StringRef Name, + DINameSpace createNameSpace(DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNo); - /// CreateLexicalBlock - This creates a descriptor for a lexical block + /// createLexicalBlock - This creates a descriptor for a lexical block /// with the specified parent context. /// @param Scope Parent lexical scope. /// @param File Source file /// @param Line Line number /// @param Col Column number - DILexicalBlock CreateLexicalBlock(DIDescriptor Scope, DIFile File, + DILexicalBlock createLexicalBlock(DIDescriptor Scope, DIFile File, unsigned Line, unsigned Col); - /// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call. + /// insertDeclare - Insert a new llvm.dbg.declare intrinsic call. /// @param Storage llvm::Value of the variable /// @param VarInfo Variable's debug info descriptor. /// @param InsertAtEnd Location for the new intrinsic. - Instruction *InsertDeclare(llvm::Value *Storage, DIVariable VarInfo, + Instruction *insertDeclare(llvm::Value *Storage, DIVariable VarInfo, BasicBlock *InsertAtEnd); - /// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call. + /// insertDeclare - Insert a new llvm.dbg.declare intrinsic call. /// @param Storage llvm::Value of the variable /// @param VarInfo Variable's debug info descriptor. /// @param InsertBefore Location for the new intrinsic. - Instruction *InsertDeclare(llvm::Value *Storage, DIVariable VarInfo, + Instruction *insertDeclare(llvm::Value *Storage, DIVariable VarInfo, Instruction *InsertBefore); - /// InsertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call. + /// insertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call. /// @param Val llvm::Value of the variable /// @param Offset Offset /// @param VarInfo Variable's debug info descriptor. /// @param InsertAtEnd Location for the new intrinsic. - Instruction *InsertDbgValueIntrinsic(llvm::Value *Val, uint64_t Offset, + Instruction *insertDbgValueIntrinsic(llvm::Value *Val, uint64_t Offset, DIVariable VarInfo, BasicBlock *InsertAtEnd); - /// InsertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call. + /// insertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call. /// @param Val llvm::Value of the variable /// @param Offset Offset /// @param VarInfo Variable's debug info descriptor. /// @param InsertBefore Location for the new intrinsic. - Instruction *InsertDbgValueIntrinsic(llvm::Value *Val, uint64_t Offset, + Instruction *insertDbgValueIntrinsic(llvm::Value *Val, uint64_t Offset, DIVariable VarInfo, Instruction *InsertBefore); diff --git a/include/llvm/CMakeLists.txt b/include/llvm/CMakeLists.txt index 5e4f40881d00..0c3ca1cd0c5c 100644 --- a/include/llvm/CMakeLists.txt +++ b/include/llvm/CMakeLists.txt @@ -4,6 +4,7 @@ tablegen(Intrinsics.gen -gen-intrinsic) add_custom_target(intrinsics_gen ALL DEPENDS ${llvm_builded_incs_dir}/Intrinsics.gen) +set_target_properties(intrinsics_gen PROPERTIES FOLDER "Tablegenning") set(LLVM_COMMON_DEPENDS ${LLVM_COMMON_DEPENDS} intrinsics_gen PARENT_SCOPE) @@ -16,4 +17,5 @@ if( MSVC_IDE OR XCODE ) # We need at least one source file: ${LLVM_MAIN_SRC_DIR}/lib/Transforms/Hello/Hello.cpp ${headers}) + set_target_properties(llvm_headers_do_not_build PROPERTIES FOLDER "Misc") endif() diff --git a/include/llvm/CodeGen/AsmPrinter.h b/include/llvm/CodeGen/AsmPrinter.h index 357b933db54c..a071febb102f 100644 --- a/include/llvm/CodeGen/AsmPrinter.h +++ b/include/llvm/CodeGen/AsmPrinter.h @@ -445,7 +445,8 @@ namespace llvm { /// EmitVisibility - This emits visibility information about symbol, if /// this is suported by the target. - void EmitVisibility(MCSymbol *Sym, unsigned Visibility) const; + void EmitVisibility(MCSymbol *Sym, unsigned Visibility, + bool IsDefinition = true) const; void EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const; diff --git a/include/llvm/CodeGen/FunctionLoweringInfo.h b/include/llvm/CodeGen/FunctionLoweringInfo.h index 27631b7ea12f..b41f30d8251d 100644 --- a/include/llvm/CodeGen/FunctionLoweringInfo.h +++ b/include/llvm/CodeGen/FunctionLoweringInfo.h @@ -101,13 +101,16 @@ class FunctionLoweringInfo { #endif struct LiveOutInfo { - unsigned NumSignBits; + unsigned NumSignBits : 31; + bool IsValid : 1; APInt KnownOne, KnownZero; - LiveOutInfo() : NumSignBits(0), KnownOne(1, 0), KnownZero(1, 0) {} + LiveOutInfo() : NumSignBits(0), IsValid(true), KnownOne(1, 0), + KnownZero(1, 0) {} }; - - /// LiveOutRegInfo - Information about live out vregs. - IndexedMap LiveOutRegInfo; + + /// VisitedBBs - The set of basic blocks visited thus far by instruction + /// selection. + DenseSet VisitedBBs; /// PHINodesToUpdate - A list of phi instructions whose operand list will /// be updated after processing the current basic block. @@ -143,12 +146,62 @@ class FunctionLoweringInfo { return R = CreateRegs(V->getType()); } + /// GetLiveOutRegInfo - Gets LiveOutInfo for a register, returning NULL if the + /// register is a PHI destination and the PHI's LiveOutInfo is not valid. + const LiveOutInfo *GetLiveOutRegInfo(unsigned Reg) { + if (!LiveOutRegInfo.inBounds(Reg)) + return NULL; + + const LiveOutInfo *LOI = &LiveOutRegInfo[Reg]; + if (!LOI->IsValid) + return NULL; + + return LOI; + } + + /// GetLiveOutRegInfo - Gets LiveOutInfo for a register, returning NULL if the + /// register is a PHI destination and the PHI's LiveOutInfo is not valid. If + /// the register's LiveOutInfo is for a smaller bit width, it is extended to + /// the larger bit width by zero extension. The bit width must be no smaller + /// than the LiveOutInfo's existing bit width. + const LiveOutInfo *GetLiveOutRegInfo(unsigned Reg, unsigned BitWidth); + + /// AddLiveOutRegInfo - Adds LiveOutInfo for a register. + void AddLiveOutRegInfo(unsigned Reg, unsigned NumSignBits, + const APInt &KnownZero, const APInt &KnownOne) { + // Only install this information if it tells us something. + if (NumSignBits == 1 && KnownZero == 0 && KnownOne == 0) + return; + + LiveOutRegInfo.grow(Reg); + LiveOutInfo &LOI = LiveOutRegInfo[Reg]; + LOI.NumSignBits = NumSignBits; + LOI.KnownOne = KnownOne; + LOI.KnownZero = KnownZero; + } + + /// ComputePHILiveOutRegInfo - Compute LiveOutInfo for a PHI's destination + /// register based on the LiveOutInfo of its operands. + void ComputePHILiveOutRegInfo(const PHINode*); + + /// InvalidatePHILiveOutRegInfo - Invalidates a PHI's LiveOutInfo, to be + /// called when a block is visited before all of its predecessors. + void InvalidatePHILiveOutRegInfo(const PHINode *PN) { + unsigned Reg = ValueMap[PN]; + LiveOutRegInfo.grow(Reg); + LiveOutRegInfo[Reg].IsValid = false; + } + /// setByValArgumentFrameIndex - Record frame index for the byval /// argument. void setByValArgumentFrameIndex(const Argument *A, int FI); /// getByValArgumentFrameIndex - Get frame index for the byval argument. int getByValArgumentFrameIndex(const Argument *A); + +private: + /// LiveOutRegInfo - Information about live out vregs. + IndexedMap LiveOutRegInfo; }; /// AddCatchInfo - Extract the personality and type infos from an eh.selector diff --git a/include/llvm/CodeGen/MachineConstantPool.h b/include/llvm/CodeGen/MachineConstantPool.h index 498f815b9b5a..5727321a0da4 100644 --- a/include/llvm/CodeGen/MachineConstantPool.h +++ b/include/llvm/CodeGen/MachineConstantPool.h @@ -16,6 +16,7 @@ #ifndef LLVM_CODEGEN_MACHINECONSTANTPOOL_H #define LLVM_CODEGEN_MACHINECONSTANTPOOL_H +#include "llvm/ADT/DenseSet.h" #include #include #include @@ -130,6 +131,8 @@ class MachineConstantPool { const TargetData *TD; ///< The machine's TargetData. unsigned PoolAlignment; ///< The alignment for the pool. std::vector Constants; ///< The pool of constants. + /// MachineConstantPoolValues that use an existing MachineConstantPoolEntry. + DenseSet MachineCPVsSharingEntries; public: /// @brief The only constructor. explicit MachineConstantPool(const TargetData *td) diff --git a/include/llvm/CodeGen/MachineFunction.h b/include/llvm/CodeGen/MachineFunction.h index abeaa4f58d3d..f56c053e4708 100644 --- a/include/llvm/CodeGen/MachineFunction.h +++ b/include/llvm/CodeGen/MachineFunction.h @@ -281,7 +281,7 @@ class MachineFunction { /// addLiveIn - Add the specified physical register as a live-in value and /// create a corresponding virtual register for it. - unsigned addLiveIn(unsigned PReg, const TargetRegisterClass *RC, DebugLoc DL); + unsigned addLiveIn(unsigned PReg, const TargetRegisterClass *RC); //===--------------------------------------------------------------------===// // BasicBlock accessor functions. diff --git a/include/llvm/CodeGen/MachineRegisterInfo.h b/include/llvm/CodeGen/MachineRegisterInfo.h index 79ff714df63d..74df8da20ed3 100644 --- a/include/llvm/CodeGen/MachineRegisterInfo.h +++ b/include/llvm/CodeGen/MachineRegisterInfo.h @@ -17,8 +17,6 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/IndexedMap.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/Support/DebugLoc.h" #include namespace llvm { @@ -66,10 +64,7 @@ class MachineRegisterInfo { /// stored in the second element. std::vector > LiveIns; std::vector LiveOuts; - - /// LiveInLocs - Keep track of location livein registers. - DenseMap LiveInLocs; - + MachineRegisterInfo(const MachineRegisterInfo&); // DO NOT IMPLEMENT void operator=(const MachineRegisterInfo&); // DO NOT IMPLEMENT public: @@ -276,12 +271,7 @@ class MachineRegisterInfo { LiveIns.push_back(std::make_pair(Reg, vreg)); } void addLiveOut(unsigned Reg) { LiveOuts.push_back(Reg); } - - /// addLiveInLoc - Keep track of location info for live in reg. - void addLiveInLoc(unsigned VReg, DebugLoc DL) { - LiveInLocs[VReg] = DL; - } - + // Iteration support for live in/out sets. These sets are kept in sorted // order by their register number. typedef std::vector >::const_iterator diff --git a/include/llvm/IntrinsicsXCore.td b/include/llvm/IntrinsicsXCore.td index 97bac1d2daaf..944120fc8c6e 100644 --- a/include/llvm/IntrinsicsXCore.td +++ b/include/llvm/IntrinsicsXCore.td @@ -33,4 +33,23 @@ let TargetPrefix = "xcore" in { // All intrinsics start with "llvm.xcore.". [NoCapture<0>]>; def int_xcore_setc : Intrinsic<[],[llvm_anyptr_ty, llvm_i32_ty], [NoCapture<0>]>; + def int_xcore_inshr : Intrinsic<[llvm_i32_ty],[llvm_anyptr_ty, llvm_i32_ty], + [NoCapture<0>]>; + def int_xcore_outshr : Intrinsic<[llvm_i32_ty],[llvm_anyptr_ty, llvm_i32_ty], + [NoCapture<0>]>; + def int_xcore_setpt : Intrinsic<[],[llvm_anyptr_ty, llvm_i32_ty], + [NoCapture<0>]>; + def int_xcore_getts : Intrinsic<[llvm_i32_ty],[llvm_anyptr_ty], + [NoCapture<0>]>; + def int_xcore_syncr : Intrinsic<[],[llvm_anyptr_ty], + [NoCapture<0>]>; + def int_xcore_settw : Intrinsic<[],[llvm_anyptr_ty, llvm_i32_ty], + [NoCapture<0>]>; + def int_xcore_setv : Intrinsic<[],[llvm_anyptr_ty, llvm_ptr_ty], + [NoCapture<0>]>; + def int_xcore_eeu : Intrinsic<[],[llvm_anyptr_ty], [NoCapture<0>]>; + + // Intrinsics for events. + def int_xcore_waitevent : Intrinsic<[llvm_ptr_ty],[], [IntrReadMem]>; + def int_xcore_clre : Intrinsic<[],[],[]>; } diff --git a/include/llvm/MC/MCAsmInfo.h b/include/llvm/MC/MCAsmInfo.h index 9cfd0048a607..0bf364a6dfcf 100644 --- a/include/llvm/MC/MCAsmInfo.h +++ b/include/llvm/MC/MCAsmInfo.h @@ -246,6 +246,11 @@ namespace llvm { /// declare a symbol as having hidden visibility. MCSymbolAttr HiddenVisibilityAttr; // Defaults to MCSA_Hidden. + /// HiddenDeclarationVisibilityAttr - This attribute, if not MCSA_Invalid, + /// is used to declare an undefined symbol as having hidden visibility. + MCSymbolAttr HiddenDeclarationVisibilityAttr; // Defaults to MCSA_Hidden. + + /// ProtectedVisibilityAttr - This attribute, if not MCSA_Invalid, is used /// to declare a symbol as having protected visibility. MCSymbolAttr ProtectedVisibilityAttr; // Defaults to MCSA_Protected @@ -425,6 +430,9 @@ namespace llvm { const char *getLinkOnceDirective() const { return LinkOnceDirective; } MCSymbolAttr getHiddenVisibilityAttr() const { return HiddenVisibilityAttr;} + MCSymbolAttr getHiddenDeclarationVisibilityAttr() const { + return HiddenDeclarationVisibilityAttr; + } MCSymbolAttr getProtectedVisibilityAttr() const { return ProtectedVisibilityAttr; } diff --git a/include/llvm/MC/MCParser/MCAsmParserExtension.h b/include/llvm/MC/MCParser/MCAsmParserExtension.h index 95184cdfcf32..ceb57f57e9e1 100644 --- a/include/llvm/MC/MCParser/MCAsmParserExtension.h +++ b/include/llvm/MC/MCParser/MCAsmParserExtension.h @@ -38,6 +38,8 @@ class MCAsmParserExtension { return (Obj->*Handler)(Directive, DirectiveLoc); } + bool BracketExpressionsSupported; + public: virtual ~MCAsmParserExtension(); @@ -68,6 +70,8 @@ class MCAsmParserExtension { const AsmToken &getTok() { return getParser().getTok(); } + bool HasBracketExpressions() const { return BracketExpressionsSupported; } + /// @} }; diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h index fc2451f9c19c..4451199b7fb3 100644 --- a/include/llvm/MC/MCStreamer.h +++ b/include/llvm/MC/MCStreamer.h @@ -57,13 +57,10 @@ namespace llvm { MCDwarfFrameInfo *getCurrentFrameInfo(); void EnsureValidFrame(); - /// CurSectionStack - This is stack of CurSection values saved by - /// PushSection. - SmallVector CurSectionStack; - - /// PrevSectionStack - This is stack of PrevSection values saved by - /// PushSection. - SmallVector PrevSectionStack; + /// SectionStack - This is stack of current and previous section + /// values saved by PushSection. + SmallVector, 4> SectionStack; protected: MCStreamer(MCContext &Ctx); @@ -117,16 +114,16 @@ namespace llvm { /// getCurrentSection - Return the current section that the streamer is /// emitting code to. const MCSection *getCurrentSection() const { - if (!CurSectionStack.empty()) - return CurSectionStack.back(); + if (!SectionStack.empty()) + return SectionStack.back().first; return NULL; } /// getPreviousSection - Return the previous section that the streamer is /// emitting code to. const MCSection *getPreviousSection() const { - if (!PrevSectionStack.empty()) - return PrevSectionStack.back(); + if (!SectionStack.empty()) + return SectionStack.back().second; return NULL; } @@ -139,8 +136,8 @@ namespace llvm { /// pushSection - Save the current and previous section on the /// section stack. void PushSection() { - PrevSectionStack.push_back(getPreviousSection()); - CurSectionStack.push_back(getCurrentSection()); + SectionStack.push_back(std::make_pair(getCurrentSection(), + getPreviousSection())); } /// popSection - Restore the current and previous section from @@ -148,12 +145,10 @@ namespace llvm { /// /// Returns false if the stack was empty. bool PopSection() { - if (PrevSectionStack.size() <= 1) + if (SectionStack.size() <= 1) return false; - assert(CurSectionStack.size() > 1); - PrevSectionStack.pop_back(); - const MCSection *oldSection = CurSectionStack.pop_back_val(); - const MCSection *curSection = CurSectionStack.back(); + const MCSection *oldSection = SectionStack.pop_back_val().first; + const MCSection *curSection = SectionStack.back().first; if (oldSection != curSection) ChangeSection(curSection); @@ -166,10 +161,10 @@ namespace llvm { /// This corresponds to assembler directives like .section, .text, etc. void SwitchSection(const MCSection *Section) { assert(Section && "Cannot switch to a null section!"); - const MCSection *curSection = CurSectionStack.back(); - PrevSectionStack.back() = curSection; + const MCSection *curSection = SectionStack.back().first; + SectionStack.back().second = curSection; if (Section != curSection) { - CurSectionStack.back() = Section; + SectionStack.back().first = Section; ChangeSection(Section); } } diff --git a/include/llvm/Support/NoFolder.h b/include/llvm/Support/NoFolder.h index d7b5b42924c3..92a9fd695e58 100644 --- a/include/llvm/Support/NoFolder.h +++ b/include/llvm/Support/NoFolder.h @@ -38,8 +38,12 @@ class NoFolder { // Binary Operators //===--------------------------------------------------------------------===// - Instruction *CreateAdd(Constant *LHS, Constant *RHS) const { - return BinaryOperator::CreateAdd(LHS, RHS); + Instruction *CreateAdd(Constant *LHS, Constant *RHS, + bool HasNUW = false, bool HasNSW = false) const { + BinaryOperator *BO = BinaryOperator::CreateAdd(LHS, RHS); + if (HasNUW) BO->setHasNoUnsignedWrap(); + if (HasNSW) BO->setHasNoSignedWrap(); + return BO; } Instruction *CreateNSWAdd(Constant *LHS, Constant *RHS) const { return BinaryOperator::CreateNSWAdd(LHS, RHS); @@ -50,8 +54,12 @@ class NoFolder { Instruction *CreateFAdd(Constant *LHS, Constant *RHS) const { return BinaryOperator::CreateFAdd(LHS, RHS); } - Instruction *CreateSub(Constant *LHS, Constant *RHS) const { - return BinaryOperator::CreateSub(LHS, RHS); + Instruction *CreateSub(Constant *LHS, Constant *RHS, + bool HasNUW = false, bool HasNSW = false) const { + BinaryOperator *BO = BinaryOperator::CreateSub(LHS, RHS); + if (HasNUW) BO->setHasNoUnsignedWrap(); + if (HasNSW) BO->setHasNoSignedWrap(); + return BO; } Instruction *CreateNSWSub(Constant *LHS, Constant *RHS) const { return BinaryOperator::CreateNSWSub(LHS, RHS); @@ -62,8 +70,12 @@ class NoFolder { Instruction *CreateFSub(Constant *LHS, Constant *RHS) const { return BinaryOperator::CreateFSub(LHS, RHS); } - Instruction *CreateMul(Constant *LHS, Constant *RHS) const { - return BinaryOperator::CreateMul(LHS, RHS); + Instruction *CreateMul(Constant *LHS, Constant *RHS, + bool HasNUW = false, bool HasNSW = false) const { + BinaryOperator *BO = BinaryOperator::CreateMul(LHS, RHS); + if (HasNUW) BO->setHasNoUnsignedWrap(); + if (HasNSW) BO->setHasNoSignedWrap(); + return BO; } Instruction *CreateNSWMul(Constant *LHS, Constant *RHS) const { return BinaryOperator::CreateNSWMul(LHS, RHS); @@ -74,14 +86,20 @@ class NoFolder { Instruction *CreateFMul(Constant *LHS, Constant *RHS) const { return BinaryOperator::CreateFMul(LHS, RHS); } - Instruction *CreateUDiv(Constant *LHS, Constant *RHS) const { - return BinaryOperator::CreateUDiv(LHS, RHS); + Instruction *CreateUDiv(Constant *LHS, Constant *RHS, + bool isExact = false) const { + if (!isExact) + return BinaryOperator::CreateUDiv(LHS, RHS); + return BinaryOperator::CreateExactUDiv(LHS, RHS); } Instruction *CreateExactUDiv(Constant *LHS, Constant *RHS) const { return BinaryOperator::CreateExactUDiv(LHS, RHS); } - Instruction *CreateSDiv(Constant *LHS, Constant *RHS) const { - return BinaryOperator::CreateSDiv(LHS, RHS); + Instruction *CreateSDiv(Constant *LHS, Constant *RHS, + bool isExact = false) const { + if (!isExact) + return BinaryOperator::CreateSDiv(LHS, RHS); + return BinaryOperator::CreateExactSDiv(LHS, RHS); } Instruction *CreateExactSDiv(Constant *LHS, Constant *RHS) const { return BinaryOperator::CreateExactSDiv(LHS, RHS); @@ -98,14 +116,24 @@ class NoFolder { Instruction *CreateFRem(Constant *LHS, Constant *RHS) const { return BinaryOperator::CreateFRem(LHS, RHS); } - Instruction *CreateShl(Constant *LHS, Constant *RHS) const { - return BinaryOperator::CreateShl(LHS, RHS); + Instruction *CreateShl(Constant *LHS, Constant *RHS, bool HasNUW = false, + bool HasNSW = false) const { + BinaryOperator *BO = BinaryOperator::CreateShl(LHS, RHS); + if (HasNUW) BO->setHasNoUnsignedWrap(); + if (HasNSW) BO->setHasNoSignedWrap(); + return BO; } - Instruction *CreateLShr(Constant *LHS, Constant *RHS) const { - return BinaryOperator::CreateLShr(LHS, RHS); + Instruction *CreateLShr(Constant *LHS, Constant *RHS, + bool isExact = false) const { + if (!isExact) + return BinaryOperator::CreateLShr(LHS, RHS); + return BinaryOperator::CreateExactLShr(LHS, RHS); } - Instruction *CreateAShr(Constant *LHS, Constant *RHS) const { - return BinaryOperator::CreateAShr(LHS, RHS); + Instruction *CreateAShr(Constant *LHS, Constant *RHS, + bool isExact = false) const { + if (!isExact) + return BinaryOperator::CreateAShr(LHS, RHS); + return BinaryOperator::CreateExactAShr(LHS, RHS); } Instruction *CreateAnd(Constant *LHS, Constant *RHS) const { return BinaryOperator::CreateAnd(LHS, RHS); @@ -126,8 +154,12 @@ class NoFolder { // Unary Operators //===--------------------------------------------------------------------===// - Instruction *CreateNeg(Constant *C) const { - return BinaryOperator::CreateNeg(C); + Instruction *CreateNeg(Constant *C, + bool HasNUW = false, bool HasNSW = false) const { + BinaryOperator *BO = BinaryOperator::CreateNeg(C); + if (HasNUW) BO->setHasNoUnsignedWrap(); + if (HasNSW) BO->setHasNoSignedWrap(); + return BO; } Instruction *CreateNSWNeg(Constant *C) const { return BinaryOperator::CreateNSWNeg(C); diff --git a/include/llvm/Support/PathV1.h b/include/llvm/Support/PathV1.h index a1c3f6a49a19..d7753a3e71e7 100644 --- a/include/llvm/Support/PathV1.h +++ b/include/llvm/Support/PathV1.h @@ -312,9 +312,9 @@ namespace sys { /// This function determines if the path name is absolute, as opposed to /// relative. /// @brief Determine if the path is absolute. -//FIXME: LLVM_ATTRIBUTE_DEPRECATED( - bool isAbsolute() const; -//FIXME: LLVMV_PATH_DEPRECATED_MSG(path::is_absolute)); + LLVM_ATTRIBUTE_DEPRECATED( + bool isAbsolute() const, + LLVM_PATH_DEPRECATED_MSG(path::is_absolute)); /// This function determines if the path name is absolute, as opposed to /// relative. diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index 5141b7b56229..ba7574dfdbd7 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -111,7 +111,7 @@ class TargetLowering { bool isBigEndian() const { return !IsLittleEndian; } bool isLittleEndian() const { return IsLittleEndian; } MVT getPointerTy() const { return PointerTy; } - MVT getShiftAmountTy() const { return ShiftAmountTy; } + virtual MVT getShiftAmountTy(EVT LHSTy) const; /// isSelectExpensive - Return true if the select operation is expensive for /// this target. @@ -210,7 +210,7 @@ class TargetLowering { /// ValueTypeActions - For each value type, keep a LegalizeAction enum /// that indicates how instruction selection should deal with the type. uint8_t ValueTypeActions[MVT::LAST_VALUETYPE]; - + LegalizeAction getExtendedTypeAction(EVT VT) const { // Handle non-vector integers. if (!VT.isVector()) { @@ -221,42 +221,56 @@ class TargetLowering { return Promote; return Expand; } - - // If this is a type smaller than a legal vector type, promote to that - // type, e.g. <2 x float> -> <4 x float>. - if (VT.getVectorElementType().isSimple() && - VT.getVectorNumElements() != 1) { - MVT EltType = VT.getVectorElementType().getSimpleVT(); - unsigned NumElts = VT.getVectorNumElements(); - while (1) { - // Round up to the nearest power of 2. - NumElts = (unsigned)NextPowerOf2(NumElts); - - MVT LargerVector = MVT::getVectorVT(EltType, NumElts); - if (LargerVector == MVT()) break; - - // If this the larger type is legal, promote to it. - if (getTypeAction(LargerVector) == Legal) return Promote; - } + + // Vectors with only one element are always scalarized. + if (VT.getVectorNumElements() == 1) + return Expand; + + // Vectors with a number of elements that is not a power of two are always + // widened, for example <3 x float> -> <4 x float>. + if (!VT.isPow2VectorType()) + return Promote; + + // Vectors with a crazy element type are always expanded, for example + // <4 x i2> is expanded into two vectors of type <2 x i2>. + if (!VT.getVectorElementType().isSimple()) + return Expand; + + // If this type is smaller than a legal vector type then widen it, + // otherwise expand it. E.g. <2 x float> -> <4 x float>. + MVT EltType = VT.getVectorElementType().getSimpleVT(); + unsigned NumElts = VT.getVectorNumElements(); + while (1) { + // Round up to the next power of 2. + NumElts = (unsigned)NextPowerOf2(NumElts); + + // If there is no simple vector type with this many elements then there + // cannot be a larger legal vector type. Note that this assumes that + // there are no skipped intermediate vector types in the simple types. + MVT LargerVector = MVT::getVectorVT(EltType, NumElts); + if (LargerVector == MVT()) + return Expand; + + // If this type is legal then widen the vector. + if (getTypeAction(LargerVector) == Legal) + return Promote; } - - return VT.isPow2VectorType() ? Expand : Promote; - } + } public: ValueTypeActionImpl() { std::fill(ValueTypeActions, array_endof(ValueTypeActions), 0); } - + LegalizeAction getTypeAction(EVT VT) const { if (!VT.isExtended()) return getTypeAction(VT.getSimpleVT()); return getExtendedTypeAction(VT); } - + LegalizeAction getTypeAction(MVT VT) const { return (LegalizeAction)ValueTypeActions[VT.SimpleTy]; } - + void setTypeAction(EVT VT, LegalizeAction Action) { unsigned I = VT.getSimpleVT().SimpleTy; ValueTypeActions[I] = Action; @@ -277,7 +291,7 @@ class TargetLowering { LegalizeAction getTypeAction(MVT VT) const { return ValueTypeActions.getTypeAction(VT); } - + /// getTypeToTransformTo - For types supported by the target, this is an /// identity function. For types that must be promoted to larger types, this /// returns the larger type to promote to. For integer types that are larger @@ -310,7 +324,7 @@ class TargetLowering { EVT NVT = VT.getRoundIntegerType(Context); if (NVT == VT) // Size is a power of two - expand to half the size. return EVT::getIntegerVT(Context, VT.getSizeInBits() / 2); - + // Promote to a power of two size, avoiding multi-step promotion. return getTypeAction(NVT) == Promote ? getTypeToTransformTo(Context, NVT) : NVT; @@ -983,10 +997,6 @@ class TargetLowering { // protected: - /// setShiftAmountType - Describe the type that should be used for shift - /// amounts. This type defaults to the pointer type. - void setShiftAmountType(MVT VT) { ShiftAmountTy = VT; } - /// setBooleanContents - Specify how the target extends the result of a /// boolean value from i1 to a wider type. See getBooleanContents. void setBooleanContents(BooleanContent Ty) { BooleanContents = Ty; } @@ -1033,12 +1043,12 @@ class TargetLowering { /// SelectIsExpensive - Tells the code generator not to expand operations /// into sequences that use the select operations if possible. - void setSelectIsExpensive(bool isExpensive = true) { - SelectIsExpensive = isExpensive; + void setSelectIsExpensive(bool isExpensive = true) { + SelectIsExpensive = isExpensive; } - /// JumpIsExpensive - Tells the code generator not to expand sequence of - /// operations into a seperate sequences that increases the amount of + /// JumpIsExpensive - Tells the code generator not to expand sequence of + /// operations into a seperate sequences that increases the amount of /// flow control. void setJumpIsExpensive(bool isExpensive = true) { JumpIsExpensive = isExpensive; @@ -1355,7 +1365,7 @@ class TargetLowering { CW_Good = 1, // Good weight. CW_Better = 2, // Better weight. CW_Best = 3, // Best weight. - + // Well-known weights. CW_SpecificReg = CW_Okay, // Specific register operands. CW_Register = CW_Good, // Register operands. @@ -1408,21 +1418,21 @@ class TargetLowering { CallOperandVal(0), ConstraintVT(MVT::Other) { } }; - + typedef std::vector AsmOperandInfoVector; - + /// ParseConstraints - Split up the constraint string from the inline /// assembly value into the specific constraints and their prefixes, /// and also tie in the associated operand values. /// If this returns an empty vector, and if the constraint string itself /// isn't empty, there was an error parsing. virtual AsmOperandInfoVector ParseConstraints(ImmutableCallSite CS) const; - + /// Examine constraint type and operand type and determine a weight value. /// The operand object must already have been set up with the operand type. virtual ConstraintWeight getMultipleConstraintMatchWeight( AsmOperandInfo &info, int maIndex) const; - + /// Examine constraint string and operand type and determine a weight value. /// The operand object must already have been set up with the operand type. virtual ConstraintWeight getSingleConstraintMatchWeight( @@ -1432,7 +1442,7 @@ class TargetLowering { /// type to use for the specific AsmOperandInfo, setting /// OpInfo.ConstraintCode and OpInfo.ConstraintType. If the actual operand /// being passed in is available, it can be passed in as Op, otherwise an - /// empty SDValue can be passed. + /// empty SDValue can be passed. virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG = 0) const; @@ -1646,10 +1656,6 @@ class TargetLowering { /// llvm.longjmp. Defaults to false. bool UseUnderscoreLongJmp; - /// ShiftAmountTy - The type to use for shift amounts, usually i8 or whatever - /// PointerTy is. - MVT ShiftAmountTy; - /// BooleanContents - Information about the contents of the high-bits in /// boolean values held in a type wider than i1. See getBooleanContents. BooleanContent BooleanContents; diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h index 26b5dd8365f1..2823fbb71997 100644 --- a/include/llvm/Transforms/Utils/Local.h +++ b/include/llvm/Transforms/Utils/Local.h @@ -60,7 +60,7 @@ bool RecursivelyDeleteTriviallyDeadInstructions(Value *V); /// dead PHI node, due to being a def-use chain of single-use nodes that /// either forms a cycle or is terminated by a trivially dead instruction, /// delete it. If that makes any of its operands trivially dead, delete them -/// too, recursively. Return true if the PHI node is actually deleted. +/// too, recursively. Return true if a change was made. bool RecursivelyDeleteDeadPHINode(PHINode *PN); diff --git a/lib/Analysis/DIBuilder.cpp b/lib/Analysis/DIBuilder.cpp index c1072df72925..590a9c17a8fa 100644 --- a/lib/Analysis/DIBuilder.cpp +++ b/lib/Analysis/DIBuilder.cpp @@ -31,9 +31,9 @@ static Constant *GetTagConstant(LLVMContext &VMContext, unsigned Tag) { DIBuilder::DIBuilder(Module &m) : M(m), VMContext(M.getContext()), TheCU(0), DeclareFn(0), ValueFn(0) {} -/// CreateCompileUnit - A CompileUnit provides an anchor for all debugging +/// createCompileUnit - A CompileUnit provides an anchor for all debugging /// information generated during this instance of compilation. -void DIBuilder::CreateCompileUnit(unsigned Lang, StringRef Filename, +void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename, StringRef Directory, StringRef Producer, bool isOptimized, StringRef Flags, unsigned RunTimeVer) { @@ -53,9 +53,9 @@ void DIBuilder::CreateCompileUnit(unsigned Lang, StringRef Filename, TheCU = DICompileUnit(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); } -/// CreateFile - Create a file descriptor to hold debugging information +/// createFile - Create a file descriptor to hold debugging information /// for a file. -DIFile DIBuilder::CreateFile(StringRef Filename, StringRef Directory) { +DIFile DIBuilder::createFile(StringRef Filename, StringRef Directory) { assert(TheCU && "Unable to create DW_TAG_file_type without CompileUnit"); Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_file_type), @@ -66,8 +66,8 @@ DIFile DIBuilder::CreateFile(StringRef Filename, StringRef Directory) { return DIFile(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); } -/// CreateEnumerator - Create a single enumerator value. -DIEnumerator DIBuilder::CreateEnumerator(StringRef Name, uint64_t Val) { +/// createEnumerator - Create a single enumerator value. +DIEnumerator DIBuilder::createEnumerator(StringRef Name, uint64_t Val) { Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_enumerator), MDString::get(VMContext, Name), @@ -76,9 +76,9 @@ DIEnumerator DIBuilder::CreateEnumerator(StringRef Name, uint64_t Val) { return DIEnumerator(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); } -/// CreateBasicType - Create debugging information entry for a basic +/// createBasicType - Create debugging information entry for a basic /// type, e.g 'char'. -DIType DIBuilder::CreateBasicType(StringRef Name, uint64_t SizeInBits, +DIType DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits, uint64_t AlignInBits, unsigned Encoding) { // Basic types are encoded in DIBasicType format. Line number, filename, @@ -98,9 +98,9 @@ DIType DIBuilder::CreateBasicType(StringRef Name, uint64_t SizeInBits, return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); } -/// CreateQaulifiedType - Create debugging information entry for a qualified +/// createQaulifiedType - Create debugging information entry for a qualified /// type, e.g. 'const int'. -DIType DIBuilder::CreateQualifiedType(unsigned Tag, DIType FromTy) { +DIType DIBuilder::createQualifiedType(unsigned Tag, DIType FromTy) { // Qualified types are encoded in DIDerivedType format. Value *Elts[] = { GetTagConstant(VMContext, Tag), @@ -117,8 +117,8 @@ DIType DIBuilder::CreateQualifiedType(unsigned Tag, DIType FromTy) { return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); } -/// CreatePointerType - Create debugging information entry for a pointer. -DIType DIBuilder::CreatePointerType(DIType PointeeTy, uint64_t SizeInBits, +/// createPointerType - Create debugging information entry for a pointer. +DIType DIBuilder::createPointerType(DIType PointeeTy, uint64_t SizeInBits, uint64_t AlignInBits, StringRef Name) { // Pointer types are encoded in DIDerivedType format. Value *Elts[] = { @@ -136,8 +136,8 @@ DIType DIBuilder::CreatePointerType(DIType PointeeTy, uint64_t SizeInBits, return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); } -/// CreateReferenceType - Create debugging information entry for a reference. -DIType DIBuilder::CreateReferenceType(DIType RTy) { +/// createReferenceType - Create debugging information entry for a reference. +DIType DIBuilder::createReferenceType(DIType RTy) { // References are encoded in DIDerivedType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_reference_type), @@ -154,8 +154,8 @@ DIType DIBuilder::CreateReferenceType(DIType RTy) { return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); } -/// CreateTypedef - Create debugging information entry for a typedef. -DIType DIBuilder::CreateTypedef(DIType Ty, StringRef Name, DIFile File, +/// createTypedef - Create debugging information entry for a typedef. +DIType DIBuilder::createTypedef(DIType Ty, StringRef Name, DIFile File, unsigned LineNo) { // typedefs are encoded in DIDerivedType format. assert(Ty.Verify() && "Invalid typedef type!"); @@ -174,8 +174,8 @@ DIType DIBuilder::CreateTypedef(DIType Ty, StringRef Name, DIFile File, return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); } -/// CreateFriend - Create debugging information entry for a 'friend'. -DIType DIBuilder::CreateFriend(DIType Ty, DIType FriendTy) { +/// createFriend - Create debugging information entry for a 'friend'. +DIType DIBuilder::createFriend(DIType Ty, DIType FriendTy) { // typedefs are encoded in DIDerivedType format. assert(Ty.Verify() && "Invalid type!"); assert(FriendTy.Verify() && "Invalid friend type!"); @@ -194,9 +194,9 @@ DIType DIBuilder::CreateFriend(DIType Ty, DIType FriendTy) { return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); } -/// CreateInheritance - Create debugging information entry to establish +/// createInheritance - Create debugging information entry to establish /// inheritnace relationship between two types. -DIType DIBuilder::CreateInheritance(DIType Ty, DIType BaseTy, +DIType DIBuilder::createInheritance(DIType Ty, DIType BaseTy, uint64_t BaseOffset, unsigned Flags) { // TAG_inheritance is encoded in DIDerivedType format. Value *Elts[] = { @@ -214,8 +214,8 @@ DIType DIBuilder::CreateInheritance(DIType Ty, DIType BaseTy, return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); } -/// CreateMemberType - Create debugging information entry for a member. -DIType DIBuilder::CreateMemberType(StringRef Name, +/// createMemberType - Create debugging information entry for a member. +DIType DIBuilder::createMemberType(StringRef Name, DIFile File, unsigned LineNumber, uint64_t SizeInBits, uint64_t AlignInBits, uint64_t OffsetInBits, unsigned Flags, @@ -236,8 +236,8 @@ DIType DIBuilder::CreateMemberType(StringRef Name, return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); } -/// CreateClassType - Create debugging information entry for a class. -DIType DIBuilder::CreateClassType(DIDescriptor Context, StringRef Name, +/// createClassType - Create debugging information entry for a class. +DIType DIBuilder::createClassType(DIDescriptor Context, StringRef Name, DIFile File, unsigned LineNumber, uint64_t SizeInBits, uint64_t AlignInBits, uint64_t OffsetInBits, unsigned Flags, @@ -263,10 +263,10 @@ DIType DIBuilder::CreateClassType(DIDescriptor Context, StringRef Name, return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); } -/// CreateTemplateTypeParameter - Create debugging information for template +/// createTemplateTypeParameter - Create debugging information for template /// type parameter. DITemplateTypeParameter -DIBuilder::CreateTemplateTypeParameter(DIDescriptor Context, StringRef Name, +DIBuilder::createTemplateTypeParameter(DIDescriptor Context, StringRef Name, DIType Ty, MDNode *File, unsigned LineNo, unsigned ColumnNo) { Value *Elts[] = { @@ -282,10 +282,10 @@ DIBuilder::CreateTemplateTypeParameter(DIDescriptor Context, StringRef Name, array_lengthof(Elts))); } -/// CreateTemplateValueParameter - Create debugging information for template +/// createTemplateValueParameter - Create debugging information for template /// value parameter. DITemplateValueParameter -DIBuilder::CreateTemplateValueParameter(DIDescriptor Context, StringRef Name, +DIBuilder::createTemplateValueParameter(DIDescriptor Context, StringRef Name, DIType Ty, uint64_t Val, MDNode *File, unsigned LineNo, unsigned ColumnNo) { @@ -303,8 +303,8 @@ DIBuilder::CreateTemplateValueParameter(DIDescriptor Context, StringRef Name, array_lengthof(Elts))); } -/// CreateStructType - Create debugging information entry for a struct. -DIType DIBuilder::CreateStructType(DIDescriptor Context, StringRef Name, +/// createStructType - Create debugging information entry for a struct. +DIType DIBuilder::createStructType(DIDescriptor Context, StringRef Name, DIFile File, unsigned LineNumber, uint64_t SizeInBits, uint64_t AlignInBits, unsigned Flags, DIArray Elements, @@ -328,8 +328,8 @@ DIType DIBuilder::CreateStructType(DIDescriptor Context, StringRef Name, return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); } -/// CreateUnionType - Create debugging information entry for an union. -DIType DIBuilder::CreateUnionType(DIDescriptor Scope, StringRef Name, +/// createUnionType - Create debugging information entry for an union. +DIType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNumber, uint64_t SizeInBits, uint64_t AlignInBits, unsigned Flags, @@ -353,8 +353,8 @@ DIType DIBuilder::CreateUnionType(DIDescriptor Scope, StringRef Name, return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); } -/// CreateSubroutineType - Create subroutine type. -DIType DIBuilder::CreateSubroutineType(DIFile File, DIArray ParameterTypes) { +/// createSubroutineType - Create subroutine type. +DIType DIBuilder::createSubroutineType(DIFile File, DIArray ParameterTypes) { // TAG_subroutine_type is encoded in DICompositeType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_subroutine_type), @@ -374,9 +374,9 @@ DIType DIBuilder::CreateSubroutineType(DIFile File, DIArray ParameterTypes) { return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); } -/// CreateEnumerationType - Create debugging information entry for an +/// createEnumerationType - Create debugging information entry for an /// enumeration. -DIType DIBuilder::CreateEnumerationType(DIDescriptor Scope, StringRef Name, +DIType DIBuilder::createEnumerationType(DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNumber, uint64_t SizeInBits, uint64_t AlignInBits, DIArray Elements) { @@ -402,8 +402,8 @@ DIType DIBuilder::CreateEnumerationType(DIDescriptor Scope, StringRef Name, return DIType(Node); } -/// CreateArrayType - Create debugging information entry for an array. -DIType DIBuilder::CreateArrayType(uint64_t Size, uint64_t AlignInBits, +/// createArrayType - Create debugging information entry for an array. +DIType DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits, DIType Ty, DIArray Subscripts) { // TAG_array_type is encoded in DICompositeType format. Value *Elts[] = { @@ -424,8 +424,8 @@ DIType DIBuilder::CreateArrayType(uint64_t Size, uint64_t AlignInBits, return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); } -/// CreateVectorType - Create debugging information entry for a vector. -DIType DIBuilder::CreateVectorType(uint64_t Size, uint64_t AlignInBits, +/// createVectorType - Create debugging information entry for a vector. +DIType DIBuilder::createVectorType(uint64_t Size, uint64_t AlignInBits, DIType Ty, DIArray Subscripts) { // TAG_vector_type is encoded in DICompositeType format. Value *Elts[] = { @@ -446,8 +446,8 @@ DIType DIBuilder::CreateVectorType(uint64_t Size, uint64_t AlignInBits, return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); } -/// CreateArtificialType - Create a new DIType with "artificial" flag set. -DIType DIBuilder::CreateArtificialType(DIType Ty) { +/// createArtificialType - Create a new DIType with "artificial" flag set. +DIType DIBuilder::createArtificialType(DIType Ty) { if (Ty.isArtificial()) return Ty; @@ -470,24 +470,24 @@ DIType DIBuilder::CreateArtificialType(DIType Ty) { return DIType(MDNode::get(VMContext, Elts.data(), Elts.size())); } -/// RetainType - Retain DIType in a module even if it is not referenced +/// retainType - Retain DIType in a module even if it is not referenced /// through debug info anchors. -void DIBuilder::RetainType(DIType T) { +void DIBuilder::retainType(DIType T) { NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.ty"); NMD->addOperand(T); } -/// CreateUnspecifiedParameter - Create unspeicified type descriptor +/// createUnspecifiedParameter - Create unspeicified type descriptor /// for the subroutine type. -DIDescriptor DIBuilder::CreateUnspecifiedParameter() { +DIDescriptor DIBuilder::createUnspecifiedParameter() { Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_unspecified_parameters) }; return DIDescriptor(MDNode::get(VMContext, &Elts[0], 1)); } -/// CreateTemporaryType - Create a temporary forward-declared type. -DIType DIBuilder::CreateTemporaryType() { +/// createTemporaryType - Create a temporary forward-declared type. +DIType DIBuilder::createTemporaryType() { // Give the temporary MDNode a tag. It doesn't matter what tag we // use here as long as DIType accepts it. Value *Elts[] = { GetTagConstant(VMContext, DW_TAG_base_type) }; @@ -495,8 +495,8 @@ DIType DIBuilder::CreateTemporaryType() { return DIType(Node); } -/// CreateTemporaryType - Create a temporary forward-declared type. -DIType DIBuilder::CreateTemporaryType(DIFile F) { +/// createTemporaryType - Create a temporary forward-declared type. +DIType DIBuilder::createTemporaryType(DIFile F) { // Give the temporary MDNode a tag. It doesn't matter what tag we // use here as long as DIType accepts it. Value *Elts[] = { @@ -509,8 +509,8 @@ DIType DIBuilder::CreateTemporaryType(DIFile F) { return DIType(Node); } -/// GetOrCreateArray - Get a DIArray, create one if required. -DIArray DIBuilder::GetOrCreateArray(Value *const *Elements, unsigned NumElements) { +/// getOrCreateArray - Get a DIArray, create one if required. +DIArray DIBuilder::getOrCreateArray(Value *const *Elements, unsigned NumElements) { if (NumElements == 0) { Value *Null = llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)); return DIArray(MDNode::get(VMContext, &Null, 1)); @@ -518,9 +518,9 @@ DIArray DIBuilder::GetOrCreateArray(Value *const *Elements, unsigned NumElements return DIArray(MDNode::get(VMContext, Elements, NumElements)); } -/// GetOrCreateSubrange - Create a descriptor for a value range. This +/// getOrCreateSubrange - Create a descriptor for a value range. This /// implicitly uniques the values returned. -DISubrange DIBuilder::GetOrCreateSubrange(int64_t Lo, int64_t Hi) { +DISubrange DIBuilder::getOrCreateSubrange(int64_t Lo, int64_t Hi) { Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_subrange_type), ConstantInt::get(Type::getInt64Ty(VMContext), Lo), @@ -530,9 +530,9 @@ DISubrange DIBuilder::GetOrCreateSubrange(int64_t Lo, int64_t Hi) { return DISubrange(MDNode::get(VMContext, &Elts[0], 3)); } -/// CreateGlobalVariable - Create a new descriptor for the specified global. +/// createGlobalVariable - Create a new descriptor for the specified global. DIGlobalVariable DIBuilder:: -CreateGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber, +createGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber, DIType Ty, bool isLocalToUnit, llvm::Value *Val) { Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_variable), @@ -555,10 +555,10 @@ CreateGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber, return DIGlobalVariable(Node); } -/// CreateStaticVariable - Create a new descriptor for the specified static +/// createStaticVariable - Create a new descriptor for the specified static /// variable. DIGlobalVariable DIBuilder:: -CreateStaticVariable(DIDescriptor Context, StringRef Name, +createStaticVariable(DIDescriptor Context, StringRef Name, StringRef LinkageName, DIFile F, unsigned LineNumber, DIType Ty, bool isLocalToUnit, llvm::Value *Val) { Value *Elts[] = { @@ -582,8 +582,8 @@ CreateStaticVariable(DIDescriptor Context, StringRef Name, return DIGlobalVariable(Node); } -/// CreateVariable - Create a new descriptor for the specified variable. -DIVariable DIBuilder::CreateLocalVariable(unsigned Tag, DIDescriptor Scope, +/// createVariable - Create a new descriptor for the specified variable. +DIVariable DIBuilder::createLocalVariable(unsigned Tag, DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNo, DIType Ty, bool AlwaysPreserve, unsigned Flags) { @@ -614,9 +614,9 @@ DIVariable DIBuilder::CreateLocalVariable(unsigned Tag, DIDescriptor Scope, return DIVariable(Node); } -/// CreateComplexVariable - Create a new descriptor for the specified variable +/// createComplexVariable - Create a new descriptor for the specified variable /// which has a complex address expression for its address. -DIVariable DIBuilder::CreateComplexVariable(unsigned Tag, DIDescriptor Scope, +DIVariable DIBuilder::createComplexVariable(unsigned Tag, DIDescriptor Scope, StringRef Name, DIFile F, unsigned LineNo, DIType Ty, Value *const *Addr, @@ -633,8 +633,8 @@ DIVariable DIBuilder::CreateComplexVariable(unsigned Tag, DIDescriptor Scope, return DIVariable(MDNode::get(VMContext, Elts.data(), Elts.size())); } -/// CreateFunction - Create a new descriptor for the specified function. -DISubprogram DIBuilder::CreateFunction(DIDescriptor Context, +/// createFunction - Create a new descriptor for the specified function. +DISubprogram DIBuilder::createFunction(DIDescriptor Context, StringRef Name, StringRef LinkageName, DIFile File, unsigned LineNo, @@ -670,8 +670,8 @@ DISubprogram DIBuilder::CreateFunction(DIDescriptor Context, return DISubprogram(Node); } -/// CreateMethod - Create a new descriptor for the specified C++ method. -DISubprogram DIBuilder::CreateMethod(DIDescriptor Context, +/// createMethod - Create a new descriptor for the specified C++ method. +DISubprogram DIBuilder::createMethod(DIDescriptor Context, StringRef Name, StringRef LinkageName, DIFile F, @@ -710,9 +710,9 @@ DISubprogram DIBuilder::CreateMethod(DIDescriptor Context, return DISubprogram(Node); } -/// CreateNameSpace - This creates new descriptor for a namespace +/// createNameSpace - This creates new descriptor for a namespace /// with the specified parent scope. -DINameSpace DIBuilder::CreateNameSpace(DIDescriptor Scope, StringRef Name, +DINameSpace DIBuilder::createNameSpace(DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNo) { Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_namespace), @@ -724,7 +724,7 @@ DINameSpace DIBuilder::CreateNameSpace(DIDescriptor Scope, StringRef Name, return DINameSpace(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); } -DILexicalBlock DIBuilder::CreateLexicalBlock(DIDescriptor Scope, DIFile File, +DILexicalBlock DIBuilder::createLexicalBlock(DIDescriptor Scope, DIFile File, unsigned Line, unsigned Col) { // Defeat MDNode uniqing for lexical blocks by using unique id. static unsigned int unique_id = 0; @@ -739,8 +739,8 @@ DILexicalBlock DIBuilder::CreateLexicalBlock(DIDescriptor Scope, DIFile File, return DILexicalBlock(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); } -/// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call. -Instruction *DIBuilder::InsertDeclare(Value *Storage, DIVariable VarInfo, +/// insertDeclare - Insert a new llvm.dbg.declare intrinsic call. +Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo, Instruction *InsertBefore) { assert(Storage && "no storage passed to dbg.declare"); assert(VarInfo.Verify() && "empty DIVariable passed to dbg.declare"); @@ -751,8 +751,8 @@ Instruction *DIBuilder::InsertDeclare(Value *Storage, DIVariable VarInfo, return CallInst::Create(DeclareFn, Args, Args+2, "", InsertBefore); } -/// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call. -Instruction *DIBuilder::InsertDeclare(Value *Storage, DIVariable VarInfo, +/// insertDeclare - Insert a new llvm.dbg.declare intrinsic call. +Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo, BasicBlock *InsertAtEnd) { assert(Storage && "no storage passed to dbg.declare"); assert(VarInfo.Verify() && "invalid DIVariable passed to dbg.declare"); @@ -769,8 +769,8 @@ Instruction *DIBuilder::InsertDeclare(Value *Storage, DIVariable VarInfo, return CallInst::Create(DeclareFn, Args, Args+2, "", InsertAtEnd); } -/// InsertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call. -Instruction *DIBuilder::InsertDbgValueIntrinsic(Value *V, uint64_t Offset, +/// insertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call. +Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset, DIVariable VarInfo, Instruction *InsertBefore) { assert(V && "no value passed to dbg.value"); @@ -784,8 +784,8 @@ Instruction *DIBuilder::InsertDbgValueIntrinsic(Value *V, uint64_t Offset, return CallInst::Create(ValueFn, Args, Args+3, "", InsertBefore); } -/// InsertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call. -Instruction *DIBuilder::InsertDbgValueIntrinsic(Value *V, uint64_t Offset, +/// insertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call. +Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset, DIVariable VarInfo, BasicBlock *InsertAtEnd) { assert(V && "no value passed to dbg.value"); diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index a2f9862383fd..982dacb50bfc 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -1161,6 +1161,16 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const TargetData *TD, (A == Op0 || B == Op0)) return Op0; + // ~(A & ?) | A = -1 + if (match(Op0, m_Not(m_And(m_Value(A), m_Value(B)))) && + (A == Op1 || B == Op1)) + return Constant::getAllOnesValue(Op1->getType()); + + // A | ~(A & ?) = -1 + if (match(Op1, m_Not(m_And(m_Value(A), m_Value(B)))) && + (A == Op0 || B == Op0)) + return Constant::getAllOnesValue(Op0->getType()); + // Try some generic simplifications for associative operations. if (Value *V = SimplifyAssociativeBinOp(Instruction::Or, Op0, Op1, TD, DT, MaxRecurse)) diff --git a/lib/CodeGen/AllocationOrder.h b/lib/CodeGen/AllocationOrder.h index 3db4b6925fca..61fd8f881a8c 100644 --- a/lib/CodeGen/AllocationOrder.h +++ b/lib/CodeGen/AllocationOrder.h @@ -47,6 +47,8 @@ class AllocationOrder { /// rewind - Start over from the beginning. void rewind() { Pos = 0; } + /// isHint - Return true if PhysReg is a preferred register. + bool isHint(unsigned PhysReg) const { return PhysReg == Hint; } }; } // end namespace llvm diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 43e8990a9da1..9cb882e6a1bb 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -764,7 +764,7 @@ bool AsmPrinter::doFinalization(Module &M) { continue; MCSymbol *Name = Mang->getSymbol(&F); - EmitVisibility(Name, V); + EmitVisibility(Name, V, false); } // Finalize debug and EH information. @@ -1820,13 +1820,17 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const { } } -void AsmPrinter::EmitVisibility(MCSymbol *Sym, unsigned Visibility) const { +void AsmPrinter::EmitVisibility(MCSymbol *Sym, unsigned Visibility, + bool IsDefinition) const { MCSymbolAttr Attr = MCSA_Invalid; switch (Visibility) { default: break; case GlobalValue::HiddenVisibility: - Attr = MAI->getHiddenVisibilityAttr(); + if (IsDefinition) + Attr = MAI->getHiddenVisibilityAttr(); + else + Attr = MAI->getHiddenDeclarationVisibilityAttr(); break; case GlobalValue::ProtectedVisibility: Attr = MAI->getProtectedVisibilityAttr(); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 5106d5778c29..780fa405ef51 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -31,6 +31,7 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Analysis/DebugInfo.h" +#include "llvm/Analysis/DIBuilder.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" @@ -644,12 +645,12 @@ void DwarfDebug::addComplexAddress(DbgVariable *&DV, DIE *Die, for (unsigned i = 0, N = DV->getNumAddrElements(); i < N; ++i) { uint64_t Element = DV->getAddrElement(i); - if (Element == DIFactory::OpPlus) { + if (Element == DIBuilder::OpPlus) { addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); addUInt(Block, 0, dwarf::DW_FORM_udata, DV->getAddrElement(++i)); - } else if (Element == DIFactory::OpDeref) { + } else if (Element == DIBuilder::OpDeref) { addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); - } else llvm_unreachable("unknown DIFactory Opcode"); + } else llvm_unreachable("unknown DIBuilder Opcode"); } // Now attach the location information to the DIE. @@ -1894,7 +1895,7 @@ void DwarfDebug::constructCompileUnit(const MDNode *N) { DIE *Die = new DIE(dwarf::DW_TAG_compile_unit); addString(Die, dwarf::DW_AT_producer, dwarf::DW_FORM_string, DIUnit.getProducer()); - addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data1, + addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2, DIUnit.getLanguage()); addString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN); // Use DW_AT_entry_pc instead of DW_AT_low_pc/DW_AT_high_pc pair. This diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index cb81aa3c88ce..78a87431feaa 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -501,10 +501,11 @@ static bool ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *SuccBB, MachineBasicBlock *PredBB) { CommonTailLen = ComputeCommonTailLength(MBB1, MBB2, I1, I2); - MachineFunction *MF = MBB1->getParent(); - if (CommonTailLen == 0) return false; + DEBUG(dbgs() << "Common tail length of BB#" << MBB1->getNumber() + << " and BB#" << MBB2->getNumber() << " is " << CommonTailLen + << '\n'); // It's almost always profitable to merge any number of non-terminator // instructions with the block that falls through into the common successor. @@ -541,6 +542,7 @@ static bool ProfitableToMerge(MachineBasicBlock *MBB1, // we don't have to split a block. At worst we will be introducing 1 new // branch instruction, which is likely to be smaller than the 2 // instructions that would be deleted in the merge. + MachineFunction *MF = MBB1->getParent(); if (EffectiveTailLen >= 2 && MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize) && (I1 == MBB1->begin() || I2 == MBB2->begin())) diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index a1bd972d38e2..38e6c8590269 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -102,8 +102,7 @@ Spiller *createInlineSpiller(MachineFunctionPass &pass, } } -/// reMaterializeFor - Attempt to rematerialize edit_->getReg() before MI instead of -/// reloading it. +/// reMaterializeFor - Attempt to rematerialize before MI instead of reloading. bool InlineSpiller::reMaterializeFor(MachineBasicBlock::iterator MI) { SlotIndex UseIdx = lis_.getInstructionIndex(MI).getUseIndex(); VNInfo *OrigVNI = edit_->getParent().getVNInfoAt(UseIdx); @@ -346,7 +345,8 @@ void InlineSpiller::spill(LiveRangeEdit &edit) { && "Trying to spill a stack slot."); DEBUG(dbgs() << "Inline spilling " << mri_.getRegClass(edit.getReg())->getName() - << ':' << edit.getParent() << "\n"); + << ':' << edit.getParent() << "\nFrom original " + << PrintReg(vrm_.getOriginal(edit.getReg())) << '\n'); assert(edit.getParent().isSpillable() && "Attempting to spill already spilled value."); @@ -357,12 +357,20 @@ void InlineSpiller::spill(LiveRangeEdit &edit) { return; rc_ = mri_.getRegClass(edit.getReg()); - stackSlot_ = vrm_.assignVirt2StackSlot(edit_->getReg()); + + // Share a stack slot among all descendants of Orig. + unsigned Orig = vrm_.getOriginal(edit.getReg()); + stackSlot_ = vrm_.getStackSlot(Orig); + if (stackSlot_ == VirtRegMap::NO_STACK_SLOT) + stackSlot_ = vrm_.assignVirt2StackSlot(Orig); + + if (Orig != edit.getReg()) + vrm_.assignVirt2StackSlot(edit.getReg(), stackSlot_); // Update LiveStacks now that we are committed to spilling. LiveInterval &stacklvr = lss_.getOrCreateInterval(stackSlot_, rc_); - assert(stacklvr.empty() && "Just created stack slot not empty"); - stacklvr.getNextValue(SlotIndex(), 0, lss_.getVNInfoAllocator()); + if (!stacklvr.hasAtLeastOneValue()) + stacklvr.getNextValue(SlotIndex(), 0, lss_.getVNInfoAllocator()); stacklvr.MergeRangesInAsValue(edit_->getParent(), stacklvr.getValNumInfo(0)); // Iterate over instructions using register. diff --git a/lib/CodeGen/LowerSubregs.cpp b/lib/CodeGen/LowerSubregs.cpp index ad1c537c1911..7871ba9c17e4 100644 --- a/lib/CodeGen/LowerSubregs.cpp +++ b/lib/CodeGen/LowerSubregs.cpp @@ -37,7 +37,7 @@ namespace { public: static char ID; // Pass identification, replacement for typeid LowerSubregsInstructionPass() : MachineFunctionPass(ID) {} - + const char *getPassName() const { return "Subregister lowering instruction pass"; } @@ -64,8 +64,8 @@ namespace { char LowerSubregsInstructionPass::ID = 0; } -FunctionPass *llvm::createLowerSubregsPass() { - return new LowerSubregsInstructionPass(); +FunctionPass *llvm::createLowerSubregsPass() { + return new LowerSubregsInstructionPass(); } /// TransferDeadFlag - MI is a pseudo-instruction with DstReg dead, @@ -192,9 +192,9 @@ bool LowerSubregsInstructionPass::LowerCopy(MachineInstr *MI) { /// copies. /// bool LowerSubregsInstructionPass::runOnMachineFunction(MachineFunction &MF) { - DEBUG(dbgs() << "Machine Function\n" + DEBUG(dbgs() << "Machine Function\n" << "********** LOWERING SUBREG INSTRS **********\n" - << "********** Function: " + << "********** Function: " << MF.getFunction()->getName() << '\n'); TRI = MF.getTarget().getRegisterInfo(); TII = MF.getTarget().getInstrInfo(); diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 85532407ca43..d81e4a1d015f 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -396,8 +396,7 @@ void MachineFunction::viewCFGOnly() const /// addLiveIn - Add the specified physical register as a live-in value and /// create a corresponding virtual register for it. unsigned MachineFunction::addLiveIn(unsigned PReg, - const TargetRegisterClass *RC, - DebugLoc DL) { + const TargetRegisterClass *RC) { MachineRegisterInfo &MRI = getRegInfo(); unsigned VReg = MRI.getLiveInVirtReg(PReg); if (VReg) { @@ -406,7 +405,6 @@ unsigned MachineFunction::addLiveIn(unsigned PReg, } VReg = MRI.createVirtualRegister(RC); MRI.addLiveIn(PReg, VReg); - MRI.addLiveInLoc(VReg, DL); return VReg; } @@ -646,6 +644,10 @@ MachineConstantPool::~MachineConstantPool() { for (unsigned i = 0, e = Constants.size(); i != e; ++i) if (Constants[i].isMachineConstantPoolEntry()) delete Constants[i].Val.MachineCPVal; + for (DenseSet::iterator I = + MachineCPVsSharingEntries.begin(), E = MachineCPVsSharingEntries.end(); + I != E; ++I) + delete *I; } /// CanShareConstantPoolEntry - Test whether the given two constants @@ -723,8 +725,10 @@ unsigned MachineConstantPool::getConstantPoolIndex(MachineConstantPoolValue *V, // // FIXME, this could be made much more efficient for large constant pools. int Idx = V->getExistingMachineCPValue(this, Alignment); - if (Idx != -1) + if (Idx != -1) { + MachineCPVsSharingEntries.insert(V); return (unsigned)Idx; + } Constants.push_back(MachineConstantPoolEntry(V, Alignment)); return Constants.size()-1; diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index b3fb33736ffc..7244d5f03a90 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -210,15 +210,8 @@ MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB, LiveIns.erase(LiveIns.begin() + i); --i; --e; } else { - DebugLoc DL; - // If there is a location for this live in then use it. - DenseMap::iterator DLI = - LiveInLocs.find(LiveIns[i].second); - if (DLI != LiveInLocs.end()) - DL = DLI->second; - // Emit a copy. - BuildMI(*EntryMBB, EntryMBB->begin(), DL, + BuildMI(*EntryMBB, EntryMBB->begin(), DebugLoc(), TII.get(TargetOpcode::COPY), LiveIns[i].second) .addReg(LiveIns[i].first); diff --git a/lib/CodeGen/RegAllocBase.h b/lib/CodeGen/RegAllocBase.h index 8c7e5f53b824..5af0ce79acf7 100644 --- a/lib/CodeGen/RegAllocBase.h +++ b/lib/CodeGen/RegAllocBase.h @@ -39,7 +39,6 @@ #include "llvm/ADT/OwningPtr.h" #include "LiveIntervalUnion.h" -#include namespace llvm { @@ -58,8 +57,8 @@ class LiveVirtRegQueue; /// be extended to add interesting heuristics. /// /// Register allocators must override the selectOrSplit() method to implement -/// live range splitting. They may also override getPriority() which otherwise -/// defaults to the spill weight computed by CalculateSpillWeights. +/// live range splitting. They must also override enqueue/dequeue to provide an +/// assignment order. class RegAllocBase { LiveIntervalUnion::Allocator UnionAllocator; protected: @@ -120,9 +119,11 @@ class RegAllocBase { // Get a temporary reference to a Spiller instance. virtual Spiller &spiller() = 0; - // getPriority - Calculate the allocation priority for VirtReg. - // Virtual registers with higher priorities are allocated first. - virtual float getPriority(LiveInterval *LI) = 0; + /// enqueue - Add VirtReg to the priority queue of unassigned registers. + virtual void enqueue(LiveInterval *LI) = 0; + + /// dequeue - Return the next unassigned register, or NULL. + virtual LiveInterval *dequeue() = 0; // A RegAlloc pass should override this to provide the allocation heuristics. // Each call must guarantee forward progess by returning an available PhysReg @@ -170,7 +171,7 @@ class RegAllocBase { static bool VerifyEnabled; private: - void seedLiveVirtRegs(std::priority_queue >&); + void seedLiveRegs(); void spillReg(LiveInterval &VirtReg, unsigned PhysReg, SmallVectorImpl &SplitVRegs); diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp index 045c8db9dadb..6923908a32d9 100644 --- a/lib/CodeGen/RegAllocBasic.cpp +++ b/lib/CodeGen/RegAllocBasic.cpp @@ -45,6 +45,7 @@ #include "llvm/Support/Timer.h" #include +#include using namespace llvm; @@ -64,6 +65,14 @@ VerifyRegAlloc("verify-regalloc", cl::location(RegAllocBase::VerifyEnabled), const char *RegAllocBase::TimerGroupName = "Register Allocation"; bool RegAllocBase::VerifyEnabled = false; +namespace { + struct CompSpillWeight { + bool operator()(LiveInterval *A, LiveInterval *B) const { + return A->weight < B->weight; + } + }; +} + namespace { /// RABasic provides a minimal implementation of the basic register allocation /// algorithm. It prioritizes live virtual registers by spill weight and spills @@ -82,7 +91,8 @@ class RABasic : public MachineFunctionPass, public RegAllocBase // state std::auto_ptr SpillerInstance; - + std::priority_queue, + CompSpillWeight> Queue; public: RABasic(); @@ -100,6 +110,18 @@ class RABasic : public MachineFunctionPass, public RegAllocBase virtual float getPriority(LiveInterval *LI) { return LI->weight; } + virtual void enqueue(LiveInterval *LI) { + Queue.push(LI); + } + + virtual LiveInterval *dequeue() { + if (Queue.empty()) + return 0; + LiveInterval *LI = Queue.top(); + Queue.pop(); + return LI; + } + virtual unsigned selectOrSplit(LiveInterval &VirtReg, SmallVectorImpl &SplitVRegs); @@ -227,18 +249,17 @@ void RegAllocBase::releaseMemory() { PhysReg2LiveUnion.clear(); } -// Visit all the live virtual registers. If they are already assigned to a -// physical register, unify them with the corresponding LiveIntervalUnion, -// otherwise push them on the priority queue for later assignment. -void RegAllocBase:: -seedLiveVirtRegs(std::priority_queue > &VirtRegQ) { +// Visit all the live registers. If they are already assigned to a physical +// register, unify them with the corresponding LiveIntervalUnion, otherwise push +// them on the priority queue for later assignment. +void RegAllocBase::seedLiveRegs() { for (LiveIntervals::iterator I = LIS->begin(), E = LIS->end(); I != E; ++I) { unsigned RegNum = I->first; LiveInterval &VirtReg = *I->second; if (TargetRegisterInfo::isPhysicalRegister(RegNum)) PhysReg2LiveUnion[RegNum].unify(VirtReg); else - VirtRegQ.push(std::make_pair(getPriority(&VirtReg), RegNum)); + enqueue(&VirtReg); } } @@ -263,38 +284,31 @@ void RegAllocBase::unassign(LiveInterval &VirtReg, unsigned PhysReg) { // Top-level driver to manage the queue of unassigned VirtRegs and call the // selectOrSplit implementation. void RegAllocBase::allocatePhysRegs() { - - // Push each vreg onto a queue or "precolor" by adding it to a physreg union. - std::priority_queue > VirtRegQ; - seedLiveVirtRegs(VirtRegQ); + seedLiveRegs(); // Continue assigning vregs one at a time to available physical registers. - while (!VirtRegQ.empty()) { - // Pop the highest priority vreg. - LiveInterval &VirtReg = LIS->getInterval(VirtRegQ.top().second); - VirtRegQ.pop(); - + while (LiveInterval *VirtReg = dequeue()) { // selectOrSplit requests the allocator to return an available physical // register if possible and populate a list of new live intervals that // result from splitting. - DEBUG(dbgs() << "\nselectOrSplit " << MRI->getRegClass(VirtReg.reg)->getName() - << ':' << VirtReg << '\n'); + DEBUG(dbgs() << "\nselectOrSplit " + << MRI->getRegClass(VirtReg->reg)->getName() + << ':' << *VirtReg << '\n'); typedef SmallVector VirtRegVec; VirtRegVec SplitVRegs; - unsigned AvailablePhysReg = selectOrSplit(VirtReg, SplitVRegs); + unsigned AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs); if (AvailablePhysReg) - assign(VirtReg, AvailablePhysReg); + assign(*VirtReg, AvailablePhysReg); for (VirtRegVec::iterator I = SplitVRegs.begin(), E = SplitVRegs.end(); I != E; ++I) { - LiveInterval* SplitVirtReg = *I; + LiveInterval *SplitVirtReg = *I; if (SplitVirtReg->empty()) continue; DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n"); assert(TargetRegisterInfo::isVirtualRegister(SplitVirtReg->reg) && "expect split value in virtual register"); - VirtRegQ.push(std::make_pair(getPriority(SplitVirtReg), - SplitVirtReg->reg)); + enqueue(SplitVirtReg); ++NumNewQueued; } } diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index c1372cd038cf..406485aaf496 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -43,6 +43,8 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Support/Timer.h" +#include + using namespace llvm; STATISTIC(NumGlobalSplits, "Number of split global live ranges"); @@ -71,6 +73,8 @@ class RAGreedy : public MachineFunctionPass, public RegAllocBase { // state std::auto_ptr SpillerInstance; std::auto_ptr SA; + std::priority_queue > Queue; + IndexedMap Generation; // splitting state. @@ -91,13 +95,10 @@ class RAGreedy : public MachineFunctionPass, public RegAllocBase { /// RAGreedy analysis usage. virtual void getAnalysisUsage(AnalysisUsage &AU) const; - virtual void releaseMemory(); - virtual Spiller &spiller() { return *SpillerInstance; } - - virtual float getPriority(LiveInterval *LI); - + virtual void enqueue(LiveInterval *LI); + virtual LiveInterval *dequeue(); virtual unsigned selectOrSplit(LiveInterval&, SmallVectorImpl&); @@ -119,9 +120,12 @@ class RAGreedy : public MachineFunctionPass, public RegAllocBase { SlotIndex getPrevMappedIndex(const MachineInstr*); void calcPrevSlots(); unsigned nextSplitPoint(unsigned); + bool canEvictInterference(LiveInterval&, unsigned, unsigned, float&); - unsigned tryReassignOrEvict(LiveInterval&, AllocationOrder&, + unsigned tryReassign(LiveInterval&, AllocationOrder&, SmallVectorImpl&); + unsigned tryEvict(LiveInterval&, AllocationOrder&, + SmallVectorImpl&); unsigned tryRegionSplit(LiveInterval&, AllocationOrder&, SmallVectorImpl&); unsigned tryLocalSplit(LiveInterval&, AllocationOrder&, @@ -183,25 +187,42 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { void RAGreedy::releaseMemory() { SpillerInstance.reset(0); + Generation.clear(); RegAllocBase::releaseMemory(); } -float RAGreedy::getPriority(LiveInterval *LI) { - float Priority = LI->weight; +void RAGreedy::enqueue(LiveInterval *LI) { + // Prioritize live ranges by size, assigning larger ranges first. + // The queue holds (size, reg) pairs. + const unsigned Size = LI->getSize(); + const unsigned Reg = LI->reg; + assert(TargetRegisterInfo::isVirtualRegister(Reg) && + "Can only enqueue virtual registers"); + const unsigned Hint = VRM->getRegAllocPref(Reg); + unsigned Prio; - // Prioritize hinted registers so they are allocated first. - std::pair Hint; - if (Hint.first || Hint.second) { - // The hint can be target specific, a virtual register, or a physreg. - Priority *= 2; + Generation.grow(Reg); + if (++Generation[Reg] == 1) + // 1st generation ranges are handled first, long -> short. + Prio = (1u << 31) + Size; + else + // Repeat offenders are handled second, short -> long + Prio = (1u << 30) - Size; - // Prefer physreg hints above anything else. - if (Hint.first == 0 && TargetRegisterInfo::isPhysicalRegister(Hint.second)) - Priority *= 2; - } - return Priority; + // Boost ranges that have a physical register hint. + if (TargetRegisterInfo::isPhysicalRegister(Hint)) + Prio |= (1u << 30); + + Queue.push(std::make_pair(Prio, Reg)); } +LiveInterval *RAGreedy::dequeue() { + if (Queue.empty()) + return 0; + LiveInterval *LI = &LIS->getInterval(Queue.top().second); + Queue.pop(); + return LI; +} //===----------------------------------------------------------------------===// // Register Reassignment @@ -230,8 +251,7 @@ LiveInterval *RAGreedy::getSingleInterference(LiveInterval &VirtReg, if (Q.checkInterference()) { if (Interference) return 0; - Q.collectInterferingVRegs(1); - if (!Q.seenAllInterferences()) + if (Q.collectInterferingVRegs(2) > 1) return 0; Interference = Q.interferingVRegs().front(); } @@ -276,21 +296,14 @@ bool RAGreedy::reassignVReg(LiveInterval &InterferingVReg, return false; } -/// tryReassignOrEvict - Try to reassign a single interferences to a different -/// physreg, or evict a single interference with a lower spill weight. +/// tryReassign - Try to reassign a single interference to a different physreg. /// @param VirtReg Currently unassigned virtual register. /// @param Order Physregs to try. /// @return Physreg to assign VirtReg, or 0. -unsigned RAGreedy::tryReassignOrEvict(LiveInterval &VirtReg, - AllocationOrder &Order, - SmallVectorImpl &NewVRegs){ +unsigned RAGreedy::tryReassign(LiveInterval &VirtReg, AllocationOrder &Order, + SmallVectorImpl &NewVRegs){ NamedRegionTimer T("Reassign", TimerGroupName, TimePassesIsEnabled); - // Keep track of the lightest single interference seen so far. - float BestWeight = VirtReg.weight; - LiveInterval *BestVirt = 0; - unsigned BestPhys = 0; - Order.rewind(); while (unsigned PhysReg = Order.next()) { LiveInterval *InterferingVReg = getSingleInterference(VirtReg, PhysReg); @@ -300,25 +313,92 @@ unsigned RAGreedy::tryReassignOrEvict(LiveInterval &VirtReg, continue; if (reassignVReg(*InterferingVReg, PhysReg)) return PhysReg; + } + return 0; +} - // Cannot reassign, is this an eviction candidate? - if (InterferingVReg->weight < BestWeight) { - BestVirt = InterferingVReg; - BestPhys = PhysReg; - BestWeight = InterferingVReg->weight; + +//===----------------------------------------------------------------------===// +// Interference eviction +//===----------------------------------------------------------------------===// + +/// canEvict - Return true if all interferences between VirtReg and PhysReg can +/// be evicted. Set maxWeight to the maximal spill weight of an interference. +bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, + unsigned Size, float &MaxWeight) { + float Weight = 0; + for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) { + LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI); + // If there is 10 or more interferences, chances are one is smaller. + if (Q.collectInterferingVRegs(10) >= 10) + return false; + + // CHeck if any interfering live range is shorter than VirtReg. + for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) { + LiveInterval *Intf = Q.interferingVRegs()[i]; + if (TargetRegisterInfo::isPhysicalRegister(Intf->reg)) + return false; + if (Intf->getSize() <= Size) + return false; + Weight = std::max(Weight, Intf->weight); } } + MaxWeight = Weight; + return true; +} - // Nothing reassigned, can we evict a lighter single interference? - if (BestVirt) { - DEBUG(dbgs() << "evicting lighter " << *BestVirt << '\n'); - unassign(*BestVirt, VRM->getPhys(BestVirt->reg)); - ++NumEvicted; - NewVRegs.push_back(BestVirt); - return BestPhys; +/// tryEvict - Try to evict all interferences for a physreg. +/// @param VirtReg Currently unassigned virtual register. +/// @param Order Physregs to try. +/// @return Physreg to assign VirtReg, or 0. +unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, + AllocationOrder &Order, + SmallVectorImpl &NewVRegs){ + NamedRegionTimer T("Evict", TimerGroupName, TimePassesIsEnabled); + + // We can only evict interference if all interfering registers are virtual and + // longer than VirtReg. + const unsigned Size = VirtReg.getSize(); + + // Keep track of the lightest single interference seen so far. + float BestWeight = 0; + unsigned BestPhys = 0; + + Order.rewind(); + while (unsigned PhysReg = Order.next()) { + float Weight = 0; + if (!canEvictInterference(VirtReg, PhysReg, Size, Weight)) + continue; + + // This is an eviction candidate. + DEBUG(dbgs() << "max " << PrintReg(PhysReg, TRI) << " interference = " + << Weight << '\n'); + if (BestPhys && Weight >= BestWeight) + continue; + + // Best so far. + BestPhys = PhysReg; + BestWeight = Weight; + // Stop if the hint can be used. + if (Order.isHint(PhysReg)) + break; } - return 0; + if (!BestPhys) + return 0; + + DEBUG(dbgs() << "evicting " << PrintReg(BestPhys, TRI) << " interference\n"); + for (const unsigned *AliasI = TRI->getOverlaps(BestPhys); *AliasI; ++AliasI) { + LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI); + assert(Q.seenAllInterferences() && "Didn't check all interfererences."); + for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) { + LiveInterval *Intf = Q.interferingVRegs()[i]; + unassign(*Intf, VRM->getPhys(Intf->reg)); + ++NumEvicted; + NewVRegs.push_back(Intf); + } + } + return BestPhys; } @@ -426,8 +506,13 @@ float RAGreedy::calcInterferenceInfo(LiveInterval &VirtReg, unsigned PhysReg) { if (!IntI.valid()) break; // Not live in, but before the first use. - if (IntI.start() < BI.FirstUse) + if (IntI.start() < BI.FirstUse) { BC.Entry = SpillPlacement::PrefSpill; + // If the block contains a kill from an earlier split, never split + // again in the same block. + if (!BI.LiveThrough && !SA->isOriginalEndpoint(BI.Kill)) + BC.Entry = SpillPlacement::MustSpill; + } } // Does interference overlap the uses in the entry segment @@ -458,8 +543,12 @@ float RAGreedy::calcInterferenceInfo(LiveInterval &VirtReg, unsigned PhysReg) { IntI.advanceTo(BI.LastUse); if (!IntI.valid()) break; - if (IntI.start() < Stop) + if (IntI.start() < Stop) { BC.Exit = SpillPlacement::PrefSpill; + // Avoid splitting twice in the same block. + if (!BI.LiveThrough && !SA->isOriginalEndpoint(BI.Def)) + BC.Exit = SpillPlacement::MustSpill; + } } } } @@ -1221,12 +1310,22 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, return PhysReg; } - // Try to reassign interferences. - if (unsigned PhysReg = tryReassignOrEvict(VirtReg, Order, NewVRegs)) + if (unsigned PhysReg = tryReassign(VirtReg, Order, NewVRegs)) + return PhysReg; + + if (unsigned PhysReg = tryEvict(VirtReg, Order, NewVRegs)) return PhysReg; assert(NewVRegs.empty() && "Cannot append to existing NewVRegs"); + // The first time we see a live range, don't try to split or spill. + // Wait until the second time, when all smaller ranges have been allocated. + // This gives a better picture of the interference to split around. + if (Generation[VirtReg.reg] == 1) { + NewVRegs.push_back(&VirtReg); + return 0; + } + // Try splitting VirtReg or interferences. unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs); if (PhysReg || !NewVRegs.empty()) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 90356021f602..9cc70a30927d 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -279,8 +279,8 @@ namespace { /// getShiftAmountTy - Returns a type large enough to hold any valid /// shift amount - before type legalization these can be huge. - EVT getShiftAmountTy() { - return LegalTypes ? TLI.getShiftAmountTy() : TLI.getPointerTy(); + EVT getShiftAmountTy(EVT LHSTy) { + return LegalTypes ? TLI.getShiftAmountTy(LHSTy) : TLI.getPointerTy(); } /// isTypeLegal - This method returns true if we are running before type @@ -670,7 +670,7 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { if (LoadSDNode *LD = dyn_cast(Op)) { EVT MemVT = LD->getMemoryVT(); ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) - ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD + ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD) : LD->getExtensionType(); Replace = true; @@ -894,7 +894,7 @@ bool DAGCombiner::PromoteLoad(SDValue Op) { LoadSDNode *LD = cast(N); EVT MemVT = LD->getMemoryVT(); ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) - ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD + ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD) : LD->getExtensionType(); SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT, @@ -1521,7 +1521,7 @@ SDValue DAGCombiner::visitADDE(SDNode *N) { // Since it may not be valid to emit a fold to zero for vector initializers // check if we can before folding. static SDValue tryFoldToZero(DebugLoc DL, const TargetLowering &TLI, EVT VT, - SelectionDAG &DAG, bool LegalOperations) { + SelectionDAG &DAG, bool LegalOperations) { if (!VT.isVector()) { return DAG.getConstant(0, VT); } else if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) { @@ -1647,7 +1647,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { if (N1C && N1C->getAPIntValue().isPowerOf2()) return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0, DAG.getConstant(N1C->getAPIntValue().logBase2(), - getShiftAmountTy())); + getShiftAmountTy(N0.getValueType()))); // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c if (N1C && (-N1C->getAPIntValue()).isPowerOf2()) { unsigned Log2Val = (-N1C->getAPIntValue()).logBase2(); @@ -1656,7 +1656,8 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, DAG.getConstant(0, VT), DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0, - DAG.getConstant(Log2Val, getShiftAmountTy()))); + DAG.getConstant(Log2Val, + getShiftAmountTy(N0.getValueType())))); } // (mul (shl X, c1), c2) -> (mul X, c2 << c1) if (N1C && N0.getOpcode() == ISD::SHL && @@ -1753,18 +1754,18 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { // Splat the sign bit into the register SDValue SGN = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0, DAG.getConstant(VT.getSizeInBits()-1, - getShiftAmountTy())); + getShiftAmountTy(N0.getValueType()))); AddToWorkList(SGN.getNode()); // Add (N0 < 0) ? abs2 - 1 : 0; SDValue SRL = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, SGN, DAG.getConstant(VT.getSizeInBits() - lg2, - getShiftAmountTy())); + getShiftAmountTy(SGN.getValueType()))); SDValue ADD = DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, SRL); AddToWorkList(SRL.getNode()); AddToWorkList(ADD.getNode()); // Divide by pow2 SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, ADD, - DAG.getConstant(lg2, getShiftAmountTy())); + DAG.getConstant(lg2, getShiftAmountTy(ADD.getValueType()))); // If we're dividing by a positive value, we're done. Otherwise, we must // negate the result. @@ -1814,7 +1815,7 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { if (N1C && N1C->getAPIntValue().isPowerOf2()) return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, DAG.getConstant(N1C->getAPIntValue().logBase2(), - getShiftAmountTy())); + getShiftAmountTy(N0.getValueType()))); // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2 if (N1.getOpcode() == ISD::SHL) { if (ConstantSDNode *SHC = dyn_cast(N1.getOperand(0))) { @@ -1955,7 +1956,7 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { if (N1C && N1C->getAPIntValue() == 1) return DAG.getNode(ISD::SRA, N->getDebugLoc(), N0.getValueType(), N0, DAG.getConstant(N0.getValueType().getSizeInBits() - 1, - getShiftAmountTy())); + getShiftAmountTy(N0.getValueType()))); // fold (mulhs x, undef) -> 0 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) return DAG.getConstant(0, VT); @@ -1971,11 +1972,11 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1); N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, - DAG.getConstant(SimpleSize, getShiftAmountTy())); + DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType()))); return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); } } - + return SDValue(); } @@ -2007,11 +2008,11 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1); N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, - DAG.getConstant(SimpleSize, getShiftAmountTy())); + DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType()))); return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); } } - + return SDValue(); } @@ -2090,14 +2091,14 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); // Compute the high part as N1. Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, - DAG.getConstant(SimpleSize, getShiftAmountTy())); + DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType()))); Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); // Compute the low part as N0. Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); return CombineTo(N, Lo, Hi); } } - + return SDValue(); } @@ -2107,7 +2108,7 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { EVT VT = N->getValueType(0); DebugLoc DL = N->getDebugLoc(); - + // If the type twice as wide is legal, transform the mulhu to a wider multiply // plus a shift. if (VT.isSimple() && !VT.isVector()) { @@ -2120,14 +2121,14 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); // Compute the high part as N1. Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, - DAG.getConstant(SimpleSize, getShiftAmountTy())); + DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType()))); Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); // Compute the low part as N0. Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); return CombineTo(N, Lo, Hi); } } - + return SDValue(); } @@ -3004,7 +3005,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { N0.getOpcode() == ISD::SIGN_EXTEND) && N0.getOperand(0).getOpcode() == ISD::SHL && isa(N0.getOperand(0)->getOperand(1))) { - uint64_t c1 = + uint64_t c1 = cast(N0.getOperand(0)->getOperand(1))->getZExtValue(); uint64_t c2 = N1C->getZExtValue(); EVT InnerShiftVT = N0.getOperand(0).getValueType(); @@ -3133,7 +3134,8 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) && TLI.isTruncateFree(VT, TruncVT)) { - SDValue Amt = DAG.getConstant(ShiftAmt, getShiftAmountTy()); + SDValue Amt = DAG.getConstant(ShiftAmt, + getShiftAmountTy(N0.getOperand(0).getValueType())); SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT, N0.getOperand(0), Amt); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), TruncVT, @@ -3180,7 +3182,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { LargeShiftAmt->getZExtValue()) { SDValue Amt = DAG.getConstant(LargeShiftAmt->getZExtValue() + N1C->getZExtValue(), - getShiftAmountTy()); + getShiftAmountTy(N0.getOperand(0).getOperand(0).getValueType())); SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), LargeVT, N0.getOperand(0).getOperand(0), Amt); return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, SRA); @@ -3245,7 +3247,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (N1C && N0.getOpcode() == ISD::TRUNCATE && N0.getOperand(0).getOpcode() == ISD::SRL && isa(N0.getOperand(0)->getOperand(1))) { - uint64_t c1 = + uint64_t c1 = cast(N0.getOperand(0)->getOperand(1))->getZExtValue(); uint64_t c2 = N1C->getZExtValue(); EVT InnerShiftVT = N0.getOperand(0).getValueType(); @@ -3256,7 +3258,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (c1 + c2 >= InnerShiftSize) return DAG.getConstant(0, VT); return DAG.getNode(ISD::TRUNCATE, N0->getDebugLoc(), VT, - DAG.getNode(ISD::SRL, N0->getDebugLoc(), InnerShiftVT, + DAG.getNode(ISD::SRL, N0->getDebugLoc(), InnerShiftVT, N0.getOperand(0)->getOperand(0), DAG.getConstant(c1 + c2, ShiftCountVT))); } @@ -3320,7 +3322,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (ShAmt) { Op = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT, Op, - DAG.getConstant(ShAmt, getShiftAmountTy())); + DAG.getConstant(ShAmt, getShiftAmountTy(Op.getValueType()))); AddToWorkList(Op.getNode()); } @@ -3685,7 +3687,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { } // fold (sext (load x)) -> (sext (truncate (sextload x))) - if (ISD::isNON_EXTLoad(N0.getNode()) && + // None of the supported targets knows how to perform load and sign extend + // in one instruction. We only perform this transformation on scalars. + if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && ((!LegalOperations && !cast(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()))) { bool DoXform = true; @@ -3887,7 +3891,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { } // fold (zext (load x)) -> (zext (truncate (zextload x))) - if (ISD::isNON_EXTLoad(N0.getNode()) && + // None of the supported targets knows how to perform load and vector_zext + // in one instruction. We only perform this transformation on scalar zext. + if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && ((!LegalOperations && !cast(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()))) { bool DoXform = true; @@ -4021,11 +4027,11 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { } DebugLoc DL = N->getDebugLoc(); - - // Ensure that the shift amount is wide enough for the shifted value. + + // Ensure that the shift amount is wide enough for the shifted value. if (VT.getSizeInBits() >= 256) ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt); - + return DAG.getNode(N0.getOpcode(), DL, VT, DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)), ShAmt); @@ -4094,7 +4100,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { } // fold (aext (load x)) -> (aext (truncate (extload x))) - if (ISD::isNON_EXTLoad(N0.getNode()) && + // None of the supported targets knows how to perform load and any_ext + // in one instruction. We only perform this transformation on scalars. + if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && ((!LegalOperations && !cast(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) { bool DoXform = true; @@ -4272,12 +4280,12 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { return SDValue(); unsigned EVTBits = ExtVT.getSizeInBits(); - + // Do not generate loads of non-round integer types since these can // be expensive (and would be wrong if the type is not byte sized). if (!ExtVT.isRound()) return SDValue(); - + unsigned ShAmt = 0; if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { if (ConstantSDNode *N01 = dyn_cast(N0.getOperand(1))) { @@ -4292,7 +4300,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { // At this point, we must have a load or else we can't do the transform. if (!isa(N0)) return SDValue(); - + // If the shift amount is larger than the input type then we're not // accessing any of the loaded bytes. If the load was a zextload/extload // then the result of the shift+trunc is zero/undef (handled elsewhere). @@ -4313,18 +4321,18 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { N0 = N0.getOperand(0); } } - + // If we haven't found a load, we can't narrow it. Don't transform one with // multiple uses, this would require adding a new load. if (!isa(N0) || !N0.hasOneUse() || // Don't change the width of a volatile load. cast(N0)->isVolatile()) return SDValue(); - + // Verify that we are actually reducing a load width here. if (cast(N0)->getMemoryVT().getSizeInBits() < EVTBits) return SDValue(); - + LoadSDNode *LN0 = cast(N0); EVT PtrType = N0.getOperand(1).getValueType(); @@ -4362,7 +4370,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { // Shift the result left, if we've swallowed a left shift. SDValue Result = Load; if (ShLeftAmt != 0) { - EVT ShImmTy = getShiftAmountTy(); + EVT ShImmTy = getShiftAmountTy(Result.getValueType()); if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt)) ShImmTy = VT; Result = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT, @@ -4504,14 +4512,17 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { } // See if we can simplify the input to this truncate through knowledge that - // only the low bits are being used. For example "trunc (or (shl x, 8), y)" - // -> trunc y - SDValue Shorter = - GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(), - VT.getSizeInBits())); - if (Shorter.getNode()) - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Shorter); - + // only the low bits are being used. + // For example "trunc (or (shl x, 8), y)" // -> trunc y + // Currenly we only perform this optimization on scalars because vectors + // may have different active low bits. + if (!VT.isVector()) { + SDValue Shorter = + GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(), + VT.getSizeInBits())); + if (Shorter.getNode()) + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Shorter); + } // fold (truncate (load x)) -> (smaller load x) // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) { @@ -5975,7 +5986,8 @@ ShrinkLoadReplaceStoreWithStore(const std::pair &MaskInfo, // shifted by ByteShift and truncated down to NumBytes. if (ByteShift) IVal = DAG.getNode(ISD::SRL, IVal->getDebugLoc(), IVal.getValueType(), IVal, - DAG.getConstant(ByteShift*8, DC->getShiftAmountTy())); + DAG.getConstant(ByteShift*8, + DC->getShiftAmountTy(IVal.getValueType()))); // Figure out the offset for the store and the alignment of the access. unsigned StOffset; @@ -6390,7 +6402,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { EVT VT = InVec.getValueType(); - // If we can't generate a legal BUILD_VECTOR, exit + // If we can't generate a legal BUILD_VECTOR, exit if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) return SDValue(); @@ -7098,7 +7110,8 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue()-1)) == 0)) { unsigned ShCtV = N2C->getAPIntValue().logBase2(); ShCtV = XType.getSizeInBits()-ShCtV-1; - SDValue ShCt = DAG.getConstant(ShCtV, getShiftAmountTy()); + SDValue ShCt = DAG.getConstant(ShCtV, + getShiftAmountTy(N0.getValueType())); SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), XType, N0, ShCt); AddToWorkList(Shift.getNode()); @@ -7114,7 +7127,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, N0, DAG.getConstant(XType.getSizeInBits()-1, - getShiftAmountTy())); + getShiftAmountTy(N0.getValueType()))); AddToWorkList(Shift.getNode()); if (XType.bitsGT(AType)) { @@ -7142,13 +7155,15 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, // Shift the tested bit over the sign bit. APInt AndMask = ConstAndRHS->getAPIntValue(); SDValue ShlAmt = - DAG.getConstant(AndMask.countLeadingZeros(), getShiftAmountTy()); + DAG.getConstant(AndMask.countLeadingZeros(), + getShiftAmountTy(AndLHS.getValueType())); SDValue Shl = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT, AndLHS, ShlAmt); // Now arithmetic right shift it all the way over, so the result is either // all-ones, or zero. SDValue ShrAmt = - DAG.getConstant(AndMask.getBitWidth()-1, getShiftAmountTy()); + DAG.getConstant(AndMask.getBitWidth()-1, + getShiftAmountTy(Shl.getValueType())); SDValue Shr = DAG.getNode(ISD::SRA, N0.getDebugLoc(), VT, Shl, ShrAmt); return DAG.getNode(ISD::AND, DL, VT, Shr, N3); @@ -7192,7 +7207,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, // shl setcc result by log2 n2c return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp, DAG.getConstant(N2C->getAPIntValue().logBase2(), - getShiftAmountTy())); + getShiftAmountTy(Temp.getValueType()))); } // Check to see if this is the equivalent of setcc @@ -7215,7 +7230,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, SDValue Ctlz = DAG.getNode(ISD::CTLZ, N0.getDebugLoc(), XType, N0); return DAG.getNode(ISD::SRL, DL, XType, Ctlz, DAG.getConstant(Log2_32(XType.getSizeInBits()), - getShiftAmountTy())); + getShiftAmountTy(Ctlz.getValueType()))); } // fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1)) if (N1C && N1C->isNullValue() && CC == ISD::SETGT) { @@ -7225,13 +7240,13 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, return DAG.getNode(ISD::SRL, DL, XType, DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0), DAG.getConstant(XType.getSizeInBits()-1, - getShiftAmountTy())); + getShiftAmountTy(XType))); } // fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1)) if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT) { SDValue Sign = DAG.getNode(ISD::SRL, N0.getDebugLoc(), XType, N0, DAG.getConstant(XType.getSizeInBits()-1, - getShiftAmountTy())); + getShiftAmountTy(N0.getValueType()))); return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, XType)); } } @@ -7258,7 +7273,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, N0, DAG.getConstant(XType.getSizeInBits()-1, - getShiftAmountTy())); + getShiftAmountTy(N0.getValueType()))); SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(), XType, N0, Shift); AddToWorkList(Shift.getNode()); diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 98582ba99f14..2ae3286829dd 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -219,6 +219,7 @@ void FunctionLoweringInfo::clear() { CatchInfoFound.clear(); #endif LiveOutRegInfo.clear(); + VisitedBBs.clear(); ArgDbgValues.clear(); ByValArgFrameIndexMap.clear(); RegFixups.clear(); @@ -254,6 +255,123 @@ unsigned FunctionLoweringInfo::CreateRegs(const Type *Ty) { return FirstReg; } +/// GetLiveOutRegInfo - Gets LiveOutInfo for a register, returning NULL if the +/// register is a PHI destination and the PHI's LiveOutInfo is not valid. If +/// the register's LiveOutInfo is for a smaller bit width, it is extended to +/// the larger bit width by zero extension. The bit width must be no smaller +/// than the LiveOutInfo's existing bit width. +const FunctionLoweringInfo::LiveOutInfo * +FunctionLoweringInfo::GetLiveOutRegInfo(unsigned Reg, unsigned BitWidth) { + if (!LiveOutRegInfo.inBounds(Reg)) + return NULL; + + LiveOutInfo *LOI = &LiveOutRegInfo[Reg]; + if (!LOI->IsValid) + return NULL; + + if (BitWidth > LOI->KnownZero.getBitWidth()) { + LOI->NumSignBits = 1; + LOI->KnownZero = LOI->KnownZero.zextOrTrunc(BitWidth); + LOI->KnownOne = LOI->KnownOne.zextOrTrunc(BitWidth); + } + + return LOI; +} + +/// ComputePHILiveOutRegInfo - Compute LiveOutInfo for a PHI's destination +/// register based on the LiveOutInfo of its operands. +void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { + const Type *Ty = PN->getType(); + if (!Ty->isIntegerTy() || Ty->isVectorTy()) + return; + + SmallVector ValueVTs; + ComputeValueVTs(TLI, Ty, ValueVTs); + assert(ValueVTs.size() == 1 && + "PHIs with non-vector integer types should have a single VT."); + EVT IntVT = ValueVTs[0]; + + if (TLI.getNumRegisters(PN->getContext(), IntVT) != 1) + return; + IntVT = TLI.getTypeToTransformTo(PN->getContext(), IntVT); + unsigned BitWidth = IntVT.getSizeInBits(); + + unsigned DestReg = ValueMap[PN]; + if (!TargetRegisterInfo::isVirtualRegister(DestReg)) + return; + LiveOutRegInfo.grow(DestReg); + LiveOutInfo &DestLOI = LiveOutRegInfo[DestReg]; + + Value *V = PN->getIncomingValue(0); + if (isa(V) || isa(V)) { + DestLOI.NumSignBits = 1; + APInt Zero(BitWidth, 0); + DestLOI.KnownZero = Zero; + DestLOI.KnownOne = Zero; + return; + } + + if (ConstantInt *CI = dyn_cast(V)) { + APInt Val = CI->getValue().zextOrTrunc(BitWidth); + DestLOI.NumSignBits = Val.getNumSignBits(); + DestLOI.KnownZero = ~Val; + DestLOI.KnownOne = Val; + } else { + assert(ValueMap.count(V) && "V should have been placed in ValueMap when its" + "CopyToReg node was created."); + unsigned SrcReg = ValueMap[V]; + if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) { + DestLOI.IsValid = false; + return; + } + const LiveOutInfo *SrcLOI = GetLiveOutRegInfo(SrcReg, BitWidth); + if (!SrcLOI) { + DestLOI.IsValid = false; + return; + } + DestLOI = *SrcLOI; + } + + assert(DestLOI.KnownZero.getBitWidth() == BitWidth && + DestLOI.KnownOne.getBitWidth() == BitWidth && + "Masks should have the same bit width as the type."); + + for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) { + Value *V = PN->getIncomingValue(i); + if (isa(V) || isa(V)) { + DestLOI.NumSignBits = 1; + APInt Zero(BitWidth, 0); + DestLOI.KnownZero = Zero; + DestLOI.KnownOne = Zero; + return; + } + + if (ConstantInt *CI = dyn_cast(V)) { + APInt Val = CI->getValue().zextOrTrunc(BitWidth); + DestLOI.NumSignBits = std::min(DestLOI.NumSignBits, Val.getNumSignBits()); + DestLOI.KnownZero &= ~Val; + DestLOI.KnownOne &= Val; + continue; + } + + assert(ValueMap.count(V) && "V should have been placed in ValueMap when " + "its CopyToReg node was created."); + unsigned SrcReg = ValueMap[V]; + if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) { + DestLOI.IsValid = false; + return; + } + const LiveOutInfo *SrcLOI = GetLiveOutRegInfo(SrcReg, BitWidth); + if (!SrcLOI) { + DestLOI.IsValid = false; + return; + } + DestLOI.NumSignBits = std::min(DestLOI.NumSignBits, SrcLOI->NumSignBits); + DestLOI.KnownZero &= SrcLOI->KnownZero; + DestLOI.KnownOne &= SrcLOI->KnownOne; + } +} + /// setByValArgumentFrameIndex - Record frame index for the byval /// argument. This overrides previous frame index entry for this argument, /// if any. diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 49c862ce3e0b..f08528fe2dc3 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -87,7 +87,7 @@ class SelectionDAGLegalize { // If someone requests legalization of the new node, return itself. if (From != To) LegalizedNodes.insert(std::make_pair(To, To)); - + // Transfer SDDbgValues. DAG.TransferDbgValues(From, To); } @@ -498,7 +498,8 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, int IncrementSize = NumBits / 8; // Divide the stored value in two parts. - SDValue ShiftAmount = DAG.getConstant(NumBits, TLI.getShiftAmountTy()); + SDValue ShiftAmount = DAG.getConstant(NumBits, + TLI.getShiftAmountTy(Val.getValueType())); SDValue Lo = Val; SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount); @@ -645,7 +646,8 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, } // aggregate the two parts - SDValue ShiftAmount = DAG.getConstant(NumBits, TLI.getShiftAmountTy()); + SDValue ShiftAmount = DAG.getConstant(NumBits, + TLI.getShiftAmountTy(Hi.getValueType())); SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount); Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo); @@ -1264,7 +1266,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // Move the top bits to the right place. Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, - DAG.getConstant(RoundWidth, TLI.getShiftAmountTy())); + DAG.getConstant(RoundWidth, + TLI.getShiftAmountTy(Hi.getValueType()))); // Join the hi and lo parts. Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); @@ -1293,7 +1296,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // Move the top bits to the right place. Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi, - DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy())); + DAG.getConstant(ExtraWidth, + TLI.getShiftAmountTy(Hi.getValueType()))); // Join the hi and lo parts. Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); @@ -1482,7 +1486,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, DAG.getIntPtrConstant(IncrementSize)); Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3, - DAG.getConstant(RoundWidth, TLI.getShiftAmountTy())); + DAG.getConstant(RoundWidth, + TLI.getShiftAmountTy(Tmp3.getValueType()))); Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, isVolatile, isNonTemporal, @@ -1492,7 +1497,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X // Store the top RoundWidth bits. Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3, - DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy())); + DAG.getConstant(ExtraWidth, + TLI.getShiftAmountTy(Tmp3.getValueType()))); Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getPointerInfo(), RoundVT, isVolatile, isNonTemporal, Alignment); @@ -1727,7 +1733,8 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { assert(BitShift < LoadTy.getSizeInBits() && "Pointer advanced wrong?"); if (BitShift) SignBit = DAG.getNode(ISD::SHL, dl, LoadTy, SignBit, - DAG.getConstant(BitShift,TLI.getShiftAmountTy())); + DAG.getConstant(BitShift, + TLI.getShiftAmountTy(SignBit.getValueType()))); } } // Now get the sign bit proper, by seeing whether the value is negative. @@ -2207,7 +2214,8 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, if (!isSigned) { SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Op0); - SDValue ShiftConst = DAG.getConstant(1, TLI.getShiftAmountTy()); + SDValue ShiftConst = + DAG.getConstant(1, TLI.getShiftAmountTy(Op0.getValueType())); SDValue Shr = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0, ShiftConst); SDValue AndConst = DAG.getConstant(1, MVT::i64); SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, AndConst); @@ -2226,7 +2234,6 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, } // Otherwise, implement the fully general conversion. - EVT SHVT = TLI.getShiftAmountTy(); SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, DAG.getConstant(UINT64_C(0xfffffffffffff800), MVT::i64)); @@ -2241,6 +2248,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, Op0, DAG.getConstant(UINT64_C(0x0020000000000000), MVT::i64), ISD::SETUGE); SDValue Sel2 = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ge, Sel, Op0); + EVT SHVT = TLI.getShiftAmountTy(Sel2.getValueType()); SDValue Sh = DAG.getNode(ISD::SRL, dl, MVT::i64, Sel2, DAG.getConstant(32, SHVT)); @@ -2387,7 +2395,7 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, /// SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) { EVT VT = Op.getValueType(); - EVT SHVT = TLI.getShiftAmountTy(); + EVT SHVT = TLI.getShiftAmountTy(VT); SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8; switch (VT.getSimpleVT().SimpleTy) { default: assert(0 && "Unhandled Expand type in BSWAP!"); @@ -2450,7 +2458,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, default: assert(0 && "Cannot expand this yet!"); case ISD::CTPOP: { EVT VT = Op.getValueType(); - EVT ShVT = TLI.getShiftAmountTy(); + EVT ShVT = TLI.getShiftAmountTy(VT); unsigned Len = VT.getSizeInBits(); assert(VT.isInteger() && Len <= 128 && Len % 8 == 0 && @@ -2487,7 +2495,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, Op = DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::MUL, dl, VT, Op, Mask01), DAG.getConstant(Len - 8, ShVT)); - + return Op; } case ISD::CTLZ: { @@ -2501,7 +2509,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, // // but see also: http://www.hackersdelight.org/HDcode/nlz.cc EVT VT = Op.getValueType(); - EVT ShVT = TLI.getShiftAmountTy(); + EVT ShVT = TLI.getShiftAmountTy(VT); unsigned len = VT.getSizeInBits(); for (unsigned i = 0; (1U << i) <= (len / 2); ++i) { SDValue Tmp3 = DAG.getConstant(1ULL << i, ShVT); @@ -2737,7 +2745,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, // SAR. However, it is doubtful that any exist. EVT ExtraVT = cast(Node->getOperand(1))->getVT(); EVT VT = Node->getValueType(0); - EVT ShiftAmountTy = TLI.getShiftAmountTy(); + EVT ShiftAmountTy = TLI.getShiftAmountTy(VT); if (VT.isVector()) ShiftAmountTy = VT; unsigned BitsDiff = VT.getScalarType().getSizeInBits() - @@ -2901,7 +2909,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, // 1 -> Hi Tmp1 = DAG.getNode(ISD::SRL, dl, OpTy, Node->getOperand(0), DAG.getConstant(OpTy.getSizeInBits()/2, - TLI.getShiftAmountTy())); + TLI.getShiftAmountTy(Node->getOperand(0).getValueType()))); Tmp1 = DAG.getNode(ISD::TRUNCATE, dl, Node->getValueType(0), Tmp1); } else { // 0 -> Lo @@ -3260,7 +3268,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!"); LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS); RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS); - + SDValue Ret = ExpandLibCall(LC, Node, isSigned); BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Ret); TopHalf = DAG.getNode(ISD::SRL, dl, Ret.getValueType(), Ret, @@ -3268,7 +3276,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, TopHalf); } if (isSigned) { - Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1, TLI.getShiftAmountTy()); + Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1, + TLI.getShiftAmountTy(BottomHalf.getValueType())); Tmp1 = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, Tmp1); TopHalf = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), TopHalf, Tmp1, ISD::SETNE); @@ -3286,7 +3295,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, Tmp2 = DAG.getNode(ISD::ANY_EXTEND, dl, PairTy, Node->getOperand(1)); Tmp2 = DAG.getNode(ISD::SHL, dl, PairTy, Tmp2, DAG.getConstant(PairTy.getSizeInBits()/2, - TLI.getShiftAmountTy())); + TLI.getShiftAmountTy(PairTy))); Results.push_back(DAG.getNode(ISD::OR, dl, PairTy, Tmp1, Tmp2)); break; } @@ -3464,7 +3473,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node, Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0)); Tmp1 = DAG.getNode(ISD::BSWAP, dl, NVT, Tmp1); Tmp1 = DAG.getNode(ISD::SRL, dl, NVT, Tmp1, - DAG.getConstant(DiffBits, TLI.getShiftAmountTy())); + DAG.getConstant(DiffBits, TLI.getShiftAmountTy(NVT))); Results.push_back(Tmp1); break; } diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 27752123aac4..27a466b3a928 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -177,25 +177,27 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) { // First get the sign bit of second operand. SDValue SignBit = DAG.getNode(ISD::SHL, dl, RVT, DAG.getConstant(1, RVT), DAG.getConstant(RSize - 1, - TLI.getShiftAmountTy())); + TLI.getShiftAmountTy(RVT))); SignBit = DAG.getNode(ISD::AND, dl, RVT, RHS, SignBit); // Shift right or sign-extend it if the two operands have different types. int SizeDiff = RVT.getSizeInBits() - LVT.getSizeInBits(); if (SizeDiff > 0) { SignBit = DAG.getNode(ISD::SRL, dl, RVT, SignBit, - DAG.getConstant(SizeDiff, TLI.getShiftAmountTy())); + DAG.getConstant(SizeDiff, + TLI.getShiftAmountTy(SignBit.getValueType()))); SignBit = DAG.getNode(ISD::TRUNCATE, dl, LVT, SignBit); } else if (SizeDiff < 0) { SignBit = DAG.getNode(ISD::ANY_EXTEND, dl, LVT, SignBit); SignBit = DAG.getNode(ISD::SHL, dl, LVT, SignBit, - DAG.getConstant(-SizeDiff, TLI.getShiftAmountTy())); + DAG.getConstant(-SizeDiff, + TLI.getShiftAmountTy(SignBit.getValueType()))); } // Clear the sign bit of the first operand. SDValue Mask = DAG.getNode(ISD::SHL, dl, LVT, DAG.getConstant(1, LVT), DAG.getConstant(LSize - 1, - TLI.getShiftAmountTy())); + TLI.getShiftAmountTy(LVT))); Mask = DAG.getNode(ISD::SUB, dl, LVT, Mask, DAG.getConstant(1, LVT)); LHS = DAG.getNode(ISD::AND, dl, LVT, LHS, Mask); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 2fb2f2d8aa1e..9120288921e2 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1420,7 +1420,7 @@ SDValue SelectionDAG::getMDNode(const MDNode *MD) { /// the target's desired shift amount type. SDValue SelectionDAG::getShiftAmountOperand(SDValue Op) { EVT OpTy = Op.getValueType(); - MVT ShTy = TLI.getShiftAmountTy(); + MVT ShTy = TLI.getShiftAmountTy(OpTy); if (OpTy == ShTy || OpTy.isVector()) return Op; ISD::NodeType Opcode = OpTy.bitsGT(ShTy) ? ISD::TRUNCATE : ISD::ZERO_EXTEND; @@ -2048,7 +2048,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask, return; } break; - + default: // Allow the target to implement this method for its nodes. if (Op.getOpcode() >= ISD::BUILTIN_OP_END) { @@ -2088,12 +2088,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ case ISD::Constant: { const APInt &Val = cast(Op)->getAPIntValue(); - // If negative, return # leading ones. - if (Val.isNegative()) - return Val.countLeadingOnes(); - - // Return # leading zeros. - return Val.countLeadingZeros(); + return Val.getNumSignBits(); } case ISD::SIGN_EXTEND: @@ -2297,12 +2292,12 @@ bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const { if ((Op.getOpcode() != ISD::ADD && Op.getOpcode() != ISD::OR) || !isa(Op.getOperand(1))) return false; - - if (Op.getOpcode() == ISD::OR && + + if (Op.getOpcode() == ISD::OR && !MaskedValueIsZero(Op.getOperand(0), cast(Op.getOperand(1))->getAPIntValue())) return false; - + return true; } @@ -2753,7 +2748,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, // i8, which is easy to fall into in generic code that uses // TLI.getShiftAmount(). assert(N2.getValueType().getSizeInBits() >= - Log2_32_Ceil(N1.getValueType().getSizeInBits()) && + Log2_32_Ceil(N1.getValueType().getSizeInBits()) && "Invalid use of small shift amount with oversized value!"); // Always fold shifts of i1 values so the code generator doesn't need to diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 452f5614b7bf..48d9bbb5132e 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -641,16 +641,17 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, // If the source register was virtual and if we know something about it, // add an assert node. if (!TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) || - !RegisterVT.isInteger() || RegisterVT.isVector() || - !FuncInfo.LiveOutRegInfo.inBounds(Regs[Part+i])) + !RegisterVT.isInteger() || RegisterVT.isVector()) + continue; + + const FunctionLoweringInfo::LiveOutInfo *LOI = + FuncInfo.GetLiveOutRegInfo(Regs[Part+i]); + if (!LOI) continue; - - const FunctionLoweringInfo::LiveOutInfo &LOI = - FuncInfo.LiveOutRegInfo[Regs[Part+i]]; unsigned RegSize = RegisterVT.getSizeInBits(); - unsigned NumSignBits = LOI.NumSignBits; - unsigned NumZeroBits = LOI.KnownZero.countLeadingOnes(); + unsigned NumSignBits = LOI->NumSignBits; + unsigned NumZeroBits = LOI->KnownZero.countLeadingOnes(); // FIXME: We capture more information than the dag can represent. For // now, just use the tightest assertzext/assertsext possible. @@ -908,7 +909,7 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, Val.getResNo(), Offset, dl, DbgSDNodeOrder); DAG.AddDbgValue(SDV, Val.getNode(), false); } - } else + } else DEBUG(dbgs() << "Dropping debug info for " << DI); DanglingDebugInfoMap[V] = DanglingDebugInfo(); } @@ -1417,7 +1418,7 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { // jle foo // if (const BinaryOperator *BOp = dyn_cast(CondVal)) { - if (!TLI.isJumpExpensive() && + if (!TLI.isJumpExpensive() && BOp->hasOneUse() && (BOp->getOpcode() == Instruction::And || BOp->getOpcode() == Instruction::Or)) { @@ -1915,7 +1916,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR, DEBUG(dbgs() << "Lowering jump table\n" << "First entry: " << First << ". Last entry: " << Last << '\n' << "Range: " << Range - << "Size: " << TSize << ". Density: " << Density << "\n\n"); + << ". Size: " << TSize << ". Density: " << Density << "\n\n"); // Get the MachineFunction which holds the current MBB. This is used when // inserting any additional MBBs necessary to represent the switch. @@ -2408,19 +2409,19 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); - - MVT ShiftTy = TLI.getShiftAmountTy(); - + + MVT ShiftTy = TLI.getShiftAmountTy(Op2.getValueType()); + // Coerce the shift amount to the right type if we can. if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) { unsigned ShiftSize = ShiftTy.getSizeInBits(); unsigned Op2Size = Op2.getValueType().getSizeInBits(); DebugLoc DL = getCurDebugLoc(); - + // If the operand is smaller than the shift count type, promote it. if (ShiftSize > Op2Size) Op2 = DAG.getNode(ISD::ZERO_EXTEND, DL, ShiftTy, Op2); - + // If the operand is larger than the shift count type but the shift // count type has enough bits to represent any shift value, truncate // it now. This is a common case and it exposes the truncate to diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index a1a70c394a51..8f466d913bbb 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -348,7 +348,7 @@ class SelectionDAGBuilder { SDValue getControlRoot(); DebugLoc getCurDebugLoc() const { return CurDebugLoc; } - void setCurDebugLoc(DebugLoc dl){ CurDebugLoc = dl; } + unsigned getSDNodeOrder() const { return SDNodeOrder; } void CopyValueToVirtualRegister(const Value *V, unsigned Reg); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 62ebc81ef86e..68ba966d268a 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -49,6 +49,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/Statistic.h" #include using namespace llvm; @@ -479,16 +480,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() { unsigned NumSignBits = CurDAG->ComputeNumSignBits(Src); Mask = APInt::getAllOnesValue(SrcVT.getSizeInBits()); CurDAG->ComputeMaskedBits(Src, Mask, KnownZero, KnownOne); - - // Only install this information if it tells us something. - if (NumSignBits != 1 || KnownZero != 0 || KnownOne != 0) { - FuncInfo->LiveOutRegInfo.grow(DestReg); - FunctionLoweringInfo::LiveOutInfo &LOI = - FuncInfo->LiveOutRegInfo[DestReg]; - LOI.NumSignBits = NumSignBits; - LOI.KnownOne = KnownOne; - LOI.KnownZero = KnownZero; - } + FuncInfo->AddLiveOutRegInfo(DestReg, NumSignBits, KnownZero, KnownOne); } while (!Worklist.empty()); } @@ -832,11 +824,39 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { FastIS = TLI.createFastISel(*FuncInfo); // Iterate over all basic blocks in the function. - for (Function::const_iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { - const BasicBlock *LLVMBB = &*I; + ReversePostOrderTraversal RPOT(&Fn); + for (ReversePostOrderTraversal::rpo_iterator + I = RPOT.begin(), E = RPOT.end(); I != E; ++I) { + const BasicBlock *LLVMBB = *I; #ifndef NDEBUG CheckLineNumbers(LLVMBB); #endif + + if (OptLevel != CodeGenOpt::None) { + bool AllPredsVisited = true; + for (const_pred_iterator PI = pred_begin(LLVMBB), PE = pred_end(LLVMBB); + PI != PE; ++PI) { + if (!FuncInfo->VisitedBBs.count(*PI)) { + AllPredsVisited = false; + break; + } + } + + if (AllPredsVisited) { + for (BasicBlock::const_iterator I = LLVMBB->begin(), E = LLVMBB->end(); + I != E && isa(I); ++I) { + FuncInfo->ComputePHILiveOutRegInfo(cast(I)); + } + } else { + for (BasicBlock::const_iterator I = LLVMBB->begin(), E = LLVMBB->end(); + I != E && isa(I); ++I) { + FuncInfo->InvalidatePHILiveOutRegInfo(cast(I)); + } + } + + FuncInfo->VisitedBBs.insert(LLVMBB); + } + FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB]; FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI(); @@ -851,17 +871,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { PrepareEHLandingPad(); // Lower any arguments needed in this block if this is the entry block. - if (LLVMBB == &Fn.getEntryBlock()) { - for (BasicBlock::const_iterator DBI = LLVMBB->begin(), DBE = LLVMBB->end(); - DBI != DBE; ++DBI) { - if (const DbgInfoIntrinsic *DI = dyn_cast(DBI)) { - const DebugLoc DL = DI->getDebugLoc(); - SDB->setCurDebugLoc(DL); - break; - } - } + if (LLVMBB == &Fn.getEntryBlock()) LowerArguments(LLVMBB); - } // Before doing SelectionDAG ISel, see if FastISel has been requested. if (FastIS) { diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 691390e2a0e4..35b847ccabfb 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -563,7 +563,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm, setOperationAction(ISD::TRAP, MVT::Other, Expand); IsLittleEndian = TD->isLittleEndian(); - ShiftAmountTy = PointerTy = MVT::getIntegerVT(8*TD->getPointerSize()); + PointerTy = MVT::getIntegerVT(8*TD->getPointerSize()); memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*)); memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray)); maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8; @@ -596,6 +596,10 @@ TargetLowering::~TargetLowering() { delete &TLOF; } +MVT TargetLowering::getShiftAmountTy(EVT LHSTy) const { + return MVT::getIntegerVT(8*TD->getPointerSize()); +} + /// canOpTrap - Returns true if the operation can trap for the value type. /// VT must be a legal type. bool TargetLowering::canOpTrap(unsigned Op, EVT VT) const { @@ -1401,7 +1405,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, BitWidth - InnerVT.getSizeInBits()) & DemandedMask) == 0 && isTypeDesirableForOp(ISD::SHL, InnerVT)) { - EVT ShTy = getShiftAmountTy(); + EVT ShTy = getShiftAmountTy(InnerVT); if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits())) ShTy = InnerVT; SDValue NarrowShl = @@ -2188,7 +2192,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (ConstantSDNode *AndRHS = dyn_cast(N0.getOperand(1))) { EVT ShiftTy = DCI.isBeforeLegalize() ? - getPointerTy() : getShiftAmountTy(); + getPointerTy() : getShiftAmountTy(N0.getValueType()); if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3 // Perform the xform if the AND RHS is a single bit. if (AndRHS->getAPIntValue().isPowerOf2()) { @@ -2359,7 +2363,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // (Z-X) == X --> Z == X<<1 SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N1, - DAG.getConstant(1, getShiftAmountTy())); + DAG.getConstant(1, getShiftAmountTy(N1.getValueType()))); if (!DCI.isCalledByLegalizer()) DCI.AddToWorklist(SH.getNode()); return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond); @@ -2381,7 +2385,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!"); // X == (Z-X) --> X<<1 == Z SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N0, - DAG.getConstant(1, getShiftAmountTy())); + DAG.getConstant(1, getShiftAmountTy(N0.getValueType()))); if (!DCI.isCalledByLegalizer()) DCI.AddToWorklist(SH.getNode()); return DAG.getSetCC(dl, VT, SH, N1.getOperand(0), Cond); @@ -2493,7 +2497,7 @@ bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue *&GA, } } } - + return false; } @@ -3141,14 +3145,14 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, // Shift right algebraic if shift value is nonzero if (magics.s > 0) { Q = DAG.getNode(ISD::SRA, dl, VT, Q, - DAG.getConstant(magics.s, getShiftAmountTy())); + DAG.getConstant(magics.s, getShiftAmountTy(Q.getValueType()))); if (Created) Created->push_back(Q.getNode()); } // Extract the sign bit and add it to the quotient SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, DAG.getConstant(VT.getSizeInBits()-1, - getShiftAmountTy())); + getShiftAmountTy(Q.getValueType()))); if (Created) Created->push_back(T.getNode()); return DAG.getNode(ISD::ADD, dl, VT, Q, T); @@ -3192,19 +3196,19 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, assert(magics.s < N1C->getAPIntValue().getBitWidth() && "We shouldn't generate an undefined shift!"); return DAG.getNode(ISD::SRL, dl, VT, Q, - DAG.getConstant(magics.s, getShiftAmountTy())); + DAG.getConstant(magics.s, getShiftAmountTy(Q.getValueType()))); } else { SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Q); if (Created) Created->push_back(NPQ.getNode()); NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, - DAG.getConstant(1, getShiftAmountTy())); + DAG.getConstant(1, getShiftAmountTy(NPQ.getValueType()))); if (Created) Created->push_back(NPQ.getNode()); NPQ = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q); if (Created) Created->push_back(NPQ.getNode()); return DAG.getNode(ISD::SRL, dl, VT, NPQ, - DAG.getConstant(magics.s-1, getShiftAmountTy())); + DAG.getConstant(magics.s-1, getShiftAmountTy(NPQ.getValueType()))); } } diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp index 5663936bf3aa..fd5d50b7ecb8 100644 --- a/lib/CodeGen/SplitKit.cpp +++ b/lib/CodeGen/SplitKit.cpp @@ -167,6 +167,20 @@ void SplitAnalysis::calcLiveBlockInfo() { } } +bool SplitAnalysis::isOriginalEndpoint(SlotIndex Idx) const { + unsigned OrigReg = VRM.getOriginal(CurLI->reg); + const LiveInterval &Orig = LIS.getInterval(OrigReg); + assert(!Orig.empty() && "Splitting empty interval?"); + LiveInterval::const_iterator I = Orig.find(Idx); + + // Range containing Idx should begin at Idx. + if (I != Orig.end() && I->start <= Idx) + return I->start == Idx; + + // Range does not contain Idx, previous must end at Idx. + return I != Orig.begin() && (--I)->end == Idx; +} + void SplitAnalysis::print(const BlockPtrSet &B, raw_ostream &OS) const { for (BlockPtrSet::const_iterator I = B.begin(), E = B.end(); I != E; ++I) { unsigned count = UsingBlocks.lookup(*I); @@ -947,10 +961,10 @@ void SplitEditor::splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks) { openIntv(); SlotIndex SegStart = enterIntvBefore(BI.FirstUse); - if (BI.LastUse < BI.LastSplitPoint) { + if (!BI.LiveOut || BI.LastUse < BI.LastSplitPoint) { useIntv(SegStart, leaveIntvAfter(BI.LastUse)); } else { - // THe last use os after tha last valid split point. + // The last use is after the last valid split point. SlotIndex SegStop = leaveIntvBefore(BI.LastSplitPoint); useIntv(SegStart, SegStop); overlapIntv(SegStop, BI.LastUse); diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h index 5c34afd1c819..e02e6297035d 100644 --- a/lib/CodeGen/SplitKit.h +++ b/lib/CodeGen/SplitKit.h @@ -125,6 +125,13 @@ class SplitAnalysis { return UsingBlocks.lookup(MBB); } + /// isOriginalEndpoint - Return true if the original live range was killed or + /// (re-)defined at Idx. Idx should be the 'def' slot for a normal kill/def, + /// and 'use' for an early-clobber def. + /// This can be used to recognize code inserted by earlier live range + /// splitting. + bool isOriginalEndpoint(SlotIndex Idx) const; + typedef SmallPtrSet BlockPtrSet; // Print a set of blocks with use counts. diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 0b7bd98cc692..fa311dc5d66c 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -178,6 +178,10 @@ const MCSection *TargetLoweringObjectFileELF::getEHFrameSection() const { static SectionKind getELFKindForNamedSection(StringRef Name, SectionKind K) { + // FIXME: Why is this here? Codegen is should not be in the business + // of figuring section flags. If the user wrote section(".eh_frame"), + // we should just pass that to MC which will defer to the assembly + // or use its default if producing an object file. if (Name.empty() || Name[0] != '.') return K; // Some lame default implementation based on some magic section names. @@ -203,6 +207,9 @@ getELFKindForNamedSection(StringRef Name, SectionKind K) { Name.startswith(".llvm.linkonce.tb.")) return SectionKind::getThreadBSS(); + if (Name == ".eh_frame") + return SectionKind::getDataRel(); + return K; } @@ -441,11 +448,15 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx, Triple T(((LLVMTargetMachine&)TM).getTargetTriple()); if (T.getOS() == Triple::Darwin) { - unsigned MajNum = T.getDarwinMajorNumber(); - if (MajNum == 7 || MajNum == 8) // 10.3 Panther, 10.4 Tiger + switch (T.getDarwinMajorNumber()) { + case 7: // 10.3 Panther. + case 8: // 10.4 Tiger. CommDirectiveSupportsAlignment = false; - if (MajNum > 9) // 10.6 SnowLeopard - IsFunctionEHSymbolGlobal = false; + break; + case 9: // 10.5 Leopard. + case 10: // 10.6 SnowLeopard. + break; + } } TargetLoweringObjectFile::Initialize(Ctx, TM); @@ -630,7 +641,7 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, Mangler *Mang, const TargetMachine &TM) const { // Parse the section specifier and create it if valid. StringRef Segment, Section; - unsigned TAA, StubSize; + unsigned TAA = (unsigned)MCSectionMachO::SECTION_ATTRIBUTES, StubSize = 0; std::string ErrorCode = MCSectionMachO::ParseSectionSpecifier(GV->getSection(), Segment, Section, TAA, StubSize); @@ -643,10 +654,19 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, return DataSection; } + bool TAAWasSet = (TAA != MCSectionMachO::SECTION_ATTRIBUTES); + if (!TAAWasSet) + TAA = 0; // Sensible default if this is a new section. + // Get the section. const MCSectionMachO *S = getContext().getMachOSection(Segment, Section, TAA, StubSize, Kind); + // If TAA wasn't set by ParseSectionSpecifier() above, + // use the value returned by getMachOSection() as a default. + if (!TAAWasSet) + TAA = S->getTypeAndAttributes(); + // Okay, now that we got the section, verify that the TAA & StubSize agree. // If the user declared multiple globals with different section flags, we need // to reject it here. diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp index 458a2134bf4a..ec149dddc1d9 100644 --- a/lib/CodeGen/VirtRegRewriter.cpp +++ b/lib/CodeGen/VirtRegRewriter.cpp @@ -478,7 +478,8 @@ static void ResurrectConfirmedKill(unsigned Reg, const TargetRegisterInfo* TRI, if (!RegKills[KReg]) return; - assert(KillOps[KReg] == KillOp && "invalid superreg kill flags"); + assert(KillOps[KReg]->getParent() == KillOp->getParent() && + "invalid superreg kill flags"); KillOps[KReg] = NULL; RegKills.reset(KReg); @@ -487,7 +488,8 @@ static void ResurrectConfirmedKill(unsigned Reg, const TargetRegisterInfo* TRI, for (const unsigned *SR = TRI->getSubRegisters(KReg); *SR; ++SR) { DEBUG(dbgs() << " Resurrect subreg " << TRI->getName(*SR) << "\n"); - assert(KillOps[*SR] == KillOp && "invalid subreg kill flags"); + assert(KillOps[*SR]->getParent() == KillOp->getParent() && + "invalid subreg kill flags"); KillOps[*SR] = NULL; RegKills.reset(*SR); } diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp index 8a00a16cfb4a..ea1629d30565 100644 --- a/lib/MC/ELFObjectWriter.cpp +++ b/lib/MC/ELFObjectWriter.cpp @@ -833,7 +833,11 @@ static bool isInSymtab(const MCAssembler &Asm, const MCSymbolData &Data, return true; const MCSymbol &A = Symbol.AliasedSymbol(); - if (!A.isVariable() && A.isUndefined() && !Data.isCommon()) + if (Symbol.isVariable() && !A.isVariable() && A.isUndefined()) + return false; + + bool IsGlobal = GetBinding(Data) == ELF::STB_GLOBAL; + if (!Symbol.isVariable() && Symbol.isUndefined() && !IsGlobal) return false; if (!Asm.isSymbolLinkerVisible(Symbol) && !Symbol.isUndefined()) @@ -1732,6 +1736,10 @@ unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target, assert(Modifier == MCSymbolRefExpr::VK_None); Type = ELF::R_X86_64_PC16; break; + case FK_PCRel_1: + assert(Modifier == MCSymbolRefExpr::VK_None); + Type = ELF::R_X86_64_PC8; + break; } } else { switch ((unsigned)Fixup.getKind()) { diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp index cc1afbd08926..8199fb2e158a 100644 --- a/lib/MC/MCAsmInfo.cpp +++ b/lib/MC/MCAsmInfo.cpp @@ -65,6 +65,7 @@ MCAsmInfo::MCAsmInfo() { WeakDefDirective = 0; LinkOnceDirective = 0; HiddenVisibilityAttr = MCSA_Hidden; + HiddenDeclarationVisibilityAttr = MCSA_Hidden; ProtectedVisibilityAttr = MCSA_Protected; HasLEB128 = false; SupportsDebugInformation = false; diff --git a/lib/MC/MCAsmInfoDarwin.cpp b/lib/MC/MCAsmInfoDarwin.cpp index 13776f04437d..526ad0da42aa 100644 --- a/lib/MC/MCAsmInfoDarwin.cpp +++ b/lib/MC/MCAsmInfoDarwin.cpp @@ -45,6 +45,7 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() { HasAggressiveSymbolFolding = false; HiddenVisibilityAttr = MCSA_PrivateExtern; + HiddenDeclarationVisibilityAttr = MCSA_Invalid; // Doesn't support protected visibility. ProtectedVisibilityAttr = MCSA_Global; diff --git a/lib/MC/MCDisassembler/EDOperand.cpp b/lib/MC/MCDisassembler/EDOperand.cpp index cfeb56fa3dfd..2b0c73e80593 100644 --- a/lib/MC/MCDisassembler/EDOperand.cpp +++ b/lib/MC/MCDisassembler/EDOperand.cpp @@ -152,10 +152,23 @@ int EDOperand::evaluate(uint64_t &result, uint64_t scaleAmount = Inst.Inst->getOperand(MCOpIndex+1).getImm(); unsigned indexReg = Inst.Inst->getOperand(MCOpIndex+2).getReg(); int64_t displacement = Inst.Inst->getOperand(MCOpIndex+3).getImm(); - //unsigned segmentReg = Inst.Inst->getOperand(MCOpIndex+4).getReg(); - + uint64_t addr = 0; + unsigned segmentReg = Inst.Inst->getOperand(MCOpIndex+4).getReg(); + + if (segmentReg != 0 && Disassembler.Key.Arch == Triple::x86_64) { + unsigned fsID = Disassembler.registerIDWithName("FS"); + unsigned gsID = Disassembler.registerIDWithName("GS"); + + if (segmentReg == fsID || + segmentReg == gsID) { + uint64_t segmentBase; + if (!callback(&segmentBase, segmentReg, arg)) + addr += segmentBase; + } + } + if (baseReg) { uint64_t baseVal; if (callback(&baseVal, baseReg, arg)) @@ -175,7 +188,7 @@ int EDOperand::evaluate(uint64_t &result, result = addr; return 0; } - } + } // switch (operandType) break; case Triple::arm: case Triple::thumb: @@ -203,6 +216,7 @@ int EDOperand::evaluate(uint64_t &result, return 0; } } + break; } return -1; diff --git a/lib/MC/MCDisassembler/EDToken.cpp b/lib/MC/MCDisassembler/EDToken.cpp index 400e1649e970..de770b41ef35 100644 --- a/lib/MC/MCDisassembler/EDToken.cpp +++ b/lib/MC/MCDisassembler/EDToken.cpp @@ -194,6 +194,10 @@ int EDToken::tokenize(std::vector &tokens, tokens.push_back(token); } + // Free any parsed operands. + for (unsigned i = 0, e = parsedOperands.size(); i != e; ++i) + delete parsedOperands[i]; + return 0; } diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp index 035826690cdf..e67d9b03a95a 100644 --- a/lib/MC/MCObjectStreamer.cpp +++ b/lib/MC/MCObjectStreamer.cpp @@ -242,7 +242,23 @@ void MCObjectStreamer::EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel, void MCObjectStreamer::EmitValueToOffset(const MCExpr *Offset, unsigned char Value) { - new MCOrgFragment(*Offset, Value, getCurrentSectionData()); + int64_t Res; + if (Offset->EvaluateAsAbsolute(Res, getAssembler())) { + new MCOrgFragment(*Offset, Value, getCurrentSectionData()); + return; + } + + MCSymbol *CurrentPos = getContext().CreateTempSymbol(); + EmitLabel(CurrentPos); + MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; + const MCExpr *Ref = + MCSymbolRefExpr::Create(CurrentPos, Variant, getContext()); + const MCExpr *Delta = + MCBinaryExpr::Create(MCBinaryExpr::Sub, Offset, Ref, getContext()); + + if (!Delta->EvaluateAsAbsolute(Res, getAssembler())) + report_fatal_error("expected assembly-time absolute expression"); + EmitFill(Res, Value, 0); } void MCObjectStreamer::Finish() { diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index c6d0da609b3b..a84917ffb86a 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -603,6 +603,8 @@ bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { Lex(); // Eat the '('. return ParseParenExpr(Res, EndLoc); case AsmToken::LBrac: + if (!PlatformParser->HasBracketExpressions()) + return TokError("brackets expression not supported on this target"); Lex(); // Eat the '['. return ParseBracketExpr(Res, EndLoc); case AsmToken::Minus: diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp index bfaf36a451b3..dcf689a6f0e7 100644 --- a/lib/MC/MCParser/ELFAsmParser.cpp +++ b/lib/MC/MCParser/ELFAsmParser.cpp @@ -30,9 +30,12 @@ class ELFAsmParser : public MCAsmParserExtension { bool ParseSectionSwitch(StringRef Section, unsigned Type, unsigned Flags, SectionKind Kind); + bool SeenIdent; public: - ELFAsmParser() {} + ELFAsmParser() : SeenIdent(false) { + BracketExpressionsSupported = true; + } virtual void Initialize(MCAsmParser &Parser) { // Call the base implementation. @@ -456,13 +459,12 @@ bool ELFAsmParser::ParseDirectiveIdent(StringRef, SMLoc) { SectionKind::getReadOnly(), 1, ""); - static bool First = true; - getStreamer().PushSection(); getStreamer().SwitchSection(Comment); - if (First) + if (!SeenIdent) { getStreamer().EmitIntValue(0, 1); - First = false; + SeenIdent = true; + } getStreamer().EmitBytes(Data, 0); getStreamer().EmitIntValue(0, 1); getStreamer().PopSection(); diff --git a/lib/MC/MCParser/MCAsmParserExtension.cpp b/lib/MC/MCParser/MCAsmParserExtension.cpp index c30d3067da59..3f25a14926b6 100644 --- a/lib/MC/MCParser/MCAsmParserExtension.cpp +++ b/lib/MC/MCParser/MCAsmParserExtension.cpp @@ -10,7 +10,8 @@ #include "llvm/MC/MCParser/MCAsmParserExtension.h" using namespace llvm; -MCAsmParserExtension::MCAsmParserExtension() { +MCAsmParserExtension::MCAsmParserExtension() : + BracketExpressionsSupported(false) { } MCAsmParserExtension::~MCAsmParserExtension() { diff --git a/lib/MC/MCSectionMachO.cpp b/lib/MC/MCSectionMachO.cpp index b897c0bd6855..577e93aed6bc 100644 --- a/lib/MC/MCSectionMachO.cpp +++ b/lib/MC/MCSectionMachO.cpp @@ -101,16 +101,18 @@ void MCSectionMachO::PrintSwitchToSection(const MCAsmInfo &MAI, return; } - OS << ','; - unsigned SectionType = TAA & MCSectionMachO::SECTION_TYPE; assert(SectionType <= MCSectionMachO::LAST_KNOWN_SECTION_TYPE && "Invalid SectionType specified!"); - if (SectionTypeDescriptors[SectionType].AssemblerName) + if (SectionTypeDescriptors[SectionType].AssemblerName) { + OS << ','; OS << SectionTypeDescriptors[SectionType].AssemblerName; - else - OS << "<<" << SectionTypeDescriptors[SectionType].EnumName << ">>"; + } else { + // If we have no name for the attribute, stop here. + OS << '\n'; + return; + } // If we don't have any attributes, we're done. unsigned SectionAttrs = TAA & MCSectionMachO::SECTION_ATTRIBUTES; @@ -125,7 +127,9 @@ void MCSectionMachO::PrintSwitchToSection(const MCAsmInfo &MAI, // Check each attribute to see if we have it. char Separator = ','; - for (unsigned i = 0; SectionAttrDescriptors[i].AttrFlag; ++i) { + for (unsigned i = 0; + SectionAttrs != 0 && SectionAttrDescriptors[i].AttrFlag; + ++i) { // Check to see if we have this attribute. if ((SectionAttrDescriptors[i].AttrFlag & SectionAttrs) == 0) continue; @@ -207,7 +211,6 @@ std::string MCSectionMachO::ParseSectionSpecifier(StringRef Spec, // In. "between 1 and 16 characters"; // If there is no comma after the section, we're done. - TAA = 0; StubSize = 0; if (Comma.second.empty()) return ""; diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp index 3dcdba13135f..4b302c8602c9 100644 --- a/lib/MC/MCStreamer.cpp +++ b/lib/MC/MCStreamer.cpp @@ -20,8 +20,8 @@ using namespace llvm; MCStreamer::MCStreamer(MCContext &Ctx) : Context(Ctx) { - PrevSectionStack.push_back(NULL); - CurSectionStack.push_back(NULL); + const MCSection *section = NULL; + SectionStack.push_back(std::make_pair(section, section)); } MCStreamer::~MCStreamer() { diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp index 77033428b577..08f36d2af3a1 100644 --- a/lib/Support/APInt.cpp +++ b/lib/Support/APInt.cpp @@ -1505,7 +1505,7 @@ APInt::ms APInt::magic() const { r2 = r2 - ad; } delta = ad - r2; - } while (q1.ule(delta) || (q1 == delta && r1 == 0)); + } while (q1.ult(delta) || (q1 == delta && r1 == 0)); mag.m = q2 + 1; if (d.isNegative()) mag.m = -mag.m; // resulting magic number diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 1fb88726d0de..7e2183d7cd5e 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -155,10 +155,11 @@ namespace ARMII { //===------------------------------------------------------------------===// // Code domain. DomainShift = 18, - DomainMask = 3 << DomainShift, + DomainMask = 7 << DomainShift, DomainGeneral = 0 << DomainShift, DomainVFP = 1 << DomainShift, DomainNEON = 2 << DomainShift, + DomainNEONA8 = 4 << DomainShift, //===------------------------------------------------------------------===// // Field shifts - such shifts are used to set field while generating diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 9f295302db0e..26f48b308316 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -172,6 +172,7 @@ class ARMFastISel : public FastISel { unsigned ARMMaterializeGV(const GlobalValue *GV, EVT VT); unsigned ARMMoveToFPReg(EVT VT, unsigned SrcReg); unsigned ARMMoveToIntReg(EVT VT, unsigned SrcReg); + unsigned ARMSelectCallOp(const GlobalValue *GV); // Call handling routines. private: @@ -1633,6 +1634,25 @@ bool ARMFastISel::SelectRet(const Instruction *I) { return true; } +unsigned ARMFastISel::ARMSelectCallOp(const GlobalValue *GV) { + + // Depend our opcode for thumb on whether or not we're targeting an + // externally callable function. For libcalls we'll just pass a NULL GV + // in here. + bool isExternal = false; + if (!GV || GV->hasExternalLinkage()) isExternal = true; + + // Darwin needs the r9 versions of the opcodes. + bool isDarwin = Subtarget->isTargetDarwin(); + if (isThumb && isExternal) { + return isDarwin ? ARM::tBLXi_r9 : ARM::tBLXi; + } else if (isThumb) { + return isDarwin ? ARM::tBLr9 : ARM::tBL; + } else { + return isDarwin ? ARM::BLr9 : ARM::BL; + } +} + // A quick function that will emit a call for a named libcall in F with the // vector of passed arguments for the Instruction in I. We can assume that we // can emit a call for any libcall we can produce. This is an abridged version @@ -1694,20 +1714,17 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { // Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops. // TODO: Turn this into the table of arm call ops. MachineInstrBuilder MIB; - unsigned CallOpc; - if(isThumb) { - CallOpc = Subtarget->isTargetDarwin() ? ARM::tBLXi_r9 : ARM::tBLXi; + unsigned CallOpc = ARMSelectCallOp(NULL); + if(isThumb) // Explicitly adding the predicate here. MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))) .addExternalSymbol(TLI.getLibcallName(Call)); - } else { - CallOpc = Subtarget->isTargetDarwin() ? ARM::BLr9 : ARM::BL; + else // Explicitly adding the predicate here. MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc)) .addExternalSymbol(TLI.getLibcallName(Call))); - } // Add implicit physical register uses to the call. for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) @@ -1813,21 +1830,18 @@ bool ARMFastISel::SelectCall(const Instruction *I) { // Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops. // TODO: Turn this into the table of arm call ops. MachineInstrBuilder MIB; - unsigned CallOpc; + unsigned CallOpc = ARMSelectCallOp(GV); // Explicitly adding the predicate here. - if(isThumb) { - CallOpc = Subtarget->isTargetDarwin() ? ARM::tBLXi_r9 : ARM::tBLXi; + if(isThumb) // Explicitly adding the predicate here. MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))) .addGlobalAddress(GV, 0, 0); - } else { - CallOpc = Subtarget->isTargetDarwin() ? ARM::BLr9 : ARM::BL; + else // Explicitly adding the predicate here. MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc)) .addGlobalAddress(GV, 0, 0)); - } // Add implicit physical register uses to the call. for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index f42c6db84fd3..68c33f098ec9 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -215,7 +215,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); // Move past area 3. - if (DPRCSSize > 0) MBBI++; + if (DPRCSSize > 0) { + MBBI++; + // Since vpush register list cannot have gaps, there may be multiple vpush + // instructions in the prologue. + while (MBBI->getOpcode() == ARM::VSTMDDB_UPD) + MBBI++; + } NumBytes = DPRCSOffset; if (NumBytes) { @@ -370,7 +376,13 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); // Increment past our save areas. - if (AFI->getDPRCalleeSavedAreaSize()) MBBI++; + if (AFI->getDPRCalleeSavedAreaSize()) { + MBBI++; + // Since vpop register list cannot have gaps, there may be multiple vpop + // instructions in the epilogue. + while (MBBI->getOpcode() == ARM::VLDMDIA_UPD) + MBBI++; + } if (AFI->getGPRCalleeSavedArea2Size()) MBBI++; if (AFI->getGPRCalleeSavedArea1Size()) MBBI++; } diff --git a/lib/Target/ARM/ARMHazardRecognizer.cpp b/lib/Target/ARM/ARMHazardRecognizer.cpp index 676b01e91c53..e97ce50bc429 100644 --- a/lib/Target/ARM/ARMHazardRecognizer.cpp +++ b/lib/Target/ARM/ARMHazardRecognizer.cpp @@ -21,17 +21,14 @@ static bool hasRAWHazard(MachineInstr *DefMI, MachineInstr *MI, // FIXME: Detect integer instructions properly. const TargetInstrDesc &TID = MI->getDesc(); unsigned Domain = TID.TSFlags & ARMII::DomainMask; - if (Domain == ARMII::DomainVFP) { - unsigned Opcode = MI->getOpcode(); - if (Opcode == ARM::VSTRS || Opcode == ARM::VSTRD || - Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) - return false; - } else if (Domain == ARMII::DomainNEON) { - if (MI->getDesc().mayStore() || MI->getDesc().mayLoad()) - return false; - } else + if (TID.mayStore()) return false; - return MI->readsRegister(DefMI->getOperand(0).getReg(), &TRI); + unsigned Opcode = TID.getOpcode(); + if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) + return false; + if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON)) + return MI->readsRegister(DefMI->getOperand(0).getReg(), &TRI); + return false; } ScheduleHazardRecognizer::HazardType diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index a506cffdba34..f0d5a7d7c2e7 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -126,6 +126,7 @@ class ARMDAGToDAGISel : public SelectionDAGISel { bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset); bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align); + bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset); bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label); @@ -886,6 +887,20 @@ bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr, return true; } +bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N, + SDValue &Offset) { + LSBaseSDNode *LdSt = cast(Op); + ISD::MemIndexedMode AM = LdSt->getAddressingMode(); + if (AM != ISD::POST_INC) + return false; + Offset = N; + if (ConstantSDNode *NC = dyn_cast(N)) { + if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits()) + Offset = CurDAG->getRegister(0, MVT::i32); + } + return true; +} + bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label) { if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) { diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 1835ec0f0054..ab9f9e1571e3 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -2236,7 +2236,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, RC = ARM::GPRRegisterClass; // Transform the arguments stored in physical registers into virtual ones. - unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC, dl); + unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); SDValue ArgValue2; @@ -2250,7 +2250,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, MachinePointerInfo::getFixedStack(FI), false, false, 0); } else { - Reg = MF.addLiveIn(NextVA.getLocReg(), RC, dl); + Reg = MF.addLiveIn(NextVA.getLocReg(), RC); ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); } @@ -2331,7 +2331,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering"); // Transform the arguments in physical registers into virtual ones. - unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC, dl); + unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); } @@ -2408,7 +2408,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, else RC = ARM::GPRRegisterClass; - unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC, dl); + unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, @@ -2838,8 +2838,51 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); EVT VT = Op.getValueType(); EVT SrcVT = Tmp1.getValueType(); - bool F2IisFast = Subtarget->isCortexA9() || - Tmp0.getOpcode() == ISD::BITCAST || Tmp0.getOpcode() == ARMISD::VMOVDRR; + bool InGPR = Tmp0.getOpcode() == ISD::BITCAST || + Tmp0.getOpcode() == ARMISD::VMOVDRR; + bool UseNEON = !InGPR && Subtarget->hasNEON(); + + if (UseNEON) { + // Use VBSL to copy the sign bit. + unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80); + SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32, + DAG.getTargetConstant(EncodedVal, MVT::i32)); + EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64; + if (VT == MVT::f64) + Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT, + DAG.getNode(ISD::BITCAST, dl, OpVT, Mask), + DAG.getConstant(32, MVT::i32)); + else /*if (VT == MVT::f32)*/ + Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0); + if (SrcVT == MVT::f32) { + Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1); + if (VT == MVT::f64) + Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT, + DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1), + DAG.getConstant(32, MVT::i32)); + } + Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0); + Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1); + + SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff), + MVT::i32); + AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes); + SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask, + DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes)); + + SDValue Res = DAG.getNode(ISD::OR, dl, OpVT, + DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask), + DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot)); + if (SrcVT == MVT::f32) { + Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res); + Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res, + DAG.getConstant(0, MVT::i32)); + } else { + Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res); + } + + return Res; + } // Bitcast operand 1 to i32. if (SrcVT == MVT::f64) @@ -2847,37 +2890,24 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { &Tmp1, 1).getValue(1); Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1); - // If float to int conversion isn't going to be super expensive, then simply - // or in the signbit. - if (F2IisFast) { - SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32); - SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32); - Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1); - if (VT == MVT::f32) { - Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32, - DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2); - return DAG.getNode(ISD::BITCAST, dl, MVT::f32, - DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1)); - } - - // f64: Or the high part with signbit and then combine two parts. - Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), - &Tmp0, 1); - SDValue Lo = Tmp0.getValue(0); - SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2); - Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1); - return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); + // Or in the signbit with integer operations. + SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32); + SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32); + Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1); + if (VT == MVT::f32) { + Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32, + DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2); + return DAG.getNode(ISD::BITCAST, dl, MVT::f32, + DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1)); } - // Remove the signbit of operand 0. - Tmp0 = DAG.getNode(ISD::FABS, dl, VT, Tmp0); - - // If operand 1 signbit is one, then negate operand 0. - SDValue ARMcc; - SDValue Cmp = getARMCmp(Tmp1, DAG.getConstant(0, MVT::i32), - ISD::SETLT, ARMcc, DAG, dl); - SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); - return DAG.getNode(ARMISD::CNEG, dl, VT, Tmp0, Tmp0, ARMcc, CCR, Cmp); + // f64: Or the high part with signbit and then combine two parts. + Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), + &Tmp0, 1); + SDValue Lo = Tmp0.getValue(0); + SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2); + Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1); + return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); } SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ @@ -2897,7 +2927,7 @@ SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ } // Return LR, which contains the return address. Mark it an implicit live-in. - unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32), dl); + unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32)); return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); } diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 765cba42d0bd..359ac45cee1d 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -127,13 +127,14 @@ def IndexModePost : IndexMode<2>; def IndexModeUpd : IndexMode<3>; // Instruction execution domain. -class Domain val> { - bits<2> Value = val; +class Domain val> { + bits<3> Value = val; } def GenericDomain : Domain<0>; def VFPDomain : Domain<1>; // Instructions in VFP domain only def NeonDomain : Domain<2>; // Instructions in Neon domain only def VFPNeonDomain : Domain<3>; // Instructions in both VFP & Neon domains +def VFPNeonA8Domain : Domain<5>; // Instructions in VFP & Neon under A8 //===----------------------------------------------------------------------===// // ARM special operands. @@ -249,7 +250,7 @@ class InstTemplate, let EncoderMethod = "getAddrMode6AddressOpValue"; } -def am6offset : Operand { +def am6offset : Operand, + ComplexPattern { let PrintMethod = "printAddrMode6OffsetOperand"; let MIOperandInfo = (ops GPR); let EncoderMethod = "getAddrMode6OffsetOpValue"; diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 1e2e5504e662..dc3d63e26ef5 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -1402,31 +1402,42 @@ def : Pat<(store (extractelt (v2f32 DPR:$src), imm:$lane), addrmode6:$addr), def : Pat<(store (extractelt (v4f32 QPR:$src), imm:$lane), addrmode6:$addr), (VST1LNq32Pseudo addrmode6:$addr, QPR:$src, imm:$lane)>; -let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { - // ...with address register writeback: -class VST1LNWB op11_8, bits<4> op7_4, string Dt> +class VST1LNWB op11_8, bits<4> op7_4, string Dt, ValueType Ty, + PatFrag StoreOp, SDNode ExtractOp> : NLdStLn<1, 0b00, op11_8, op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, nohash_imm:$lane), IIC_VST1lnu, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn$Rm", - "$Rn.addr = $wb", []>; + "$Rn.addr = $wb", + [(set GPR:$wb, (StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), + addrmode6:$Rn, am6offset:$Rm))]>; +class VST1QLNWBPseudo + : VSTQLNWBPseudo { + let Pattern = [(set GPR:$wb, (StoreOp (ExtractOp (Ty QPR:$src), imm:$lane), + addrmode6:$addr, am6offset:$offset))]; +} -def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8"> { +def VST1LNd8_UPD : VST1LNWB<0b0000, {?,?,?,0}, "8", v8i8, post_truncsti8, + NEONvgetlaneu> { let Inst{7-5} = lane{2-0}; } -def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16"> { +def VST1LNd16_UPD : VST1LNWB<0b0100, {?,?,0,?}, "16", v4i16, post_truncsti16, + NEONvgetlaneu> { let Inst{7-6} = lane{1-0}; let Inst{4} = Rn{5}; } -def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32"> { +def VST1LNd32_UPD : VST1LNWB<0b1000, {?,0,?,?}, "32", v2i32, post_store, + extractelt> { let Inst{7} = lane{0}; let Inst{5-4} = Rn{5-4}; } -def VST1LNq8Pseudo_UPD : VSTQLNWBPseudo; -def VST1LNq16Pseudo_UPD : VSTQLNWBPseudo; -def VST1LNq32Pseudo_UPD : VSTQLNWBPseudo; +def VST1LNq8Pseudo_UPD : VST1QLNWBPseudo; +def VST1LNq16Pseudo_UPD : VST1QLNWBPseudo; +def VST1LNq32Pseudo_UPD : VST1QLNWBPseudo; + +let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { // VST2LN : Vector Store (single 2-element structure from one lane) class VST2LN op11_8, bits<4> op7_4, string Dt> diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 920c5c98002a..29902833f2bb 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -197,9 +197,9 @@ def VADDS : ASbIn<0b11100, 0b11, 0, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpALU32, "vadd", ".f32\t$Sd, $Sn, $Sm", [(set SPR:$Sd, (fadd SPR:$Sn, SPR:$Sm))]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VSUBD : ADbI<0b11100, 0b11, 1, 0, @@ -211,9 +211,9 @@ def VSUBS : ASbIn<0b11100, 0b11, 1, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpALU32, "vsub", ".f32\t$Sd, $Sn, $Sm", [(set SPR:$Sd, (fsub SPR:$Sn, SPR:$Sm))]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VDIVD : ADbI<0b11101, 0b00, 0, 0, @@ -235,9 +235,9 @@ def VMULS : ASbIn<0b11100, 0b10, 0, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpMUL32, "vmul", ".f32\t$Sd, $Sn, $Sm", [(set SPR:$Sd, (fmul SPR:$Sn, SPR:$Sm))]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VNMULD : ADbI<0b11100, 0b10, 1, 0, @@ -249,9 +249,9 @@ def VNMULS : ASbI<0b11100, 0b10, 1, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), IIC_fpMUL32, "vnmul", ".f32\t$Sd, $Sn, $Sm", [(set SPR:$Sd, (fneg (fmul SPR:$Sn, SPR:$Sm)))]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } // Match reassociated forms only if not sign dependent rounding. @@ -271,9 +271,9 @@ def VCMPES : ASuI<0b11101, 0b11, 0b0100, 0b11, 0, (outs), (ins SPR:$Sd, SPR:$Sm), IIC_fpCMP32, "vcmpe", ".f32\t$Sd, $Sm", [(arm_cmpfp SPR:$Sd, SPR:$Sm)]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } // FIXME: Verify encoding after integrated assembler is working. @@ -286,9 +286,9 @@ def VCMPS : ASuI<0b11101, 0b11, 0b0100, 0b01, 0, (outs), (ins SPR:$Sd, SPR:$Sm), IIC_fpCMP32, "vcmp", ".f32\t$Sd, $Sm", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } } // Defs = [FPSCR] @@ -305,9 +305,9 @@ def VABSS : ASuIn<0b11101, 0b11, 0b0000, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpUNA32, "vabs", ".f32\t$Sd, $Sm", [(set SPR:$Sd, (fabs SPR:$Sm))]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } let Defs = [FPSCR] in { @@ -326,9 +326,9 @@ def VCMPEZS : ASuI<0b11101, 0b11, 0b0101, 0b11, 0, let Inst{3-0} = 0b0000; let Inst{5} = 0; - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } // FIXME: Verify encoding after integrated assembler is working. @@ -347,9 +347,9 @@ def VCMPZS : ASuI<0b11101, 0b11, 0b0101, 0b01, 0, let Inst{3-0} = 0b0000; let Inst{5} = 0; - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } } // Defs = [FPSCR] @@ -423,9 +423,9 @@ def VNEGS : ASuIn<0b11101, 0b11, 0b0001, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), IIC_fpUNA32, "vneg", ".f32\t$Sd, $Sm", [(set SPR:$Sd, (fneg SPR:$Sm))]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VSQRTD : ADuI<0b11101, 0b11, 0b0001, 0b11, 0, @@ -598,9 +598,9 @@ def VSITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010, [(set SPR:$Sd, (arm_sitof SPR:$Sm))]> { let Inst{7} = 1; // s32 - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VUITOD : AVConv1IDs_Encode<0b11101, 0b11, 0b1000, 0b1011, @@ -616,9 +616,9 @@ def VUITOS : AVConv1InSs_Encode<0b11101, 0b11, 0b1000, 0b1010, [(set SPR:$Sd, (arm_uitof SPR:$Sm))]> { let Inst{7} = 0; // u32 - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } // FP -> Int: @@ -671,9 +671,9 @@ def VTOSIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1101, 0b1010, [(set SPR:$Sd, (arm_ftosi SPR:$Sm))]> { let Inst{7} = 1; // Z bit - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VTOUIZD : AVConv1IsD_Encode<0b11101, 0b11, 0b1100, 0b1011, @@ -689,9 +689,9 @@ def VTOUIZS : AVConv1InsS_Encode<0b11101, 0b11, 0b1100, 0b1010, [(set SPR:$Sd, (arm_ftoui SPR:$Sm))]> { let Inst{7} = 1; // Z bit - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } // And the Z bit '0' variants, i.e. use the rounding mode specified by FPSCR. @@ -743,36 +743,36 @@ def VTOSHS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTSI, "vcvt", ".s16.f32\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VTOUHS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTSI, "vcvt", ".u16.f32\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VTOSLS : AVConv1XI<0b11101, 0b11, 0b1110, 0b1010, 1, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VTOULS : AVConv1XI<0b11101, 0b11, 0b1111, 0b1010, 1, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VTOSHD : AVConv1XI<0b11101, 0b11, 0b1110, 0b1011, 0, @@ -801,36 +801,36 @@ def VSHTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTIS, "vcvt", ".f32.s16\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VUHTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 0, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTIS, "vcvt", ".f32.u16\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VSLTOS : AVConv1XI<0b11101, 0b11, 0b1010, 0b1010, 1, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VULTOS : AVConv1XI<0b11101, 0b11, 0b1011, 0b1010, 1, (outs SPR:$dst), (ins SPR:$a, i32imm:$fbits), IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a, $fbits", [/* For disassembly only; pattern left blank */]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def VSHTOD : AVConv1XI<0b11101, 0b11, 0b1010, 0b1011, 0, @@ -874,9 +874,9 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0, SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), @@ -901,9 +901,9 @@ def VMLSS : ASbIn<0b11100, 0b00, 1, 0, SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), @@ -928,9 +928,9 @@ def VNMLAS : ASbI<0b11100, 0b01, 1, 0, SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin), @@ -954,9 +954,9 @@ def VNMLSS : ASbI<0b11100, 0b01, 0, 0, [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin), @@ -995,9 +995,9 @@ def VNEGScc : ASuI<0b11101, 0b11, 0b0001, 0b01, 0, IIC_fpUNA32, "vneg", ".f32\t$Sd, $Sm", [/*(set SPR:$Sd, (ARMcneg SPR:$Sn, SPR:$Sm, imm:$cc))*/]>, RegConstraint<"$Sn = $Sd"> { - // Some single precision VFP instructions may be executed on both NEON and VFP - // pipelines. - let D = VFPNeonDomain; + // Some single precision VFP instructions may be executed on both NEON and + // VFP pipelines on A8. + let D = VFPNeonA8Domain; } } // neverHasSideEffects diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 0bd740cfb28c..1465984899c6 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -171,7 +171,9 @@ ARMSubtarget::GVIsIndirectSymbol(const GlobalValue *GV, // Materializable GVs (in JIT lazy compilation mode) do not require an extra // load from stub. - bool isDecl = GV->isDeclaration() && !GV->isMaterializable(); + bool isDecl = GV->hasAvailableExternallyLinkage(); + if (GV->isDeclaration() && !GV->isMaterializable()) + isDecl = true; if (!isTargetDarwin()) { // Extra load is needed for all externally visible. diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp index f9e86eb36e04..9a27e2f47064 100644 --- a/lib/Target/ARM/MLxExpansionPass.cpp +++ b/lib/Target/ARM/MLxExpansionPass.cpp @@ -132,22 +132,16 @@ unsigned MLxExpansion::getDefReg(MachineInstr *MI) const { } bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const { - const TargetInstrDesc &TID = MI->getDesc(); // FIXME: Detect integer instructions properly. + const TargetInstrDesc &TID = MI->getDesc(); unsigned Domain = TID.TSFlags & ARMII::DomainMask; - if (Domain == ARMII::DomainVFP) { - unsigned Opcode = TID.getOpcode(); - if (Opcode == ARM::VSTRS || Opcode == ARM::VSTRD || - Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) - return false; - } else if (Domain == ARMII::DomainNEON) { - if (TID.mayStore() || TID.mayLoad()) - return false; - } else { + if (TID.mayStore()) return false; - } - - return MI->readsRegister(Reg, TRI); + unsigned Opcode = TID.getOpcode(); + if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) + return false; + if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON)) + return MI->readsRegister(Reg, TRI); return false; } diff --git a/lib/Target/ARM/NEONMoveFix.cpp b/lib/Target/ARM/NEONMoveFix.cpp index 97e54bfaed9e..965665c2821a 100644 --- a/lib/Target/ARM/NEONMoveFix.cpp +++ b/lib/Target/ARM/NEONMoveFix.cpp @@ -35,6 +35,7 @@ namespace { private: const TargetRegisterInfo *TRI; const ARMBaseInstrInfo *TII; + bool isA8; typedef DenseMap RegMap; @@ -43,6 +44,11 @@ namespace { char NEONMoveFixPass::ID = 0; } +static bool inNEONDomain(unsigned Domain, bool isA8) { + return (Domain & ARMII::DomainNEON) || + (isA8 && (Domain & ARMII::DomainNEONA8)); +} + bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) { RegMap Defs; bool Modified = false; @@ -70,7 +76,7 @@ bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) { Domain = ARMII::DomainNEON; } - if (Domain & ARMII::DomainNEON) { + if (inNEONDomain(Domain, isA8)) { // Convert VMOVD to VMOVDneon unsigned DestReg = MI->getOperand(0).getReg(); @@ -123,6 +129,7 @@ bool NEONMoveFixPass::runOnMachineFunction(MachineFunction &Fn) { TRI = TM.getRegisterInfo(); TII = static_cast(TM.getInstrInfo()); + isA8 = TM.getSubtarget().isCortexA8(); bool Modified = false; for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index 2f67257f8fa1..9b1073be3c8e 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -95,6 +95,12 @@ Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, bool Thumb2InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { + while (MBBI->isDebugValue()) { + ++MBBI; + if (MBBI == MBB.end()) + return false; + } + unsigned PredReg = 0; return llvm::getITInstrPredicate(MBBI, PredReg) == ARMCC::AL; } diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp index 9137d654edba..c4f43ab9e4e7 100644 --- a/lib/Target/Alpha/AlphaISelLowering.cpp +++ b/lib/Target/Alpha/AlphaISelLowering.cpp @@ -48,7 +48,6 @@ AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM) : TargetLowering(TM, new TargetLoweringObjectFileELF()) { // Set up the TargetLowering object. //I am having problems with shr n i8 1 - setShiftAmountType(MVT::i64); setBooleanContents(ZeroOrOneBooleanContent); addRegisterClass(MVT::i64, Alpha::GPRCRegisterClass); diff --git a/lib/Target/Alpha/AlphaISelLowering.h b/lib/Target/Alpha/AlphaISelLowering.h index b429e9fc1390..cb98f921dd68 100644 --- a/lib/Target/Alpha/AlphaISelLowering.h +++ b/lib/Target/Alpha/AlphaISelLowering.h @@ -31,25 +31,25 @@ namespace llvm { /// GPRelHi/GPRelLo - These represent the high and low 16-bit /// parts of a global address respectively. - GPRelHi, GPRelLo, + GPRelHi, GPRelLo, /// RetLit - Literal Relocation of a Global RelLit, /// GlobalRetAddr - used to restore the return address GlobalRetAddr, - + /// CALL - Normal call. CALL, /// DIVCALL - used for special library calls for div and rem DivCall, - + /// return flag operand RET_FLAG, /// CHAIN = COND_BRANCH CHAIN, OPC, (G|F)PRC, DESTBB [, INFLAG] - This - /// corresponds to the COND_BRANCH pseudo instruction. + /// corresponds to the COND_BRANCH pseudo instruction. /// *PRC is the input register to compare to zero, /// OPC is the branch opcode to use (e.g. Alpha::BEQ), /// DESTBB is the destination block to branch to, and INFLAG is @@ -62,7 +62,9 @@ namespace llvm { class AlphaTargetLowering : public TargetLowering { public: explicit AlphaTargetLowering(TargetMachine &TM); - + + virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i64; } + /// getSetCCResultType - Get the SETCC result ValueType virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const; @@ -92,7 +94,7 @@ namespace llvm { ConstraintWeight getSingleConstraintMatchWeight( AsmOperandInfo &info, const char *constraint) const; - std::vector + std::vector getRegClassForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; diff --git a/lib/Target/Blackfin/BlackfinISelLowering.cpp b/lib/Target/Blackfin/BlackfinISelLowering.cpp index dd27d0a0ff36..7c80eec3ba63 100644 --- a/lib/Target/Blackfin/BlackfinISelLowering.cpp +++ b/lib/Target/Blackfin/BlackfinISelLowering.cpp @@ -41,7 +41,6 @@ using namespace llvm; BlackfinTargetLowering::BlackfinTargetLowering(TargetMachine &TM) : TargetLowering(TM, new TargetLoweringObjectFileELF()) { - setShiftAmountType(MVT::i16); setBooleanContents(ZeroOrOneBooleanContent); setStackPointerRegisterToSaveRestore(BF::SP); setIntDivIsCheap(false); diff --git a/lib/Target/Blackfin/BlackfinISelLowering.h b/lib/Target/Blackfin/BlackfinISelLowering.h index 15a745fa8724..102c830688e2 100644 --- a/lib/Target/Blackfin/BlackfinISelLowering.h +++ b/lib/Target/Blackfin/BlackfinISelLowering.h @@ -32,6 +32,7 @@ namespace llvm { class BlackfinTargetLowering : public TargetLowering { public: BlackfinTargetLowering(TargetMachine &TM); + virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i16; } virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const; virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; virtual void ReplaceNodeResults(SDNode *N, diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index e6511d008c2b..743a4d7a0f78 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -435,7 +435,6 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) setOperationAction(ISD::FDIV, MVT::v4f32, Legal); - setShiftAmountType(MVT::i32); setBooleanContents(ZeroOrNegativeOneBooleanContent); setStackPointerRegisterToSaveRestore(SPU::R1); @@ -1219,7 +1218,7 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain, FuncInfo->setVarArgsFrameIndex( MFI->CreateFixedObject(StackSlotSize, ArgOffset, true)); SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); - unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::R32CRegClass, dl); + unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::R32CRegClass); SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8); SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, MachinePointerInfo(), false, false, 0); @@ -2190,7 +2189,7 @@ static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc, { SDValue N0 = Op.getOperand(0); // Everything has at least one operand DebugLoc dl = Op.getDebugLoc(); - EVT ShiftVT = TLI.getShiftAmountTy(); + EVT ShiftVT = TLI.getShiftAmountTy(N0.getValueType()); assert(Op.getValueType() == MVT::i8); switch (Opc) { @@ -3112,7 +3111,7 @@ SPUTargetLowering::getSingleConstraintMatchWeight( switch (*constraint) { default: weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); - break; + break; //FIXME: Seems like the supported constraint letters were just copied // from PPC, as the following doesn't correspond to the GCC docs. // I'm leaving it so until someone adds the corresponding lowering support. diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h index 95d44afe37c8..dd48d7bafaef 100644 --- a/lib/Target/CellSPU/SPUISelLowering.h +++ b/lib/Target/CellSPU/SPUISelLowering.h @@ -109,6 +109,8 @@ namespace llvm { /// getSetCCResultType - Return the ValueType for ISD::SETCC virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const; + virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; } + //! Custom lowering hooks virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; @@ -179,9 +181,9 @@ namespace llvm { virtual bool isLegalICmpImmediate(int64_t Imm) const; - virtual bool isLegalAddressingMode(const AddrMode &AM, + virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty) const; - + /// After allocating this many registers, the allocator should feel /// register pressure. The value is a somewhat random guess, based on the /// number of non callee saved registers in the C calling convention. diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp index 2f40bfc89601..f39826b1cf17 100644 --- a/lib/Target/MBlaze/MBlazeISelLowering.cpp +++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp @@ -907,7 +907,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, // Transform the arguments stored on // physical registers into virtual ones - unsigned Reg = MF.addLiveIn(ArgRegEnd, RC, dl); + unsigned Reg = MF.addLiveIn(ArgRegEnd, RC); SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); // If this is an 8 or 16-bit value, it has been passed promoted @@ -973,7 +973,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, for (; Start <= End; ++Start, ++StackLoc) { unsigned Reg = MBlazeRegisterInfo::getRegisterFromNumbering(Start); - unsigned LiveReg = MF.addLiveIn(Reg, RC, dl); + unsigned LiveReg = MF.addLiveIn(Reg, RC); SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, LiveReg, MVT::i32); int FI = MFI->CreateFixedObject(4, 0, true); diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index 30ef4f5da08e..a95d59c0576c 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -77,10 +77,6 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) : // Division is expensive setIntDivIsCheap(false); - // Even if we have only 1 bit shift here, we can perform - // shifts of the whole bitwidth 1 bit per step. - setShiftAmountType(MVT::i8); - setStackPointerRegisterToSaveRestore(MSP430::SPW); setBooleanContents(ZeroOrOneBooleanContent); setSchedulingPreference(Sched::Latency); @@ -330,7 +326,7 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain, // Arguments passed in registers EVT RegVT = VA.getLocVT(); switch (RegVT.getSimpleVT().SimpleTy) { - default: + default: { #ifndef NDEBUG errs() << "LowerFormalArguments Unhandled argument type: " diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h index 673c5433b96e..19c9eac589f0 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.h +++ b/lib/Target/MSP430/MSP430ISelLowering.h @@ -73,6 +73,8 @@ namespace llvm { public: explicit MSP430TargetLowering(MSP430TargetMachine &TM); + virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i8; } + /// LowerOperation - Provide custom lowering hooks for some operations. virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 8f623b859b55..70d00e4b5cc5 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -362,7 +362,6 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); } - setShiftAmountType(MVT::i32); setBooleanContents(ZeroOrOneBooleanContent); if (TM.getSubtarget().isPPC64()) { @@ -1597,7 +1596,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4( } // Transform the arguments stored in physical registers into virtual ones. - unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC, dl); + unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, ValVT); InVals.push_back(ArgValue); @@ -1689,7 +1688,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4( // Get an existing live-in vreg, or add a new one. unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]); if (!VReg) - VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass, dl); + VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, @@ -1708,7 +1707,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4( // Get an existing live-in vreg, or add a new one. unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]); if (!VReg) - VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass, dl); + VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, @@ -1872,7 +1871,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( InVals.push_back(FIN); if (ObjSize==1 || ObjSize==2) { if (GPR_idx != Num_GPR_Regs) { - unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass, dl); + unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo(), @@ -1891,7 +1890,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( // to memory. ArgVal will be address of the beginning of // the object. if (GPR_idx != Num_GPR_Regs) { - unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass, dl); + unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); @@ -1914,7 +1913,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( case MVT::i32: if (!isPPC64) { if (GPR_idx != Num_GPR_Regs) { - unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass, dl); + unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); ++GPR_idx; } else { @@ -1928,7 +1927,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( // FALLTHROUGH case MVT::i64: // PPC64 if (GPR_idx != Num_GPR_Regs) { - unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass, dl); + unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); if (ObjectVT == MVT::i32) { @@ -1966,9 +1965,9 @@ PPCTargetLowering::LowerFormalArguments_Darwin( unsigned VReg; if (ObjectVT == MVT::f32) - VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass, dl); + VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass); else - VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass, dl); + VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); ++FPR_idx; @@ -1986,7 +1985,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( // Note that vector arguments in registers don't reserve stack space, // except in varargs functions. if (VR_idx != Num_VR_Regs) { - unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass, dl); + unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); if (isVarArg) { while ((ArgOffset % 16) != 0) { @@ -2064,9 +2063,9 @@ PPCTargetLowering::LowerFormalArguments_Darwin( unsigned VReg; if (isPPC64) - VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass, dl); + VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); else - VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass, dl); + VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 80cab75b960a..33daae9b5445 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -29,36 +29,36 @@ namespace llvm { /// FSEL - Traditional three-operand fsel node. /// FSEL, - + /// FCFID - The FCFID instruction, taking an f64 operand and producing /// and f64 value containing the FP representation of the integer that /// was temporarily in the f64 operand. FCFID, - - /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 + + /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 /// operand, producing an f64 value containing the integer representation /// of that FP value. FCTIDZ, FCTIWZ, - + /// STFIWX - The STFIWX instruction. The first operand is an input token /// chain, then an f64 value to store, then an address to store it to. STFIWX, - + // VMADDFP, VNMSUBFP - The VMADDFP and VNMSUBFP instructions, taking // three v4f32 operands and producing a v4f32 result. VMADDFP, VNMSUBFP, - + /// VPERM - The PPC VPERM Instruction. /// VPERM, - + /// Hi/Lo - These represent the high and low 16-bit parts of a global /// address respectively. These nodes have two operands, the first of /// which must be a TargetGlobalAddress, and the second of which must be a /// Constant. Selected naively, these turn into 'lis G+C' and 'li G+C', /// though these are usually folded into other nodes. Hi, Lo, - + TOC_ENTRY, /// The following three target-specific nodes are used for calls through @@ -80,37 +80,37 @@ namespace llvm { /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to /// compute an allocation on the stack. DYNALLOC, - + /// GlobalBaseReg - On Darwin, this node represents the result of the mflr /// at function entry, used for PIC code. GlobalBaseReg, - + /// These nodes represent the 32-bit PPC shifts that operate on 6-bit /// shift amounts. These nodes are generated by the multi-precision shift /// code. SRL, SRA, SHL, - + /// EXTSW_32 - This is the EXTSW instruction for use with "32-bit" /// registers. EXTSW_32, /// CALL - A direct function call. CALL_Darwin, CALL_SVR4, - + /// NOP - Special NOP which follows 64-bit SVR4 calls. NOP, /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a /// MTCTR instruction. MTCTR, - + /// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a /// BCTRL instruction. BCTRL_Darwin, BCTRL_SVR4, - + /// Return with a flag operand, matched by 'blr' RET_FLAG, - + /// R32 = MFCR(CRREG, INFLAG) - Represents the MFCRpseud/MFOCRF /// instructions. This copies the bits corresponding to the specified /// CRREG into the resultant GPR. Bits corresponding to other CR regs @@ -122,20 +122,20 @@ namespace llvm { /// encoding for the OPC field to identify the compare. For example, 838 /// is VCMPGTSH. VCMP, - + /// RESVEC, OUTFLAG = VCMPo(LHS, RHS, OPC) - Represents one of the - /// altivec VCMP*o instructions. For lack of better number, we use the + /// altivec VCMP*o instructions. For lack of better number, we use the /// opcode number encoding for the OPC field to identify the compare. For /// example, 838 is VCMPGTSH. VCMPo, - + /// CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This /// corresponds to the COND_BRANCH pseudo instruction. CRRC is the /// condition register to branch on, OPC is the branch opcode to use (e.g. /// PPC::BLE), DESTBB is the destination block to branch to, and INFLAG is /// an optional input flag argument. COND_BRANCH, - + // The following 5 instructions are used only as part of the // long double-to-int conversion sequence. @@ -150,7 +150,7 @@ namespace llvm { MTFSB1, /// F8RC, OUTFLAG = FADDRTZ F8RC, F8RC, INFLAG - This is an FADD done with - /// rounding towards zero. It has flags added so it won't move past the + /// rounding towards zero. It has flags added so it won't move past the /// FPSCR-setting instructions. FADDRTZ, @@ -174,14 +174,14 @@ namespace llvm { /// STD_32 - This is the STD instruction for use with "32-bit" registers. STD_32 = ISD::FIRST_TARGET_MEMORY_OPCODE, - - /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a + + /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a /// byte-swapping store instruction. It byte-swaps the low "Type" bits of /// the GPRC input, then stores it through Ptr. Type can be either i16 or /// i32. - STBRX, - - /// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a + STBRX, + + /// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a /// byte-swapping load instruction. It loads "Type" bits, byte swaps it, /// then puts it in the bottom bits of the GPRC. TYPE can be either i16 /// or i32. @@ -194,7 +194,7 @@ namespace llvm { /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUHUM instruction. bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary); - + /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUWUM instruction. bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary); @@ -208,16 +208,16 @@ namespace llvm { /// a VRGH* instruction with the specified unit size (1,2 or 4 bytes). bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, bool isUnary); - + /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift /// amount, otherwise return -1. int isVSLDOIShuffleMask(SDNode *N, bool isUnary); - + /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a splat of a single element that is suitable for input to /// VSPLTB/VSPLTH/VSPLTW. bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize); - + /// isAllNegativeZeroVector - Returns true if all elements of build_vector /// are -0.0. bool isAllNegativeZeroVector(SDNode *N); @@ -225,24 +225,26 @@ namespace llvm { /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize); - + /// get_VSPLTI_elt - If this is a build_vector of constants which can be /// formed by using a vspltis[bhw] instruction of the specified element /// size, return the constant being splatted. The ByteSize field indicates /// the number of bytes of each element [124] -> [bhw]. SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG); } - + class PPCTargetLowering : public TargetLowering { const PPCSubtarget &PPCSubTarget; public: explicit PPCTargetLowering(PPCTargetMachine &TM); - + /// getTargetNodeName() - This method returns the name of a target specific /// DAG node. virtual const char *getTargetNodeName(unsigned Opcode) const; + virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; } + /// getSetCCResultType - Return the ISD::SETCC ValueType virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const; @@ -253,19 +255,19 @@ namespace llvm { SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const; - + /// SelectAddressRegReg - Given the specified addressed, check to see if it /// can be represented as an indexed [r+r] operation. Returns false if it /// can be more efficiently represented with [r+imm]. bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const; - + /// SelectAddressRegImm - Returns true if the address N can be represented /// by a base register plus a signed 16-bit displacement [r+imm], and if it /// is not better represented as reg+reg. bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const; - + /// SelectAddressRegRegOnly - Given the specified addressed, force it to be /// represented as an indexed [r+r] operation. bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, @@ -277,7 +279,7 @@ namespace llvm { bool SelectAddressRegImmShift(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const; - + /// LowerOperation - Provide custom lowering hooks for some operations. /// virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; @@ -289,10 +291,10 @@ namespace llvm { SelectionDAG &DAG) const; virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; - + virtual void computeMaskedBitsForTargetNode(const SDValue Op, const APInt &Mask, - APInt &KnownZero, + APInt &KnownZero, APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth = 0) const; @@ -300,13 +302,13 @@ namespace llvm { virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const; - MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, + MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *MBB, bool is64Bit, unsigned BinOpcode) const; - MachineBasicBlock *EmitPartwordAtomicBinary(MachineInstr *MI, - MachineBasicBlock *MBB, + MachineBasicBlock *EmitPartwordAtomicBinary(MachineInstr *MI, + MachineBasicBlock *MBB, bool is8bit, unsigned Opcode) const; - + ConstraintType getConstraintType(const std::string &Constraint) const; /// Examine constraint string and operand type and determine a weight value. @@ -314,7 +316,7 @@ namespace llvm { ConstraintWeight getSingleConstraintMatchWeight( AsmOperandInfo &info, const char *constraint) const; - std::pair + std::pair getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; @@ -329,11 +331,11 @@ namespace llvm { char ConstraintLetter, std::vector &Ops, SelectionDAG &DAG) const; - + /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const; - + /// isLegalAddressImmediate - Return true if the integer value can be used /// as the offset of the target addressing mode for load / store of the /// given type. @@ -344,7 +346,7 @@ namespace llvm { virtual bool isLegalAddressImmediate(GlobalValue *GV) const; virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; - + /// getOptimalMemOpType - Returns the target specific optimal type for load /// and store operations as a result of memset, memcpy, and memmove /// lowering. If DstAlign is zero that means it's safe to destination diff --git a/lib/Target/README.txt b/lib/Target/README.txt index 4e14fbbb09ba..f85914b61d9d 100644 --- a/lib/Target/README.txt +++ b/lib/Target/README.txt @@ -254,6 +254,20 @@ unsigned long reverse(unsigned v) { //===---------------------------------------------------------------------===// +[LOOP DELETION] + +We don't delete this output free loop, because trip count analysis doesn't +realize that it is finite (if it were infinite, it would be undefined). Not +having this blocks Loop Idiom from matching strlen and friends. + +void foo(char *C) { + int x = 0; + while (*C) + ++x,++C; +} + +//===---------------------------------------------------------------------===// + [LOOP RECOGNITION] These idioms should be recognized as popcount (see PR1488): @@ -287,6 +301,16 @@ unsigned int popcount(unsigned int input) { return count; } +This should be recognized as CLZ: rdar://8459039 + +unsigned clz_a(unsigned a) { + int i; + for (i=0;i<32;i++) + if (a & (1<<(31-i))) + return i; + return 32; +} + This sort of thing should be added to the loop idiom pass. //===---------------------------------------------------------------------===// diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp index ee292758d186..4b12852ef873 100644 --- a/lib/Target/Sparc/DelaySlotFiller.cpp +++ b/lib/Target/Sparc/DelaySlotFiller.cpp @@ -79,6 +79,7 @@ namespace { MachineBasicBlock::iterator findDelayInstr(MachineBasicBlock &MBB, MachineBasicBlock::iterator slot); + bool needsUnimp(MachineBasicBlock::iterator I, unsigned &StructSize); }; char Filler::ID = 0; @@ -91,6 +92,7 @@ FunctionPass *llvm::createSparcDelaySlotFillerPass(TargetMachine &tm) { return new Filler(tm); } + /// runOnMachineBasicBlock - Fill in delay slots for the given basic block. /// We assume there is only one delay slot per delayed instruction. /// @@ -112,6 +114,13 @@ bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) { BuildMI(MBB, ++J, I->getDebugLoc(), TII->get(SP::NOP)); else MBB.splice(++J, &MBB, D); + unsigned structSize = 0; + if (needsUnimp(I, structSize)) { + MachineBasicBlock::iterator J = I; + ++J; //skip the delay filler. + BuildMI(MBB, ++J, I->getDebugLoc(), + TII->get(SP::UNIMP)).addImm(structSize); + } } return Changed; } @@ -287,6 +296,28 @@ bool Filler::isDelayFiller(MachineBasicBlock &MBB, { if (candidate == MBB.begin()) return false; + if (candidate->getOpcode() == SP::UNIMP) + return true; const TargetInstrDesc &prevdesc = (--candidate)->getDesc(); return prevdesc.hasDelaySlot(); } + +bool Filler::needsUnimp(MachineBasicBlock::iterator I, unsigned &StructSize) +{ + if (!I->getDesc().isCall()) + return false; + + unsigned structSizeOpNum = 0; + switch (I->getOpcode()) { + default: llvm_unreachable("Unknown call opcode."); + case SP::CALL: structSizeOpNum = 1; break; + case SP::JMPLrr: + case SP::JMPLri: structSizeOpNum = 2; break; + } + + const MachineOperand &MO = I->getOperand(structSizeOpNum); + if (!MO.isImm()) + return false; + StructSize = MO.getImm(); + return true; +} diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index 196b87dd58d0..70574c370f35 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -16,7 +16,9 @@ #include "SparcISelLowering.h" #include "SparcTargetMachine.h" #include "SparcMachineFunctionInfo.h" +#include "llvm/DerivedTypes.h" #include "llvm/Function.h" +#include "llvm/Module.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -116,6 +118,8 @@ SparcTargetLowering::LowerReturn(SDValue Chain, // Guarantee that all emitted copies are stuck together with flags. Flag = Chain.getValue(1); } + + unsigned RetAddrOffset = 8; //Call Inst + Delay Slot // If the function returns a struct, copy the SRetReturnReg to I0 if (MF.getFunction()->hasStructRetAttr()) { SparcMachineFunctionInfo *SFI = MF.getInfo(); @@ -127,11 +131,16 @@ SparcTargetLowering::LowerReturn(SDValue Chain, Flag = Chain.getValue(1); if (MF.getRegInfo().liveout_empty()) MF.getRegInfo().addLiveOut(SP::I0); + RetAddrOffset = 12; // CallInst + Delay Slot + Unimp } + SDValue RetAddrOffsetNode = DAG.getConstant(RetAddrOffset, MVT::i32); + if (Flag.getNode()) - return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain, Flag); - return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain); + return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain, + RetAddrOffsetNode, Flag); + return DAG.getNode(SPISD::RET_FLAG, dl, MVT::Other, Chain, + RetAddrOffsetNode); } /// LowerFormalArguments - V8 uses a very simple ABI, where all values are @@ -194,7 +203,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, false, false, 0); } else { unsigned loReg = MF.addLiveIn(NextVA.getLocReg(), - &SP::IntRegsRegClass, dl); + &SP::IntRegsRegClass); LoVal = DAG.getCopyFromReg(Chain, dl, loReg, MVT::i32); } SDValue WholeValue = @@ -393,6 +402,7 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, SmallVector MemOpChains; const unsigned StackOffset = 92; + bool hasStructRetAttr = false; // Walk the register/memloc assignments, inserting copies/loads. for (unsigned i = 0, realArgIdx = 0, byvalArgIdx = 0, e = ArgLocs.size(); i != e; @@ -433,6 +443,7 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo(), false, false, 0)); + hasStructRetAttr = true; continue; } @@ -546,6 +557,8 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, InFlag = Chain.getValue(1); } + unsigned SRetArgSize = (hasStructRetAttr)? getSRetArgSize(DAG, Callee):0; + // If the callee is a GlobalAddress node (quite common, every direct call is) // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. // Likewise ExternalSymbol -> TargetExternalSymbol. @@ -559,6 +572,8 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, SmallVector Ops; Ops.push_back(Chain); Ops.push_back(Callee); + if (hasStructRetAttr) + Ops.push_back(DAG.getTargetConstant(SRetArgSize, MVT::i32)); for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { unsigned Reg = RegsToPass[i].first; if (Reg >= SP::I0 && Reg <= SP::I7) @@ -600,7 +615,29 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, return Chain; } +unsigned +SparcTargetLowering::getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const +{ + const Function *CalleeFn = 0; + if (GlobalAddressSDNode *G = dyn_cast(Callee)) { + CalleeFn = dyn_cast(G->getGlobal()); + } else if (ExternalSymbolSDNode *E = + dyn_cast(Callee)) { + const Function *Fn = DAG.getMachineFunction().getFunction(); + const Module *M = Fn->getParent(); + CalleeFn = M->getFunction(E->getSymbol()); + } + if (!CalleeFn) + return 0; + + assert(CalleeFn->hasStructRetAttr() && + "Callee does not have the StructRet attribute."); + + const PointerType *Ty = cast(CalleeFn->arg_begin()->getType()); + const Type *ElementTy = Ty->getElementType(); + return getTargetData()->getTypeAllocSize(ElementTy); +} //===----------------------------------------------------------------------===// // TargetLowering Implementation diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h index 849e4010af6b..7d02df8adcca 100644 --- a/lib/Target/Sparc/SparcISelLowering.h +++ b/lib/Target/Sparc/SparcISelLowering.h @@ -101,6 +101,8 @@ namespace llvm { SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; + + unsigned getSRetArgSize(SelectionDAG &DAG, SDValue Callee) const; }; } // end namespace llvm diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td index 107232357b3b..cf5c48fd18d9 100644 --- a/lib/Target/Sparc/SparcInstrInfo.td +++ b/lib/Target/Sparc/SparcInstrInfo.td @@ -124,7 +124,8 @@ def call : SDNode<"SPISD::CALL", SDT_SPCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; -def retflag : SDNode<"SPISD::RET_FLAG", SDTNone, +def SDT_SPRet : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; +def retflag : SDNode<"SPISD::RET_FLAG", SDT_SPRet, [SDNPHasChain, SDNPOptInGlue]>; def flushw : SDNode<"SPISD::FLUSHW", SDTNone, @@ -132,7 +133,7 @@ def flushw : SDNode<"SPISD::FLUSHW", SDTNone, def getPCX : Operand { let PrintMethod = "printGetPCX"; -} +} //===----------------------------------------------------------------------===// // SPARC Flag Conditions @@ -232,6 +233,9 @@ let hasSideEffects = 1, mayStore = 1 in { [(flushw)]>; } +def UNIMP : F2_1<0b000, (outs), (ins i32imm:$val), + "unimp $val", []>; + // FpMOVD/FpNEGD/FpABSD - These are lowered to single-precision ops by the // fpmover pass. let Predicates = [HasNoV9] in { // Only emit these in V8 mode. @@ -292,11 +296,13 @@ let usesCustomInserter = 1, Uses = [FCC] in { // Section A.3 - Synthetic Instructions, p. 85 // special cases of JMPL: let isReturn = 1, isTerminator = 1, hasDelaySlot = 1, isBarrier = 1 in { - let rd = O7.Num, rs1 = G0.Num, simm13 = 8 in - def RETL: F3_2<2, 0b111000, (outs), (ins), "retl", [(retflag)]>; + let rd = O7.Num, rs1 = G0.Num in + def RETL: F3_2<2, 0b111000, (outs), (ins i32imm:$val), + "jmp %o7+$val", [(retflag simm13:$val)]>; - let rd = I7.Num, rs1 = G0.Num, simm13 = 8 in - def RET: F3_2<2, 0b111000, (outs), (ins), "ret", []>; + let rd = I7.Num, rs1 = G0.Num in + def RET: F3_2<2, 0b111000, (outs), (ins i32imm:$val), + "jmp %i7+$val", []>; } // Section B.1 - Load Integer Instructions, p. 90 diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index d694f2e67edc..90939c312065 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -59,9 +59,6 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) : // Compute derived properties from the register classes computeRegisterProperties(); - // Set shifts properties - setShiftAmountType(MVT::i64); - // Provide all sorts of operation actions setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index 51d2df3a3008..30192420dcb6 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -57,6 +57,8 @@ namespace llvm { public: explicit SystemZTargetLowering(SystemZTargetMachine &TM); + virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i64; } + /// LowerOperation - Provide custom lowering hooks for some operations. virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 1cac07a0e10a..8fe549ba3126 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -775,6 +775,19 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, delete &Op; } } + // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al". + if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") && + Operands.size() == 3) { + X86Operand &Op = *(X86Operand*)Operands.begin()[1]; + if (Op.isMem() && Op.Mem.SegReg == 0 && + isa(Op.Mem.Disp) && + cast(Op.Mem.Disp)->getValue() == 0 && + Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) { + SMLoc Loc = Op.getEndLoc(); + Operands.begin()[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc); + delete &Op; + } + } // FIXME: Hack to handle recognize s{hr,ar,hl} $1, . Canonicalize to // "shift ". diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index 691e2d7204ab..f7777561b6a7 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -168,16 +168,16 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate, switch (insn.displacementSize) { default: break; - case 8: + case 1: type = TYPE_MOFFS8; break; - case 16: + case 2: type = TYPE_MOFFS16; break; - case 32: + case 4: type = TYPE_MOFFS32; break; - case 64: + case 8: type = TYPE_MOFFS64; break; } diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h index 4f4fbcdd394c..d0dc8b56aea5 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h @@ -399,7 +399,7 @@ struct InternalInstruction { /* The segment override type */ SegmentOverride segmentOverride; - /* Sizes of various critical pieces of data */ + /* Sizes of various critical pieces of data, in bytes */ uint8_t registerSize; uint8_t addressSize; uint8_t displacementSize; diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt index c10e1709f667..abd1515cf5d7 100644 --- a/lib/Target/X86/README.txt +++ b/lib/Target/X86/README.txt @@ -1879,39 +1879,71 @@ _add32carry: //===---------------------------------------------------------------------===// -This: -char t(char c) { - return c/3; +The hot loop of 256.bzip2 contains code that looks a bit like this: + +int foo(char *P, char *Q, int x, int y) { + if (P[0] != Q[0]) + return P[0] < Q[0]; + if (P[1] != Q[1]) + return P[1] < Q[1]; + if (P[2] != Q[2]) + return P[2] < Q[2]; + return P[3] < Q[3]; } -Compiles to: $clang t.c -S -o - -O3 -mkernel -fomit-frame-pointer +In the real code, we get a lot more wrong than this. However, even in this +code we generate: -_t: ## @t - movslq %edi, %rax - imulq $-1431655765, %rax, %rcx ## imm = 0xFFFFFFFFAAAAAAAB - shrq $32, %rcx - addl %ecx, %eax - movl %eax, %ecx - shrl $31, %ecx - shrl %eax - addl %ecx, %eax - movsbl %al, %eax +_foo: ## @foo +## BB#0: ## %entry + movb (%rsi), %al + movb (%rdi), %cl + cmpb %al, %cl + je LBB0_2 +LBB0_1: ## %if.then + cmpb %al, %cl + jmp LBB0_5 +LBB0_2: ## %if.end + movb 1(%rsi), %al + movb 1(%rdi), %cl + cmpb %al, %cl + jne LBB0_1 +## BB#3: ## %if.end38 + movb 2(%rsi), %al + movb 2(%rdi), %cl + cmpb %al, %cl + jne LBB0_1 +## BB#4: ## %if.end60 + movb 3(%rdi), %al + cmpb 3(%rsi), %al +LBB0_5: ## %if.end60 + setl %al + movzbl %al, %eax ret -GCC gets: +Note that we generate jumps to LBB0_1 which does a redundant compare. The +redundant compare also forces the register values to be live, which prevents +folding one of the loads into the compare. In contrast, GCC 4.2 produces: -_t: - movl $86, %eax - imulb %dil - shrw $8, %ax - sarb $7, %dil - subb %dil, %al - movsbl %al,%eax +_foo: + movzbl (%rsi), %eax + cmpb %al, (%rdi) + jne L10 +L12: + movzbl 1(%rsi), %eax + cmpb %al, 1(%rdi) + jne L10 + movzbl 2(%rsi), %eax + cmpb %al, 2(%rdi) + jne L10 + movzbl 3(%rdi), %eax + cmpb 3(%rsi), %al +L10: + setl %al + movzbl %al, %eax ret -which is nicer. This also happens for int, not just char. +which is "perfect". //===---------------------------------------------------------------------===// - - diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 9d42ac2e470c..6fa928462b28 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -597,9 +597,13 @@ bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) { (AM.Base.Reg != 0 || AM.IndexReg != 0)) return false; - // Can't handle TLS or DLLImport. + // Can't handle DLLImport. + if (GV->hasDLLImportLinkage()) + return false; + + // Can't handle TLS. if (const GlobalVariable *GVar = dyn_cast(GV)) - if (GVar->isThreadLocal() || GVar->hasDLLImportLinkage()) + if (GVar->isThreadLocal()) return false; // Okay, we've committed to selecting this global. Set up the basic address. diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 27024b4e9e5a..2f49dbcebf3c 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -45,7 +45,6 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/VectorExtras.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" @@ -56,10 +55,6 @@ using namespace dwarf; STATISTIC(NumTailCalls, "Number of tail calls"); -static cl::opt -Disable256Bit("disable-256bit", cl::Hidden, - cl::desc("Disable use of 256-bit vectors")); - // Forward declarations. static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1, SDValue V2); @@ -225,7 +220,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) static MVT IntVTs[] = { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }; // X86 is weird, it always uses i8 for shift amounts and setcc results. - setShiftAmountType(MVT::i8); setBooleanContents(ZeroOrOneBooleanContent); setSchedulingPreference(Sched::RegPressure); setStackPointerRegisterToSaveRestore(X86StackPtr); @@ -1713,7 +1707,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, else llvm_unreachable("Unknown argument type!"); - unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC, dl); + unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); // If this is an 8 or 16-bit value, it is really passed promoted to 32 @@ -1845,7 +1839,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN, DAG.getIntPtrConstant(Offset)); unsigned VReg = MF.addLiveIn(GPR64ArgRegs[NumIntRegs], - X86::GR64RegisterClass, dl); + X86::GR64RegisterClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, @@ -1861,7 +1855,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, SmallVector SaveXMMOps; SaveXMMOps.push_back(Chain); - unsigned AL = MF.addLiveIn(X86::AL, X86::GR8RegisterClass, dl); + unsigned AL = MF.addLiveIn(X86::AL, X86::GR8RegisterClass); SDValue ALVal = DAG.getCopyFromReg(DAG.getEntryNode(), dl, AL, MVT::i8); SaveXMMOps.push_back(ALVal); @@ -1872,7 +1866,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) { unsigned VReg = MF.addLiveIn(XMMArgRegs64Bit[NumXMMRegs], - X86::VR128RegisterClass, dl); + X86::VR128RegisterClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::v4f32); SaveXMMOps.push_back(Val); } @@ -2693,6 +2687,10 @@ static bool isTargetShuffle(unsigned Opcode) { case X86ISD::MOVSD: case X86ISD::UNPCKLPS: case X86ISD::UNPCKLPD: + case X86ISD::VUNPCKLPS: + case X86ISD::VUNPCKLPD: + case X86ISD::VUNPCKLPSY: + case X86ISD::VUNPCKLPDY: case X86ISD::PUNPCKLWD: case X86ISD::PUNPCKLBW: case X86ISD::PUNPCKLDQ: @@ -2760,6 +2758,10 @@ static SDValue getTargetShuffleNode(unsigned Opc, DebugLoc dl, EVT VT, case X86ISD::MOVSD: case X86ISD::UNPCKLPS: case X86ISD::UNPCKLPD: + case X86ISD::VUNPCKLPS: + case X86ISD::VUNPCKLPD: + case X86ISD::VUNPCKLPSY: + case X86ISD::VUNPCKLPDY: case X86ISD::PUNPCKLWD: case X86ISD::PUNPCKLBW: case X86ISD::PUNPCKLDQ: @@ -4178,7 +4180,8 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, SrcOp = DAG.getNode(ISD::BITCAST, dl, ShVT, SrcOp); return DAG.getNode(ISD::BITCAST, dl, VT, DAG.getNode(Opc, dl, ShVT, SrcOp, - DAG.getConstant(NumBits, TLI.getShiftAmountTy()))); + DAG.getConstant(NumBits, + TLI.getShiftAmountTy(SrcOp.getValueType())))); } SDValue @@ -4327,16 +4330,15 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { // For AVX-length vectors, build the individual 128-bit pieces and // use shuffles to put them in place. - if (VT.getSizeInBits() > 256 && - Subtarget->hasAVX() && - !Disable256Bit && + if (VT.getSizeInBits() > 256 && + Subtarget->hasAVX() && !ISD::isBuildVectorAllZeros(Op.getNode())) { SmallVector V; V.resize(NumElems); for (unsigned i = 0; i < NumElems; ++i) { V[i] = Op.getOperand(i); } - + EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElems/2); // Build the lower subvector. @@ -5044,7 +5046,8 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, DAG.getIntPtrConstant(Elt1 / 2)); if ((Elt1 & 1) == 0) InsElt = DAG.getNode(ISD::SHL, dl, MVT::i16, InsElt, - DAG.getConstant(8, TLI.getShiftAmountTy())); + DAG.getConstant(8, + TLI.getShiftAmountTy(InsElt.getValueType()))); else if (Elt0 >= 0) InsElt = DAG.getNode(ISD::AND, dl, MVT::i16, InsElt, DAG.getConstant(0xFF00, MVT::i16)); @@ -5058,7 +5061,8 @@ SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, Elt0Src, DAG.getIntPtrConstant(Elt0 / 2)); if ((Elt0 & 1) != 0) InsElt0 = DAG.getNode(ISD::SRL, dl, MVT::i16, InsElt0, - DAG.getConstant(8, TLI.getShiftAmountTy())); + DAG.getConstant(8, + TLI.getShiftAmountTy(InsElt0.getValueType()))); else if (Elt1 >= 0) InsElt0 = DAG.getNode(ISD::AND, dl, MVT::i16, InsElt0, DAG.getConstant(0x00FF, MVT::i16)); @@ -5475,7 +5479,7 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) { // Both of them can't be memory operations though. if (MayFoldVectorLoad(V1) && MayFoldVectorLoad(V2)) CanFoldLoad = false; - + if (CanFoldLoad) { if (HasSSE2 && NumElems == 2) return getTargetShuffleNode(X86ISD::MOVLPD, dl, VT, V1, V2, DAG); @@ -6088,7 +6092,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { SDValue ScaledN2 = N2; if (Upper) ScaledN2 = DAG.getNode(ISD::SUB, dl, N2.getValueType(), N2, - DAG.getConstant(NumElems / + DAG.getConstant(NumElems / (VT.getSizeInBits() / 128), N2.getValueType())); Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubN0.getValueType(), SubN0, @@ -9327,6 +9331,10 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::MOVSS: return "X86ISD::MOVSS"; case X86ISD::UNPCKLPS: return "X86ISD::UNPCKLPS"; case X86ISD::UNPCKLPD: return "X86ISD::UNPCKLPD"; + case X86ISD::VUNPCKLPS: return "X86ISD::VUNPCKLPS"; + case X86ISD::VUNPCKLPD: return "X86ISD::VUNPCKLPD"; + case X86ISD::VUNPCKLPSY: return "X86ISD::VUNPCKLPSY"; + case X86ISD::VUNPCKLPDY: return "X86ISD::VUNPCKLPDY"; case X86ISD::UNPCKHPS: return "X86ISD::UNPCKHPS"; case X86ISD::UNPCKHPD: return "X86ISD::UNPCKHPD"; case X86ISD::PUNPCKLBW: return "X86ISD::PUNPCKLBW"; @@ -11984,6 +11992,10 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::PUNPCKLQDQ: case X86ISD::UNPCKLPS: case X86ISD::UNPCKLPD: + case X86ISD::VUNPCKLPS: + case X86ISD::VUNPCKLPD: + case X86ISD::VUNPCKLPSY: + case X86ISD::VUNPCKLPDY: case X86ISD::MOVHLPS: case X86ISD::MOVLHPS: case X86ISD::PSHUFD: diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 419da3742cf8..6ec4a7de7558 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -159,16 +159,16 @@ namespace llvm { /// PSHUFB - Shuffle 16 8-bit values within a vector. PSHUFB, - + /// PANDN - and with not'd value. PANDN, - + /// PSIGNB/W/D - Copy integer sign. - PSIGNB, PSIGNW, PSIGND, - + PSIGNB, PSIGNW, PSIGND, + /// PBLENDVB - Variable blend PBLENDVB, - + /// FMAX, FMIN - Floating point max and min. /// FMAX, FMIN, @@ -212,7 +212,7 @@ namespace llvm { // ADD, SUB, SMUL, etc. - Arithmetic operations with FLAGS results. ADD, SUB, ADC, SBB, SMUL, INC, DEC, OR, XOR, AND, - + UMUL, // LOW, HI, FLAGS = umul LHS, RHS // MUL_IMM - X86 specific multiply by immediate. @@ -248,6 +248,10 @@ namespace llvm { MOVSS, UNPCKLPS, UNPCKLPD, + VUNPCKLPS, + VUNPCKLPD, + VUNPCKLPSY, + VUNPCKLPDY, UNPCKHPS, UNPCKHPD, PUNPCKLBW, @@ -463,6 +467,8 @@ namespace llvm { virtual unsigned getJumpTableEncoding() const; + virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i8; } + virtual const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index 344c14c112a0..0660072589e4 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -41,6 +41,8 @@ def MRM_F8 : Format<41>; def MRM_F9 : Format<42>; def RawFrmImm8 : Format<43>; def RawFrmImm16 : Format<44>; +def MRM_D0 : Format<45>; +def MRM_D1 : Format<46>; // ImmType - This specifies the immediate type used by an instruction. This is // part of the ad-hoc solution used to emit machine instruction encodings by our diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index ceb1b6539826..76a9b12b8aad 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -369,8 +369,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::IMUL32rri8, X86::IMUL32rmi8, 0 }, { X86::IMUL64rri32, X86::IMUL64rmi32, 0 }, { X86::IMUL64rri8, X86::IMUL64rmi8, 0 }, - { X86::Int_CMPSDrr, X86::Int_CMPSDrm, 0 }, - { X86::Int_CMPSSrr, X86::Int_CMPSSrm, 0 }, { X86::Int_COMISDrr, X86::Int_COMISDrm, 0 }, { X86::Int_COMISSrr, X86::Int_COMISSrm, 0 }, { X86::Int_CVTDQ2PDrr, X86::Int_CVTDQ2PDrm, 16 }, @@ -568,6 +566,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::IMUL16rr, X86::IMUL16rm, 0 }, { X86::IMUL32rr, X86::IMUL32rm, 0 }, { X86::IMUL64rr, X86::IMUL64rm, 0 }, + { X86::Int_CMPSDrr, X86::Int_CMPSDrm, 0 }, + { X86::Int_CMPSSrr, X86::Int_CMPSSrm, 0 }, { X86::MAXPDrr, X86::MAXPDrm, 16 }, { X86::MAXPDrr_Int, X86::MAXPDrm_Int, 16 }, { X86::MAXPSrr, X86::MAXPSrm, 16 }, diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index 1d4420787273..fcb5a25104ac 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -311,6 +311,8 @@ namespace X86II { MRM_F0 = 40, MRM_F8 = 41, MRM_F9 = 42, + MRM_D0 = 45, + MRM_D1 = 46, /// RawFrmImm8 - This is used for the ENTER instruction, which has two /// immediates, the first of which is a 16-bit immediate (specified by @@ -577,6 +579,8 @@ namespace X86II { case X86II::MRM_F0: case X86II::MRM_F8: case X86II::MRM_F9: + case X86II::MRM_D0: + case X86II::MRM_D1: return -1; } } diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 87dc4bece742..f832a7c85a8a 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -1296,6 +1296,9 @@ def : MnemonicAlias<"lret", "lretl">; def : MnemonicAlias<"leavel", "leave">, Requires<[In32BitMode]>; def : MnemonicAlias<"leaveq", "leave">, Requires<[In64BitMode]>; +def : MnemonicAlias<"loopz", "loope">; +def : MnemonicAlias<"loopnz", "loopne">; + def : MnemonicAlias<"pop", "popl">, Requires<[In32BitMode]>; def : MnemonicAlias<"pop", "popq">, Requires<[In64BitMode]>; def : MnemonicAlias<"popf", "popfl">, Requires<[In32BitMode]>; diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td index 1a58ba0f96ef..6a24d145c696 100644 --- a/lib/Target/X86/X86InstrSystem.td +++ b/lib/Target/X86/X86InstrSystem.td @@ -388,3 +388,8 @@ def CPUID : I<0xA2, RawFrm, (outs), (ins), "cpuid", []>, TB; def INVD : I<0x08, RawFrm, (outs), (ins), "invd", []>, TB; def WBINVD : I<0x09, RawFrm, (outs), (ins), "wbinvd", []>, TB; +let Defs = [RDX, RAX], Uses = [RCX] in + def XGETBV : I<0x01, MRM_D0, (outs), (ins), "xgetbv", []>, TB; + +let Uses = [RDX, RAX, RCX] in + def XSETBV : I<0x01, MRM_D1, (outs), (ins), "xsetbv", []>, TB; diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp index e6dc74e65d79..0e3b5711f2b5 100644 --- a/lib/Target/X86/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/X86MCCodeEmitter.cpp @@ -979,6 +979,14 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, EmitByte(BaseOpcode, CurByte, OS); EmitByte(0xF9, CurByte, OS); break; + case X86II::MRM_D0: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xD0, CurByte, OS); + break; + case X86II::MRM_D1: + EmitByte(BaseOpcode, CurByte, OS); + EmitByte(0xD1, CurByte, OS); + break; } // If there is a remaining operand, it must be a trailing immediate. Emit it diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index de768561f111..1ee73123bbc6 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -342,9 +342,10 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, assert((!Is64Bit || HasX86_64) && "64-bit code requested on a subtarget that doesn't support it!"); - // Stack alignment is 16 bytes on Darwin and Linux (both 32 and 64 bit) and - // for all 64-bit targets. - if (isTargetDarwin() || isTargetLinux() || Is64Bit) + // Stack alignment is 16 bytes on Darwin, FreeBSD, Linux and Solaris (both + // 32 and 64 bit) and for all 64-bit targets. + if (isTargetDarwin() || isTargetFreeBSD() || isTargetLinux() || + isTargetSolaris() || Is64Bit) stackAlignment = 16; if (StackAlignment) diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 8a119b43cd91..0a62a029554c 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -166,6 +166,8 @@ class X86Subtarget : public TargetSubtarget { bool hasVectorUAMem() const { return HasVectorUAMem; } bool isTargetDarwin() const { return TargetTriple.getOS() == Triple::Darwin; } + bool isTargetFreeBSD() const { return TargetTriple.getOS() == Triple::FreeBSD; } + bool isTargetSolaris() const { return TargetTriple.getOS() == Triple::Solaris; } // ELF is a reasonably sane default and the only other X86 targets we // support are Darwin and Windows. Just use "not those". diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index 828d6f92caf4..4817787d7515 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -42,9 +42,9 @@ using namespace llvm; const char *XCoreTargetLowering:: -getTargetNodeName(unsigned Opcode) const +getTargetNodeName(unsigned Opcode) const { - switch (Opcode) + switch (Opcode) { case XCoreISD::BL : return "XCoreISD::BL"; case XCoreISD::PCRelativeWrapper : return "XCoreISD::PCRelativeWrapper"; @@ -77,7 +77,6 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) // Division is expensive setIntDivIsCheap(false); - setShiftAmountType(MVT::i32); setStackPointerRegisterToSaveRestore(XCore::SP); setSchedulingPreference(Sched::RegPressure); @@ -95,7 +94,7 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) // Stop the combiner recombining select and set_cc setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); - + // 64bit setOperationAction(ISD::ADD, MVT::i64, Custom); setOperationAction(ISD::SUB, MVT::i64, Custom); @@ -106,14 +105,14 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand); setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand); setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand); - + // Bit Manipulation setOperationAction(ISD::CTPOP, MVT::i32, Expand); setOperationAction(ISD::ROTL , MVT::i32, Expand); setOperationAction(ISD::ROTR , MVT::i32, Expand); - + setOperationAction(ISD::TRAP, MVT::Other, Legal); - + // Jump tables. setOperationAction(ISD::BR_JT, MVT::Other, Custom); @@ -122,7 +121,7 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) // Thread Local Storage setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); - + // Conversion of i64 -> double produces constantpool nodes setOperationAction(ISD::ConstantPool, MVT::i32, Custom); @@ -143,7 +142,7 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) setOperationAction(ISD::VACOPY, MVT::Other, Expand); setOperationAction(ISD::VAARG, MVT::Other, Custom); setOperationAction(ISD::VASTART, MVT::Other, Custom); - + // Dynamic stack setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); @@ -163,7 +162,7 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) SDValue XCoreTargetLowering:: LowerOperation(SDValue Op, SelectionDAG &DAG) const { - switch (Op.getOpcode()) + switch (Op.getOpcode()) { case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); @@ -414,7 +413,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = LD->getChain(); SDValue BasePtr = LD->getBasePtr(); DebugLoc DL = Op.getDebugLoc(); - + SDValue Base; int64_t Offset; if (!LD->isVolatile() && @@ -437,10 +436,10 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const { SDValue HighOffset = DAG.getConstant((Offset & ~0x3) + 4, MVT::i32); SDValue LowShift = DAG.getConstant((Offset & 0x3) * 8, MVT::i32); SDValue HighShift = DAG.getConstant(32 - (Offset & 0x3) * 8, MVT::i32); - + SDValue LowAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base, LowOffset); SDValue HighAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base, HighOffset); - + SDValue Low = DAG.getLoad(getPointerTy(), DL, Chain, LowAddr, MachinePointerInfo(), false, false, 0); SDValue High = DAG.getLoad(getPointerTy(), DL, Chain, @@ -453,7 +452,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const { SDValue Ops[] = { Result, Chain }; return DAG.getMergeValues(Ops, 2, DL); } - + if (LD->getAlignment() == 2) { SDValue Low = DAG.getExtLoad(ISD::ZEXTLOAD, DL, MVT::i32, Chain, BasePtr, LD->getPointerInfo(), MVT::i16, @@ -473,16 +472,16 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const { SDValue Ops[] = { Result, Chain }; return DAG.getMergeValues(Ops, 2, DL); } - + // Lower to a call to __misaligned_load(BasePtr). const Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - + Entry.Ty = IntPtrTy; Entry.Node = BasePtr; Args.push_back(Entry); - + std::pair CallResult = LowerCallTo(Chain, IntPtrTy, false, false, false, false, 0, CallingConv::C, false, @@ -515,7 +514,7 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const SDValue BasePtr = ST->getBasePtr(); SDValue Value = ST->getValue(); DebugLoc dl = Op.getDebugLoc(); - + if (ST->getAlignment() == 2) { SDValue Low = Value; SDValue High = DAG.getNode(ISD::SRL, dl, MVT::i32, Value, @@ -532,19 +531,19 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const ST->isNonTemporal(), 2); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, StoreLow, StoreHigh); } - + // Lower to a call to __misaligned_store(BasePtr, Value). const Type *IntPtrTy = getTargetData()->getIntPtrType(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - + Entry.Ty = IntPtrTy; Entry.Node = BasePtr; Args.push_back(Entry); - + Entry.Node = Value; Args.push_back(Entry); - + std::pair CallResult = LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()), false, false, false, false, 0, CallingConv::C, false, @@ -722,7 +721,7 @@ ExpandADDSUB(SDNode *N, SelectionDAG &DAG) const } DebugLoc dl = N->getDebugLoc(); - + // Extract components SDValue LHSL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), DAG.getConstant(0, MVT::i32)); @@ -732,7 +731,7 @@ ExpandADDSUB(SDNode *N, SelectionDAG &DAG) const N->getOperand(1), DAG.getConstant(0, MVT::i32)); SDValue RHSH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(1), DAG.getConstant(1, MVT::i32)); - + // Expand unsigned Opcode = (N->getOpcode() == ISD::ADD) ? XCoreISD::LADD : XCoreISD::LSUB; @@ -740,7 +739,7 @@ ExpandADDSUB(SDNode *N, SelectionDAG &DAG) const SDValue Carry = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32), LHSL, RHSL, Zero); SDValue Lo(Carry.getNode(), 1); - + SDValue Ignored = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32), LHSH, RHSH, Carry); SDValue Hi(Ignored.getNode(), 1); @@ -761,8 +760,8 @@ LowerVAARG(SDValue Op, SelectionDAG &DAG) const Node->getOperand(1), MachinePointerInfo(V), false, false, 0); // Increment the pointer, VAList, to the next vararg - SDValue Tmp3 = DAG.getNode(ISD::ADD, dl, getPointerTy(), VAList, - DAG.getConstant(VT.getSizeInBits(), + SDValue Tmp3 = DAG.getNode(ISD::ADD, dl, getPointerTy(), VAList, + DAG.getConstant(VT.getSizeInBits(), getPointerTy())); // Store the incremented VAList to the legalized pointer Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Node->getOperand(1), @@ -781,20 +780,20 @@ LowerVASTART(SDValue Op, SelectionDAG &DAG) const MachineFunction &MF = DAG.getMachineFunction(); XCoreFunctionInfo *XFI = MF.getInfo(); SDValue Addr = DAG.getFrameIndex(XFI->getVarArgsFrameIndex(), MVT::i32); - return DAG.getStore(Op.getOperand(0), dl, Addr, Op.getOperand(1), + return DAG.getStore(Op.getOperand(0), dl, Addr, Op.getOperand(1), MachinePointerInfo(), false, false, 0); } SDValue XCoreTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); - // Depths > 0 not supported yet! + // Depths > 0 not supported yet! if (cast(Op.getOperand(0))->getZExtValue() > 0) return SDValue(); - + MachineFunction &MF = DAG.getMachineFunction(); const TargetRegisterInfo *RegInfo = getTargetMachine().getRegisterInfo(); - return DAG.getCopyFromReg(DAG.getEntryNode(), dl, + return DAG.getCopyFromReg(DAG.getEntryNode(), dl, RegInfo->getFrameRegister(MF), MVT::i32); } @@ -919,7 +918,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); - Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, + Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy(), true)); SmallVector, 4> RegsToPass; @@ -944,8 +943,8 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); break; } - - // Arguments that can be passed on register must be kept at + + // Arguments that can be passed on register must be kept at // RegsToPass vector if (VA.isRegLoc()) { RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); @@ -954,7 +953,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, int Offset = VA.getLocMemOffset(); - MemOpChains.push_back(DAG.getNode(XCoreISD::STWSP, dl, MVT::Other, + MemOpChains.push_back(DAG.getNode(XCoreISD::STWSP, dl, MVT::Other, Chain, Arg, DAG.getConstant(Offset/4, MVT::i32))); } @@ -963,16 +962,16 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, // Transform all store nodes into one single node because // all store nodes are independent of each other. if (!MemOpChains.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &MemOpChains[0], MemOpChains.size()); - // Build a sequence of copy-to-reg nodes chained together with token + // Build a sequence of copy-to-reg nodes chained together with token // chain and flag operands which copy the outgoing args into registers. // The InFlag in necessary since all emited instructions must be // stuck together. SDValue InFlag; for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { - Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, + Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, RegsToPass[i].second, InFlag); InFlag = Chain.getValue(1); } @@ -986,7 +985,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32); // XCoreBranchLink = #chain, #target_address, #opt_in_flags... - // = Chain, Callee, Reg#1, Reg#2, ... + // = Chain, Callee, Reg#1, Reg#2, ... // // Returns a chain & a flag for retval copy to use. SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); @@ -994,7 +993,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, Ops.push_back(Chain); Ops.push_back(Callee); - // Add argument registers to the end of the list so that they are + // Add argument registers to the end of the list so that they are // known live into the call. for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) Ops.push_back(DAG.getRegister(RegsToPass[i].first, @@ -1098,11 +1097,11 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, unsigned StackSlotSize = XCoreFrameLowering::stackSlotSize(); unsigned LRSaveSize = StackSlotSize; - + for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; - + if (VA.isRegLoc()) { // Arguments passed in registers EVT RegVT = VA.getLocVT(); @@ -1139,12 +1138,12 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, // Create the SelectionDAG nodes corresponding to a load //from this parameter SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); - InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, + InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, MachinePointerInfo::getFixedStack(FI), false, false, 0)); } } - + if (isVarArg) { /* Argument registers */ static const unsigned ArgRegs[] = { @@ -1186,7 +1185,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, true)); } } - + return Chain; } @@ -1222,7 +1221,7 @@ XCoreTargetLowering::LowerReturn(SDValue Chain, // Analize return values. CCInfo.AnalyzeReturn(Outs, RetCC_XCore); - // If this is the first return lowered for this function, add + // If this is the first return lowered for this function, add // the regs to the liveout set for the function. if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { for (unsigned i = 0; i != RVLocs.size(); ++i) @@ -1237,7 +1236,7 @@ XCoreTargetLowering::LowerReturn(SDValue Chain, CCValAssign &VA = RVLocs[i]; assert(VA.isRegLoc() && "Can only return in registers!"); - Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), + Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), OutVals[i], Flag); // guarantee that all emitted copies are @@ -1265,7 +1264,7 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, DebugLoc dl = MI->getDebugLoc(); assert((MI->getOpcode() == XCore::SELECT_CC) && "Unexpected instr type to insert"); - + // To "insert" a SELECT_CC instruction, we actually have to insert the diamond // control-flow pattern. The incoming instruction knows the destination vreg // to set, the condition code register to branch on, the true/false values to @@ -1273,7 +1272,7 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction::iterator It = BB; ++It; - + // thisMBB: // ... // TrueVal = ... @@ -1296,7 +1295,7 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // Next, add the true and fallthrough blocks as its successors. BB->addSuccessor(copy0MBB); BB->addSuccessor(sinkMBB); - + BuildMI(BB, dl, TII.get(XCore::BRFT_lru6)) .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); @@ -1304,10 +1303,10 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // %FalseValue = ... // # fallthrough to sinkMBB BB = copy0MBB; - + // Update machine-CFG edges BB->addSuccessor(sinkMBB); - + // sinkMBB: // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] // ... @@ -1316,7 +1315,7 @@ XCoreTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, TII.get(XCore::PHI), MI->getOperand(0).getReg()) .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB) .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); - + MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; } @@ -1354,7 +1353,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, // fold (ladd x, 0, y) -> 0, add x, y iff carry is unused and y has only the // low bit set - if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 0)) { + if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 0)) { APInt KnownZero, KnownOne; APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), VT.getSizeInBits() - 1); @@ -1377,7 +1376,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, EVT VT = N0.getValueType(); // fold (lsub 0, 0, x) -> x, -x iff x has only the low bit set - if (N0C && N0C->isNullValue() && N1C && N1C->isNullValue()) { + if (N0C && N0C->isNullValue() && N1C && N1C->isNullValue()) { APInt KnownZero, KnownOne; APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), VT.getSizeInBits() - 1); @@ -1393,7 +1392,7 @@ SDValue XCoreTargetLowering::PerformDAGCombine(SDNode *N, // fold (lsub x, 0, y) -> 0, sub x, y iff borrow is unused and y has only the // low bit set - if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 0)) { + if (N1C && N1C->isNullValue() && N->hasNUsesOfValue(0, 0)) { APInt KnownZero, KnownOne; APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), VT.getSizeInBits() - 1); @@ -1557,7 +1556,7 @@ static inline bool isImmUs4(int64_t val) /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. bool -XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM, +XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM, const Type *Ty) const { if (Ty->getTypeID() == Type::VoidTyID) return AM.Scale == 0 && isImmUs(AM.BaseOffs) && isImmUs4(AM.BaseOffs); @@ -1568,7 +1567,7 @@ XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM, return Size >= 4 && !AM.HasBaseReg && AM.Scale == 0 && AM.BaseOffs%4 == 0; } - + switch (Size) { case 1: // reg + imm @@ -1593,7 +1592,7 @@ XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM, // reg + reg<<2 return AM.Scale == 4 && AM.BaseOffs == 0; } - + return false; } @@ -1603,7 +1602,7 @@ XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM, std::vector XCoreTargetLowering:: getRegClassForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const + EVT VT) const { if (Constraint.size() != 1) return std::vector(); @@ -1611,9 +1610,9 @@ getRegClassForInlineAsmConstraint(const std::string &Constraint, switch (Constraint[0]) { default : break; case 'r': - return make_vector(XCore::R0, XCore::R1, XCore::R2, - XCore::R3, XCore::R4, XCore::R5, - XCore::R6, XCore::R7, XCore::R8, + return make_vector(XCore::R0, XCore::R1, XCore::R2, + XCore::R3, XCore::R4, XCore::R5, + XCore::R6, XCore::R7, XCore::R8, XCore::R9, XCore::R10, XCore::R11, 0); break; } diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h index 7e5dd2e8e512..bb3f2cc038e7 100644 --- a/lib/Target/XCore/XCoreISelLowering.h +++ b/lib/Target/XCore/XCoreISelLowering.h @@ -20,11 +20,11 @@ #include "XCore.h" namespace llvm { - + // Forward delcarations class XCoreSubtarget; class XCoreTargetMachine; - + namespace XCoreISD { enum NodeType { // Start the numbering where the builtin ops and target ops leave off. @@ -38,16 +38,16 @@ namespace llvm { // dp relative address DPRelativeWrapper, - + // cp relative address CPRelativeWrapper, - + // Store word to stack STWSP, // Corresponds to retsp instruction RETSP, - + // Corresponds to LADD instruction LADD, @@ -74,13 +74,14 @@ namespace llvm { //===--------------------------------------------------------------------===// // TargetLowering Implementation //===--------------------------------------------------------------------===// - class XCoreTargetLowering : public TargetLowering + class XCoreTargetLowering : public TargetLowering { public: explicit XCoreTargetLowering(XCoreTargetMachine &TM); virtual unsigned getJumpTableEncoding() const; + virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; } /// LowerOperation - Provide custom lowering hooks for some operations. virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; @@ -91,10 +92,10 @@ namespace llvm { virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl&Results, SelectionDAG &DAG) const; - /// getTargetNodeName - This method returns the name of a target specific + /// getTargetNodeName - This method returns the name of a target specific // DAG node. virtual const char *getTargetNodeName(unsigned Opcode) const; - + virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const; @@ -108,7 +109,7 @@ namespace llvm { private: const XCoreTargetMachine &TM; const XCoreSubtarget &Subtarget; - + // Lower Operand helpers SDValue LowerCCCArguments(SDValue Chain, CallingConv::ID CallConv, @@ -148,12 +149,12 @@ namespace llvm { SDValue LowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; - + // Inline asm support std::vector getRegClassForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; - + // Expand specifics SDValue TryExpandADDWithMul(SDNode *Op, SelectionDAG &DAG) const; SDValue ExpandADDSUB(SDNode *Op, SelectionDAG &DAG) const; diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index 38cc734ce7c3..ecdd4cb63000 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -727,7 +727,7 @@ def NEG : _F2R<(outs GRRegs:$dst), (ins GRRegs:$b), "neg $dst, $b", [(set GRRegs:$dst, (ineg GRRegs:$b))]>; -// TODO setd, eet, eef, getts, setpt, outshr, inshr, testwct, tinitpc, tinitdp, +// TODO setd, eet, eef, testwct, tinitpc, tinitdp, // tinitsp, tinitcp, tsetmr, sext (reg), zext (reg) let Constraints = "$src1 = $dst" in { let neverHasSideEffects = 1 in @@ -758,6 +758,14 @@ def GETR_rus : _FRUS<(outs GRRegs:$dst), (ins i32imm:$type), "getr $dst, $type", [(set GRRegs:$dst, (int_xcore_getr immUs:$type))]>; +def GETTS_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r), + "getts $dst, res[$r]", + [(set GRRegs:$dst, (int_xcore_getts GRRegs:$r))]>; + +def SETPT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val), + "setpt res[$r], $val", + [(int_xcore_setpt GRRegs:$r, GRRegs:$val)]>; + def OUTCT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val), "outct res[$r], $val", [(int_xcore_outct GRRegs:$r, GRRegs:$val)]>; @@ -774,6 +782,11 @@ def OUT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val), "out res[$r], $val", [(int_xcore_out GRRegs:$r, GRRegs:$val)]>; +let Constraints = "$src = $dst" in +def OUTSHR_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r, GRRegs:$src), + "outshr res[$r], $src", + [(set GRRegs:$dst, (int_xcore_outshr GRRegs:$r, GRRegs:$src))]>; + def INCT_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r), "inct $dst, res[$r]", [(set GRRegs:$dst, (int_xcore_inct GRRegs:$r))]>; @@ -786,6 +799,11 @@ def IN_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r), "in $dst, res[$r]", [(set GRRegs:$dst, (int_xcore_in GRRegs:$r))]>; +let Constraints = "$src = $dst" in +def INSHR_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r, GRRegs:$src), + "inshr $dst, res[$r]", + [(set GRRegs:$dst, (int_xcore_inshr GRRegs:$r, GRRegs:$src))]>; + def CHKCT_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val), "chkct res[$r], $val", [(int_xcore_chkct GRRegs:$r, GRRegs:$val)]>; @@ -799,7 +817,7 @@ def SETD_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val), [(int_xcore_setd GRRegs:$r, GRRegs:$val)]>; // Two operand long -// TODO settw, setclk, setrdy, setpsc, endin, peek, +// TODO setclk, setrdy, setpsc, endin, peek, // getd, testlcl, tinitlr, getps, setps def BITREV_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src), "bitrev $dst, $src", @@ -813,13 +831,17 @@ def CLZ_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src), "clz $dst, $src", [(set GRRegs:$dst, (ctlz GRRegs:$src))]>; -def SETC_l2r : _FRU6<(outs), (ins GRRegs:$r, GRRegs:$val), +def SETC_l2r : _FL2R<(outs), (ins GRRegs:$r, GRRegs:$val), "setc res[$r], $val", [(int_xcore_setc GRRegs:$r, GRRegs:$val)]>; +def SETTW_l2r : _FL2R<(outs), (ins GRRegs:$r, GRRegs:$val), + "settw res[$r], $val", + [(int_xcore_settw GRRegs:$r, GRRegs:$val)]>; + // One operand short -// TODO edu, eeu, waitet, waitef, tstart, msync, mjoin, syncr, clrtp -// setdp, setcp, setv, setev, kcall +// TODO edu, eeu, waitet, waitef, tstart, msync, mjoin, clrtp +// setdp, setcp, setev, kcall // dgetreg let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in def BAU_1r : _F1R<(outs), (ins GRRegs:$addr), @@ -859,20 +881,41 @@ def BLA_1r : _F1R<(outs), (ins GRRegs:$addr, variable_ops), [(XCoreBranchLink GRRegs:$addr)]>; } +def SYNCR_1r : _F1R<(outs), (ins GRRegs:$r), + "syncr res[$r]", + [(int_xcore_syncr GRRegs:$r)]>; + def FREER_1r : _F1R<(outs), (ins GRRegs:$r), "freer res[$r]", [(int_xcore_freer GRRegs:$r)]>; +let Uses=[R11] in +def SETV_1r : _F1R<(outs), (ins GRRegs:$r), + "setv res[$r], r11", + [(int_xcore_setv GRRegs:$r, R11)]>; + +def EEU_1r : _F1R<(outs), (ins GRRegs:$r), + "eeu res[$r]", + [(int_xcore_eeu GRRegs:$r)]>; + // Zero operand short -// TODO waiteu, clre, ssync, freet, ldspc, stspc, ldssr, stssr, ldsed, stsed, +// TODO ssync, freet, ldspc, stspc, ldssr, stssr, ldsed, stsed, // stet, geted, getet, getkep, getksp, setkep, getid, kret, dcall, dret, // dentsp, drestsp +def CLRE_0R : _F0R<(outs), (ins), "clre", [(int_xcore_clre)]>; + let Defs = [R11] in def GETID_0R : _F0R<(outs), (ins), "get r11, id", [(set R11, (int_xcore_getid))]>; +let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1, + hasSideEffects = 1 in +def WAITEU_0R : _F0R<(outs), (ins), + "waiteu", + [(brind (int_xcore_waitevent))]>; + //===----------------------------------------------------------------------===// // Non-Instruction Patterns //===----------------------------------------------------------------------===// diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index b6b6b84d9647..7986d1aca762 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1897,6 +1897,39 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { return BinaryOperator::CreateNot(And); } + // Canonicalize xor to the RHS. + if (match(Op0, m_Xor(m_Value(), m_Value()))) + std::swap(Op0, Op1); + + // A | ( A ^ B) -> A | B + // A | (~A ^ B) -> A | ~B + if (match(Op1, m_Xor(m_Value(A), m_Value(B)))) { + if (Op0 == A || Op0 == B) + return BinaryOperator::CreateOr(A, B); + + if (Op1->hasOneUse() && match(A, m_Not(m_Specific(Op0)))) { + Value *Not = Builder->CreateNot(B, B->getName()+".not"); + return BinaryOperator::CreateOr(Not, Op0); + } + if (Op1->hasOneUse() && match(B, m_Not(m_Specific(Op0)))) { + Value *Not = Builder->CreateNot(A, A->getName()+".not"); + return BinaryOperator::CreateOr(Not, Op0); + } + } + + // A | ~(A | B) -> A | ~B + // A | ~(A ^ B) -> A | ~B + if (match(Op1, m_Not(m_Value(A)))) + if (BinaryOperator *B = dyn_cast(A)) + if ((Op0 == B->getOperand(0) || Op0 == B->getOperand(1)) && + Op1->hasOneUse() && (B->getOpcode() == Instruction::Or || + B->getOpcode() == Instruction::Xor)) { + Value *NotOp = Op0 == B->getOperand(0) ? B->getOperand(1) : + B->getOperand(0); + Value *Not = Builder->CreateNot(NotOp, NotOp->getName()+".not"); + return BinaryOperator::CreateOr(Not, Op0); + } + if (ICmpInst *RHS = dyn_cast(I.getOperand(1))) if (ICmpInst *LHS = dyn_cast(I.getOperand(0))) if (Value *Res = FoldOrOfICmps(LHS, RHS)) diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 8449f7b7982c..0e464507a7e4 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -953,10 +953,19 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { if (Callee->isDeclaration() && !isConvertible) return false; } - if (FT->getNumParams() < NumActualArgs && !FT->isVarArg() && - Callee->isDeclaration()) - return false; // Do not delete arguments unless we have a function body. + if (Callee->isDeclaration()) { + // Do not delete arguments unless we have a function body. + if (FT->getNumParams() < NumActualArgs && !FT->isVarArg()) + return false; + // If the callee is just a declaration, don't change the varargsness of the + // call. We don't want to introduce a varargs call where one doesn't + // already exist. + const PointerType *APTy = cast(CS.getCalledValue()->getType()); + if (FT->isVarArg()!=cast(APTy->getElementType())->isVarArg()) + return false; + } + if (FT->getNumParams() < NumActualArgs && FT->isVarArg() && !CallerPAL.isEmpty()) // In this case we have more arguments than the new function type, but we @@ -970,8 +979,9 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) { return false; } + // Okay, we decided that this is a safe thing to do: go ahead and start - // inserting cast instructions as necessary... + // inserting cast instructions as necessary. std::vector Args; Args.reserve(NumActualArgs); SmallVector attrVec; diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp index 6d1d344a9296..753a558cfe83 100644 --- a/lib/Transforms/Scalar/LoopDeletion.cpp +++ b/lib/Transforms/Scalar/LoopDeletion.cpp @@ -78,7 +78,6 @@ bool LoopDeletion::IsLoopDead(Loop* L, SmallVector& exitingBlocks, SmallVector& exitBlocks, bool &Changed, BasicBlock *Preheader) { - BasicBlock* exitingBlock = exitingBlocks[0]; BasicBlock* exitBlock = exitBlocks[0]; // Make sure that all PHI entries coming from the loop are loop invariant. @@ -88,11 +87,21 @@ bool LoopDeletion::IsLoopDead(Loop* L, // of the loop. BasicBlock::iterator BI = exitBlock->begin(); while (PHINode* P = dyn_cast(BI)) { - Value* incoming = P->getIncomingValueForBlock(exitingBlock); + Value* incoming = P->getIncomingValueForBlock(exitingBlocks[0]); + + // Make sure all exiting blocks produce the same incoming value for the exit + // block. If there are different incoming values for different exiting + // blocks, then it is impossible to statically determine which value should + // be used. + for (unsigned i = 1; i < exitingBlocks.size(); ++i) { + if (incoming != P->getIncomingValueForBlock(exitingBlocks[i])) + return false; + } + if (Instruction* I = dyn_cast(incoming)) if (!L->makeLoopInvariant(I, Changed, Preheader->getTerminator())) return false; - + ++BI; } @@ -147,10 +156,6 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { if (exitBlocks.size() != 1) return false; - // Loops with multiple exits are too complicated to handle correctly. - if (exitingBlocks.size() != 1) - return false; - // Finally, we have to check that the loop really is dead. bool Changed = false; if (!IsLoopDead(L, exitingBlocks, exitBlocks, Changed, preheader)) @@ -166,7 +171,6 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { // Now that we know the removal is safe, remove the loop by changing the // branch from the preheader to go to the single exit block. BasicBlock* exitBlock = exitBlocks[0]; - BasicBlock* exitingBlock = exitingBlocks[0]; // Because we're deleting a large chunk of code at once, the sequence in which // we remove things is very important to avoid invalidation issues. Don't @@ -183,9 +187,12 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) { // Rewrite phis in the exit block to get their inputs from // the preheader instead of the exiting block. + BasicBlock* exitingBlock = exitingBlocks[0]; BasicBlock::iterator BI = exitBlock->begin(); while (PHINode* P = dyn_cast(BI)) { P->replaceUsesOfWith(exitingBlock, preheader); + for (unsigned i = 1; i < exitingBlocks.size(); ++i) + P->removeIncomingValue(exitingBlocks[i]); ++BI; } diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index d7fa149492bd..f8ce214750ac 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -31,6 +31,11 @@ // void foo(_Complex float *P) // for (i) { __real__(*P) = 0; __imag__(*P) = 0; } // +// We should enhance this to handle negative strides through memory. +// Alternatively (and perhaps better) we could rely on an earlier pass to force +// forward iteration through memory, which is generally better for cache +// behavior. Negative strides *do* happen for memset/memcpy loops. +// // This could recognize common matrix multiplies and dot product idioms and // replace them with calls to BLAS (if linked in??). // @@ -272,10 +277,17 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) { unsigned StoreSize = (unsigned)SizeInBits >> 3; const SCEVConstant *Stride = dyn_cast(StoreEv->getOperand(1)); - // TODO: Could also handle negative stride here someday, that will require the - // validity check in mayLoopAccessLocation to be updated though. - if (Stride == 0 || StoreSize != Stride->getValue()->getValue()) + if (Stride == 0 || StoreSize != Stride->getValue()->getValue()) { + // TODO: Could also handle negative stride here someday, that will require + // the validity check in mayLoopAccessLocation to be updated though. + // Enable this to print exact negative strides. + if (0 && Stride && StoreSize == -Stride->getValue()->getValue()) { + dbgs() << "NEGATIVE STRIDE: " << *SI << "\n"; + dbgs() << "BB: " << *SI->getParent(); + } + return false; + } // See if we can optimize just this store in isolation. if (processLoopStridedStore(StorePtr, StoreSize, SI->getAlignment(), diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index ec45b71dd368..9f136d4e3077 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -25,13 +25,14 @@ #include "llvm/Support/IRBuilder.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Config/config.h" +#include "llvm/Config/config.h" // FIXME: Shouldn't depend on host! using namespace llvm; STATISTIC(NumSimplified, "Number of library calls simplified"); @@ -1369,6 +1370,8 @@ namespace { /// This pass optimizes well known library functions from libc and libm. /// class SimplifyLibCalls : public FunctionPass { + TargetLibraryInfo *TLI; + StringMap Optimizations; // String and Memory LibCall Optimizations StrCatOpt StrCat; StrNCatOpt StrNCat; StrChrOpt StrChr; StrRChrOpt StrRChr; @@ -1385,7 +1388,7 @@ namespace { SPrintFOpt SPrintF; PrintFOpt PrintF; FWriteOpt FWrite; FPutsOpt FPuts; FPrintFOpt FPrintF; PutsOpt Puts; - + bool Modified; // This is only used by doInitialization. public: static char ID; // Pass identification @@ -1402,14 +1405,20 @@ namespace { void setDoesNotAlias(Function &F, unsigned n); bool doInitialization(Module &M); + void inferPrototypeAttributes(Function &F); virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); } }; - char SimplifyLibCalls::ID = 0; } // end anonymous namespace. -INITIALIZE_PASS(SimplifyLibCalls, "simplify-libcalls", - "Simplify well-known library calls", false, false) +char SimplifyLibCalls::ID = 0; + +INITIALIZE_PASS_BEGIN(SimplifyLibCalls, "simplify-libcalls", + "Simplify well-known library calls", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) +INITIALIZE_PASS_END(SimplifyLibCalls, "simplify-libcalls", + "Simplify well-known library calls", false, false) // Public interface to the Simplify LibCalls pass. FunctionPass *llvm::createSimplifyLibCallsPass() { @@ -1441,9 +1450,9 @@ void SimplifyLibCalls::InitOptimizations() { Optimizations["strcspn"] = &StrCSpn; Optimizations["strstr"] = &StrStr; Optimizations["memcmp"] = &MemCmp; - Optimizations["memcpy"] = &MemCpy; + if (TLI->has(LibFunc::memcpy)) Optimizations["memcpy"] = &MemCpy; Optimizations["memmove"] = &MemMove; - Optimizations["memset"] = &MemSet; + if (TLI->has(LibFunc::memset)) Optimizations["memset"] = &MemSet; // _chk variants of String and Memory LibCall Optimizations. Optimizations["__strcpy_chk"] = &StrCpyChk; @@ -1506,6 +1515,8 @@ void SimplifyLibCalls::InitOptimizations() { /// runOnFunction - Top level algorithm. /// bool SimplifyLibCalls::runOnFunction(Function &F) { + TLI = &getAnalysis(); + if (Optimizations.empty()) InitOptimizations(); @@ -1597,688 +1608,654 @@ void SimplifyLibCalls::setDoesNotAlias(Function &F, unsigned n) { } } + +void SimplifyLibCalls::inferPrototypeAttributes(Function &F) { + const FunctionType *FTy = F.getFunctionType(); + + StringRef Name = F.getName(); + switch (Name[0]) { + case 's': + if (Name == "strlen") { + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return; + setOnlyReadsMemory(F); + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } else if (Name == "strchr" || + Name == "strrchr") { + if (FTy->getNumParams() != 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isIntegerTy()) + return; + setOnlyReadsMemory(F); + setDoesNotThrow(F); + } else if (Name == "strcpy" || + Name == "stpcpy" || + Name == "strcat" || + Name == "strtol" || + Name == "strtod" || + Name == "strtof" || + Name == "strtoul" || + Name == "strtoll" || + Name == "strtold" || + Name == "strncat" || + Name == "strncpy" || + Name == "strtoull") { + if (FTy->getNumParams() < 2 || + !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + } else if (Name == "strxfrm") { + if (FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } else if (Name == "strcmp" || + Name == "strspn" || + Name == "strncmp" || + Name == "strcspn" || + Name == "strcoll" || + Name == "strcasecmp" || + Name == "strncasecmp") { + if (FTy->getNumParams() < 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return; + setOnlyReadsMemory(F); + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } else if (Name == "strstr" || + Name == "strpbrk") { + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return; + setOnlyReadsMemory(F); + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + } else if (Name == "strtok" || + Name == "strtok_r") { + if (FTy->getNumParams() < 2 || !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + } else if (Name == "scanf" || + Name == "setbuf" || + Name == "setvbuf") { + if (FTy->getNumParams() < 1 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } else if (Name == "strdup" || + Name == "strndup") { + if (FTy->getNumParams() < 1 || !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + } else if (Name == "stat" || + Name == "sscanf" || + Name == "sprintf" || + Name == "statvfs") { + if (FTy->getNumParams() < 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } else if (Name == "snprintf") { + if (FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 3); + } else if (Name == "setitimer") { + if (FTy->getNumParams() != 3 || + !FTy->getParamType(1)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + setDoesNotCapture(F, 3); + } else if (Name == "system") { + if (FTy->getNumParams() != 1 || + !FTy->getParamType(0)->isPointerTy()) + return; + // May throw; "system" is a valid pthread cancellation point. + setDoesNotCapture(F, 1); + } + break; + case 'm': + if (Name == "malloc") { + if (FTy->getNumParams() != 1 || + !FTy->getReturnType()->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + } else if (Name == "memcmp") { + if (FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return; + setOnlyReadsMemory(F); + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } else if (Name == "memchr" || + Name == "memrchr") { + if (FTy->getNumParams() != 3) + return; + setOnlyReadsMemory(F); + setDoesNotThrow(F); + } else if (Name == "modf" || + Name == "modff" || + Name == "modfl" || + Name == "memcpy" || + Name == "memccpy" || + Name == "memmove") { + if (FTy->getNumParams() < 2 || + !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + } else if (Name == "memalign") { + if (!FTy->getReturnType()->isPointerTy()) + return; + setDoesNotAlias(F, 0); + } else if (Name == "mkdir" || + Name == "mktime") { + if (FTy->getNumParams() == 0 || + !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } + break; + case 'r': + if (Name == "realloc") { + if (FTy->getNumParams() != 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getReturnType()->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + } else if (Name == "read") { + if (FTy->getNumParams() != 3 || + !FTy->getParamType(1)->isPointerTy()) + return; + // May throw; "read" is a valid pthread cancellation point. + setDoesNotCapture(F, 2); + } else if (Name == "rmdir" || + Name == "rewind" || + Name == "remove" || + Name == "realpath") { + if (FTy->getNumParams() < 1 || + !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } else if (Name == "rename" || + Name == "readlink") { + if (FTy->getNumParams() < 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } + break; + case 'w': + if (Name == "write") { + if (FTy->getNumParams() != 3 || !FTy->getParamType(1)->isPointerTy()) + return; + // May throw; "write" is a valid pthread cancellation point. + setDoesNotCapture(F, 2); + } + break; + case 'b': + if (Name == "bcopy") { + if (FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } else if (Name == "bcmp") { + if (FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setOnlyReadsMemory(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } else if (Name == "bzero") { + if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } + break; + case 'c': + if (Name == "calloc") { + if (FTy->getNumParams() != 2 || + !FTy->getReturnType()->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + } else if (Name == "chmod" || + Name == "chown" || + Name == "ctermid" || + Name == "clearerr" || + Name == "closedir") { + if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } + break; + case 'a': + if (Name == "atoi" || + Name == "atol" || + Name == "atof" || + Name == "atoll") { + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setOnlyReadsMemory(F); + setDoesNotCapture(F, 1); + } else if (Name == "access") { + if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } + break; + case 'f': + if (Name == "fopen") { + if (FTy->getNumParams() != 2 || + !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } else if (Name == "fdopen") { + if (FTy->getNumParams() != 2 || + !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 2); + } else if (Name == "feof" || + Name == "free" || + Name == "fseek" || + Name == "ftell" || + Name == "fgetc" || + Name == "fseeko" || + Name == "ftello" || + Name == "fileno" || + Name == "fflush" || + Name == "fclose" || + Name == "fsetpos" || + Name == "flockfile" || + Name == "funlockfile" || + Name == "ftrylockfile") { + if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } else if (Name == "ferror") { + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setOnlyReadsMemory(F); + } else if (Name == "fputc" || + Name == "fstat" || + Name == "frexp" || + Name == "frexpf" || + Name == "frexpl" || + Name == "fstatvfs") { + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + } else if (Name == "fgets") { + if (FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 3); + } else if (Name == "fread" || + Name == "fwrite") { + if (FTy->getNumParams() != 4 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(3)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 4); + } else if (Name == "fputs" || + Name == "fscanf" || + Name == "fprintf" || + Name == "fgetpos") { + if (FTy->getNumParams() < 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } + break; + case 'g': + if (Name == "getc" || + Name == "getlogin_r" || + Name == "getc_unlocked") { + if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } else if (Name == "getenv") { + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setOnlyReadsMemory(F); + setDoesNotCapture(F, 1); + } else if (Name == "gets" || + Name == "getchar") { + setDoesNotThrow(F); + } else if (Name == "getitimer") { + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + } else if (Name == "getpwnam") { + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } + break; + case 'u': + if (Name == "ungetc") { + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + } else if (Name == "uname" || + Name == "unlink" || + Name == "unsetenv") { + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } else if (Name == "utime" || + Name == "utimes") { + if (FTy->getNumParams() != 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } + break; + case 'p': + if (Name == "putc") { + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + } else if (Name == "puts" || + Name == "printf" || + Name == "perror") { + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } else if (Name == "pread" || + Name == "pwrite") { + if (FTy->getNumParams() != 4 || !FTy->getParamType(1)->isPointerTy()) + return; + // May throw; these are valid pthread cancellation points. + setDoesNotCapture(F, 2); + } else if (Name == "putchar") { + setDoesNotThrow(F); + } else if (Name == "popen") { + if (FTy->getNumParams() != 2 || + !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } else if (Name == "pclose") { + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } + break; + case 'v': + if (Name == "vscanf") { + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } else if (Name == "vsscanf" || + Name == "vfscanf") { + if (FTy->getNumParams() != 3 || + !FTy->getParamType(1)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } else if (Name == "valloc") { + if (!FTy->getReturnType()->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + } else if (Name == "vprintf") { + if (FTy->getNumParams() != 2 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } else if (Name == "vfprintf" || + Name == "vsprintf") { + if (FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } else if (Name == "vsnprintf") { + if (FTy->getNumParams() != 4 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 3); + } + break; + case 'o': + if (Name == "open") { + if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy()) + return; + // May throw; "open" is a valid pthread cancellation point. + setDoesNotCapture(F, 1); + } else if (Name == "opendir") { + if (FTy->getNumParams() != 1 || + !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + } + break; + case 't': + if (Name == "tmpfile") { + if (!FTy->getReturnType()->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + } else if (Name == "times") { + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } + break; + case 'h': + if (Name == "htonl" || + Name == "htons") { + setDoesNotThrow(F); + setDoesNotAccessMemory(F); + } + break; + case 'n': + if (Name == "ntohl" || + Name == "ntohs") { + setDoesNotThrow(F); + setDoesNotAccessMemory(F); + } + break; + case 'l': + if (Name == "lstat") { + if (FTy->getNumParams() != 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } else if (Name == "lchown") { + if (FTy->getNumParams() != 3 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } + break; + case 'q': + if (Name == "qsort") { + if (FTy->getNumParams() != 4 || !FTy->getParamType(3)->isPointerTy()) + return; + // May throw; places call through function pointer. + setDoesNotCapture(F, 4); + } + break; + case '_': + if (Name == "__strdup" || + Name == "__strndup") { + if (FTy->getNumParams() < 1 || + !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + } else if (Name == "__strtok_r") { + if (FTy->getNumParams() != 3 || + !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + } else if (Name == "_IO_getc") { + if (FTy->getNumParams() != 1 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } else if (Name == "_IO_putc") { + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + } + break; + case 1: + if (Name == "\1__isoc99_scanf") { + if (FTy->getNumParams() < 1 || + !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } else if (Name == "\1stat64" || + Name == "\1lstat64" || + Name == "\1statvfs64" || + Name == "\1__isoc99_sscanf") { + if (FTy->getNumParams() < 1 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } else if (Name == "\1fopen64") { + if (FTy->getNumParams() != 2 || + !FTy->getReturnType()->isPointerTy() || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + setDoesNotCapture(F, 1); + setDoesNotCapture(F, 2); + } else if (Name == "\1fseeko64" || + Name == "\1ftello64") { + if (FTy->getNumParams() == 0 || !FTy->getParamType(0)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 1); + } else if (Name == "\1tmpfile64") { + if (!FTy->getReturnType()->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotAlias(F, 0); + } else if (Name == "\1fstat64" || + Name == "\1fstatvfs64") { + if (FTy->getNumParams() != 2 || !FTy->getParamType(1)->isPointerTy()) + return; + setDoesNotThrow(F); + setDoesNotCapture(F, 2); + } else if (Name == "\1open64") { + if (FTy->getNumParams() < 2 || !FTy->getParamType(0)->isPointerTy()) + return; + // May throw; "open" is a valid pthread cancellation point. + setDoesNotCapture(F, 1); + } + break; + } +} + /// doInitialization - Add attributes to well-known functions. /// bool SimplifyLibCalls::doInitialization(Module &M) { Modified = false; for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { Function &F = *I; - if (!F.isDeclaration()) - continue; - - if (!F.hasName()) - continue; - - const FunctionType *FTy = F.getFunctionType(); - - StringRef Name = F.getName(); - switch (Name[0]) { - case 's': - if (Name == "strlen") { - if (FTy->getNumParams() != 1 || - !FTy->getParamType(0)->isPointerTy()) - continue; - setOnlyReadsMemory(F); - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "strchr" || - Name == "strrchr") { - if (FTy->getNumParams() != 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isIntegerTy()) - continue; - setOnlyReadsMemory(F); - setDoesNotThrow(F); - } else if (Name == "strcpy" || - Name == "stpcpy" || - Name == "strcat" || - Name == "strtol" || - Name == "strtod" || - Name == "strtof" || - Name == "strtoul" || - Name == "strtoll" || - Name == "strtold" || - Name == "strncat" || - Name == "strncpy" || - Name == "strtoull") { - if (FTy->getNumParams() < 2 || - !FTy->getParamType(1)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "strxfrm") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "strcmp" || - Name == "strspn" || - Name == "strncmp" || - Name == "strcspn" || - Name == "strcoll" || - Name == "strcasecmp" || - Name == "strncasecmp") { - if (FTy->getNumParams() < 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - continue; - setOnlyReadsMemory(F); - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "strstr" || - Name == "strpbrk") { - if (FTy->getNumParams() != 2 || - !FTy->getParamType(1)->isPointerTy()) - continue; - setOnlyReadsMemory(F); - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "strtok" || - Name == "strtok_r") { - if (FTy->getNumParams() < 2 || - !FTy->getParamType(1)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "scanf" || - Name == "setbuf" || - Name == "setvbuf") { - if (FTy->getNumParams() < 1 || - !FTy->getParamType(0)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "strdup" || - Name == "strndup") { - if (FTy->getNumParams() < 1 || - !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - } else if (Name == "stat" || - Name == "sscanf" || - Name == "sprintf" || - Name == "statvfs") { - if (FTy->getNumParams() < 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "snprintf") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(2)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 3); - } else if (Name == "setitimer") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(1)->isPointerTy() || - !FTy->getParamType(2)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - setDoesNotCapture(F, 3); - } else if (Name == "system") { - if (FTy->getNumParams() != 1 || - !FTy->getParamType(0)->isPointerTy()) - continue; - // May throw; "system" is a valid pthread cancellation point. - setDoesNotCapture(F, 1); - } - break; - case 'm': - if (Name == "malloc") { - if (FTy->getNumParams() != 1 || - !FTy->getReturnType()->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - } else if (Name == "memcmp") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - continue; - setOnlyReadsMemory(F); - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "memchr" || - Name == "memrchr") { - if (FTy->getNumParams() != 3) - continue; - setOnlyReadsMemory(F); - setDoesNotThrow(F); - } else if (Name == "modf" || - Name == "modff" || - Name == "modfl" || - Name == "memcpy" || - Name == "memccpy" || - Name == "memmove") { - if (FTy->getNumParams() < 2 || - !FTy->getParamType(1)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "memalign") { - if (!FTy->getReturnType()->isPointerTy()) - continue; - setDoesNotAlias(F, 0); - } else if (Name == "mkdir" || - Name == "mktime") { - if (FTy->getNumParams() == 0 || - !FTy->getParamType(0)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } - break; - case 'r': - if (Name == "realloc") { - if (FTy->getNumParams() != 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getReturnType()->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - } else if (Name == "read") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(1)->isPointerTy()) - continue; - // May throw; "read" is a valid pthread cancellation point. - setDoesNotCapture(F, 2); - } else if (Name == "rmdir" || - Name == "rewind" || - Name == "remove" || - Name == "realpath") { - if (FTy->getNumParams() < 1 || - !FTy->getParamType(0)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "rename" || - Name == "readlink") { - if (FTy->getNumParams() < 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } - break; - case 'w': - if (Name == "write") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(1)->isPointerTy()) - continue; - // May throw; "write" is a valid pthread cancellation point. - setDoesNotCapture(F, 2); - } - break; - case 'b': - if (Name == "bcopy") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "bcmp") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - continue; - setDoesNotThrow(F); - setOnlyReadsMemory(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "bzero") { - if (FTy->getNumParams() != 2 || - !FTy->getParamType(0)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } - break; - case 'c': - if (Name == "calloc") { - if (FTy->getNumParams() != 2 || - !FTy->getReturnType()->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - } else if (Name == "chmod" || - Name == "chown" || - Name == "ctermid" || - Name == "clearerr" || - Name == "closedir") { - if (FTy->getNumParams() == 0 || - !FTy->getParamType(0)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } - break; - case 'a': - if (Name == "atoi" || - Name == "atol" || - Name == "atof" || - Name == "atoll") { - if (FTy->getNumParams() != 1 || - !FTy->getParamType(0)->isPointerTy()) - continue; - setDoesNotThrow(F); - setOnlyReadsMemory(F); - setDoesNotCapture(F, 1); - } else if (Name == "access") { - if (FTy->getNumParams() != 2 || - !FTy->getParamType(0)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } - break; - case 'f': - if (Name == "fopen") { - if (FTy->getNumParams() != 2 || - !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "fdopen") { - if (FTy->getNumParams() != 2 || - !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 2); - } else if (Name == "feof" || - Name == "free" || - Name == "fseek" || - Name == "ftell" || - Name == "fgetc" || - Name == "fseeko" || - Name == "ftello" || - Name == "fileno" || - Name == "fflush" || - Name == "fclose" || - Name == "fsetpos" || - Name == "flockfile" || - Name == "funlockfile" || - Name == "ftrylockfile") { - if (FTy->getNumParams() == 0 || - !FTy->getParamType(0)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "ferror") { - if (FTy->getNumParams() != 1 || - !FTy->getParamType(0)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setOnlyReadsMemory(F); - } else if (Name == "fputc" || - Name == "fstat" || - Name == "frexp" || - Name == "frexpf" || - Name == "frexpl" || - Name == "fstatvfs") { - if (FTy->getNumParams() != 2 || - !FTy->getParamType(1)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "fgets") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(2)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 3); - } else if (Name == "fread" || - Name == "fwrite") { - if (FTy->getNumParams() != 4 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(3)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 4); - } else if (Name == "fputs" || - Name == "fscanf" || - Name == "fprintf" || - Name == "fgetpos") { - if (FTy->getNumParams() < 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } - break; - case 'g': - if (Name == "getc" || - Name == "getlogin_r" || - Name == "getc_unlocked") { - if (FTy->getNumParams() == 0 || - !FTy->getParamType(0)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "getenv") { - if (FTy->getNumParams() != 1 || - !FTy->getParamType(0)->isPointerTy()) - continue; - setDoesNotThrow(F); - setOnlyReadsMemory(F); - setDoesNotCapture(F, 1); - } else if (Name == "gets" || - Name == "getchar") { - setDoesNotThrow(F); - } else if (Name == "getitimer") { - if (FTy->getNumParams() != 2 || - !FTy->getParamType(1)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "getpwnam") { - if (FTy->getNumParams() != 1 || - !FTy->getParamType(0)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } - break; - case 'u': - if (Name == "ungetc") { - if (FTy->getNumParams() != 2 || - !FTy->getParamType(1)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "uname" || - Name == "unlink" || - Name == "unsetenv") { - if (FTy->getNumParams() != 1 || - !FTy->getParamType(0)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "utime" || - Name == "utimes") { - if (FTy->getNumParams() != 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } - break; - case 'p': - if (Name == "putc") { - if (FTy->getNumParams() != 2 || - !FTy->getParamType(1)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "puts" || - Name == "printf" || - Name == "perror") { - if (FTy->getNumParams() != 1 || - !FTy->getParamType(0)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "pread" || - Name == "pwrite") { - if (FTy->getNumParams() != 4 || - !FTy->getParamType(1)->isPointerTy()) - continue; - // May throw; these are valid pthread cancellation points. - setDoesNotCapture(F, 2); - } else if (Name == "putchar") { - setDoesNotThrow(F); - } else if (Name == "popen") { - if (FTy->getNumParams() != 2 || - !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "pclose") { - if (FTy->getNumParams() != 1 || - !FTy->getParamType(0)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } - break; - case 'v': - if (Name == "vscanf") { - if (FTy->getNumParams() != 2 || - !FTy->getParamType(1)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "vsscanf" || - Name == "vfscanf") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(1)->isPointerTy() || - !FTy->getParamType(2)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "valloc") { - if (!FTy->getReturnType()->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - } else if (Name == "vprintf") { - if (FTy->getNumParams() != 2 || - !FTy->getParamType(0)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "vfprintf" || - Name == "vsprintf") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "vsnprintf") { - if (FTy->getNumParams() != 4 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(2)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 3); - } - break; - case 'o': - if (Name == "open") { - if (FTy->getNumParams() < 2 || - !FTy->getParamType(0)->isPointerTy()) - continue; - // May throw; "open" is a valid pthread cancellation point. - setDoesNotCapture(F, 1); - } else if (Name == "opendir") { - if (FTy->getNumParams() != 1 || - !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - } - break; - case 't': - if (Name == "tmpfile") { - if (!FTy->getReturnType()->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - } else if (Name == "times") { - if (FTy->getNumParams() != 1 || - !FTy->getParamType(0)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } - break; - case 'h': - if (Name == "htonl" || - Name == "htons") { - setDoesNotThrow(F); - setDoesNotAccessMemory(F); - } - break; - case 'n': - if (Name == "ntohl" || - Name == "ntohs") { - setDoesNotThrow(F); - setDoesNotAccessMemory(F); - } - break; - case 'l': - if (Name == "lstat") { - if (FTy->getNumParams() != 2 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "lchown") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(0)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } - break; - case 'q': - if (Name == "qsort") { - if (FTy->getNumParams() != 4 || - !FTy->getParamType(3)->isPointerTy()) - continue; - // May throw; places call through function pointer. - setDoesNotCapture(F, 4); - } - break; - case '_': - if (Name == "__strdup" || - Name == "__strndup") { - if (FTy->getNumParams() < 1 || - !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - } else if (Name == "__strtok_r") { - if (FTy->getNumParams() != 3 || - !FTy->getParamType(1)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "_IO_getc") { - if (FTy->getNumParams() != 1 || - !FTy->getParamType(0)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "_IO_putc") { - if (FTy->getNumParams() != 2 || - !FTy->getParamType(1)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } - break; - case 1: - if (Name == "\1__isoc99_scanf") { - if (FTy->getNumParams() < 1 || - !FTy->getParamType(0)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "\1stat64" || - Name == "\1lstat64" || - Name == "\1statvfs64" || - Name == "\1__isoc99_sscanf") { - if (FTy->getNumParams() < 1 || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "\1fopen64") { - if (FTy->getNumParams() != 2 || - !FTy->getReturnType()->isPointerTy() || - !FTy->getParamType(0)->isPointerTy() || - !FTy->getParamType(1)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - setDoesNotCapture(F, 1); - setDoesNotCapture(F, 2); - } else if (Name == "\1fseeko64" || - Name == "\1ftello64") { - if (FTy->getNumParams() == 0 || - !FTy->getParamType(0)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 1); - } else if (Name == "\1tmpfile64") { - if (!FTy->getReturnType()->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotAlias(F, 0); - } else if (Name == "\1fstat64" || - Name == "\1fstatvfs64") { - if (FTy->getNumParams() != 2 || - !FTy->getParamType(1)->isPointerTy()) - continue; - setDoesNotThrow(F); - setDoesNotCapture(F, 2); - } else if (Name == "\1open64") { - if (FTy->getNumParams() < 2 || - !FTy->getParamType(0)->isPointerTy()) - continue; - // May throw; "open" is a valid pthread cancellation point. - setDoesNotCapture(F, 1); - } - break; - } + if (F.isDeclaration() && F.hasName()) + inferPrototypeAttributes(F); } return Modified; } diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 063c76e9522c..3f789fa86589 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -262,12 +262,13 @@ bool llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V) { /// areAllUsesEqual - Check whether the uses of a value are all the same. /// This is similar to Instruction::hasOneUse() except this will also return -/// true when there are multiple uses that all refer to the same value. +/// true when there are no uses or multiple uses that all refer to the same +/// value. static bool areAllUsesEqual(Instruction *I) { Value::use_iterator UI = I->use_begin(); Value::use_iterator UE = I->use_end(); if (UI == UE) - return false; + return true; User *TheUse = *UI; for (++UI; UI != UE; ++UI) { @@ -281,31 +282,24 @@ static bool areAllUsesEqual(Instruction *I) { /// dead PHI node, due to being a def-use chain of single-use nodes that /// either forms a cycle or is terminated by a trivially dead instruction, /// delete it. If that makes any of its operands trivially dead, delete them -/// too, recursively. Return true if the PHI node is actually deleted. +/// too, recursively. Return true if a change was made. bool llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) { - // We can remove a PHI if it is on a cycle in the def-use graph - // where each node in the cycle has degree one, i.e. only one use, - // and is an instruction with no side effects. - if (!areAllUsesEqual(PN)) - return false; + SmallPtrSet Visited; + for (Instruction *I = PN; areAllUsesEqual(I) && !I->mayHaveSideEffects(); + I = cast(*I->use_begin())) { + if (I->use_empty()) + return RecursivelyDeleteTriviallyDeadInstructions(I); - bool Changed = false; - SmallPtrSet PHIs; - PHIs.insert(PN); - for (Instruction *J = cast(*PN->use_begin()); - areAllUsesEqual(J) && !J->mayHaveSideEffects(); - J = cast(*J->use_begin())) - // If we find a PHI more than once, we're on a cycle that + // If we find an instruction more than once, we're on a cycle that // won't prove fruitful. - if (PHINode *JP = dyn_cast(J)) - if (!PHIs.insert(JP)) { - // Break the cycle and delete the PHI and its operands. - JP->replaceAllUsesWith(UndefValue::get(JP->getType())); - (void)RecursivelyDeleteTriviallyDeadInstructions(JP); - Changed = true; - break; - } - return Changed; + if (!Visited.insert(I)) { + // Break the cycle and delete the instruction and its operands. + I->replaceAllUsesWith(UndefValue::get(I->getType())); + (void)RecursivelyDeleteTriviallyDeadInstructions(I); + return true; + } + } + return false; } /// SimplifyInstructionsInBlock - Scan the specified basic block and try to diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index e6a4373c495b..778885723e66 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -35,6 +35,7 @@ #include "llvm/Metadata.h" #include "llvm/Analysis/AliasSetTracker.h" #include "llvm/Analysis/DebugInfo.h" +#include "llvm/Analysis/DIBuilder.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/ADT/DenseMap.h" @@ -190,7 +191,7 @@ namespace { /// std::vector Allocas; DominatorTree &DT; - DIFactory *DIF; + DIBuilder *DIB; /// AST - An AliasSetTracker object to update. If null, don't update it. /// @@ -235,9 +236,9 @@ namespace { public: PromoteMem2Reg(const std::vector &A, DominatorTree &dt, AliasSetTracker *ast) - : Allocas(A), DT(dt), DIF(0), AST(ast) {} + : Allocas(A), DT(dt), DIB(0), AST(ast) {} ~PromoteMem2Reg() { - delete DIF; + delete DIB; } void run(); @@ -951,9 +952,9 @@ void PromoteMem2Reg::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, if (!DIVar.Verify()) return; - if (!DIF) - DIF = new DIFactory(*SI->getParent()->getParent()->getParent()); - Instruction *DbgVal = DIF->InsertDbgValueIntrinsic(SI->getOperand(0), 0, + if (!DIB) + DIB = new DIBuilder(*SI->getParent()->getParent()->getParent()); + Instruction *DbgVal = DIB->insertDbgValueIntrinsic(SI->getOperand(0), 0, DIVar, SI); // Propagate any debug metadata from the store onto the dbg.value. diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index fb660dbfac10..c6708857cb56 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -247,6 +247,11 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, if (PBB->getFirstNonPHIOrDbg() != I) return false; break; + case Instruction::GetElementPtr: + // GEPs are cheap if all indices are constant. + if (!cast(I)->hasAllConstantIndices()) + return false; + break; case Instruction::Add: case Instruction::Sub: case Instruction::And: diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 302e141a3ed1..b696682c13fa 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -82,6 +82,7 @@ if(PYTHONINTERP_FOUND) ${LIT_ARGS} ${CMAKE_CURRENT_BINARY_DIR} COMMENT "Running LLVM regression tests") + set_target_properties(check PROPERTIES FOLDER "Tests") add_custom_target(check.deps) add_dependencies(check check.deps) @@ -91,5 +92,6 @@ if(PYTHONINTERP_FOUND) llc lli llvm-ar llvm-as llvm-dis llvm-extract llvm-ld llvm-link llvm-mc llvm-nm macho-dump opt FileCheck count not) + set_target_properties(check.deps PROPERTIES FOLDER "Tests") endif() diff --git a/test/CodeGen/ARM/2009-10-16-Scope.ll b/test/CodeGen/ARM/2009-10-16-Scope.ll new file mode 100644 index 000000000000..ce440e986de0 --- /dev/null +++ b/test/CodeGen/ARM/2009-10-16-Scope.ll @@ -0,0 +1,32 @@ +; RUN: llc %s -O0 -o /dev/null -mtriple=arm-apple-darwin +; PR 5197 +; There is not any llvm instruction assocated with !5. The code generator +; should be able to handle this. + +define void @bar() nounwind ssp { +entry: + %count_ = alloca i32, align 4 ; [#uses=2] + br label %do.body, !dbg !0 + +do.body: ; preds = %entry + call void @llvm.dbg.declare(metadata !{i32* %count_}, metadata !4) + %conv = ptrtoint i32* %count_ to i32, !dbg !0 ; [#uses=1] + %call = call i32 @foo(i32 %conv) ssp, !dbg !0 ; [#uses=0] + br label %do.end, !dbg !0 + +do.end: ; preds = %do.body + ret void, !dbg !7 +} + +declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone + +declare i32 @foo(i32) ssp + +!0 = metadata !{i32 5, i32 2, metadata !1, null} +!1 = metadata !{i32 458763, metadata !2}; [DW_TAG_lexical_block ] +!2 = metadata !{i32 458798, i32 0, metadata !3, metadata !"bar", metadata !"bar", metadata !"bar", metadata !3, i32 4, null, i1 false, i1 true}; [DW_TAG_subprogram ] +!3 = metadata !{i32 458769, i32 0, i32 12, metadata !"genmodes.i", metadata !"/Users/yash/Downloads", metadata !"clang 1.1", i1 true, i1 false, metadata !"", i32 0}; [DW_TAG_compile_unit ] +!4 = metadata !{i32 459008, metadata !5, metadata !"count_", metadata !3, i32 5, metadata !6}; [ DW_TAG_auto_variable ] +!5 = metadata !{i32 458763, metadata !1}; [DW_TAG_lexical_block ] +!6 = metadata !{i32 458788, metadata !3, metadata !"int", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}; [DW_TAG_base_type ] +!7 = metadata !{i32 6, i32 1, metadata !2, null} diff --git a/test/CodeGen/ARM/2010-08-04-StackVariable.ll b/test/CodeGen/ARM/2010-08-04-StackVariable.ll new file mode 100644 index 000000000000..f077d04803bd --- /dev/null +++ b/test/CodeGen/ARM/2010-08-04-StackVariable.ll @@ -0,0 +1,124 @@ +; RUN: llc -O0 -mtriple=arm-apple-darwin < %s | grep DW_OP_fbreg +; Use DW_OP_fbreg in variable's location expression if the variable is in a stack slot. + +%struct.SVal = type { i8*, i32 } + +define i32 @_Z3fooi4SVal(i32 %i, %struct.SVal* noalias %location) nounwind ssp { +entry: + %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] + call void @llvm.dbg.value(metadata !{i32 %i}, i64 0, metadata !23), !dbg !24 + call void @llvm.dbg.value(metadata !{%struct.SVal* %location}, i64 0, metadata !25), !dbg !24 + %0 = icmp ne i32 %i, 0, !dbg !27 ; [#uses=1] + br i1 %0, label %bb, label %bb1, !dbg !27 + +bb: ; preds = %entry + %1 = getelementptr inbounds %struct.SVal* %location, i32 0, i32 1, !dbg !29 ; [#uses=1] + %2 = load i32* %1, align 8, !dbg !29 ; [#uses=1] + %3 = add i32 %2, %i, !dbg !29 ; [#uses=1] + br label %bb2, !dbg !29 + +bb1: ; preds = %entry + %4 = getelementptr inbounds %struct.SVal* %location, i32 0, i32 1, !dbg !30 ; [#uses=1] + %5 = load i32* %4, align 8, !dbg !30 ; [#uses=1] + %6 = sub i32 %5, 1, !dbg !30 ; [#uses=1] + br label %bb2, !dbg !30 + +bb2: ; preds = %bb1, %bb + %.0 = phi i32 [ %3, %bb ], [ %6, %bb1 ] ; [#uses=1] + br label %return, !dbg !29 + +return: ; preds = %bb2 + ret i32 %.0, !dbg !29 +} + +define linkonce_odr void @_ZN4SValC1Ev(%struct.SVal* %this) nounwind ssp align 2 { +entry: + %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] + call void @llvm.dbg.value(metadata !{%struct.SVal* %this}, i64 0, metadata !31), !dbg !34 + %0 = getelementptr inbounds %struct.SVal* %this, i32 0, i32 0, !dbg !34 ; [#uses=1] + store i8* null, i8** %0, align 8, !dbg !34 + %1 = getelementptr inbounds %struct.SVal* %this, i32 0, i32 1, !dbg !34 ; [#uses=1] + store i32 0, i32* %1, align 8, !dbg !34 + br label %return, !dbg !34 + +return: ; preds = %entry + ret void, !dbg !35 +} + +declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone + +define i32 @main() nounwind ssp { +entry: + %0 = alloca %struct.SVal ; <%struct.SVal*> [#uses=3] + %v = alloca %struct.SVal ; <%struct.SVal*> [#uses=4] + %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] + call void @llvm.dbg.declare(metadata !{%struct.SVal* %v}, metadata !38), !dbg !41 + call void @_ZN4SValC1Ev(%struct.SVal* %v) nounwind, !dbg !41 + %1 = getelementptr inbounds %struct.SVal* %v, i32 0, i32 1, !dbg !42 ; [#uses=1] + store i32 1, i32* %1, align 8, !dbg !42 + %2 = getelementptr inbounds %struct.SVal* %0, i32 0, i32 0, !dbg !43 ; [#uses=1] + %3 = getelementptr inbounds %struct.SVal* %v, i32 0, i32 0, !dbg !43 ; [#uses=1] + %4 = load i8** %3, align 8, !dbg !43 ; [#uses=1] + store i8* %4, i8** %2, align 8, !dbg !43 + %5 = getelementptr inbounds %struct.SVal* %0, i32 0, i32 1, !dbg !43 ; [#uses=1] + %6 = getelementptr inbounds %struct.SVal* %v, i32 0, i32 1, !dbg !43 ; [#uses=1] + %7 = load i32* %6, align 8, !dbg !43 ; [#uses=1] + store i32 %7, i32* %5, align 8, !dbg !43 + %8 = call i32 @_Z3fooi4SVal(i32 2, %struct.SVal* noalias %0) nounwind, !dbg !43 ; [#uses=0] + call void @llvm.dbg.value(metadata !{i32 %8}, i64 0, metadata !44), !dbg !43 + br label %return, !dbg !45 + +return: ; preds = %entry + ret i32 0, !dbg !45 +} + +declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone + +!llvm.dbg.sp = !{!0, !9, !16, !17, !20} + +!0 = metadata !{i32 524334, i32 0, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"", metadata !2, i32 11, metadata !14, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!1 = metadata !{i32 524307, metadata !2, metadata !"SVal", metadata !2, i32 1, i64 128, i64 64, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_structure_type ] +!2 = metadata !{i32 524329, metadata !"small.cc", metadata !"/Users/manav/R8248330", metadata !3} ; [ DW_TAG_file_type ] +!3 = metadata !{i32 524305, i32 0, i32 4, metadata !"small.cc", metadata !"/Users/manav/R8248330", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 false, metadata !"", i32 0} ; [ DW_TAG_compile_unit ] +!4 = metadata !{metadata !5, metadata !7, metadata !0, metadata !9} +!5 = metadata !{i32 524301, metadata !1, metadata !"Data", metadata !2, i32 7, i64 64, i64 64, i64 0, i32 0, metadata !6} ; [ DW_TAG_member ] +!6 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ] +!7 = metadata !{i32 524301, metadata !1, metadata !"Kind", metadata !2, i32 8, i64 32, i64 32, i64 64, i32 0, metadata !8} ; [ DW_TAG_member ] +!8 = metadata !{i32 524324, metadata !2, metadata !"unsigned int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ] +!9 = metadata !{i32 524334, i32 0, metadata !1, metadata !"~SVal", metadata !"~SVal", metadata !"", metadata !2, i32 12, metadata !10, i1 false, i1 false, i32 0, i32 0, null, i1 false, i1 false, null} ; [ DW_TAG_subprogram ] +!10 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !11, i32 0, null} ; [ DW_TAG_subroutine_type ] +!11 = metadata !{null, metadata !12, metadata !13} +!12 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !1} ; [ DW_TAG_pointer_type ] +!13 = metadata !{i32 524324, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!14 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !15, i32 0, null} ; [ DW_TAG_subroutine_type ] +!15 = metadata !{null, metadata !12} +!16 = metadata !{i32 524334, i32 0, metadata !1, metadata !"SVal", metadata !"SVal", metadata !"_ZN4SValC1Ev", metadata !2, i32 11, metadata !14, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, void (%struct.SVal*)* @_ZN4SValC1Ev} ; [ DW_TAG_subprogram ] +!17 = metadata !{i32 524334, i32 0, metadata !2, metadata !"foo", metadata !"foo", metadata !"_Z3fooi4SVal", metadata !2, i32 16, metadata !18, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 (i32, %struct.SVal*)* @_Z3fooi4SVal} ; [ DW_TAG_subprogram ] +!18 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !19, i32 0, null} ; [ DW_TAG_subroutine_type ] +!19 = metadata !{metadata !13, metadata !13, metadata !1} +!20 = metadata !{i32 524334, i32 0, metadata !2, metadata !"main", metadata !"main", metadata !"main", metadata !2, i32 23, metadata !21, i1 false, i1 true, i32 0, i32 0, null, i1 false, i1 false, i32 ()* @main} ; [ DW_TAG_subprogram ] +!21 = metadata !{i32 524309, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !22, i32 0, null} ; [ DW_TAG_subroutine_type ] +!22 = metadata !{metadata !13} +!23 = metadata !{i32 524545, metadata !17, metadata !"i", metadata !2, i32 16, metadata !13} ; [ DW_TAG_arg_variable ] +!24 = metadata !{i32 16, i32 0, metadata !17, null} +!25 = metadata !{i32 524545, metadata !17, metadata !"location", metadata !2, i32 16, metadata !26} ; [ DW_TAG_arg_variable ] +!26 = metadata !{i32 524304, metadata !2, metadata !"SVal", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_reference_type ] +!27 = metadata !{i32 17, i32 0, metadata !28, null} +!28 = metadata !{i32 524299, metadata !17, i32 16, i32 0, metadata !2, i32 2} ; [ DW_TAG_lexical_block ] +!29 = metadata !{i32 18, i32 0, metadata !28, null} +!30 = metadata !{i32 20, i32 0, metadata !28, null} +!31 = metadata !{i32 524545, metadata !16, metadata !"this", metadata !2, i32 11, metadata !32} ; [ DW_TAG_arg_variable ] +!32 = metadata !{i32 524326, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 64, metadata !33} ; [ DW_TAG_const_type ] +!33 = metadata !{i32 524303, metadata !2, metadata !"", metadata !2, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !1} ; [ DW_TAG_pointer_type ] +!34 = metadata !{i32 11, i32 0, metadata !16, null} +!35 = metadata !{i32 11, i32 0, metadata !36, null} +!36 = metadata !{i32 524299, metadata !37, i32 11, i32 0, metadata !2, i32 1} ; [ DW_TAG_lexical_block ] +!37 = metadata !{i32 524299, metadata !16, i32 11, i32 0, metadata !2, i32 0} ; [ DW_TAG_lexical_block ] +!38 = metadata !{i32 524544, metadata !39, metadata !"v", metadata !2, i32 24, metadata !1} ; [ DW_TAG_auto_variable ] +!39 = metadata !{i32 524299, metadata !40, i32 23, i32 0, metadata !2, i32 4} ; [ DW_TAG_lexical_block ] +!40 = metadata !{i32 524299, metadata !20, i32 23, i32 0, metadata !2, i32 3} ; [ DW_TAG_lexical_block ] +!41 = metadata !{i32 24, i32 0, metadata !39, null} +!42 = metadata !{i32 25, i32 0, metadata !39, null} +!43 = metadata !{i32 26, i32 0, metadata !39, null} +!44 = metadata !{i32 524544, metadata !39, metadata !"k", metadata !2, i32 26, metadata !13} ; [ DW_TAG_auto_variable ] +!45 = metadata !{i32 27, i32 0, metadata !39, null} diff --git a/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll b/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll index 99baad2d38d1..94842124fb08 100644 --- a/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll +++ b/test/CodeGen/ARM/2011-01-19-MergedGlobalDbg.ll @@ -17,13 +17,12 @@ target triple = "thumbv7-apple-darwin10" ; DW_OP_constu ; offset -;CHECK: .byte 7 @ Abbrev [7] 0x1a5:0x13 DW_TAG_variable -;CHECK-NEXT: .ascii "x2" @ DW_AT_name +;CHECK: .ascii "x2" @ DW_AT_name ;CHECK-NEXT: .byte 0 -;CHECK-NEXT: .long 93 @ DW_AT_type -;CHECK-NEXT: .byte 1 @ DW_AT_decl_file -;CHECK-NEXT: .byte 6 @ DW_AT_decl_line -;CHECK-NEXT: .byte 8 @ DW_AT_location +;CHECK-NEXT: @ DW_AT_type +;CHECK-NEXT: @ DW_AT_decl_file +;CHECK-NEXT: @ DW_AT_decl_line +;CHECK-NEXT: @ DW_AT_location ;CHECK-NEXT: .byte 3 ;CHECK-NEXT: .long __MergedGlobals ;CHECK-NEXT: .byte 16 diff --git a/test/CodeGen/ARM/available_externally.ll b/test/CodeGen/ARM/available_externally.ll new file mode 100644 index 000000000000..0f646d582e71 --- /dev/null +++ b/test/CodeGen/ARM/available_externally.ll @@ -0,0 +1,16 @@ +; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic | FileCheck %s +; rdar://9027648 + +@A = available_externally hidden constant i32 1 +@B = external hidden constant i32 + +define i32 @t1() { + %tmp = load i32* @A + store i32 %tmp, i32* @B + ret i32 %tmp +} + +; CHECK: L_A$non_lazy_ptr: +; CHECK-NEXT: .long _A +; CHECK: L_B$non_lazy_ptr: +; CHECK-NEXT: .long _B diff --git a/test/CodeGen/ARM/fcopysign.ll b/test/CodeGen/ARM/fcopysign.ll index 1050cd265998..d30e3ebf50a5 100644 --- a/test/CodeGen/ARM/fcopysign.ll +++ b/test/CodeGen/ARM/fcopysign.ll @@ -9,9 +9,8 @@ entry: ; SOFT: bfi r0, r1, #31, #1 ; HARD: test1: -; HARD: vabs.f32 d0, d0 -; HARD: cmp r0, #0 -; HARD: vneglt.f32 s0, s0 +; HARD: vmov.i32 [[REG1:(d[0-9]+)]], #0x80000000 +; HARD: vbsl [[REG1]], d2, d0 %0 = tail call float @copysignf(float %x, float %y) nounwind ret float %0 } @@ -23,9 +22,9 @@ entry: ; SOFT: bfi r1, r2, #31, #1 ; HARD: test2: -; HARD: vabs.f64 d0, d0 -; HARD: cmp r1, #0 -; HARD: vneglt.f64 d0, d0 +; HARD: vmov.i32 [[REG2:(d[0-9]+)]], #0x80000000 +; HARD: vshl.i64 [[REG2]], [[REG2]], #32 +; HARD: vbsl [[REG2]], d1, d0 %0 = tail call double @copysign(double %x, double %y) nounwind ret double %0 } @@ -33,9 +32,9 @@ entry: define double @test3(double %x, double %y, double %z) nounwind { entry: ; SOFT: test3: -; SOFT: vabs.f64 -; SOFT: cmp {{.*}}, #0 -; SOFT: vneglt.f64 +; SOFT: vmov.i32 [[REG3:(d[0-9]+)]], #0x80000000 +; SOFT: vshl.i64 [[REG3]], [[REG3]], #32 +; SOFT: vbsl [[REG3]], %0 = fmul double %x, %y %1 = tail call double @copysign(double %0, double %z) nounwind ret double %1 diff --git a/test/CodeGen/ARM/vstlane.ll b/test/CodeGen/ARM/vstlane.ll index 6cc052bbeb1c..d1bc15ad576d 100644 --- a/test/CodeGen/ARM/vstlane.ll +++ b/test/CodeGen/ARM/vstlane.ll @@ -10,6 +10,19 @@ define void @vst1lanei8(i8* %A, <8 x i8>* %B) nounwind { ret void } +;Check for a post-increment updating store. +define void @vst1lanei8_update(i8** %ptr, <8 x i8>* %B) nounwind { +;CHECK: vst1lanei8_update: +;CHECK: vst1.8 {d16[3]}, [r2]! + %A = load i8** %ptr + %tmp1 = load <8 x i8>* %B + %tmp2 = extractelement <8 x i8> %tmp1, i32 3 + store i8 %tmp2, i8* %A, align 8 + %tmp3 = getelementptr i8* %A, i32 1 + store i8* %tmp3, i8** %ptr + ret void +} + define void @vst1lanei16(i16* %A, <4 x i16>* %B) nounwind { ;CHECK: vst1lanei16: ;Check the alignment value. Max for this instruction is 16 bits: @@ -66,6 +79,19 @@ define void @vst1laneQi32(i32* %A, <4 x i32>* %B) nounwind { ret void } +;Check for a post-increment updating store. +define void @vst1laneQi32_update(i32** %ptr, <4 x i32>* %B) nounwind { +;CHECK: vst1laneQi32_update: +;CHECK: vst1.32 {d17[1]}, [r1, :32]! + %A = load i32** %ptr + %tmp1 = load <4 x i32>* %B + %tmp2 = extractelement <4 x i32> %tmp1, i32 3 + store i32 %tmp2, i32* %A, align 8 + %tmp3 = getelementptr i32* %A, i32 1 + store i32* %tmp3, i32** %ptr + ret void +} + define void @vst1laneQf(float* %A, <4 x float>* %B) nounwind { ;CHECK: vst1laneQf: ;CHECK: vst1.32 {d17[1]}, [r0] diff --git a/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll b/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll index bc27e987a179..71fdb4e0d60f 100644 --- a/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll +++ b/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll @@ -7,7 +7,7 @@ entry: ; CHECK: test ; CHECK: call bar ; CHECK-NOT: nop -; CHECK: ret +; CHECK: jmp ; CHECK-NEXT: restore %0 = tail call i32 @bar(i32 %a) nounwind ret i32 %0 @@ -18,7 +18,7 @@ entry: ; CHECK: test_jmpl ; CHECK: call ; CHECK-NOT: nop -; CHECK: ret +; CHECK: jmp ; CHECK-NEXT: restore %0 = tail call i32 %f(i32 %a, i32 %b) nounwind ret i32 %0 @@ -47,7 +47,7 @@ bb: ; preds = %entry, %bb bb5: ; preds = %bb, %entry %a_addr.1.lcssa = phi i32 [ %a, %entry ], [ %a_addr.0, %bb ] -;CHECK: ret +;CHECK: jmp ;CHECK-NEXT: restore ret i32 %a_addr.1.lcssa } diff --git a/test/CodeGen/SPARC/2011-01-22-SRet.ll b/test/CodeGen/SPARC/2011-01-22-SRet.ll index 2f684b009c96..506d3a8f87af 100644 --- a/test/CodeGen/SPARC/2011-01-22-SRet.ll +++ b/test/CodeGen/SPARC/2011-01-22-SRet.ll @@ -7,7 +7,7 @@ entry: ;CHECK: make_foo ;CHECK: ld [%fp+64], {{.+}} ;CHECK: or {{.+}}, {{.+}}, %i0 -;CHECK: ret +;CHECK: jmp %i7+12 %0 = getelementptr inbounds %struct.foo_t* %agg.result, i32 0, i32 0 store i32 %a, i32* %0, align 4 %1 = getelementptr inbounds %struct.foo_t* %agg.result, i32 0, i32 1 @@ -22,6 +22,7 @@ entry: ;CHECK: test ;CHECK: st {{.+}}, [%sp+64] ;CHECK: make_foo +;CHECK: unimp 12 %f = alloca %struct.foo_t, align 8 call void @make_foo(%struct.foo_t* noalias sret %f, i32 10, i32 20, i32 30) nounwind %0 = getelementptr inbounds %struct.foo_t* %f, i32 0, i32 0 diff --git a/test/DebugInfo/2009-10-16-Scope.ll b/test/CodeGen/X86/2009-10-16-Scope.ll similarity index 96% rename from test/DebugInfo/2009-10-16-Scope.ll rename to test/CodeGen/X86/2009-10-16-Scope.ll index 037294fc63a4..86c20243c874 100644 --- a/test/DebugInfo/2009-10-16-Scope.ll +++ b/test/CodeGen/X86/2009-10-16-Scope.ll @@ -1,5 +1,4 @@ ; RUN: llc %s -O0 -o /dev/null -mtriple=x86_64-apple-darwin -; RUN: llc %s -O0 -o /dev/null -mtriple=arm-apple-darwin ; PR 5197 ; There is not any llvm instruction assocated with !5. The code generator ; should be able to handle this. diff --git a/test/CodeGen/X86/2010-06-28-DbgEntryPC.ll b/test/CodeGen/X86/2010-06-28-DbgEntryPC.ll index 9b9d63609ab4..2ba12dfc5680 100644 --- a/test/CodeGen/X86/2010-06-28-DbgEntryPC.ll +++ b/test/CodeGen/X86/2010-06-28-DbgEntryPC.ll @@ -7,7 +7,7 @@ ; CHECK-NEXT: .byte 37 ## DW_AT_producer ; CHECK-NEXT: .byte 8 ## DW_FORM_string ; CHECK-NEXT: .byte 19 ## DW_AT_language -; CHECK-NEXT: .byte 11 ## DW_FORM_data1 +; CHECK-NEXT: .byte 5 ## DW_FORM_data2 ; CHECK-NEXT: .byte 3 ## DW_AT_name ; CHECK-NEXT: .byte 8 ## DW_FORM_string ; CHECK-NEXT: .byte 82 ## DW_AT_entry_pc diff --git a/test/DebugInfo/2010-08-04-StackVariable.ll b/test/CodeGen/X86/2010-08-04-StackVariable.ll similarity index 99% rename from test/DebugInfo/2010-08-04-StackVariable.ll rename to test/CodeGen/X86/2010-08-04-StackVariable.ll index c35c3d36d261..edfd1b868737 100644 --- a/test/DebugInfo/2010-08-04-StackVariable.ll +++ b/test/CodeGen/X86/2010-08-04-StackVariable.ll @@ -1,4 +1,3 @@ -; RUN: llc -O0 -mtriple=arm-apple-darwin < %s | grep DW_OP_fbreg ; RUN: llc -O0 -mtriple=x86_64-apple-darwin < %s | grep DW_OP_fbreg ; Use DW_OP_fbreg in variable's location expression if the variable is in a stack slot. diff --git a/test/CodeGen/X86/2011-02-21-VirtRegRewriter-KillSubReg.ll b/test/CodeGen/X86/2011-02-21-VirtRegRewriter-KillSubReg.ll new file mode 100644 index 000000000000..f982723781ea --- /dev/null +++ b/test/CodeGen/X86/2011-02-21-VirtRegRewriter-KillSubReg.ll @@ -0,0 +1,50 @@ +; RUN: llc < %s -O2 -march=x86 -mtriple=i386-pc-linux-gnu -relocation-model=pic | FileCheck %s +; PR9237: Assertion in VirtRegRewriter.cpp, ResurrectConfirmedKill +; `KillOps[*SR] == KillOp && "invalid subreg kill flags"' + +%t = type { i32 } + +define i32 @foo(%t* %s) nounwind { +entry: + br label %if.then735 + +if.then735: + %call747 = call i32 undef(%t* %s, i8* null, i8* undef, i32 128, i8* undef, i32 516) nounwind + br i1 undef, label %if.then751, label %if.then758 + +if.then751: + unreachable + +if.then758: + %add761 = add i32 %call747, 4 + %add763 = add i32 %add761, %call747 + %add.ptr768 = getelementptr inbounds [516 x i8]* null, i32 0, i32 %add761 + br i1 undef, label %cond.false783, label %cond.true771 + +cond.true771: + %call782 = call i8* @__memmove_chk(i8* %add.ptr768, i8* undef, i32 %call747, i32 undef) + br label %cond.end791 + +; CHECK: calll __memmove_chk +cond.false783: + %call.i1035 = call i8* @__memmove_chk(i8* %add.ptr768, i8* undef, i32 %call747, i32 undef) nounwind + br label %cond.end791 + +cond.end791: + %conv801 = trunc i32 %call747 to i8 + %add.ptr822.sum = add i32 %call747, 3 + %arrayidx833 = getelementptr inbounds [516 x i8]* null, i32 0, i32 %add.ptr822.sum + store i8 %conv801, i8* %arrayidx833, align 1 + %cmp841 = icmp eq i8* undef, null + br i1 %cmp841, label %if.end849, label %if.then843 + +if.then843: + unreachable + +if.end849: + %call921 = call i32 undef(%t* %s, i8* undef, i8* undef, i32 %add763) nounwind + unreachable + +} + +declare i8* @__memmove_chk(i8*, i8*, i32, i32) nounwind diff --git a/test/CodeGen/X86/2011-02-23-UnfoldBug.ll b/test/CodeGen/X86/2011-02-23-UnfoldBug.ll new file mode 100644 index 000000000000..900106aac351 --- /dev/null +++ b/test/CodeGen/X86/2011-02-23-UnfoldBug.ll @@ -0,0 +1,42 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin10 +; rdar://9045024 +; PR9305 + +define void @calc_gb_rad_still_sse2_double() nounwind ssp { +entry: + br label %for.cond.outer + +for.cond.outer: ; preds = %if.end71, %entry + %theta.0.ph = phi <2 x double> [ undef, %entry ], [ %theta.1, %if.end71 ] + %mul.i97 = fmul <2 x double> %theta.0.ph, undef + %mul.i96 = fmul <2 x double> %mul.i97, fmul (<2 x double> , <2 x double> undef) + br i1 undef, label %for.body, label %for.end82 + +for.body: ; preds = %for.cond.outer + br i1 undef, label %for.body33.lr.ph, label %for.end + +for.body33.lr.ph: ; preds = %for.body + %dccf.2 = select i1 undef, <2 x double> %mul.i96, <2 x double> undef + unreachable + +for.end: ; preds = %for.body + %vecins.i94 = insertelement <2 x double> undef, double 0.000000e+00, i32 0 + %cmpsd.i = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %vecins.i94, <2 x double> , i8 2) nounwind + tail call void (...)* @_mm_movemask_pd(<2 x double> %cmpsd.i) nounwind + br i1 undef, label %if.then67, label %if.end71 + +if.then67: ; preds = %for.end + %vecins.i91 = insertelement <2 x double> %vecins.i94, double undef, i32 0 + br label %if.end71 + +if.end71: ; preds = %if.then67, %for.end + %theta.1 = phi <2 x double> [ %vecins.i91, %if.then67 ], [ %theta.0.ph, %for.end ] + br label %for.cond.outer + +for.end82: ; preds = %for.cond.outer + ret void +} + +declare void @_mm_movemask_pd(...) + +declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounwind readnone diff --git a/test/CodeGen/X86/add.ll b/test/CodeGen/X86/add.ll index 3ec5358affb3..62c898025c80 100644 --- a/test/CodeGen/X86/add.ll +++ b/test/CodeGen/X86/add.ll @@ -1,5 +1,6 @@ ; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32 -; RUN: llc < %s -march=x86-64 | FileCheck %s -check-prefix=X64 +; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64 +; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=X64 ; The immediate can be encoded in a smaller way if the ; instruction is a sub instead of an add. @@ -43,7 +44,7 @@ overflow: ; X32-NEXT: jo ; X64: test4: -; X64: addl %esi, %edi +; X64: addl %e[[A1:si|dx]], %e[[A0:di|cx]] ; X64-NEXT: jo } @@ -66,7 +67,7 @@ carry: ; X32-NEXT: jb ; X64: test5: -; X64: addl %esi, %edi +; X64: addl %e[[A1]], %e[[A0]] ; X64-NEXT: jb } @@ -87,8 +88,8 @@ define i64 @test6(i64 %A, i32 %B) nounwind { ; X32-NEXT: ret ; X64: test6: -; X64: shlq $32, %rsi -; X64: leaq (%rsi,%rdi), %rax +; X64: shlq $32, %r[[A1]] +; X64: leaq (%r[[A1]],%r[[A0]]), %rax ; X64: ret } @@ -98,7 +99,7 @@ define {i32, i1} @test7(i32 %v1, i32 %v2) nounwind { } ; X64: test7: -; X64: addl %esi, %eax +; X64: addl %e[[A1]], %eax ; X64-NEXT: setb %dl ; X64-NEXT: ret diff --git a/test/CodeGen/X86/break-sse-dep.ll b/test/CodeGen/X86/break-sse-dep.ll index 094cbc7bdefc..2dee5754256a 100644 --- a/test/CodeGen/X86/break-sse-dep.ll +++ b/test/CodeGen/X86/break-sse-dep.ll @@ -1,9 +1,10 @@ -; RUN: llc < %s -march=x86-64 -mattr=+sse2 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-linux -mattr=+sse2 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse2 | FileCheck %s define double @t1(float* nocapture %x) nounwind readonly ssp { entry: ; CHECK: t1: -; CHECK: movss (%rdi), %xmm0 +; CHECK: movss ([[A0:%rdi|%rcx]]), %xmm0 ; CHECK: cvtss2sd %xmm0, %xmm0 %0 = load float* %x, align 4 @@ -14,7 +15,7 @@ entry: define float @t2(double* nocapture %x) nounwind readonly ssp optsize { entry: ; CHECK: t2: -; CHECK: cvtsd2ss (%rdi), %xmm0 +; CHECK: cvtsd2ss ([[A0]]), %xmm0 %0 = load double* %x, align 8 %1 = fptrunc double %0 to float ret float %1 @@ -23,7 +24,7 @@ entry: define float @squirtf(float* %x) nounwind { entry: ; CHECK: squirtf: -; CHECK: movss (%rdi), %xmm0 +; CHECK: movss ([[A0]]), %xmm0 ; CHECK: sqrtss %xmm0, %xmm0 %z = load float* %x %t = call float @llvm.sqrt.f32(float %z) @@ -33,7 +34,7 @@ entry: define double @squirt(double* %x) nounwind { entry: ; CHECK: squirt: -; CHECK: movsd (%rdi), %xmm0 +; CHECK: movsd ([[A0]]), %xmm0 ; CHECK: sqrtsd %xmm0, %xmm0 %z = load double* %x %t = call double @llvm.sqrt.f64(double %z) @@ -43,7 +44,7 @@ entry: define float @squirtf_size(float* %x) nounwind optsize { entry: ; CHECK: squirtf_size: -; CHECK: sqrtss (%rdi), %xmm0 +; CHECK: sqrtss ([[A0]]), %xmm0 %z = load float* %x %t = call float @llvm.sqrt.f32(float %z) ret float %t @@ -52,7 +53,7 @@ entry: define double @squirt_size(double* %x) nounwind optsize { entry: ; CHECK: squirt_size: -; CHECK: sqrtsd (%rdi), %xmm0 +; CHECK: sqrtsd ([[A0]]), %xmm0 %z = load double* %x %t = call double @llvm.sqrt.f64(double %z) ret double %t diff --git a/test/CodeGen/X86/codegen-dce.ll b/test/CodeGen/X86/codegen-dce.ll deleted file mode 100644 index d83efaf57766..000000000000 --- a/test/CodeGen/X86/codegen-dce.ll +++ /dev/null @@ -1,43 +0,0 @@ -; RUN: llc < %s -march=x86 -stats |& grep {codegen-dce} | grep {Number of dead instructions deleted} - - %struct.anon = type { [3 x double], double, %struct.node*, [64 x %struct.bnode*], [64 x %struct.bnode*] } - %struct.bnode = type { i16, double, [3 x double], i32, i32, [3 x double], [3 x double], [3 x double], double, %struct.bnode*, %struct.bnode* } - %struct.node = type { i16, double, [3 x double], i32, i32 } - -define i32 @main(i32 %argc, i8** nocapture %argv) nounwind { -entry: - %0 = malloc %struct.anon ; <%struct.anon*> [#uses=2] - %1 = getelementptr %struct.anon* %0, i32 0, i32 2 ; <%struct.node**> [#uses=1] - br label %bb14.i - -bb14.i: ; preds = %bb14.i, %entry - %i8.0.reg2mem.0.i = phi i32 [ 0, %entry ], [ %2, %bb14.i ] ; [#uses=1] - %2 = add i32 %i8.0.reg2mem.0.i, 1 ; [#uses=2] - %exitcond74.i = icmp eq i32 %2, 32 ; [#uses=1] - br i1 %exitcond74.i, label %bb32.i, label %bb14.i - -bb32.i: ; preds = %bb32.i, %bb14.i - %tmp.0.reg2mem.0.i = phi i32 [ %indvar.next63.i, %bb32.i ], [ 0, %bb14.i ] ; [#uses=1] - %indvar.next63.i = add i32 %tmp.0.reg2mem.0.i, 1 ; [#uses=2] - %exitcond64.i = icmp eq i32 %indvar.next63.i, 64 ; [#uses=1] - br i1 %exitcond64.i, label %bb47.loopexit.i, label %bb32.i - -bb.i.i: ; preds = %bb47.loopexit.i - unreachable - -stepsystem.exit.i: ; preds = %bb47.loopexit.i - store %struct.node* null, %struct.node** %1, align 4 - br label %bb.i6.i - -bb.i6.i: ; preds = %bb.i6.i, %stepsystem.exit.i - br i1 false, label %bb107.i.i, label %bb.i6.i - -bb107.i.i: ; preds = %bb107.i.i, %bb.i6.i - %q_addr.0.i.i.in = phi %struct.bnode** [ null, %bb107.i.i ], [ %3, %bb.i6.i ] ; <%struct.bnode**> [#uses=0] - br label %bb107.i.i - -bb47.loopexit.i: ; preds = %bb32.i - %3 = getelementptr %struct.anon* %0, i32 0, i32 4, i32 0 ; <%struct.bnode**> [#uses=1] - %4 = icmp eq %struct.node* null, null ; [#uses=1] - br i1 %4, label %stepsystem.exit.i, label %bb.i.i -} diff --git a/test/CodeGen/X86/codegen-prepare-extload.ll b/test/CodeGen/X86/codegen-prepare-extload.ll index 9f57d53178f3..14df815663e3 100644 --- a/test/CodeGen/X86/codegen-prepare-extload.ll +++ b/test/CodeGen/X86/codegen-prepare-extload.ll @@ -1,10 +1,11 @@ -; RUN: llc < %s -march=x86-64 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-win64 | FileCheck %s ; rdar://7304838 ; CodeGenPrepare should move the zext into the block with the load ; so that SelectionDAG can select it with the load. -; CHECK: movzbl (%rdi), %eax +; CHECK: movzbl ({{%rdi|%rcx}}), %eax define void @foo(i8* %p, i32* %q) { entry: diff --git a/test/CodeGen/X86/constant-pool-sharing.ll b/test/CodeGen/X86/constant-pool-sharing.ll index 33de5767ad65..f979945835ff 100644 --- a/test/CodeGen/X86/constant-pool-sharing.ll +++ b/test/CodeGen/X86/constant-pool-sharing.ll @@ -1,11 +1,12 @@ -; RUN: llc < %s -march=x86-64 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s ; llc should share constant pool entries between this integer vector ; and this floating-point vector since they have the same encoding. ; CHECK: LCPI0_0(%rip), %xmm0 -; CHECK: movaps %xmm0, (%rdi) -; CHECK: movaps %xmm0, (%rsi) +; CHECK: movaps %xmm0, ({{%rdi|%rcx}}) +; CHECK: movaps %xmm0, ({{%rsi|%rdx}}) define void @foo(<4 x i32>* %p, <4 x float>* %q, i1 %t) nounwind { entry: diff --git a/test/CodeGen/X86/ctpop-combine.ll b/test/CodeGen/X86/ctpop-combine.ll index c957d385a24a..6406cc73e412 100644 --- a/test/CodeGen/X86/ctpop-combine.ll +++ b/test/CodeGen/X86/ctpop-combine.ll @@ -9,7 +9,7 @@ define i32 @test1(i64 %x) nounwind readnone { %conv = zext i1 %cmp to i32 ret i32 %conv ; CHECK: test1: -; CHECK: leaq -1(%rdi) +; CHECK: leaq -1([[A0:%rdi|%rcx]]) ; CHECK-NEXT: testq ; CHECK-NEXT: setne ; CHECK: ret @@ -22,7 +22,7 @@ define i32 @test2(i64 %x) nounwind readnone { %conv = zext i1 %cmp to i32 ret i32 %conv ; CHECK: test2: -; CHECK: leaq -1(%rdi) +; CHECK: leaq -1([[A0]]) ; CHECK-NEXT: testq ; CHECK-NEXT: sete ; CHECK: ret diff --git a/test/CodeGen/X86/dbg-live-in-location.ll b/test/CodeGen/X86/dbg-live-in-location.ll deleted file mode 100644 index 9b1464d415f9..000000000000 --- a/test/CodeGen/X86/dbg-live-in-location.ll +++ /dev/null @@ -1,84 +0,0 @@ -; RUN: llc < %s | FileCheck %s -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" -target triple = "x86_64-apple-darwin10.0.0" - -@str = internal constant [3 x i8] c"Hi\00" - -define void @foo() nounwind ssp { -entry: - %puts = tail call i32 @puts(i8* getelementptr inbounds ([3 x i8]* @str, i64 0, i64 0)) - ret void, !dbg !17 -} - -; CHECK: arg.c:5:14 - -define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp { -entry: - tail call void @llvm.dbg.value(metadata !{i32 %argc}, i64 0, metadata !9), !dbg !19 - tail call void @llvm.dbg.value(metadata !{i8** %argv}, i64 0, metadata !10), !dbg !20 - %cmp = icmp sgt i32 %argc, 1, !dbg !21 - br i1 %cmp, label %cond.end, label %for.body.lr.ph, !dbg !21 - -cond.end: ; preds = %entry - %arrayidx = getelementptr inbounds i8** %argv, i64 1, !dbg !21 - %tmp2 = load i8** %arrayidx, align 8, !dbg !21, !tbaa !22 - %call = tail call i32 (...)* @atoi(i8* %tmp2) nounwind, !dbg !21 - tail call void @llvm.dbg.value(metadata !{i32 %call}, i64 0, metadata !16), !dbg !21 - tail call void @llvm.dbg.value(metadata !25, i64 0, metadata !14), !dbg !26 - %cmp57 = icmp sgt i32 %call, 0, !dbg !26 - br i1 %cmp57, label %for.body.lr.ph, label %for.end, !dbg !26 - -for.body.lr.ph: ; preds = %entry, %cond.end - %cond10 = phi i32 [ %call, %cond.end ], [ 300, %entry ] - br label %for.body - -for.body: ; preds = %for.body, %for.body.lr.ph - %i.08 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] - %puts.i = tail call i32 @puts(i8* getelementptr inbounds ([3 x i8]* @str, i64 0, i64 0)) nounwind - %inc = add nsw i32 %i.08, 1, !dbg !27 - %exitcond = icmp eq i32 %inc, %cond10 - br i1 %exitcond, label %for.end, label %for.body, !dbg !26 - -for.end: ; preds = %for.body, %cond.end - ret i32 0, !dbg !29 -} - -declare i32 @atoi(...) - -declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone - -declare i32 @puts(i8* nocapture) nounwind - -!llvm.dbg.sp = !{!0, !5} -!llvm.dbg.lv.main = !{!9, !10, !14, !16} - -!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"foo", metadata !"foo", metadata !"", metadata !1, i32 2, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 0, i1 true, void ()* @foo} ; [ DW_TAG_subprogram ] -!1 = metadata !{i32 589865, metadata !"arg.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ] -!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"arg.c", metadata !"/private/tmp", metadata !"clang version 2.9 (trunk 124504)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ] -!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] -!4 = metadata !{null} -!5 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 6, metadata !6, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**)* @main} ; [ DW_TAG_subprogram ] -!6 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !7, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] -!7 = metadata !{metadata !8} -!8 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] -!9 = metadata !{i32 590081, metadata !5, metadata !"argc", metadata !1, i32 5, metadata !8, i32 0} ; [ DW_TAG_arg_variable ] -!10 = metadata !{i32 590081, metadata !5, metadata !"argv", metadata !1, i32 5, metadata !11, i32 0} ; [ DW_TAG_arg_variable ] -!11 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !12} ; [ DW_TAG_pointer_type ] -!12 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 64, i64 64, i64 0, i32 0, metadata !13} ; [ DW_TAG_pointer_type ] -!13 = metadata !{i32 589860, metadata !2, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] -!14 = metadata !{i32 590080, metadata !15, metadata !"i", metadata !1, i32 7, metadata !8, i32 0} ; [ DW_TAG_auto_variable ] -!15 = metadata !{i32 589835, metadata !5, i32 6, i32 1, metadata !1, i32 1} ; [ DW_TAG_lexical_block ] -!16 = metadata !{i32 590080, metadata !15, metadata !"iterations", metadata !1, i32 8, metadata !8, i32 0} ; [ DW_TAG_auto_variable ] -!17 = metadata !{i32 4, i32 1, metadata !18, null} -!18 = metadata !{i32 589835, metadata !0, i32 2, i32 12, metadata !1, i32 0} ; [ DW_TAG_lexical_block ] -!19 = metadata !{i32 5, i32 14, metadata !5, null} -!20 = metadata !{i32 5, i32 26, metadata !5, null} -!21 = metadata !{i32 8, i32 51, metadata !15, null} -!22 = metadata !{metadata !"any pointer", metadata !23} -!23 = metadata !{metadata !"omnipotent char", metadata !24} -!24 = metadata !{metadata !"Simple C/C++ TBAA", null} -!25 = metadata !{i32 0} -!26 = metadata !{i32 9, i32 2, metadata !15, null} -!27 = metadata !{i32 9, i32 30, metadata !28, null} -!28 = metadata !{i32 589835, metadata !15, i32 9, i32 2, metadata !1, i32 2} ; [ DW_TAG_lexical_block ] -!29 = metadata !{i32 12, i32 9, metadata !15, null} diff --git a/test/CodeGen/X86/dbg-value-location.ll b/test/CodeGen/X86/dbg-value-location.ll index 2449046c65fb..87d7e910c339 100644 --- a/test/CodeGen/X86/dbg-value-location.ll +++ b/test/CodeGen/X86/dbg-value-location.ll @@ -5,10 +5,10 @@ target triple = "x86_64-apple-darwin10.0.0" ;CHECK: .ascii "var" ## DW_AT_name ;CHECK-NEXT: .byte 0 -;CHECK-NEXT: .byte 2 ## DW_AT_decl_file -;CHECK-NEXT: .short 19509 ## DW_AT_decl_line -;CHECK-NEXT: .long 68 ## DW_AT_type -;CHECK-NEXT: .byte 1 ## DW_AT_location +;CHECK-NEXT: ## DW_AT_decl_file +;CHECK-NEXT: ## DW_AT_decl_line +;CHECK-NEXT: ## DW_AT_type +;CHECK-NEXT: ## DW_AT_location @dfm = external global i32, align 4 diff --git a/test/CodeGen/X86/divide-by-constant.ll b/test/CodeGen/X86/divide-by-constant.ll index 7ceb972f61bb..fe335b9369cb 100644 --- a/test/CodeGen/X86/divide-by-constant.ll +++ b/test/CodeGen/X86/divide-by-constant.ll @@ -40,7 +40,7 @@ entry: %div = sdiv i16 %x, 33 ; [#uses=1] ret i16 %div ; CHECK: test4: -; CHECK: imull $-1985, %ecx, %ecx +; CHECK: imull $1986, %eax, %eax } define i32 @test5(i32 %A) nounwind { diff --git a/test/CodeGen/X86/dll-linkage.ll b/test/CodeGen/X86/dll-linkage.ll index 913617585206..a0c2a54a99a4 100644 --- a/test/CodeGen/X86/dll-linkage.ll +++ b/test/CodeGen/X86/dll-linkage.ll @@ -1,9 +1,14 @@ ; RUN: llc < %s -mtriple=i386-pc-mingw32 | FileCheck %s +; RUN: llc < %s -mtriple=i386-pc-mingw32 -O0 | FileCheck %s -check-prefix=FAST +; PR6275 + declare dllimport void @foo() define void @bar() nounwind { ; CHECK: calll *__imp__foo +; FAST: movl __imp__foo, [[R:%[a-z]{3}]] +; FAST: calll *[[R]] call void @foo() ret void } diff --git a/test/CodeGen/X86/fast-isel-cmp-branch.ll b/test/CodeGen/X86/fast-isel-cmp-branch.ll index 4ab1bc61c7e2..12312e8a581c 100644 --- a/test/CodeGen/X86/fast-isel-cmp-branch.ll +++ b/test/CodeGen/X86/fast-isel-cmp-branch.ll @@ -1,13 +1,14 @@ -; RUN: llc -O0 -march=x86-64 -asm-verbose=false < %s | FileCheck %s +; RUN: llc -O0 -mtriple=x86_64-linux -asm-verbose=false < %s | FileCheck %s +; RUN: llc -O0 -mtriple=x86_64-win32 -asm-verbose=false < %s | FileCheck %s ; rdar://8337108 ; Fast-isel shouldn't try to look through the compare because it's in a ; different basic block, so its operands aren't necessarily exported ; for cross-block usage. -; CHECK: movb %al, 7(%rsp) +; CHECK: movb %al, [[OFS:[0-9]*]](%rsp) ; CHECK: callq {{_?}}bar -; CHECK: movb 7(%rsp), %al +; CHECK: movb [[OFS]](%rsp), %al declare void @bar() diff --git a/test/CodeGen/X86/fast-isel-gep.ll b/test/CodeGen/X86/fast-isel-gep.ll index 622a1ff831d0..fbe0243716bd 100644 --- a/test/CodeGen/X86/fast-isel-gep.ll +++ b/test/CodeGen/X86/fast-isel-gep.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -march=x86-64 -O0 | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=x86_64-linux -O0 | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=x86_64-win32 -O0 | FileCheck %s --check-prefix=X64 ; RUN: llc < %s -march=x86 -O0 | FileCheck %s --check-prefix=X32 ; GEP indices are interpreted as signed integers, so they @@ -13,8 +14,8 @@ define i32 @test1(i32 %t3, i32* %t1) nounwind { ; X32: ret ; X64: test1: -; X64: movslq %edi, %rax -; X64: movl (%rsi,%rax,4), %eax +; X64: movslq %e[[A0:di|cx]], %rax +; X64: movl (%r[[A1:si|dx]],%rax,4), %eax ; X64: ret } @@ -27,7 +28,7 @@ define i32 @test2(i64 %t3, i32* %t1) nounwind { ; X32: ret ; X64: test2: -; X64: movl (%rsi,%rdi,4), %eax +; X64: movl (%r[[A1]],%r[[A0]],4), %eax ; X64: ret } @@ -47,7 +48,7 @@ entry: ; X32: ret ; X64: test3: -; X64: movb -2(%rdi), %al +; X64: movb -2(%r[[A0]]), %al ; X64: ret } @@ -80,9 +81,9 @@ define i64 @test5(i8* %A, i32 %I, i64 %B) nounwind { %v11 = add i64 %B, %v10 ret i64 %v11 ; X64: test5: -; X64: movslq %esi, %rax -; X64-NEXT: movq (%rdi,%rax), %rax -; X64-NEXT: addq %rdx, %rax +; X64: movslq %e[[A1]], %rax +; X64-NEXT: movq (%r[[A0]],%rax), %rax +; X64-NEXT: addq %{{rdx|r8}}, %rax ; X64-NEXT: ret } diff --git a/test/CodeGen/X86/gather-addresses.ll b/test/CodeGen/X86/gather-addresses.ll index 134ee28df6c8..4a6927f6a269 100644 --- a/test/CodeGen/X86/gather-addresses.ll +++ b/test/CodeGen/X86/gather-addresses.ll @@ -1,20 +1,21 @@ -; RUN: llc -march=x86-64 < %s | FileCheck %s +; RUN: llc -mtriple=x86_64-linux < %s | FileCheck %s +; RUN: llc -mtriple=x86_64-win32 < %s | FileCheck %s ; rdar://7398554 ; When doing vector gather-scatter index calculation with 32-bit indices, ; bounce the vector off of cache rather than shuffling each individual ; element out of the index vector. -; CHECK: andps (%rdx), %xmm0 -; CHECK: movaps %xmm0, -24(%rsp) -; CHECK: movslq -24(%rsp), %rax -; CHECK: movsd (%rdi,%rax,8), %xmm0 -; CHECK: movslq -20(%rsp), %rax -; CHECK: movhpd (%rdi,%rax,8), %xmm0 -; CHECK: movslq -16(%rsp), %rax -; CHECK: movsd (%rdi,%rax,8), %xmm1 -; CHECK: movslq -12(%rsp), %rax -; CHECK: movhpd (%rdi,%rax,8), %xmm1 +; CHECK: andps ([[H:%rdx|%r8]]), %xmm0 +; CHECK: movaps %xmm0, {{(-24)?}}(%rsp) +; CHECK: movslq {{(-24)?}}(%rsp), %rax +; CHECK: movsd ([[P:%rdi|%rcx]],%rax,8), %xmm0 +; CHECK: movslq {{-20|4}}(%rsp), %rax +; CHECK: movhpd ([[P]],%rax,8), %xmm0 +; CHECK: movslq {{-16|8}}(%rsp), %rax +; CHECK: movsd ([[P]],%rax,8), %xmm1 +; CHECK: movslq {{-12|12}}(%rsp), %rax +; CHECK: movhpd ([[P]],%rax,8), %xmm1 define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind { %a = load <4 x i32>* %i diff --git a/test/CodeGen/X86/i128-ret.ll b/test/CodeGen/X86/i128-ret.ll index 277f4283328b..264f07ceb4c8 100644 --- a/test/CodeGen/X86/i128-ret.ll +++ b/test/CodeGen/X86/i128-ret.ll @@ -1,5 +1,7 @@ -; RUN: llc < %s -march=x86-64 | grep {movq 8(%rdi), %rdx} -; RUN: llc < %s -march=x86-64 | grep {movq (%rdi), %rax} +; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s +; CHECK: movq ([[A0:%rdi|%rcx]]), %rax +; CHECK: movq 8([[A0]]), %rdx define i128 @test(i128 *%P) { %A = load i128* %P diff --git a/test/CodeGen/X86/lea.ll b/test/CodeGen/X86/lea.ll index 22a96448f029..542135529f1d 100644 --- a/test/CodeGen/X86/lea.ll +++ b/test/CodeGen/X86/lea.ll @@ -1,11 +1,12 @@ -; RUN: llc < %s -march=x86-64 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s define i32 @test1(i32 %x) nounwind { %tmp1 = shl i32 %x, 3 %tmp2 = add i32 %tmp1, 7 ret i32 %tmp2 ; CHECK: test1: -; CHECK: leal 7(,%rdi,8), %eax +; CHECK: leal 7(,[[A0:%rdi|%rcx]],8), %eax } @@ -27,8 +28,8 @@ bb.nph: bb2: ret i32 %x_offs ; CHECK: test2: -; CHECK: leal -5(%rdi), %eax +; CHECK: leal -5([[A0]]), %eax ; CHECK: andl $-4, %eax ; CHECK: negl %eax -; CHECK: leal -4(%rdi,%rax), %eax +; CHECK: leal -4([[A0]],%rax), %eax } diff --git a/test/CodeGen/X86/lsr-overflow.ll b/test/CodeGen/X86/lsr-overflow.ll index 0b0214c6d9f8..5bc4f7e96a0b 100644 --- a/test/CodeGen/X86/lsr-overflow.ll +++ b/test/CodeGen/X86/lsr-overflow.ll @@ -1,10 +1,11 @@ -; RUN: llc < %s -march=x86-64 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s ; The comparison uses the pre-inc value, which could lead LSR to ; try to compute -INT64_MIN. ; CHECK: movabsq $-9223372036854775808, %rax -; CHECK: cmpq %rax, %rbx +; CHECK: cmpq %rax, ; CHECK: sete %al declare i64 @bar() diff --git a/test/CodeGen/X86/lsr-reuse-trunc.ll b/test/CodeGen/X86/lsr-reuse-trunc.ll index d1d714491faa..29f03d68dade 100644 --- a/test/CodeGen/X86/lsr-reuse-trunc.ll +++ b/test/CodeGen/X86/lsr-reuse-trunc.ll @@ -1,12 +1,13 @@ -; RUN: llc < %s -march=x86-64 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s ; Full strength reduction wouldn't reduce register pressure, so LSR should ; stick with indexing here. -; CHECK: movaps (%rsi,%rax,4), %xmm3 -; CHECK: movaps %xmm3, (%rdi,%rax,4) +; CHECK: movaps (%{{rsi|rdx}},%rax,4), %xmm3 +; CHECK: movaps %xmm3, (%{{rdi|rcx}},%rax,4) ; CHECK: addq $4, %rax -; CHECK: cmpl %eax, (%rdx) +; CHECK: cmpl %eax, (%{{rdx|r8}}) ; CHECK-NEXT: jg define void @vvfloorf(float* nocapture %y, float* nocapture %x, i32* nocapture %n) nounwind { diff --git a/test/CodeGen/X86/memcmp.ll b/test/CodeGen/X86/memcmp.ll index 36be1f308ccd..f4bc1bb7015a 100644 --- a/test/CodeGen/X86/memcmp.ll +++ b/test/CodeGen/X86/memcmp.ll @@ -1,4 +1,5 @@ -; RUN: llc %s -o - -march=x86-64 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s ; This tests codegen time inlining/optimization of memcmp ; rdar://6480398 @@ -20,8 +21,8 @@ bb: ; preds = %entry return: ; preds = %entry ret void ; CHECK: memcmp2: -; CHECK: movw (%rdi), %ax -; CHECK: cmpw (%rsi), %ax +; CHECK: movw ([[A0:%rdi|%rcx]]), %ax +; CHECK: cmpw ([[A1:%rsi|%rdx]]), %ax } define void @memcmp2a(i8* %X, i32* nocapture %P) nounwind { @@ -37,7 +38,7 @@ bb: ; preds = %entry return: ; preds = %entry ret void ; CHECK: memcmp2a: -; CHECK: cmpw $28527, (%rdi) +; CHECK: cmpw $28527, ([[A0]]) } @@ -54,8 +55,8 @@ bb: ; preds = %entry return: ; preds = %entry ret void ; CHECK: memcmp4: -; CHECK: movl (%rdi), %eax -; CHECK: cmpl (%rsi), %eax +; CHECK: movl ([[A0]]), %eax +; CHECK: cmpl ([[A1]]), %eax } define void @memcmp4a(i8* %X, i32* nocapture %P) nounwind { @@ -71,7 +72,7 @@ bb: ; preds = %entry return: ; preds = %entry ret void ; CHECK: memcmp4a: -; CHECK: cmpl $1869573999, (%rdi) +; CHECK: cmpl $1869573999, ([[A0]]) } define void @memcmp8(i8* %X, i8* %Y, i32* nocapture %P) nounwind { @@ -87,8 +88,8 @@ bb: ; preds = %entry return: ; preds = %entry ret void ; CHECK: memcmp8: -; CHECK: movq (%rdi), %rax -; CHECK: cmpq (%rsi), %rax +; CHECK: movq ([[A0]]), %rax +; CHECK: cmpq ([[A1]]), %rax } define void @memcmp8a(i8* %X, i32* nocapture %P) nounwind { @@ -105,6 +106,6 @@ return: ; preds = %entry ret void ; CHECK: memcmp8a: ; CHECK: movabsq $8029759185026510694, %rax -; CHECK: cmpq %rax, (%rdi) +; CHECK: cmpq %rax, ([[A0]]) } diff --git a/test/CodeGen/X86/movgs.ll b/test/CodeGen/X86/movgs.ll index 00190e802fc9..97b7fe70d858 100644 --- a/test/CodeGen/X86/movgs.ll +++ b/test/CodeGen/X86/movgs.ll @@ -1,5 +1,6 @@ ; RUN: llc < %s -march=x86 -mattr=sse41 | FileCheck %s --check-prefix=X32 -; RUN: llc < %s -march=x86-64 -mattr=sse41 | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=x86_64-linux -mattr=sse41 | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=x86_64-win32 -mattr=sse41 | FileCheck %s --check-prefix=X64 define i32 @test1() nounwind readonly { entry: @@ -30,7 +31,7 @@ entry: ; X32: calll *%gs:(%eax) ; X64: test2: -; X64: callq *%gs:(%rdi) +; X64: callq *%gs:([[A0:%rdi|%rcx]]) @@ -50,7 +51,7 @@ entry: ; X32: ret ; X64: pmovsxwd_1: -; X64: pmovsxwd %gs:(%rdi), %xmm0 +; X64: pmovsxwd %gs:([[A0]]), %xmm0 ; X64: ret } diff --git a/test/CodeGen/X86/non-globl-eh-frame.ll b/test/CodeGen/X86/non-globl-eh-frame.ll deleted file mode 100644 index 71349ecafeb6..000000000000 --- a/test/CodeGen/X86/non-globl-eh-frame.ll +++ /dev/null @@ -1,24 +0,0 @@ -; RUN: llc < %s -mtriple x86_64-apple-darwin10 -march x86 | not grep {{.globl\[\[:space:\]\]*__Z4funcv.eh}} -; RUN: llc < %s -mtriple x86_64-apple-darwin9 -march x86 | FileCheck %s -check-prefix=DARWIN9 - -%struct.__pointer_type_info_pseudo = type { %struct.__type_info_pseudo, i32, %"struct.std::type_info"* } -%struct.__type_info_pseudo = type { i8*, i8* } -%"struct.std::type_info" = type opaque - -@.str = private constant [12 x i8] c"hello world\00", align 1 -@_ZTIPc = external constant %struct.__pointer_type_info_pseudo - -define void @_Z4funcv() noreturn optsize ssp { -entry: - %0 = tail call i8* @__cxa_allocate_exception(i64 8) nounwind - %1 = bitcast i8* %0 to i8** - store i8* getelementptr inbounds ([12 x i8]* @.str, i64 0, i64 0), i8** %1, align 8 - tail call void @__cxa_throw(i8* %0, i8* bitcast (%struct.__pointer_type_info_pseudo* @_ZTIPc to i8*), void (i8*)* null) noreturn - unreachable -} - -; DARWIN9: .globl __Z4funcv.eh - -declare i8* @__cxa_allocate_exception(i64) nounwind - -declare void @__cxa_throw(i8*, i8*, void (i8*)*) noreturn diff --git a/test/CodeGen/X86/optimize-max-3.ll b/test/CodeGen/X86/optimize-max-3.ll index f1e3c2772ac9..b90413d40a0f 100644 --- a/test/CodeGen/X86/optimize-max-3.ll +++ b/test/CodeGen/X86/optimize-max-3.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -march=x86-64 -asm-verbose=false | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-linux -asm-verbose=false | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-win32 -asm-verbose=false | FileCheck %s ; LSR's OptimizeMax should eliminate the select (max). @@ -40,13 +41,13 @@ for.end: ; preds = %for.body, %entry ; CHECK: jle ; CHECK-NOT: cmov -; CHECK: xorl %edi, %edi +; CHECK: xorl {{%edi, %edi|%ecx, %ecx}} ; CHECK-NEXT: align ; CHECK-NEXT: BB1_2: ; CHECK-NEXT: callq -; CHECK-NEXT: incl %ebx -; CHECK-NEXT: cmpl %r14d, %ebx -; CHECK-NEXT: movq %rax, %rdi +; CHECK-NEXT: incl [[BX:%ebx|%esi]] +; CHECK-NEXT: cmpl [[R14:%r14d|%edi]], [[BX]] +; CHECK-NEXT: movq %rax, %r{{di|cx}} ; CHECK-NEXT: jl define void @_Z18GenerateStatusPagei(i32 %jobs_to_display) nounwind { diff --git a/test/CodeGen/X86/phi-constants.ll b/test/CodeGen/X86/phi-constants.ll new file mode 100644 index 000000000000..da9652f73404 --- /dev/null +++ b/test/CodeGen/X86/phi-constants.ll @@ -0,0 +1,35 @@ +; RUN: llc < %s -march=x86-64 | FileCheck %s + +%"class.std::bitset" = type { [8 x i8] } + +define zeroext i1 @_Z3fooPjmS_mRSt6bitsetILm32EE(i32* nocapture %a, i64 %asize, i32* nocapture %b, i64 %bsize, %"class.std::bitset"* %bits) nounwind readonly ssp noredzone { +entry: + %tmp.i.i.i.i = bitcast %"class.std::bitset"* %bits to i64* + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %conv = zext i32 %0 to i64 + %cmp = icmp eq i64 %conv, %bsize + br i1 %cmp, label %return, label %for.body + +for.body: ; preds = %for.cond + %arrayidx = getelementptr inbounds i32* %b, i64 %conv + %tmp5 = load i32* %arrayidx, align 4 + %conv6 = zext i32 %tmp5 to i64 + %rem.i.i.i.i = and i64 %conv6, 63 + %tmp3.i = load i64* %tmp.i.i.i.i, align 8 + %shl.i.i = shl i64 1, %rem.i.i.i.i + %and.i = and i64 %shl.i.i, %tmp3.i + %cmp.i = icmp eq i64 %and.i, 0 + br i1 %cmp.i, label %for.inc, label %return + +for.inc: ; preds = %for.body + %inc = add i32 %0, 1 + br label %for.cond + +return: ; preds = %for.body, %for.cond +; CHECK-NOT: and + %retval.0 = phi i1 [ true, %for.body ], [ false, %for.cond ] + ret i1 %retval.0 +} diff --git a/test/CodeGen/X86/pr9127.ll b/test/CodeGen/X86/pr9127.ll index 45b0c6c78706..9b251f57e0e3 100644 --- a/test/CodeGen/X86/pr9127.ll +++ b/test/CodeGen/X86/pr9127.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=x86-64 < %s | FileCheck %s +; RUN: llc -mtriple=x86_64-linux < %s | FileCheck %s +; RUN: llc -mtriple=x86_64-win32 < %s | FileCheck %s define i8 @foobar(double %d, double* %x) { entry: @@ -9,4 +10,4 @@ entry: } ; test that the load is folded. -; CHECK: ucomisd (%rdi), %xmm0 +; CHECK: ucomisd (%{{rdi|rdx}}), %xmm0 diff --git a/test/CodeGen/X86/red-zone.ll b/test/CodeGen/X86/red-zone.ll index 1ffb4e3c78f6..d93697123596 100644 --- a/test/CodeGen/X86/red-zone.ll +++ b/test/CodeGen/X86/red-zone.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86-64 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s ; First without noredzone. ; CHECK: f0: diff --git a/test/CodeGen/X86/remat-mov-0.ll b/test/CodeGen/X86/remat-mov-0.ll index 5fb445c9357c..f89cd330803d 100644 --- a/test/CodeGen/X86/remat-mov-0.ll +++ b/test/CodeGen/X86/remat-mov-0.ll @@ -1,12 +1,13 @@ -; RUN: llc < %s -march=x86-64 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s ; CodeGen should remat the zero instead of spilling it. declare void @foo(i64 %p) ; CHECK: bar: -; CHECK: xorl %edi, %edi -; CHECK: xorl %edi, %edi +; CHECK: xorl %e[[A0:di|cx]], %e +; CHECK: xorl %e[[A0]], %e[[A0]] define void @bar() nounwind { call void @foo(i64 0) call void @foo(i64 0) @@ -14,8 +15,8 @@ define void @bar() nounwind { } ; CHECK: bat: -; CHECK: movq $-1, %rdi -; CHECK: movq $-1, %rdi +; CHECK: movq $-1, %r[[A0]] +; CHECK: movq $-1, %r[[A0]] define void @bat() nounwind { call void @foo(i64 -1) call void @foo(i64 -1) @@ -23,8 +24,8 @@ define void @bat() nounwind { } ; CHECK: bau: -; CHECK: movl $1, %edi -; CHECK: movl $1, %edi +; CHECK: movl $1, %e[[A0]] +; CHECK: movl $1, %e[[A0]] define void @bau() nounwind { call void @foo(i64 1) call void @foo(i64 1) diff --git a/test/CodeGen/X86/test-shrink.ll b/test/CodeGen/X86/test-shrink.ll index 1d636930641f..5bc28ecbc48c 100644 --- a/test/CodeGen/X86/test-shrink.ll +++ b/test/CodeGen/X86/test-shrink.ll @@ -1,8 +1,9 @@ -; RUN: llc < %s -march=x86-64 | FileCheck %s --check-prefix=CHECK-64 +; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefix=CHECK-64 +; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s --check-prefix=CHECK-64 ; RUN: llc < %s -march=x86 | FileCheck %s --check-prefix=CHECK-32 ; CHECK-64: g64xh: -; CHECK-64: testb $8, %ah +; CHECK-64: testb $8, {{%ah|%ch}} ; CHECK-64: ret ; CHECK-32: g64xh: ; CHECK-32: testb $8, %ah @@ -19,7 +20,7 @@ no: ret void } ; CHECK-64: g64xl: -; CHECK-64: testb $8, %dil +; CHECK-64: testb $8, [[A0L:%dil|%cl]] ; CHECK-64: ret ; CHECK-32: g64xl: ; CHECK-32: testb $8, %al @@ -36,7 +37,7 @@ no: ret void } ; CHECK-64: g32xh: -; CHECK-64: testb $8, %ah +; CHECK-64: testb $8, {{%ah|%ch}} ; CHECK-64: ret ; CHECK-32: g32xh: ; CHECK-32: testb $8, %ah @@ -53,7 +54,7 @@ no: ret void } ; CHECK-64: g32xl: -; CHECK-64: testb $8, %dil +; CHECK-64: testb $8, [[A0L]] ; CHECK-64: ret ; CHECK-32: g32xl: ; CHECK-32: testb $8, %al @@ -70,7 +71,7 @@ no: ret void } ; CHECK-64: g16xh: -; CHECK-64: testb $8, %ah +; CHECK-64: testb $8, {{%ah|%ch}} ; CHECK-64: ret ; CHECK-32: g16xh: ; CHECK-32: testb $8, %ah @@ -87,7 +88,7 @@ no: ret void } ; CHECK-64: g16xl: -; CHECK-64: testb $8, %dil +; CHECK-64: testb $8, [[A0L]] ; CHECK-64: ret ; CHECK-32: g16xl: ; CHECK-32: testb $8, %al @@ -104,7 +105,7 @@ no: ret void } ; CHECK-64: g64x16: -; CHECK-64: testw $-32640, %di +; CHECK-64: testw $-32640, %[[A0W:di|cx]] ; CHECK-64: ret ; CHECK-32: g64x16: ; CHECK-32: testw $-32640, %ax @@ -121,7 +122,7 @@ no: ret void } ; CHECK-64: g32x16: -; CHECK-64: testw $-32640, %di +; CHECK-64: testw $-32640, %[[A0W]] ; CHECK-64: ret ; CHECK-32: g32x16: ; CHECK-32: testw $-32640, %ax @@ -138,7 +139,7 @@ no: ret void } ; CHECK-64: g64x32: -; CHECK-64: testl $268468352, %edi +; CHECK-64: testl $268468352, %e[[A0W]] ; CHECK-64: ret ; CHECK-32: g64x32: ; CHECK-32: testl $268468352, %eax diff --git a/test/CodeGen/X86/use-add-flags.ll b/test/CodeGen/X86/use-add-flags.ll index c2f0c23fe1d3..8fbbd397b8af 100644 --- a/test/CodeGen/X86/use-add-flags.ll +++ b/test/CodeGen/X86/use-add-flags.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -march=x86-64 -o - | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s ; Reuse the flags value from the add instructions instead of emitting separate ; testl instructions. @@ -6,9 +7,9 @@ ; Use the flags on the add. ; CHECK: test1: -; CHECK: addl (%rdi), %esi -; CHECK-NEXT: movl %edx, %eax -; CHECK-NEXT: cmovnsl %ecx, %eax +; CHECK: addl (%r[[A0:di|cx]]), {{%esi|%edx}} +; CHECK-NEXT: movl {{%edx|%r8d}}, %eax +; CHECK-NEXT: cmovnsl {{%ecx|%r9d}}, %eax ; CHECK-NEXT: ret define i32 @test1(i32* %x, i32 %y, i32 %a, i32 %b) nounwind { @@ -25,7 +26,7 @@ declare void @foo(i32) ; other use. A simple test is better. ; CHECK: test2: -; CHECK: testb $16, %dil +; CHECK: testb $16, {{%dil|%cl}} define void @test2(i32 %x) nounwind { %y = and i32 %x, 16 @@ -41,7 +42,7 @@ false: ; Do use the flags result of the and here, since the and has another use. ; CHECK: test3: -; CHECK: andl $16, %edi +; CHECK: andl $16, %e[[A0]] ; CHECK-NEXT: jne define void @test3(i32 %x) nounwind { diff --git a/test/CodeGen/X86/vec_anyext.ll b/test/CodeGen/X86/vec_anyext.ll new file mode 100644 index 000000000000..d2a4c7f60dd7 --- /dev/null +++ b/test/CodeGen/X86/vec_anyext.ll @@ -0,0 +1,77 @@ +; RUN: llc < %s -march=x86-64 +; PR 9267 + +define<4 x i16> @func_16_32() { + %F = load <4 x i32>* undef + %G = trunc <4 x i32> %F to <4 x i16> + %H = load <4 x i32>* undef + %Y = trunc <4 x i32> %H to <4 x i16> + %T = add <4 x i16> %Y, %G + store <4 x i16>%T , <4 x i16>* undef + ret <4 x i16> %T +} + +define<4 x i16> @func_16_64() { + %F = load <4 x i64>* undef + %G = trunc <4 x i64> %F to <4 x i16> + %H = load <4 x i64>* undef + %Y = trunc <4 x i64> %H to <4 x i16> + %T = xor <4 x i16> %Y, %G + store <4 x i16>%T , <4 x i16>* undef + ret <4 x i16> %T +} + +define<4 x i32> @func_32_64() { + %F = load <4 x i64>* undef + %G = trunc <4 x i64> %F to <4 x i32> + %H = load <4 x i64>* undef + %Y = trunc <4 x i64> %H to <4 x i32> + %T = or <4 x i32> %Y, %G + ret <4 x i32> %T +} + +define<4 x i8> @func_8_16() { + %F = load <4 x i16>* undef + %G = trunc <4 x i16> %F to <4 x i8> + %H = load <4 x i16>* undef + %Y = trunc <4 x i16> %H to <4 x i8> + %T = add <4 x i8> %Y, %G + ret <4 x i8> %T +} + +define<4 x i8> @func_8_32() { + %F = load <4 x i32>* undef + %G = trunc <4 x i32> %F to <4 x i8> + %H = load <4 x i32>* undef + %Y = trunc <4 x i32> %H to <4 x i8> + %T = sub <4 x i8> %Y, %G + ret <4 x i8> %T +} + +define<4 x i8> @func_8_64() { + %F = load <4 x i64>* undef + %G = trunc <4 x i64> %F to <4 x i8> + %H = load <4 x i64>* undef + %Y = trunc <4 x i64> %H to <4 x i8> + %T = add <4 x i8> %Y, %G + ret <4 x i8> %T +} + +define<4 x i16> @const_16_32() { + %G = trunc <4 x i32> to <4 x i16> + ret <4 x i16> %G +} + +define<4 x i16> @const_16_64() { + %G = trunc <4 x i64> to <4 x i16> + ret <4 x i16> %G +} + +define void @bugOnTruncBitwidthReduce() nounwind { +meh: + %0 = xor <4 x i64> zeroinitializer, zeroinitializer + %1 = trunc <4 x i64> %0 to <4 x i32> + %2 = lshr <4 x i32> %1, + %3 = xor <4 x i32> %2, %1 + ret void +} diff --git a/test/CodeGen/X86/vec_sext.ll b/test/CodeGen/X86/vec_sext.ll new file mode 100644 index 000000000000..776ddec2e63b --- /dev/null +++ b/test/CodeGen/X86/vec_sext.ll @@ -0,0 +1,69 @@ +; RUN: llc < %s -march=x86-64 +; PR 9267 + +define<4 x i32> @func_16_32() { + %F = load <4 x i16>* undef + %G = sext <4 x i16> %F to <4 x i32> + %H = load <4 x i16>* undef + %Y = sext <4 x i16> %H to <4 x i32> + %T = add <4 x i32> %Y, %G + store <4 x i32>%T , <4 x i32>* undef + ret <4 x i32> %T +} + +define<4 x i64> @func_16_64() { + %F = load <4 x i16>* undef + %G = sext <4 x i16> %F to <4 x i64> + %H = load <4 x i16>* undef + %Y = sext <4 x i16> %H to <4 x i64> + %T = xor <4 x i64> %Y, %G + store <4 x i64>%T , <4 x i64>* undef + ret <4 x i64> %T +} + +define<4 x i64> @func_32_64() { + %F = load <4 x i32>* undef + %G = sext <4 x i32> %F to <4 x i64> + %H = load <4 x i32>* undef + %Y = sext <4 x i32> %H to <4 x i64> + %T = or <4 x i64> %Y, %G + ret <4 x i64> %T +} + +define<4 x i16> @func_8_16() { + %F = load <4 x i8>* undef + %G = sext <4 x i8> %F to <4 x i16> + %H = load <4 x i8>* undef + %Y = sext <4 x i8> %H to <4 x i16> + %T = add <4 x i16> %Y, %G + ret <4 x i16> %T +} + +define<4 x i32> @func_8_32() { + %F = load <4 x i8>* undef + %G = sext <4 x i8> %F to <4 x i32> + %H = load <4 x i8>* undef + %Y = sext <4 x i8> %H to <4 x i32> + %T = sub <4 x i32> %Y, %G + ret <4 x i32> %T +} + +define<4 x i64> @func_8_64() { + %F = load <4 x i8>* undef + %G = sext <4 x i8> %F to <4 x i64> + %H = load <4 x i8>* undef + %Y = sext <4 x i8> %H to <4 x i64> + %T = add <4 x i64> %Y, %G + ret <4 x i64> %T +} + +define<4 x i32> @const_16_32() { + %G = sext <4 x i16> to <4 x i32> + ret <4 x i32> %G +} + +define<4 x i64> @const_16_64() { + %G = sext <4 x i16> to <4 x i64> + ret <4 x i64> %G +} + diff --git a/test/CodeGen/X86/vec_shuffle-37.ll b/test/CodeGen/X86/vec_shuffle-37.ll index b09093089c5a..2efdb14b4044 100644 --- a/test/CodeGen/X86/vec_shuffle-37.ll +++ b/test/CodeGen/X86/vec_shuffle-37.ll @@ -1,9 +1,10 @@ -; RUN: llc < %s -march=x86-64 | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s ; RUN: llc -O0 < %s -march=x86 -mcpu=core2 | FileCheck %s --check-prefix=CHECK_O0 define <4 x i32> @t00(<4 x i32>* %a0) nounwind ssp { entry: -; CHECK: movaps (%rdi), %xmm0 +; CHECK: movaps ({{%rdi|%rcx}}), %xmm0 ; CHECK-NEXT: movaps %xmm0, %xmm1 ; CHECK-NEXT: movlps (%rax), %xmm1 ; CHECK-NEXT: shufps $36, %xmm1, %xmm0 diff --git a/test/CodeGen/X86/vec_zext.ll b/test/CodeGen/X86/vec_zext.ll new file mode 100644 index 000000000000..615a50b7afc3 --- /dev/null +++ b/test/CodeGen/X86/vec_zext.ll @@ -0,0 +1,69 @@ +; RUN: llc < %s -march=x86-64 +; PR 9267 + +define<4 x i32> @func_16_32() { + %F = load <4 x i16>* undef + %G = zext <4 x i16> %F to <4 x i32> + %H = load <4 x i16>* undef + %Y = zext <4 x i16> %H to <4 x i32> + %T = add <4 x i32> %Y, %G + store <4 x i32>%T , <4 x i32>* undef + ret <4 x i32> %T +} + +define<4 x i64> @func_16_64() { + %F = load <4 x i16>* undef + %G = zext <4 x i16> %F to <4 x i64> + %H = load <4 x i16>* undef + %Y = zext <4 x i16> %H to <4 x i64> + %T = xor <4 x i64> %Y, %G + store <4 x i64>%T , <4 x i64>* undef + ret <4 x i64> %T +} + +define<4 x i64> @func_32_64() { + %F = load <4 x i32>* undef + %G = zext <4 x i32> %F to <4 x i64> + %H = load <4 x i32>* undef + %Y = zext <4 x i32> %H to <4 x i64> + %T = or <4 x i64> %Y, %G + ret <4 x i64> %T +} + +define<4 x i16> @func_8_16() { + %F = load <4 x i8>* undef + %G = zext <4 x i8> %F to <4 x i16> + %H = load <4 x i8>* undef + %Y = zext <4 x i8> %H to <4 x i16> + %T = add <4 x i16> %Y, %G + ret <4 x i16> %T +} + +define<4 x i32> @func_8_32() { + %F = load <4 x i8>* undef + %G = zext <4 x i8> %F to <4 x i32> + %H = load <4 x i8>* undef + %Y = zext <4 x i8> %H to <4 x i32> + %T = sub <4 x i32> %Y, %G + ret <4 x i32> %T +} + +define<4 x i64> @func_8_64() { + %F = load <4 x i8>* undef + %G = zext <4 x i8> %F to <4 x i64> + %H = load <4 x i8>* undef + %Y = zext <4 x i8> %H to <4 x i64> + %T = add <4 x i64> %Y, %G + ret <4 x i64> %T +} + +define<4 x i32> @const_16_32() { + %G = zext <4 x i16> to <4 x i32> + ret <4 x i32> %G +} + +define<4 x i64> @const_16_64() { + %G = zext <4 x i16> to <4 x i64> + ret <4 x i64> %G +} + diff --git a/test/CodeGen/X86/xor.ll b/test/CodeGen/X86/xor.ll index 6c623cb15538..b90d81ac9b18 100644 --- a/test/CodeGen/X86/xor.ll +++ b/test/CodeGen/X86/xor.ll @@ -1,5 +1,6 @@ ; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s -check-prefix=X32 -; RUN: llc < %s -march=x86-64 | FileCheck %s -check-prefix=X64 +; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64 +; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=X64 ; Though it is undefined, we want xor undef,undef to produce zero. define <4 x i32> @test1() nounwind { @@ -28,9 +29,9 @@ entry: ret i32 %tmp4 ; X64: test3: -; X64: notl %esi -; X64: andl %edi, %esi -; X64: movl %esi, %eax +; X64: notl [[A1:%esi|%edx]] +; X64: andl [[A0:%edi|%ecx]], [[A1]] +; X64: movl [[A1]], %eax ; X64: shrl %eax ; X64: ret diff --git a/test/CodeGen/XCore/events.ll b/test/CodeGen/XCore/events.ll new file mode 100644 index 000000000000..4fc2f26d1b6b --- /dev/null +++ b/test/CodeGen/XCore/events.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s -march=xcore | FileCheck %s + +declare void @llvm.xcore.setv.p1i8(i8 addrspace(1)* %r, i8* %p) +declare i8* @llvm.xcore.waitevent() +declare void @llvm.xcore.clre() + +define i32 @f(i8 addrspace(1)* %r) nounwind { +; CHECK: f: +entry: +; CHECK: clre + call void @llvm.xcore.clre() + call void @llvm.xcore.setv.p1i8(i8 addrspace(1)* %r, i8* blockaddress(@f, %L1)) + call void @llvm.xcore.setv.p1i8(i8 addrspace(1)* %r, i8* blockaddress(@f, %L2)) + %goto_addr = call i8* @llvm.xcore.waitevent() +; CHECK: waiteu + indirectbr i8* %goto_addr, [label %L1, label %L2] +L1: + br label %ret +L2: + br label %ret +ret: + %retval = phi i32 [1, %L1], [2, %L2] + ret i32 %retval +} diff --git a/test/CodeGen/XCore/resources.ll b/test/CodeGen/XCore/resources.ll index 3114bdcd1777..3389912b8c0b 100644 --- a/test/CodeGen/XCore/resources.ll +++ b/test/CodeGen/XCore/resources.ll @@ -11,6 +11,14 @@ declare void @llvm.xcore.outct.p1i8(i8 addrspace(1)* %r, i32 %value) declare void @llvm.xcore.chkct.p1i8(i8 addrspace(1)* %r, i32 %value) declare void @llvm.xcore.setd.p1i8(i8 addrspace(1)* %r, i32 %value) declare void @llvm.xcore.setc.p1i8(i8 addrspace(1)* %r, i32 %value) +declare i32 @llvm.xcore.inshr.p1i8(i8 addrspace(1)* %r, i32 %value) +declare i32 @llvm.xcore.outshr.p1i8(i8 addrspace(1)* %r, i32 %value) +declare void @llvm.xcore.setpt.p1i8(i8 addrspace(1)* %r, i32 %value) +declare i32 @llvm.xcore.getts.p1i8(i8 addrspace(1)* %r) +declare void @llvm.xcore.syncr.p1i8(i8 addrspace(1)* %r) +declare void @llvm.xcore.settw.p1i8(i8 addrspace(1)* %r, i32 %value) +declare void @llvm.xcore.setv.p1i8(i8 addrspace(1)* %r, i8* %p) +declare void @llvm.xcore.eeu.p1i8(i8 addrspace(1)* %r) define i8 addrspace(1)* @getr() { ; CHECK: getr: @@ -109,3 +117,60 @@ define void @setci(i8 addrspace(1)* %r) { call void @llvm.xcore.setc.p1i8(i8 addrspace(1)* %r, i32 2) ret void } + +define i32 @inshr(i32 %value, i8 addrspace(1)* %r) { +; CHECK: inshr: +; CHECK: inshr r0, res[r1] + %result = call i32 @llvm.xcore.inshr.p1i8(i8 addrspace(1)* %r, i32 %value) + ret i32 %result +} + +define i32 @outshr(i32 %value, i8 addrspace(1)* %r) { +; CHECK: outshr: +; CHECK: outshr res[r1], r0 + %result = call i32 @llvm.xcore.outshr.p1i8(i8 addrspace(1)* %r, i32 %value) + ret i32 %result +} + +define void @setpt(i8 addrspace(1)* %r, i32 %value) { +; CHECK: setpt: +; CHECK: setpt res[r0], r1 + call void @llvm.xcore.setpt.p1i8(i8 addrspace(1)* %r, i32 %value) + ret void +} + +define i32 @getts(i8 addrspace(1)* %r) { +; CHECK: getts: +; CHECK: getts r0, res[r0] + %result = call i32 @llvm.xcore.getts.p1i8(i8 addrspace(1)* %r) + ret i32 %result +} + +define void @syncr(i8 addrspace(1)* %r) { +; CHECK: syncr: +; CHECK: syncr res[r0] + call void @llvm.xcore.syncr.p1i8(i8 addrspace(1)* %r) + ret void +} + +define void @settw(i8 addrspace(1)* %r, i32 %value) { +; CHECK: settw: +; CHECK: settw res[r0], r1 + call void @llvm.xcore.settw.p1i8(i8 addrspace(1)* %r, i32 %value) + ret void +} + +define void @setv(i8 addrspace(1)* %r, i8* %p) { +; CHECK: setv: +; CHECK: mov r11, r1 +; CHECK-NEXT: setv res[r0], r11 + call void @llvm.xcore.setv.p1i8(i8 addrspace(1)* %r, i8* %p) + ret void +} + +define void @eeu(i8 addrspace(1)* %r) { +; CHECK: eeu: +; CHECK: eeu res[r0] + call void @llvm.xcore.eeu.p1i8(i8 addrspace(1)* %r) + ret void +} diff --git a/test/DebugInfo/2009-03-03-deadstore.ll b/test/DebugInfo/2009-03-03-deadstore.ll deleted file mode 100644 index 0705c155f9f7..000000000000 --- a/test/DebugInfo/2009-03-03-deadstore.ll +++ /dev/null @@ -1,108 +0,0 @@ -; RUN: opt < %s -instcombine -S | not grep alloca -; ModuleID = '' -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" -target triple = "i386-apple-darwin9.6" - - type { } ; type %0 - type <{ i8 }> ; type %1 - type { i32 (...)**, %3 } ; type %2 - type { %4, %2*, i8, i8, %10*, %11*, %12*, %12* } ; type %3 - type { i32 (...)**, i32, i32, i32, i32, i32, %5*, %6, [8 x %6], i32, %6*, %7 } ; type %4 - type { %5*, void (i32, %4*, i32)*, i32, i32 } ; type %5 - type { i8*, i32 } ; type %6 - type { %8* } ; type %7 - type { i32, %9**, i32, %9**, i8** } ; type %8 - type { i32 (...)**, i32 } ; type %9 - type { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, %7 } ; type %10 - type { %9, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 } ; type %11 - type { %9 } ; type %12 - type { i32, void ()* } ; type %13 - type { %15 } ; type %14 - type { %16 } ; type %15 - type { %17 } ; type %16 - type { i32*, i32*, i32* } ; type %17 - type { %19 } ; type %18 - type { %20 } ; type %19 - type { %21 } ; type %20 - type { %14*, %14*, %14* } ; type %21 - type { i32 } ; type %22 - type { i8 } ; type %23 - type { i32* } ; type %24 - type { %14* } ; type %25 - type { %27 } ; type %26 - type { i8* } ; type %27 - type { %29, %30, %3 } ; type %28 - type { i32 (...)** } ; type %29 - type { %10, i32, %26 } ; type %30 - %llvm.dbg.anchor.type = type { i32, i32 } - %llvm.dbg.basictype.type = type { i32, %0*, i8*, %0*, i32, i64, i64, i64, i32, i32 } - %llvm.dbg.compile_unit.type = type { i32, %0*, i32, i8*, i8*, i8*, i1, i1, i8*, i32 } - %llvm.dbg.composite.type = type { i32, %0*, i8*, %0*, i32, i64, i64, i64, i32, %0*, %0*, i32 } - %llvm.dbg.derivedtype.type = type { i32, %0*, i8*, %0*, i32, i64, i64, i64, i32, %0* } - %llvm.dbg.enumerator.type = type { i32, i8*, i64 } - %llvm.dbg.global_variable.type = type { i32, %0*, %0*, i8*, i8*, i8*, %0*, i32, %0*, i1, i1, %0* } - %llvm.dbg.subprogram.type = type { i32, %0*, %0*, i8*, i8*, i8*, %0*, i32, %0*, i1, i1 } - %llvm.dbg.subrange.type = type { i32, i64, i64 } - %llvm.dbg.variable.type = type { i32, %0*, i8*, %0*, i32, %0* } - -@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata" ; <%llvm.dbg.anchor.type*> [#uses=1] -@llvm.dbg.subprograms = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 46 }, section "llvm.metadata" ; <%llvm.dbg.anchor.type*> [#uses=1] -internal constant [11 x i8] c"bigfib.cpp\00", section "llvm.metadata" ; <[11 x i8]*>:0 [#uses=1] -internal constant [84 x i8] c"/Volumes/Nanpura/mainline/llvm/projects/llvm-test/SingleSource/Benchmarks/Misc-C++/\00", section "llvm.metadata" ; <[84 x i8]*>:1 [#uses=1] -internal constant [57 x i8] c"4.2.1 (Based on Apple Inc. build 5636) (LLVM build 2099)\00", section "llvm.metadata" ; <[57 x i8]*>:2 [#uses=1] -@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 458769, %0* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to %0*), i32 4, i8* getelementptr ([11 x i8]* @0, i32 0, i32 0), i8* getelementptr ([84 x i8]* @1, i32 0, i32 0), i8* getelementptr ([57 x i8]* @2, i32 0, i32 0), i1 true, i1 false, i8* null, i32 0 }, section "llvm.metadata" ; <%llvm.dbg.compile_unit.type*> [#uses=1] -internal constant [23 x i8] c"/usr/include/c++/4.0.0\00", section "llvm.metadata" ; <[23 x i8]*>:3 [#uses=1] - - -internal constant [4 x i8] c"int\00", section "llvm.metadata" ; <[4 x i8]*>:4 [#uses=1] -@llvm.dbg.basictype103 = internal constant %llvm.dbg.basictype.type { i32 458788, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([4 x i8]* @4, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 32, i64 32, i64 0, i32 0, i32 5 }, section "llvm.metadata" ; <%llvm.dbg.basictype.type*> [#uses=1] -internal constant [8 x i8] c"iomanip\00", section "llvm.metadata" ; <[8 x i8]*>:5 [#uses=1] -@llvm.dbg.compile_unit1548 = internal constant %llvm.dbg.compile_unit.type { i32 458769, %0* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to %0*), i32 4, i8* getelementptr ([8 x i8]* @5, i32 0, i32 0), i8* getelementptr ([23 x i8]* @3, i32 0, i32 0), i8* getelementptr ([57 x i8]* @2, i32 0, i32 0), i1 false, i1 false, i8* null, i32 0 }, section "llvm.metadata" ; <%llvm.dbg.compile_unit.type*> [#uses=1] -internal constant [6 x i8] c"_Setw\00", section "llvm.metadata" ; <[6 x i8]*>:6 [#uses=1] -internal constant [5 x i8] c"_M_n\00", section "llvm.metadata" ; <[5 x i8]*>:7 [#uses=1] -@llvm.dbg.derivedtype1552 = internal constant %llvm.dbg.derivedtype.type { i32 458765, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([5 x i8]* @7, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit1548 to %0*), i32 232, i64 32, i64 32, i64 0, i32 0, %0* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype103 to %0*) }, section "llvm.metadata" ; <%llvm.dbg.derivedtype.type*> [#uses=1] -@llvm.dbg.array1553 = internal constant [1 x %0*] [%0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype1552 to %0*)], section "llvm.metadata" ; <[1 x %0*]*> [#uses=1] -@llvm.dbg.composite1554 = internal constant %llvm.dbg.composite.type { i32 458771, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([6 x i8]* @6, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit1548 to %0*), i32 232, i64 32, i64 32, i64 0, i32 0, %0* null, %0* bitcast ([1 x %0*]* @llvm.dbg.array1553 to %0*), i32 0 }, section "llvm.metadata" ; <%llvm.dbg.composite.type*> [#uses=1] -@llvm.dbg.array1555 = internal constant [2 x %0*] [%0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite1554 to %0*), %0* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype103 to %0*)], section "llvm.metadata" ; <[2 x %0*]*> [#uses=1] -@llvm.dbg.composite1556 = internal constant %llvm.dbg.composite.type { i32 458773, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 0, i64 0, i64 0, i32 0, %0* null, %0* bitcast ([2 x %0*]* @llvm.dbg.array1555 to %0*), i32 0 }, section "llvm.metadata" ; <%llvm.dbg.composite.type*> [#uses=1] -internal constant [5 x i8] c"setw\00", section "llvm.metadata" ; <[5 x i8]*>:8 [#uses=2] -internal constant [11 x i8] c"_ZSt4setwi\00", section "llvm.metadata" ; <[11 x i8]*>:9 [#uses=1] -@llvm.dbg.subprogram1559 = internal constant %llvm.dbg.subprogram.type { i32 458798, %0* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to %0*), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([5 x i8]* @8, i32 0, i32 0), i8* getelementptr ([5 x i8]* @8, i32 0, i32 0), i8* getelementptr ([11 x i8]* @9, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit1548 to %0*), i32 242, %0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite1556 to %0*), i1 false, i1 true }, section "llvm.metadata" ; <%llvm.dbg.subprogram.type*> [#uses=1] -internal constant [4 x i8] c"__x\00", section "llvm.metadata" ; <[4 x i8]*>:10 [#uses=1] -@llvm.dbg.variable1563 = internal constant %llvm.dbg.variable.type { i32 459008, %0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram1559 to %0*), i8* getelementptr ([4 x i8]* @10, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit1548 to %0*), i32 244, %0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite1554 to %0*) }, section "llvm.metadata" ; <%llvm.dbg.variable.type*> [#uses=1] - -define linkonce i32 @_ZSt4setwi(i32) nounwind { - %2 = alloca %22 ; <%22*> [#uses=2] - %3 = alloca %22 ; <%22*> [#uses=3] - %4 = alloca %22 ; <%22*> [#uses=2] - %5 = bitcast i32 0 to i32 ; [#uses=0] - call void @llvm.dbg.func.start(%0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram1559 to %0*)) - %6 = bitcast %22* %3 to %0* ; <%0*> [#uses=1] - call void @llvm.dbg.declare(%0* %6, %0* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable1563 to %0*)) - call void @llvm.dbg.stoppoint(i32 245, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit1548 to %0*)) - %7 = getelementptr %22* %3, i32 0, i32 0 ; [#uses=1] - store i32 %0, i32* %7, align 4 - call void @llvm.dbg.stoppoint(i32 246, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit1548 to %0*)) - %8 = getelementptr %22* %4, i32 0, i32 0 ; [#uses=1] - %9 = getelementptr %22* %3, i32 0, i32 0 ; [#uses=1] - %10 = load i32* %9, align 4 ; [#uses=1] - store i32 %10, i32* %8, align 4 - %11 = getelementptr %22* %2, i32 0, i32 0 ; [#uses=1] - %12 = getelementptr %22* %4, i32 0, i32 0 ; [#uses=1] - %13 = load i32* %12, align 4 ; [#uses=1] - store i32 %13, i32* %11, align 4 - %14 = bitcast %22* %2 to i32* ; [#uses=1] - %15 = load i32* %14 ; [#uses=1] - call void @llvm.dbg.stoppoint(i32 246, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit1548 to %0*)) - call void @llvm.dbg.region.end(%0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram1559 to %0*)) - ret i32 %15 -} - -declare void @llvm.dbg.func.start(%0*) nounwind - -declare void @llvm.dbg.declare(%0*, %0*) nounwind - -declare void @llvm.dbg.stoppoint(i32, i32, %0*) nounwind - -declare void @llvm.dbg.region.end(%0*) nounwind - diff --git a/test/DebugInfo/2009-03-03-store-to-load-forward.ll b/test/DebugInfo/2009-03-03-store-to-load-forward.ll deleted file mode 100644 index 75d3a6943393..000000000000 --- a/test/DebugInfo/2009-03-03-store-to-load-forward.ll +++ /dev/null @@ -1,260 +0,0 @@ -; RUN: opt < %s -instcombine -S | not grep alloca -; ModuleID = '' -target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" -target triple = "i386-apple-darwin9.6" - type { } ; type %0 - type <{ i8 }> ; type %1 - type { i32* } ; type %2 - %llvm.dbg.anchor.type = type { i32, i32 } - %llvm.dbg.basictype.type = type { i32, %0*, i8*, %0*, i32, i64, i64, i64, i32, i32 } - %llvm.dbg.compile_unit.type = type { i32, %0*, i32, i8*, i8*, i8*, i1, i1, i8*, i32 } - %llvm.dbg.composite.type = type { i32, %0*, i8*, %0*, i32, i64, i64, i64, i32, %0*, %0*, i32 } - %llvm.dbg.derivedtype.type = type { i32, %0*, i8*, %0*, i32, i64, i64, i64, i32, %0* } - %llvm.dbg.subprogram.type = type { i32, %0*, %0*, i8*, i8*, i8*, %0*, i32, %0*, i1, i1 } - %llvm.dbg.variable.type = type { i32, %0*, i8*, %0*, i32, %0* } -@llvm.dbg.compile_units = internal constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata" ; <%llvm.dbg.anchor.type*> [#uses=1] -internal constant [11 x i8] c"bigfib.cpp\00", section "llvm.metadata" ; <[11 x i8]*>:0 [#uses=1] -internal constant [84 x i8] c"/Volumes/Nanpura/mainline/llvm/projects/llvm-test/SingleSource/Benchmarks/Misc-C++/\00", section "llvm.metadata" ; <[84 x i8]*>:1 [#uses=1] -internal constant [57 x i8] c"4.2.1 (Based on Apple Inc. build 5636) (LLVM build 2099)\00", section "llvm.metadata" ; <[57 x i8]*>:2 [#uses=1] -@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 458769, %0* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to %0*), i32 4, i8* getelementptr ([11 x i8]* @0, i32 0, i32 0), i8* getelementptr ([84 x i8]* @1, i32 0, i32 0), i8* getelementptr ([57 x i8]* @2, i32 0, i32 0), i1 true, i1 false, i8* null, i32 0 }, section "llvm.metadata" ; <%llvm.dbg.compile_unit.type*> [#uses=1] -internal constant [18 x i8] c"long unsigned int\00", section "llvm.metadata" ; <[18 x i8]*>:3 [#uses=1] -@llvm.dbg.basictype = internal constant %llvm.dbg.basictype.type { i32 458788, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([18 x i8]* @3, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 32, i64 32, i64 0, i32 0, i32 7 }, section "llvm.metadata" ; <%llvm.dbg.basictype.type*> [#uses=1] -internal constant [69 x i8] c"/Developer/usr/llvm-gcc-4.2/lib/gcc/i686-apple-darwin9/4.2.1/include\00", section "llvm.metadata" ; <[69 x i8]*>:4 [#uses=1] -@llvm.dbg.subprograms = internal constant %llvm.dbg.anchor.type { i32 458752, i32 46 }, section "llvm.metadata" ; <%llvm.dbg.anchor.type*> [#uses=1] -internal constant [12 x i8] c"unnamed_arg\00", section "llvm.metadata" ; <[12 x i8]*>:5 [#uses=1] -internal constant [28 x i8] c"/usr/include/c++/4.0.0/bits\00", section "llvm.metadata" ; <[28 x i8]*>:6 [#uses=1] -internal constant [4 x i8] c"int\00", section "llvm.metadata" ; <[4 x i8]*>:7 [#uses=1] -@llvm.dbg.basictype103 = internal constant %llvm.dbg.basictype.type { i32 458788, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([4 x i8]* @7, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 32, i64 32, i64 0, i32 0, i32 5 }, section "llvm.metadata" ; <%llvm.dbg.basictype.type*> [#uses=1] -@llvm.dbg.derivedtype110 = internal constant %llvm.dbg.derivedtype.type { i32 458790, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 32, i64 32, i64 0, i32 0, %0* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype103 to %0*) }, section "llvm.metadata" ; <%llvm.dbg.derivedtype.type*> [#uses=1] -internal constant [11 x i8] c"\00", section "llvm.metadata" ; <[11 x i8]*>:8 [#uses=1] -@llvm.dbg.compile_unit112 = internal constant %llvm.dbg.compile_unit.type { i32 458769, %0* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to %0*), i32 4, i8* getelementptr ([11 x i8]* @8, i32 0, i32 0), i8* getelementptr ([84 x i8]* @1, i32 0, i32 0), i8* getelementptr ([57 x i8]* @2, i32 0, i32 0), i1 false, i1 false, i8* null, i32 0 }, section "llvm.metadata" ; <%llvm.dbg.compile_unit.type*> [#uses=1] -internal constant [10 x i8] c"ptrdiff_t\00", section "llvm.metadata" ; <[10 x i8]*>:9 [#uses=1] -@llvm.dbg.derivedtype114 = internal constant %llvm.dbg.derivedtype.type { i32 458774, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([10 x i8]* @9, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit112 to %0*), i32 0, i64 0, i64 0, i64 0, i32 0, %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype110 to %0*) }, section "llvm.metadata" ; <%llvm.dbg.derivedtype.type*> [#uses=1] -internal constant [9 x i8] c"_types.h\00", section "llvm.metadata" ; <[9 x i8]*>:10 [#uses=1] -internal constant [18 x i8] c"/usr/include/i386\00", section "llvm.metadata" ; <[18 x i8]*>:11 [#uses=1] -@llvm.dbg.compile_unit117 = internal constant %llvm.dbg.compile_unit.type { i32 458769, %0* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to %0*), i32 4, i8* getelementptr ([9 x i8]* @10, i32 0, i32 0), i8* getelementptr ([18 x i8]* @11, i32 0, i32 0), i8* getelementptr ([57 x i8]* @2, i32 0, i32 0), i1 false, i1 false, i8* null, i32 0 }, section "llvm.metadata" ; <%llvm.dbg.compile_unit.type*> [#uses=1] -internal constant [10 x i8] c"__int32_t\00", section "llvm.metadata" ; <[10 x i8]*>:12 [#uses=1] -@llvm.dbg.derivedtype119 = internal constant %llvm.dbg.derivedtype.type { i32 458774, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([10 x i8]* @12, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit117 to %0*), i32 43, i64 0, i64 0, i64 0, i32 0, %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype114 to %0*) }, section "llvm.metadata" ; <%llvm.dbg.derivedtype.type*> [#uses=1] -internal constant [19 x i8] c"__darwin_ct_rune_t\00", section "llvm.metadata" ; <[19 x i8]*>:13 [#uses=1] -@llvm.dbg.derivedtype121 = internal constant %llvm.dbg.derivedtype.type { i32 458774, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([19 x i8]* @13, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit117 to %0*), i32 50, i64 0, i64 0, i64 0, i32 0, %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype119 to %0*) }, section "llvm.metadata" ; <%llvm.dbg.derivedtype.type*> [#uses=1] -internal constant [19 x i8] c"__darwin_ptrdiff_t\00", section "llvm.metadata" ; <[19 x i8]*>:14 [#uses=1] -@llvm.dbg.derivedtype123 = internal constant %llvm.dbg.derivedtype.type { i32 458774, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([19 x i8]* @14, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit117 to %0*), i32 81, i64 0, i64 0, i64 0, i32 0, %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype121 to %0*) }, section "llvm.metadata" ; <%llvm.dbg.derivedtype.type*> [#uses=1] -internal constant [17 x i8] c"__darwin_wchar_t\00", section "llvm.metadata" ; <[17 x i8]*>:15 [#uses=1] -@llvm.dbg.derivedtype125 = internal constant %llvm.dbg.derivedtype.type { i32 458774, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([17 x i8]* @15, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit117 to %0*), i32 96, i64 0, i64 0, i64 0, i32 0, %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype123 to %0*) }, section "llvm.metadata" ; <%llvm.dbg.derivedtype.type*> [#uses=1] -internal constant [16 x i8] c"__darwin_rune_t\00", section "llvm.metadata" ; <[16 x i8]*>:16 [#uses=1] -@llvm.dbg.derivedtype127 = internal constant %llvm.dbg.derivedtype.type { i32 458774, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([16 x i8]* @16, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit117 to %0*), i32 102, i64 0, i64 0, i64 0, i32 0, %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype125 to %0*) }, section "llvm.metadata" ; <%llvm.dbg.derivedtype.type*> [#uses=1] -internal constant [16 x i8] c"__darwin_wint_t\00", section "llvm.metadata" ; <[16 x i8]*>:17 [#uses=1] -@llvm.dbg.derivedtype129 = internal constant %llvm.dbg.derivedtype.type { i32 458774, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([16 x i8]* @17, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit117 to %0*), i32 107, i64 0, i64 0, i64 0, i32 0, %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype127 to %0*) }, section "llvm.metadata" ; <%llvm.dbg.derivedtype.type*> [#uses=1] -internal constant [17 x i8] c"/usr/include/sys\00", section "llvm.metadata" ; <[17 x i8]*>:18 [#uses=1] -@llvm.dbg.compile_unit131 = internal constant %llvm.dbg.compile_unit.type { i32 458769, %0* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to %0*), i32 4, i8* getelementptr ([9 x i8]* @10, i32 0, i32 0), i8* getelementptr ([17 x i8]* @18, i32 0, i32 0), i8* getelementptr ([57 x i8]* @2, i32 0, i32 0), i1 false, i1 false, i8* null, i32 0 }, section "llvm.metadata" ; <%llvm.dbg.compile_unit.type*> [#uses=1] -internal constant [19 x i8] c"__darwin_blksize_t\00", section "llvm.metadata" ; <[19 x i8]*>:19 [#uses=1] -@llvm.dbg.derivedtype133 = internal constant %llvm.dbg.derivedtype.type { i32 458774, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([19 x i8]* @19, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit131 to %0*), i32 94, i64 0, i64 0, i64 0, i32 0, %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype129 to %0*) }, section "llvm.metadata" ; <%llvm.dbg.derivedtype.type*> [#uses=1] -internal constant [15 x i8] c"__darwin_dev_t\00", section "llvm.metadata" ; <[15 x i8]*>:20 [#uses=1] -@llvm.dbg.derivedtype135 = internal constant %llvm.dbg.derivedtype.type { i32 458774, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([15 x i8]* @20, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit131 to %0*), i32 95, i64 0, i64 0, i64 0, i32 0, %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype133 to %0*) }, section "llvm.metadata" ; <%llvm.dbg.derivedtype.type*> [#uses=1] -internal constant [15 x i8] c"__darwin_pid_t\00", section "llvm.metadata" ; <[15 x i8]*>:21 [#uses=1] -@llvm.dbg.derivedtype137 = internal constant %llvm.dbg.derivedtype.type { i32 458774, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([15 x i8]* @21, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit131 to %0*), i32 110, i64 0, i64 0, i64 0, i32 0, %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype135 to %0*) }, section "llvm.metadata" ; <%llvm.dbg.derivedtype.type*> [#uses=1] -internal constant [21 x i8] c"__darwin_suseconds_t\00", section "llvm.metadata" ; <[21 x i8]*>:22 [#uses=1] -@llvm.dbg.derivedtype139 = internal constant %llvm.dbg.derivedtype.type { i32 458774, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([21 x i8]* @22, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit131 to %0*), i32 131, i64 0, i64 0, i64 0, i32 0, %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype137 to %0*) }, section "llvm.metadata" ; <%llvm.dbg.derivedtype.type*> [#uses=1] -internal constant [17 x i8] c"__darwin_nl_item\00", section "llvm.metadata" ; <[17 x i8]*>:23 [#uses=1] -@llvm.dbg.derivedtype141 = internal constant %llvm.dbg.derivedtype.type { i32 458774, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([17 x i8]* @23, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit131 to %0*), i32 135, i64 0, i64 0, i64 0, i32 0, %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype139 to %0*) }, section "llvm.metadata" ; <%llvm.dbg.derivedtype.type*> [#uses=1] -internal constant [13 x i8] c"/usr/include\00", section "llvm.metadata" ; <[13 x i8]*>:24 [#uses=1] -@llvm.dbg.compile_unit143 = internal constant %llvm.dbg.compile_unit.type { i32 458769, %0* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to %0*), i32 4, i8* getelementptr ([9 x i8]* @10, i32 0, i32 0), i8* getelementptr ([13 x i8]* @24, i32 0, i32 0), i8* getelementptr ([57 x i8]* @2, i32 0, i32 0), i1 false, i1 false, i8* null, i32 0 }, section "llvm.metadata" ; <%llvm.dbg.compile_unit.type*> [#uses=1] -internal constant [19 x i8] c"__darwin_wctrans_t\00", section "llvm.metadata" ; <[19 x i8]*>:25 [#uses=1] -@llvm.dbg.derivedtype145 = internal constant %llvm.dbg.derivedtype.type { i32 458774, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([19 x i8]* @25, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit143 to %0*), i32 29, i64 0, i64 0, i64 0, i32 0, %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype141 to %0*) }, section "llvm.metadata" ; <%llvm.dbg.derivedtype.type*> [#uses=1] -internal constant [7 x i8] c"wait.h\00", section "llvm.metadata" ; <[7 x i8]*>:26 [#uses=1] -@llvm.dbg.compile_unit147 = internal constant %llvm.dbg.compile_unit.type { i32 458769, %0* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to %0*), i32 4, i8* getelementptr ([7 x i8]* @26, i32 0, i32 0), i8* getelementptr ([17 x i8]* @18, i32 0, i32 0), i8* getelementptr ([57 x i8]* @2, i32 0, i32 0), i1 false, i1 false, i8* null, i32 0 }, section "llvm.metadata" ; <%llvm.dbg.compile_unit.type*> [#uses=1] -internal constant [6 x i8] c"pid_t\00", section "llvm.metadata" ; <[6 x i8]*>:27 [#uses=1] -@llvm.dbg.derivedtype149 = internal constant %llvm.dbg.derivedtype.type { i32 458774, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([6 x i8]* @27, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit147 to %0*), i32 83, i64 0, i64 0, i64 0, i32 0, %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype145 to %0*) }, section "llvm.metadata" ; <%llvm.dbg.derivedtype.type*> [#uses=1] -internal constant [13 x i8] c"sig_atomic_t\00", section "llvm.metadata" ; <[13 x i8]*>:28 [#uses=1] -@llvm.dbg.derivedtype151 = internal constant %llvm.dbg.derivedtype.type { i32 458774, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([13 x i8]* @28, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit147 to %0*), i32 95, i64 0, i64 0, i64 0, i32 0, %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype149 to %0*) }, section "llvm.metadata" ; <%llvm.dbg.derivedtype.type*> [#uses=1] -internal constant [10 x i8] c"ct_rune_t\00", section "llvm.metadata" ; <[10 x i8]*>:29 [#uses=1] -@llvm.dbg.derivedtype153 = internal constant %llvm.dbg.derivedtype.type { i32 458774, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([10 x i8]* @29, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit147 to %0*), i32 262, i64 0, i64 0, i64 0, i32 0, %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype151 to %0*) }, section "llvm.metadata" ; <%llvm.dbg.derivedtype.type*> [#uses=1] -internal constant [9 x i8] c"stdlib.h\00", section "llvm.metadata" ; <[9 x i8]*>:30 [#uses=1] -@llvm.dbg.compile_unit155 = internal constant %llvm.dbg.compile_unit.type { i32 458769, %0* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to %0*), i32 4, i8* getelementptr ([9 x i8]* @30, i32 0, i32 0), i8* getelementptr ([13 x i8]* @24, i32 0, i32 0), i8* getelementptr ([57 x i8]* @2, i32 0, i32 0), i1 false, i1 false, i8* null, i32 0 }, section "llvm.metadata" ; <%llvm.dbg.compile_unit.type*> [#uses=1] -internal constant [7 x i8] c"rune_t\00", section "llvm.metadata" ; <[7 x i8]*>:31 [#uses=1] -@llvm.dbg.derivedtype157 = internal constant %llvm.dbg.derivedtype.type { i32 458774, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([7 x i8]* @31, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit155 to %0*), i32 81, i64 0, i64 0, i64 0, i32 0, %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype153 to %0*) }, section "llvm.metadata" ; <%llvm.dbg.derivedtype.type*> [#uses=1] -internal constant [8 x i8] c"types.h\00", section "llvm.metadata" ; <[8 x i8]*>:32 [#uses=1] -@llvm.dbg.compile_unit159 = internal constant %llvm.dbg.compile_unit.type { i32 458769, %0* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to %0*), i32 4, i8* getelementptr ([8 x i8]* @32, i32 0, i32 0), i8* getelementptr ([18 x i8]* @11, i32 0, i32 0), i8* getelementptr ([57 x i8]* @2, i32 0, i32 0), i1 false, i1 false, i8* null, i32 0 }, section "llvm.metadata" ; <%llvm.dbg.compile_unit.type*> [#uses=1] -internal constant [8 x i8] c"int32_t\00", section "llvm.metadata" ; <[8 x i8]*>:33 [#uses=1] -@llvm.dbg.derivedtype161 = internal constant %llvm.dbg.derivedtype.type { i32 458774, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([8 x i8]* @33, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit159 to %0*), i32 85, i64 0, i64 0, i64 0, i32 0, %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype157 to %0*) }, section "llvm.metadata" ; <%llvm.dbg.derivedtype.type*> [#uses=1] -internal constant [11 x i8] c"register_t\00", section "llvm.metadata" ; <[11 x i8]*>:34 [#uses=1] -@llvm.dbg.derivedtype163 = internal constant %llvm.dbg.derivedtype.type { i32 458774, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([11 x i8]* @34, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit159 to %0*), i32 95, i64 0, i64 0, i64 0, i32 0, %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype161 to %0*) }, section "llvm.metadata" ; <%llvm.dbg.derivedtype.type*> [#uses=1] -internal constant [6 x i8] c"dev_t\00", section "llvm.metadata" ; <[6 x i8]*>:35 [#uses=1] -@llvm.dbg.derivedtype165 = internal constant %llvm.dbg.derivedtype.type { i32 458774, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([6 x i8]* @35, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit159 to %0*), i32 125, i64 0, i64 0, i64 0, i32 0, %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype163 to %0*) }, section "llvm.metadata" ; <%llvm.dbg.derivedtype.type*> [#uses=1] -internal constant [11 x i8] c"_structs.h\00", section "llvm.metadata" ; <[11 x i8]*>:36 [#uses=1] -@llvm.dbg.compile_unit167 = internal constant %llvm.dbg.compile_unit.type { i32 458769, %0* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to %0*), i32 4, i8* getelementptr ([11 x i8]* @36, i32 0, i32 0), i8* getelementptr ([17 x i8]* @18, i32 0, i32 0), i8* getelementptr ([57 x i8]* @2, i32 0, i32 0), i1 false, i1 false, i8* null, i32 0 }, section "llvm.metadata" ; <%llvm.dbg.compile_unit.type*> [#uses=1] -internal constant [12 x i8] c"suseconds_t\00", section "llvm.metadata" ; <[12 x i8]*>:37 [#uses=1] -@llvm.dbg.derivedtype169 = internal constant %llvm.dbg.derivedtype.type { i32 458774, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([12 x i8]* @37, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit167 to %0*), i32 191, i64 0, i64 0, i64 0, i32 0, %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype165 to %0*) }, section "llvm.metadata" ; <%llvm.dbg.derivedtype.type*> [#uses=1] -internal constant [15 x i8] c"gthr-default.h\00", section "llvm.metadata" ; <[15 x i8]*>:38 [#uses=1] -internal constant [47 x i8] c"/usr/include/c++/4.0.0/i686-apple-darwin9/bits\00", section "llvm.metadata" ; <[47 x i8]*>:39 [#uses=1] -@llvm.dbg.compile_unit172 = internal constant %llvm.dbg.compile_unit.type { i32 458769, %0* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to %0*), i32 4, i8* getelementptr ([15 x i8]* @38, i32 0, i32 0), i8* getelementptr ([47 x i8]* @39, i32 0, i32 0), i8* getelementptr ([57 x i8]* @2, i32 0, i32 0), i1 false, i1 false, i8* null, i32 0 }, section "llvm.metadata" ; <%llvm.dbg.compile_unit.type*> [#uses=1] -internal constant [7 x i8] c"wint_t\00", section "llvm.metadata" ; <[7 x i8]*>:40 [#uses=1] -@llvm.dbg.derivedtype174 = internal constant %llvm.dbg.derivedtype.type { i32 458774, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([7 x i8]* @40, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit172 to %0*), i32 567, i64 0, i64 0, i64 0, i32 0, %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype169 to %0*) }, section "llvm.metadata" ; <%llvm.dbg.derivedtype.type*> [#uses=1] -internal constant [9 x i8] c"stdint.h\00", section "llvm.metadata" ; <[9 x i8]*>:41 [#uses=1] -@llvm.dbg.compile_unit176 = internal constant %llvm.dbg.compile_unit.type { i32 458769, %0* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to %0*), i32 4, i8* getelementptr ([9 x i8]* @41, i32 0, i32 0), i8* getelementptr ([69 x i8]* @4, i32 0, i32 0), i8* getelementptr ([57 x i8]* @2, i32 0, i32 0), i1 false, i1 false, i8* null, i32 0 }, section "llvm.metadata" ; <%llvm.dbg.compile_unit.type*> [#uses=1] -internal constant [14 x i8] c"int_least32_t\00", section "llvm.metadata" ; <[14 x i8]*>:42 [#uses=1] -@llvm.dbg.derivedtype178 = internal constant %llvm.dbg.derivedtype.type { i32 458774, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([14 x i8]* @42, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit176 to %0*), i32 60, i64 0, i64 0, i64 0, i32 0, %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype174 to %0*) }, section "llvm.metadata" ; <%llvm.dbg.derivedtype.type*> [#uses=1] -internal constant [13 x i8] c"int_fast32_t\00", section "llvm.metadata" ; <[13 x i8]*>:43 [#uses=1] -@llvm.dbg.derivedtype180 = internal constant %llvm.dbg.derivedtype.type { i32 458774, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([13 x i8]* @43, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit176 to %0*), i32 71, i64 0, i64 0, i64 0, i32 0, %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype178 to %0*) }, section "llvm.metadata" ; <%llvm.dbg.derivedtype.type*> [#uses=1] -internal constant [11 x i8] c"postypes.h\00", section "llvm.metadata" ; <[11 x i8]*>:44 [#uses=1] -@llvm.dbg.compile_unit182 = internal constant %llvm.dbg.compile_unit.type { i32 458769, %0* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to %0*), i32 4, i8* getelementptr ([11 x i8]* @44, i32 0, i32 0), i8* getelementptr ([28 x i8]* @6, i32 0, i32 0), i8* getelementptr ([57 x i8]* @2, i32 0, i32 0), i1 false, i1 false, i8* null, i32 0 }, section "llvm.metadata" ; <%llvm.dbg.compile_unit.type*> [#uses=1] -internal constant [11 x i8] c"streamsize\00", section "llvm.metadata" ; <[11 x i8]*>:45 [#uses=1] -@llvm.dbg.derivedtype184 = internal constant %llvm.dbg.derivedtype.type { i32 458774, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([11 x i8]* @45, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit182 to %0*), i32 72, i64 0, i64 0, i64 0, i32 0, %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype180 to %0*) }, section "llvm.metadata" ; <%llvm.dbg.derivedtype.type*> [#uses=1] -@llvm.dbg.derivedtype230 = internal constant %llvm.dbg.derivedtype.type { i32 458774, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([10 x i8]* @9, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit112 to %0*), i32 0, i64 0, i64 0, i64 0, i32 0, %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype184 to %0*) }, section "llvm.metadata" ; <%llvm.dbg.derivedtype.type*> [#uses=1] -internal constant [15 x i8] c"stl_iterator.h\00", section "llvm.metadata" ; <[15 x i8]*>:46 [#uses=1] -@llvm.dbg.compile_unit709 = internal constant %llvm.dbg.compile_unit.type { i32 458769, %0* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to %0*), i32 4, i8* getelementptr ([15 x i8]* @46, i32 0, i32 0), i8* getelementptr ([28 x i8]* @6, i32 0, i32 0), i8* getelementptr ([57 x i8]* @2, i32 0, i32 0), i1 false, i1 false, i8* null, i32 0 }, section "llvm.metadata" ; <%llvm.dbg.compile_unit.type*> [#uses=1] -internal constant [11 x i8] c"_M_current\00", section "llvm.metadata" ; <[11 x i8]*>:47 [#uses=1] -internal constant [18 x i8] c"__normal_iterator\00", section "llvm.metadata" ; <[18 x i8]*>:48 [#uses=1] -internal constant [10 x i8] c"operator*\00", section "llvm.metadata" ; <[10 x i8]*>:49 [#uses=1] -internal constant [11 x i8] c"operator->\00", section "llvm.metadata" ; <[11 x i8]*>:50 [#uses=1] -internal constant [11 x i8] c"operator++\00", section "llvm.metadata" ; <[11 x i8]*>:51 [#uses=1] -internal constant [11 x i8] c"operator--\00", section "llvm.metadata" ; <[11 x i8]*>:52 [#uses=1] -@llvm.dbg.derivedtype759 = internal constant %llvm.dbg.derivedtype.type { i32 458768, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 32, i64 32, i64 0, i32 0, %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype230 to %0*) }, section "llvm.metadata" ; <%llvm.dbg.derivedtype.type*> [#uses=1] -internal constant [11 x i8] c"operator[]\00", section "llvm.metadata" ; <[11 x i8]*>:53 [#uses=1] -internal constant [11 x i8] c"operator+=\00", section "llvm.metadata" ; <[11 x i8]*>:54 [#uses=1] -internal constant [10 x i8] c"operator+\00", section "llvm.metadata" ; <[10 x i8]*>:55 [#uses=1] -internal constant [11 x i8] c"operator-=\00", section "llvm.metadata" ; <[11 x i8]*>:56 [#uses=1] -internal constant [10 x i8] c"operator-\00", section "llvm.metadata" ; <[10 x i8]*>:57 [#uses=1] -internal constant [5 x i8] c"base\00", section "llvm.metadata" ; <[5 x i8]*>:58 [#uses=1] -internal constant [18 x i8] c"cpp_type_traits.h\00", section "llvm.metadata" ; <[18 x i8]*>:59 [#uses=1] -@llvm.dbg.compile_unit1192 = internal constant %llvm.dbg.compile_unit.type { i32 458769, %0* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to %0*), i32 4, i8* getelementptr ([18 x i8]* @59, i32 0, i32 0), i8* getelementptr ([28 x i8]* @6, i32 0, i32 0), i8* getelementptr ([57 x i8]* @2, i32 0, i32 0), i1 false, i1 false, i8* null, i32 0 }, section "llvm.metadata" ; <%llvm.dbg.compile_unit.type*> [#uses=1] -internal constant [12 x i8] c"__true_type\00", section "llvm.metadata" ; <[12 x i8]*>:60 [#uses=1] -@llvm.dbg.array1195 = internal constant [0 x %0*] zeroinitializer, section "llvm.metadata" ; <[0 x %0*]*> [#uses=1] -@llvm.dbg.composite1196 = internal constant %llvm.dbg.composite.type { i32 458771, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([12 x i8]* @60, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit1192 to %0*), i32 93, i64 8, i64 8, i64 0, i32 0, %0* null, %0* bitcast ([0 x %0*]* @llvm.dbg.array1195 to %0*), i32 0 }, section "llvm.metadata" ; <%llvm.dbg.composite.type*> [#uses=1] -@llvm.dbg.derivedtype1631 = internal constant %llvm.dbg.derivedtype.type { i32 458767, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 32, i64 32, i64 0, i32 0, %0* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to %0*) }, section "llvm.metadata" ; <%llvm.dbg.derivedtype.type*> [#uses=1] -@llvm.dbg.derivedtype1633 = internal constant %llvm.dbg.derivedtype.type { i32 458768, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 32, i64 32, i64 0, i32 0, %0* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to %0*) }, section "llvm.metadata" ; <%llvm.dbg.derivedtype.type*> [#uses=1] -internal constant [106 x i8] c"__normal_iterator > >\00", section "llvm.metadata" ; <[106 x i8]*>:61 [#uses=1] -@llvm.dbg.derivedtype1768 = internal constant %llvm.dbg.derivedtype.type { i32 458765, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([11 x i8]* @47, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit709 to %0*), i32 589, i64 32, i64 32, i64 0, i32 2, %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype1631 to %0*) }, section "llvm.metadata" ; <%llvm.dbg.derivedtype.type*> [#uses=1] -@llvm.dbg.derivedtype1769 = internal constant %llvm.dbg.derivedtype.type { i32 458767, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 32, i64 32, i64 0, i32 0, %0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite1828 to %0*) }, section "llvm.metadata" ; <%llvm.dbg.derivedtype.type*> [#uses=1] -@llvm.dbg.array1770 = internal constant [2 x %0*] [%0* null, %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype1769 to %0*)], section "llvm.metadata" ; <[2 x %0*]*> [#uses=1] -@llvm.dbg.composite1771 = internal constant %llvm.dbg.composite.type { i32 458773, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 0, i64 0, i64 0, i32 0, %0* null, %0* bitcast ([2 x %0*]* @llvm.dbg.array1770 to %0*), i32 0 }, section "llvm.metadata" ; <%llvm.dbg.composite.type*> [#uses=1] -@llvm.dbg.subprogram1772 = internal constant %llvm.dbg.subprogram.type { i32 458798, %0* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to %0*), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([18 x i8]* @48, i32 0, i32 0), i8* getelementptr ([18 x i8]* @48, i32 0, i32 0), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit709 to %0*), i32 600, %0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite1771 to %0*), i1 false, i1 false }, section "llvm.metadata" ; <%llvm.dbg.subprogram.type*> [#uses=1] -@llvm.dbg.derivedtype1773 = internal constant %llvm.dbg.derivedtype.type { i32 458790, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 32, i64 32, i64 0, i32 0, %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype1631 to %0*) }, section "llvm.metadata" ; <%llvm.dbg.derivedtype.type*> [#uses=1] -@llvm.dbg.derivedtype1774 = internal constant %llvm.dbg.derivedtype.type { i32 458768, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 32, i64 32, i64 0, i32 0, %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype1773 to %0*) }, section "llvm.metadata" ; <%llvm.dbg.derivedtype.type*> [#uses=1] -@llvm.dbg.array1775 = internal constant [3 x %0*] [%0* null, %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype1769 to %0*), %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype1774 to %0*)], section "llvm.metadata" ; <[3 x %0*]*> [#uses=1] -@llvm.dbg.composite1776 = internal constant %llvm.dbg.composite.type { i32 458773, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 0, i64 0, i64 0, i32 0, %0* null, %0* bitcast ([3 x %0*]* @llvm.dbg.array1775 to %0*), i32 0 }, section "llvm.metadata" ; <%llvm.dbg.composite.type*> [#uses=1] -@llvm.dbg.subprogram1777 = internal constant %llvm.dbg.subprogram.type { i32 458798, %0* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to %0*), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([18 x i8]* @48, i32 0, i32 0), i8* getelementptr ([18 x i8]* @48, i32 0, i32 0), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit709 to %0*), i32 603, %0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite1776 to %0*), i1 false, i1 false }, section "llvm.metadata" ; <%llvm.dbg.subprogram.type*> [#uses=1] -@llvm.dbg.composite1778 = internal constant %llvm.dbg.composite.type { i32 458771, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit709 to %0*), i32 587, i64 0, i64 0, i64 0, i32 4, %0* null, %0* null, i32 0 }, section "llvm.metadata" ; <%llvm.dbg.composite.type*> [#uses=1] -@llvm.dbg.derivedtype1779 = internal constant %llvm.dbg.derivedtype.type { i32 458790, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 0, i64 8, i64 0, i32 0, %0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite1778 to %0*) }, section "llvm.metadata" ; <%llvm.dbg.derivedtype.type*> [#uses=1] -@llvm.dbg.derivedtype1780 = internal constant %llvm.dbg.derivedtype.type { i32 458768, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 32, i64 32, i64 0, i32 0, %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype1779 to %0*) }, section "llvm.metadata" ; <%llvm.dbg.derivedtype.type*> [#uses=1] -@llvm.dbg.array1781 = internal constant [3 x %0*] [%0* null, %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype1769 to %0*), %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype1780 to %0*)], section "llvm.metadata" ; <[3 x %0*]*> [#uses=1] -@llvm.dbg.composite1782 = internal constant %llvm.dbg.composite.type { i32 458773, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 0, i64 0, i64 0, i32 0, %0* null, %0* bitcast ([3 x %0*]* @llvm.dbg.array1781 to %0*), i32 0 }, section "llvm.metadata" ; <%llvm.dbg.composite.type*> [#uses=1] -@llvm.dbg.subprogram1783 = internal constant %llvm.dbg.subprogram.type { i32 458798, %0* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to %0*), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([18 x i8]* @48, i32 0, i32 0), i8* getelementptr ([18 x i8]* @48, i32 0, i32 0), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit709 to %0*), i32 608, %0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite1782 to %0*), i1 false, i1 false }, section "llvm.metadata" ; <%llvm.dbg.subprogram.type*> [#uses=1] -@llvm.dbg.derivedtype1784 = internal constant %llvm.dbg.derivedtype.type { i32 458790, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 32, i64 32, i64 0, i32 0, %0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite1828 to %0*) }, section "llvm.metadata" ; <%llvm.dbg.derivedtype.type*> [#uses=1] -@llvm.dbg.derivedtype1785 = internal constant %llvm.dbg.derivedtype.type { i32 458767, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 32, i64 32, i64 0, i32 0, %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype1784 to %0*) }, section "llvm.metadata" ; <%llvm.dbg.derivedtype.type*> [#uses=1] -@llvm.dbg.array1786 = internal constant [2 x %0*] [%0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype1633 to %0*), %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype1785 to %0*)], section "llvm.metadata" ; <[2 x %0*]*> [#uses=1] -@llvm.dbg.composite1787 = internal constant %llvm.dbg.composite.type { i32 458773, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 0, i64 0, i64 0, i32 0, %0* null, %0* bitcast ([2 x %0*]* @llvm.dbg.array1786 to %0*), i32 0 }, section "llvm.metadata" ; <%llvm.dbg.composite.type*> [#uses=1] -internal constant [59 x i8] c"_ZNK9__gnu_cxx17__normal_iteratorIPmSt6vectorImSaImEEEdeEv\00", section "llvm.metadata" ; <[59 x i8]*>:62 [#uses=1] -@llvm.dbg.subprogram1789 = internal constant %llvm.dbg.subprogram.type { i32 458798, %0* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to %0*), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([10 x i8]* @49, i32 0, i32 0), i8* getelementptr ([10 x i8]* @49, i32 0, i32 0), i8* getelementptr ([59 x i8]* @62, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit709 to %0*), i32 613, %0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite1787 to %0*), i1 false, i1 false }, section "llvm.metadata" ; <%llvm.dbg.subprogram.type*> [#uses=1] -@llvm.dbg.array1790 = internal constant [2 x %0*] [%0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype1631 to %0*), %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype1785 to %0*)], section "llvm.metadata" ; <[2 x %0*]*> [#uses=1] -@llvm.dbg.composite1791 = internal constant %llvm.dbg.composite.type { i32 458773, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 0, i64 0, i64 0, i32 0, %0* null, %0* bitcast ([2 x %0*]* @llvm.dbg.array1790 to %0*), i32 0 }, section "llvm.metadata" ; <%llvm.dbg.composite.type*> [#uses=1] -internal constant [59 x i8] c"_ZNK9__gnu_cxx17__normal_iteratorIPmSt6vectorImSaImEEEptEv\00", section "llvm.metadata" ; <[59 x i8]*>:63 [#uses=1] -@llvm.dbg.subprogram1793 = internal constant %llvm.dbg.subprogram.type { i32 458798, %0* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to %0*), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([11 x i8]* @50, i32 0, i32 0), i8* getelementptr ([11 x i8]* @50, i32 0, i32 0), i8* getelementptr ([59 x i8]* @63, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit709 to %0*), i32 617, %0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite1791 to %0*), i1 false, i1 false }, section "llvm.metadata" ; <%llvm.dbg.subprogram.type*> [#uses=1] -@llvm.dbg.derivedtype1794 = internal constant %llvm.dbg.derivedtype.type { i32 458768, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 32, i64 32, i64 0, i32 0, %0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite1828 to %0*) }, section "llvm.metadata" ; <%llvm.dbg.derivedtype.type*> [#uses=1] -@llvm.dbg.array1795 = internal constant [2 x %0*] [%0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype1794 to %0*), %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype1769 to %0*)], section "llvm.metadata" ; <[2 x %0*]*> [#uses=1] -@llvm.dbg.composite1796 = internal constant %llvm.dbg.composite.type { i32 458773, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 0, i64 0, i64 0, i32 0, %0* null, %0* bitcast ([2 x %0*]* @llvm.dbg.array1795 to %0*), i32 0 }, section "llvm.metadata" ; <%llvm.dbg.composite.type*> [#uses=1] -internal constant [58 x i8] c"_ZN9__gnu_cxx17__normal_iteratorIPmSt6vectorImSaImEEEppEv\00", section "llvm.metadata" ; <[58 x i8]*>:64 [#uses=1] -@llvm.dbg.subprogram1798 = internal constant %llvm.dbg.subprogram.type { i32 458798, %0* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to %0*), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([11 x i8]* @51, i32 0, i32 0), i8* getelementptr ([11 x i8]* @51, i32 0, i32 0), i8* getelementptr ([58 x i8]* @64, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit709 to %0*), i32 621, %0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite1796 to %0*), i1 false, i1 false }, section "llvm.metadata" ; <%llvm.dbg.subprogram.type*> [#uses=1] -@llvm.dbg.array1799 = internal constant [3 x %0*] [%0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite1828 to %0*), %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype1769 to %0*), %0* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype103 to %0*)], section "llvm.metadata" ; <[3 x %0*]*> [#uses=1] -@llvm.dbg.composite1800 = internal constant %llvm.dbg.composite.type { i32 458773, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 0, i64 0, i64 0, i32 0, %0* null, %0* bitcast ([3 x %0*]* @llvm.dbg.array1799 to %0*), i32 0 }, section "llvm.metadata" ; <%llvm.dbg.composite.type*> [#uses=1] -internal constant [58 x i8] c"_ZN9__gnu_cxx17__normal_iteratorIPmSt6vectorImSaImEEEppEi\00", section "llvm.metadata" ; <[58 x i8]*>:65 [#uses=1] -@llvm.dbg.subprogram1802 = internal constant %llvm.dbg.subprogram.type { i32 458798, %0* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to %0*), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([11 x i8]* @51, i32 0, i32 0), i8* getelementptr ([11 x i8]* @51, i32 0, i32 0), i8* getelementptr ([58 x i8]* @65, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit709 to %0*), i32 628, %0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite1800 to %0*), i1 false, i1 false }, section "llvm.metadata" ; <%llvm.dbg.subprogram.type*> [#uses=1] -internal constant [58 x i8] c"_ZN9__gnu_cxx17__normal_iteratorIPmSt6vectorImSaImEEEmmEv\00", section "llvm.metadata" ; <[58 x i8]*>:66 [#uses=1] -@llvm.dbg.subprogram1804 = internal constant %llvm.dbg.subprogram.type { i32 458798, %0* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to %0*), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([11 x i8]* @52, i32 0, i32 0), i8* getelementptr ([11 x i8]* @52, i32 0, i32 0), i8* getelementptr ([58 x i8]* @66, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit709 to %0*), i32 633, %0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite1796 to %0*), i1 false, i1 false }, section "llvm.metadata" ; <%llvm.dbg.subprogram.type*> [#uses=1] -internal constant [58 x i8] c"_ZN9__gnu_cxx17__normal_iteratorIPmSt6vectorImSaImEEEmmEi\00", section "llvm.metadata" ; <[58 x i8]*>:67 [#uses=1] -@llvm.dbg.subprogram1806 = internal constant %llvm.dbg.subprogram.type { i32 458798, %0* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to %0*), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([11 x i8]* @52, i32 0, i32 0), i8* getelementptr ([11 x i8]* @52, i32 0, i32 0), i8* getelementptr ([58 x i8]* @67, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit709 to %0*), i32 640, %0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite1800 to %0*), i1 false, i1 false }, section "llvm.metadata" ; <%llvm.dbg.subprogram.type*> [#uses=1] -@llvm.dbg.array1807 = internal constant [3 x %0*] [%0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype1633 to %0*), %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype1785 to %0*), %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype759 to %0*)], section "llvm.metadata" ; <[3 x %0*]*> [#uses=1] -@llvm.dbg.composite1808 = internal constant %llvm.dbg.composite.type { i32 458773, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 0, i64 0, i64 0, i32 0, %0* null, %0* bitcast ([3 x %0*]* @llvm.dbg.array1807 to %0*), i32 0 }, section "llvm.metadata" ; <%llvm.dbg.composite.type*> [#uses=1] -internal constant [61 x i8] c"_ZNK9__gnu_cxx17__normal_iteratorIPmSt6vectorImSaImEEEixERKi\00", section "llvm.metadata" ; <[61 x i8]*>:68 [#uses=1] -@llvm.dbg.subprogram1810 = internal constant %llvm.dbg.subprogram.type { i32 458798, %0* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to %0*), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([11 x i8]* @53, i32 0, i32 0), i8* getelementptr ([11 x i8]* @53, i32 0, i32 0), i8* getelementptr ([61 x i8]* @68, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit709 to %0*), i32 645, %0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite1808 to %0*), i1 false, i1 false }, section "llvm.metadata" ; <%llvm.dbg.subprogram.type*> [#uses=1] -@llvm.dbg.array1811 = internal constant [3 x %0*] [%0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype1794 to %0*), %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype1769 to %0*), %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype759 to %0*)], section "llvm.metadata" ; <[3 x %0*]*> [#uses=1] -@llvm.dbg.composite1812 = internal constant %llvm.dbg.composite.type { i32 458773, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 0, i64 0, i64 0, i32 0, %0* null, %0* bitcast ([3 x %0*]* @llvm.dbg.array1811 to %0*), i32 0 }, section "llvm.metadata" ; <%llvm.dbg.composite.type*> [#uses=1] -internal constant [60 x i8] c"_ZN9__gnu_cxx17__normal_iteratorIPmSt6vectorImSaImEEEpLERKi\00", section "llvm.metadata" ; <[60 x i8]*>:69 [#uses=1] -@llvm.dbg.subprogram1814 = internal constant %llvm.dbg.subprogram.type { i32 458798, %0* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to %0*), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([11 x i8]* @54, i32 0, i32 0), i8* getelementptr ([11 x i8]* @54, i32 0, i32 0), i8* getelementptr ([60 x i8]* @69, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit709 to %0*), i32 649, %0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite1812 to %0*), i1 false, i1 false }, section "llvm.metadata" ; <%llvm.dbg.subprogram.type*> [#uses=1] -@llvm.dbg.array1815 = internal constant [3 x %0*] [%0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite1828 to %0*), %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype1785 to %0*), %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype759 to %0*)], section "llvm.metadata" ; <[3 x %0*]*> [#uses=1] -@llvm.dbg.composite1816 = internal constant %llvm.dbg.composite.type { i32 458773, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 0, i64 0, i64 0, i32 0, %0* null, %0* bitcast ([3 x %0*]* @llvm.dbg.array1815 to %0*), i32 0 }, section "llvm.metadata" ; <%llvm.dbg.composite.type*> [#uses=1] -internal constant [61 x i8] c"_ZNK9__gnu_cxx17__normal_iteratorIPmSt6vectorImSaImEEEplERKi\00", section "llvm.metadata" ; <[61 x i8]*>:70 [#uses=1] -@llvm.dbg.subprogram1818 = internal constant %llvm.dbg.subprogram.type { i32 458798, %0* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to %0*), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([10 x i8]* @55, i32 0, i32 0), i8* getelementptr ([10 x i8]* @55, i32 0, i32 0), i8* getelementptr ([61 x i8]* @70, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit709 to %0*), i32 653, %0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite1816 to %0*), i1 false, i1 false }, section "llvm.metadata" ; <%llvm.dbg.subprogram.type*> [#uses=1] -internal constant [60 x i8] c"_ZN9__gnu_cxx17__normal_iteratorIPmSt6vectorImSaImEEEmIERKi\00", section "llvm.metadata" ; <[60 x i8]*>:71 [#uses=1] -@llvm.dbg.subprogram1820 = internal constant %llvm.dbg.subprogram.type { i32 458798, %0* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to %0*), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([11 x i8]* @56, i32 0, i32 0), i8* getelementptr ([11 x i8]* @56, i32 0, i32 0), i8* getelementptr ([60 x i8]* @71, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit709 to %0*), i32 657, %0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite1812 to %0*), i1 false, i1 false }, section "llvm.metadata" ; <%llvm.dbg.subprogram.type*> [#uses=1] -internal constant [61 x i8] c"_ZNK9__gnu_cxx17__normal_iteratorIPmSt6vectorImSaImEEEmiERKi\00", section "llvm.metadata" ; <[61 x i8]*>:72 [#uses=1] -@llvm.dbg.subprogram1822 = internal constant %llvm.dbg.subprogram.type { i32 458798, %0* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to %0*), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([10 x i8]* @57, i32 0, i32 0), i8* getelementptr ([10 x i8]* @57, i32 0, i32 0), i8* getelementptr ([61 x i8]* @72, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit709 to %0*), i32 661, %0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite1816 to %0*), i1 false, i1 false }, section "llvm.metadata" ; <%llvm.dbg.subprogram.type*> [#uses=1] -@llvm.dbg.array1823 = internal constant [2 x %0*] [%0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype1774 to %0*), %0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype1785 to %0*)], section "llvm.metadata" ; <[2 x %0*]*> [#uses=1] -@llvm.dbg.composite1824 = internal constant %llvm.dbg.composite.type { i32 458773, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 0, i64 0, i64 0, i32 0, %0* null, %0* bitcast ([2 x %0*]* @llvm.dbg.array1823 to %0*), i32 0 }, section "llvm.metadata" ; <%llvm.dbg.composite.type*> [#uses=1] -internal constant [62 x i8] c"_ZNK9__gnu_cxx17__normal_iteratorIPmSt6vectorImSaImEEE4baseEv\00", section "llvm.metadata" ; <[62 x i8]*>:73 [#uses=1] -@llvm.dbg.subprogram1826 = internal constant %llvm.dbg.subprogram.type { i32 458798, %0* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to %0*), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([5 x i8]* @58, i32 0, i32 0), i8* getelementptr ([5 x i8]* @58, i32 0, i32 0), i8* getelementptr ([62 x i8]* @73, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit709 to %0*), i32 665, %0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite1824 to %0*), i1 false, i1 false }, section "llvm.metadata" ; <%llvm.dbg.subprogram.type*> [#uses=1] -@llvm.dbg.array1827 = internal constant [16 x %0*] [%0* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype1768 to %0*), %0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram1772 to %0*), %0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram1777 to %0*), %0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram1783 to %0*), %0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram1789 to %0*), %0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram1793 to %0*), %0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram1798 to %0*), %0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram1802 to %0*), %0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram1804 to %0*), %0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram1806 to %0*), %0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram1810 to %0*), %0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram1814 to %0*), %0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram1818 to %0*), %0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram1820 to %0*), %0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram1822 to %0*), %0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram1826 to %0*)], section "llvm.metadata" ; <[16 x %0*]*> [#uses=1] -@llvm.dbg.composite1828 = internal constant %llvm.dbg.composite.type { i32 458771, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([106 x i8]* @61, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit709 to %0*), i32 587, i64 32, i64 32, i64 0, i32 0, %0* null, %0* bitcast ([16 x %0*]* @llvm.dbg.array1827 to %0*), i32 0 }, section "llvm.metadata" ; <%llvm.dbg.composite.type*> [#uses=1] -internal constant [8 x i8] c"__first\00", section "llvm.metadata" ; <[8 x i8]*>:74 [#uses=1] -internal constant [7 x i8] c"__last\00", section "llvm.metadata" ; <[7 x i8]*>:75 [#uses=1] -internal constant [9 x i8] c"__result\00", section "llvm.metadata" ; <[9 x i8]*>:76 [#uses=1] -internal constant [20 x i8] c"stl_uninitialized.h\00", section "llvm.metadata" ; <[20 x i8]*>:77 [#uses=1] -@llvm.dbg.compile_unit2900 = internal constant %llvm.dbg.compile_unit.type { i32 458769, %0* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to %0*), i32 4, i8* getelementptr ([20 x i8]* @77, i32 0, i32 0), i8* getelementptr ([28 x i8]* @6, i32 0, i32 0), i8* getelementptr ([57 x i8]* @2, i32 0, i32 0), i1 false, i1 false, i8* null, i32 0 }, section "llvm.metadata" ; <%llvm.dbg.compile_unit.type*> [#uses=1] -@llvm.dbg.array4285 = internal constant [5 x %0*] [%0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite1828 to %0*), %0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite1828 to %0*), %0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite1828 to %0*), %0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite1828 to %0*), %0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite1196 to %0*)], section "llvm.metadata" ; <[5 x %0*]*> [#uses=1] -@llvm.dbg.composite4286 = internal constant %llvm.dbg.composite.type { i32 458773, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* null, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i32 0, i64 0, i64 0, i64 0, i32 0, %0* null, %0* bitcast ([5 x %0*]* @llvm.dbg.array4285 to %0*), i32 0 }, section "llvm.metadata" ; <%llvm.dbg.composite.type*> [#uses=1] -internal constant [264 x i8] c"__uninitialized_copy_aux<__gnu_cxx::__normal_iterator > >, __gnu_cxx::__normal_iterator > > >\00", section "llvm.metadata" ; <[264 x i8]*>:78 [#uses=1] -internal constant [112 x i8] c"_ZSt24__uninitialized_copy_auxIN9__gnu_cxx17__normal_iteratorIPmSt6vectorImSaImEEEES6_ET0_T_S8_S7_11__true_type\00", section "llvm.metadata" ; <[112 x i8]*>:79 [#uses=1] -@llvm.dbg.subprogram4289 = internal constant %llvm.dbg.subprogram.type { i32 458798, %0* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to %0*), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to %0*), i8* getelementptr ([264 x i8]* @78, i32 0, i32 0), i8* getelementptr ([264 x i8]* @78, i32 0, i32 0), i8* getelementptr ([112 x i8]* @79, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit2900 to %0*), i32 73, %0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite4286 to %0*), i1 false, i1 true }, section "llvm.metadata" ; <%llvm.dbg.subprogram.type*> [#uses=1] -@llvm.dbg.variable4290 = internal constant %llvm.dbg.variable.type { i32 459009, %0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram4289 to %0*), i8* getelementptr ([8 x i8]* @74, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit2900 to %0*), i32 73, %0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite1828 to %0*) }, section "llvm.metadata" ; <%llvm.dbg.variable.type*> [#uses=1] -@llvm.dbg.variable4291 = internal constant %llvm.dbg.variable.type { i32 459009, %0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram4289 to %0*), i8* getelementptr ([7 x i8]* @75, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit2900 to %0*), i32 73, %0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite1828 to %0*) }, section "llvm.metadata" ; <%llvm.dbg.variable.type*> [#uses=1] -@llvm.dbg.variable4292 = internal constant %llvm.dbg.variable.type { i32 459009, %0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram4289 to %0*), i8* getelementptr ([9 x i8]* @76, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit2900 to %0*), i32 73, %0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite1828 to %0*) }, section "llvm.metadata" ; <%llvm.dbg.variable.type*> [#uses=1] -@llvm.dbg.variable4293 = internal constant %llvm.dbg.variable.type { i32 459009, %0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram4289 to %0*), i8* getelementptr ([12 x i8]* @5, i32 0, i32 0), %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit2900 to %0*), i32 73, %0* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite1196 to %0*) }, section "llvm.metadata" ; <%llvm.dbg.variable.type*> [#uses=1] -@llvm.used = appending global [1 x i8*] [i8* bitcast (i32* (i32*, i32*, i32*, %1*)* @_ZSt24__uninitialized_copy_auxIN9__gnu_cxx17__normal_iteratorIPmSt6vectorImSaImEEEES6_ET0_T_S8_S7_11__true_type to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0] - -define i32* @_ZSt24__uninitialized_copy_auxIN9__gnu_cxx17__normal_iteratorIPmSt6vectorImSaImEEEES6_ET0_T_S8_S7_11__true_type(i32*, i32*, i32*, %1* byval align 4) { - %5 = alloca %2 ; <%2*> [#uses=3] - %6 = alloca %2 ; <%2*> [#uses=3] - %7 = alloca %2 ; <%2*> [#uses=3] - %8 = alloca %2 ; <%2*> [#uses=2] - %9 = alloca %2 ; <%2*> [#uses=2] - %10 = alloca %2 ; <%2*> [#uses=2] - %11 = bitcast i32 0 to i32 ; [#uses=0] - call void @llvm.dbg.func.start(%0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram4289 to %0*)) - %12 = bitcast %2* %5 to %0* ; <%0*> [#uses=1] - call void @llvm.dbg.declare(%0* %12, %0* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable4290 to %0*)) - %13 = getelementptr %2* %5, i32 0, i32 0 ; [#uses=1] - store i32* %0, i32** %13 - %14 = bitcast %2* %6 to %0* ; <%0*> [#uses=1] - call void @llvm.dbg.declare(%0* %14, %0* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable4291 to %0*)) - %15 = getelementptr %2* %6, i32 0, i32 0 ; [#uses=1] - store i32* %1, i32** %15 - %16 = bitcast %2* %7 to %0* ; <%0*> [#uses=1] - call void @llvm.dbg.declare(%0* %16, %0* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable4292 to %0*)) - %17 = getelementptr %2* %7, i32 0, i32 0 ; [#uses=1] - store i32* %2, i32** %17 - %18 = bitcast %1* %3 to %0* ; <%0*> [#uses=1] - call void @llvm.dbg.declare(%0* %18, %0* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable4293 to %0*)) - call void @llvm.dbg.stoppoint(i32 74, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit2900 to %0*)) - %19 = getelementptr %2* %5, i32 0, i32 0 ; [#uses=1] - %20 = load i32** %19 ; [#uses=1] - %21 = getelementptr %2* %6, i32 0, i32 0 ; [#uses=1] - %22 = load i32** %21 ; [#uses=1] - %23 = getelementptr %2* %7, i32 0, i32 0 ; [#uses=1] - %24 = load i32** %23 ; [#uses=1] - %25 = call i32* @_ZSt4copyIN9__gnu_cxx17__normal_iteratorIPmSt6vectorImSaImEEEES6_ET0_T_S8_S7_(i32* %20, i32* %22, i32* %24) ; [#uses=1] - %26 = bitcast %2* %9 to i32** ; [#uses=1] - store i32* %25, i32** %26, align 4 - %27 = getelementptr %2* %10, i32 0, i32 0 ; [#uses=1] - %28 = getelementptr %2* %9, i32 0, i32 0 ; [#uses=1] - %29 = load i32** %28, align 4 ; [#uses=1] - store i32* %29, i32** %27, align 4 - %30 = getelementptr %2* %8, i32 0, i32 0 ; [#uses=1] - %31 = getelementptr %2* %10, i32 0, i32 0 ; [#uses=1] - %32 = load i32** %31, align 4 ; [#uses=1] - store i32* %32, i32** %30, align 4 - %33 = bitcast %2* %8 to i32** ; [#uses=1] - %34 = load i32** %33 ; [#uses=1] - call void @llvm.dbg.stoppoint(i32 74, i32 0, %0* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit2900 to %0*)) - call void @llvm.dbg.region.end(%0* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram4289 to %0*)) - ret i32* %34 -} - -declare void @llvm.dbg.func.start(%0*) nounwind - -declare void @llvm.dbg.declare(%0*, %0*) nounwind - -declare void @llvm.dbg.stoppoint(i32, i32, %0*) nounwind - -declare void @llvm.dbg.region.end(%0*) nounwind - -declare i32* @_ZSt4copyIN9__gnu_cxx17__normal_iteratorIPmSt6vectorImSaImEEEES6_ET0_T_S8_S7_(i32*, i32*, i32*) diff --git a/test/FrontendC/2011-02-21-DATA-common.c b/test/FrontendC/2011-02-21-DATA-common.c new file mode 100644 index 000000000000..650ae7edddbd --- /dev/null +++ b/test/FrontendC/2011-02-21-DATA-common.c @@ -0,0 +1,5 @@ +// RUN: %llvmgcc -S %s -o /dev/null +struct rtxc_snapshot { + int a, b, c, d; +}; +__attribute__ ((section("__DATA, __common"))) static struct rtxc_snapshot rtxc_log_A[4]; diff --git a/test/MC/ARM/bracket-darwin.s b/test/MC/ARM/bracket-darwin.s new file mode 100644 index 000000000000..dc8b34857555 --- /dev/null +++ b/test/MC/ARM/bracket-darwin.s @@ -0,0 +1,5 @@ +// RUN: not llvm-mc -triple arm-apple-darwin %s 2> %t +// RUN: FileCheck -input-file %t %s + +// CHECK: error: brackets expression not supported on this target +.byte [4-3] diff --git a/test/MC/ARM/bracket-exprs.s b/test/MC/ARM/bracket-exprs.s new file mode 100644 index 000000000000..922bf7037019 --- /dev/null +++ b/test/MC/ARM/bracket-exprs.s @@ -0,0 +1,15 @@ +// RUN: llvm-mc -triple arm-unknown-linux %s | FileCheck %s + +// CHECK: .byte 1 +.if [~0 >> 1] == -1 +.byte 1 +.else +.byte 2 +.endif + +// CHECK: .byte 3 +.if 4 * [4 + (3 + [2 * 2] + 1)] == 48 +.byte 3 +.else +.byte 4 +.endif diff --git a/test/MC/MachO/darwin-ARM-reloc.s b/test/MC/ARM/darwin-ARM-reloc.s similarity index 100% rename from test/MC/MachO/darwin-ARM-reloc.s rename to test/MC/ARM/darwin-ARM-reloc.s diff --git a/test/MC/MachO/darwin-Thumb-reloc.s b/test/MC/ARM/darwin-Thumb-reloc.s similarity index 100% rename from test/MC/MachO/darwin-Thumb-reloc.s rename to test/MC/ARM/darwin-Thumb-reloc.s diff --git a/test/MC/AsmParser/full_line_comment.s b/test/MC/ARM/full_line_comment.s similarity index 100% rename from test/MC/AsmParser/full_line_comment.s rename to test/MC/ARM/full_line_comment.s diff --git a/test/MC/AsmParser/exprs.s b/test/MC/AsmParser/exprs.s index 0861922b603c..153701d6852a 100644 --- a/test/MC/AsmParser/exprs.s +++ b/test/MC/AsmParser/exprs.s @@ -35,8 +35,6 @@ k: check_expr 1 << 1, 2 check_expr 2 >> 1, 1 check_expr (~0 >> 1), -1 - check_expr [~0 >> 1], -1 - check_expr 4 * [4 + (3 + [2 * 2] + 1)], 48 check_expr 3 - 2, 1 check_expr 1 ^ 3, 2 check_expr 1 && 2, 1 diff --git a/test/MC/Disassembler/X86/enhanced.txt b/test/MC/Disassembler/X86/enhanced.txt new file mode 100644 index 000000000000..fc6949901b72 --- /dev/null +++ b/test/MC/Disassembler/X86/enhanced.txt @@ -0,0 +1,6 @@ +# RUN: llvm-mc --edis %s -triple=x86_64-apple-darwin9 |& FileCheck %s + +# CHECK: [o:jne][w: ][0-p:-][0-l:10=10]
0:[RIP/111](pc)=18446744073709551606 +0x0f 0x85 0xf6 0xff 0xff 0xff +# CHECK: [o:movq][w: ][1-r:%gs=r63][1-p::][1-l:8=8][p:,][w: ][0-r:%rcx=r108] 0:[RCX/108]=0 1:[GS/63]=8 +0x65 0x48 0x8b 0x0c 0x25 0x08 0x00 0x00 0x00 diff --git a/test/MC/ELF/bracket-exprs.s b/test/MC/ELF/bracket-exprs.s new file mode 100644 index 000000000000..96f9f9aa450a --- /dev/null +++ b/test/MC/ELF/bracket-exprs.s @@ -0,0 +1,15 @@ +// RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s + +// CHECK: .byte 1 +.if [~0 >> 1] == -1 +.byte 1 +.else +.byte 2 +.endif + +// CHECK: .byte 3 +.if 4 * [4 + (3 + [2 * 2] + 1)] == 48 +.byte 3 +.else +.byte 4 +.endif diff --git a/test/MC/AsmParser/paren.s b/test/MC/ELF/bracket.s similarity index 100% rename from test/MC/AsmParser/paren.s rename to test/MC/ELF/bracket.s diff --git a/test/MC/ELF/org.s b/test/MC/ELF/org.s new file mode 100644 index 000000000000..c073fa5d808e --- /dev/null +++ b/test/MC/ELF/org.s @@ -0,0 +1,13 @@ +// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump | FileCheck %s + + .zero 4 +foo: + .zero 4 + .org foo+16 + +// CHECK: (('sh_name', 0x00000001) # '.text' +// CHECK-NEXT: ('sh_type', +// CHECK-NEXT: ('sh_flags', +// CHECK-NEXT: ('sh_addr', +// CHECK-NEXT: ('sh_offset' +// CHECK-NEXT: ('sh_size', 0x00000014) diff --git a/test/MC/ELF/pr9292.s b/test/MC/ELF/pr9292.s new file mode 100644 index 000000000000..a198fed87949 --- /dev/null +++ b/test/MC/ELF/pr9292.s @@ -0,0 +1,26 @@ +// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump | FileCheck %s + +// Test that both foo and bar are undefined. + +.globl foo +.globl bar +mov %eax,bar + + +// CHECK: (('st_name', 0x00000005) # 'bar' +// CHECK-NEXT: ('st_bind', 0x00000001) +// CHECK-NEXT: ('st_type', 0x00000000) +// CHECK-NEXT: ('st_other', 0x00000000) +// CHECK-NEXT: ('st_shndx', 0x00000000) +// CHECK-NEXT: ('st_value', 0x0000000000000000) +// CHECK-NEXT: ('st_size', 0x0000000000000000) +// CHECK-NEXT: ), +// CHECK-NEXT: # Symbol 0x00000005 +// CHECK-NEXT: (('st_name', 0x00000001) # 'foo' +// CHECK-NEXT: ('st_bind', 0x00000001) +// CHECK-NEXT: ('st_type', 0x00000000) +// CHECK-NEXT: ('st_other', 0x00000000) +// CHECK-NEXT: ('st_shndx', 0x00000000) +// CHECK-NEXT: ('st_value', 0x0000000000000000) +// CHECK-NEXT: ('st_size', 0x0000000000000000) +// CHECK-NEXT: ), diff --git a/test/MC/ELF/relocation-pc.s b/test/MC/ELF/relocation-pc.s new file mode 100644 index 000000000000..58c5f410b517 --- /dev/null +++ b/test/MC/ELF/relocation-pc.s @@ -0,0 +1,33 @@ +// RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s -o - | elf-dump --dump-section-data | FileCheck %s + +// Test that we produce the correct relocation. + + loope 0 # R_X86_64_PC8 + jmp -256 # R_X86_64_PC32 + +// CHECK: # Section 0x00000007 +// CHECK-NEXT: (('sh_name', 0x0000002c) # '.rela.text' +// CHECK-NEXT: ('sh_type', 0x00000004) +// CHECK-NEXT: ('sh_flags', 0x00000000) +// CHECK-NEXT: ('sh_addr', 0x00000000) +// CHECK-NEXT: ('sh_offset', 0x000000e8) +// CHECK-NEXT: ('sh_size', 0x00000030) +// CHECK-NEXT: ('sh_link', 0x00000005) +// CHECK-NEXT: ('sh_info', 0x00000001) +// CHECK-NEXT: ('sh_addralign', 0x00000008) +// CHECK-NEXT: ('sh_entsize', 0x00000018) +// CHECK-NEXT: ('_relocations', [ +// CHECK-NEXT: # Relocation 0x00000000 +// CHECK-NEXT: (('r_offset', 0x00000001) +// CHECK-NEXT: ('r_sym', 0x00000000) +// CHECK-NEXT: ('r_type', 0x0000000f) +// CHECK-NEXT: ('r_addend', 0x00000000) +// CHECK-NEXT: ), +// CHECK-NEXT: # Relocation 0x00000001 +// CHECK-NEXT: (('r_offset', 0x00000003) +// CHECK-NEXT: ('r_sym', 0x00000000) +// CHECK-NEXT: ('r_type', 0x00000002) +// CHECK-NEXT: ('r_addend', 0x00000000) +// CHECK-NEXT: ), +// CHECK-NEXT: ]) +// CHECK-NEXT: ), diff --git a/test/MC/X86/x86-32.s b/test/MC/X86/x86-32.s index de6b96389ead..723983da6de6 100644 --- a/test/MC/X86/x86-32.s +++ b/test/MC/X86/x86-32.s @@ -808,3 +808,11 @@ pshufw $90, %mm4, %mm0 // CHECK: ud2b // CHECK: encoding: [0x0f,0xb9] ud2b + +// CHECK: loope 0 +// CHECK: encoding: [0xe1,A] + loopz 0 + +// CHECK: loopne 0 +// CHECK: encoding: [0xe0,A] + loopnz 0 diff --git a/test/MC/X86/x86-64.s b/test/MC/X86/x86-64.s index c8b6414d59a6..ee9757fa3176 100644 --- a/test/MC/X86/x86-64.s +++ b/test/MC/X86/x86-64.s @@ -241,13 +241,32 @@ inl %dx // PR8114 // CHECK: outb %al, %dx +// CHECK: outb %al, %dx +// CHECK: outw %ax, %dx // CHECK: outw %ax, %dx // CHECK: outl %eax, %dx +// CHECK: outl %eax, %dx -out %al, (%dx) -out %ax, (%dx) -outl %eax, (%dx) +out %al, (%dx) +outb %al, (%dx) +out %ax, (%dx) +outw %ax, (%dx) +out %eax, (%dx) +outl %eax, (%dx) +// CHECK: inb %dx, %al +// CHECK: inb %dx, %al +// CHECK: inw %dx, %ax +// CHECK: inw %dx, %ax +// CHECK: inl %dx, %eax +// CHECK: inl %dx, %eax + +in (%dx), %al +inb (%dx), %al +in (%dx), %ax +inw (%dx), %ax +in (%dx), %eax +inl (%dx), %eax // rdar://8431422 @@ -942,3 +961,15 @@ movq 18446744073709551615,%rbx // CHECK: movq -1, %rbx // PR8946 movdqu %xmm0, %xmm1 // CHECK: movdqu %xmm0, %xmm1 # encoding: [0xf3,0x0f,0x6f,0xc8] + +// PR8935 +xgetbv // CHECK: xgetbv # encoding: [0x0f,0x01,0xd0] +xsetbv // CHECK: xsetbv # encoding: [0x0f,0x01,0xd1] + +// CHECK: loope 0 +// CHECK: encoding: [0xe1,A] + loopz 0 + +// CHECK: loopne 0 +// CHECK: encoding: [0xe0,A] + loopnz 0 diff --git a/test/Transforms/InstCombine/2003-11-13-ConstExprCastCall.ll b/test/Transforms/InstCombine/2003-11-13-ConstExprCastCall.ll deleted file mode 100644 index fdb8fd9363c6..000000000000 --- a/test/Transforms/InstCombine/2003-11-13-ConstExprCastCall.ll +++ /dev/null @@ -1,12 +0,0 @@ -; RUN: opt < %s -instcombine -S | FileCheck %s -target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" - -declare void @free(i8*) - -define void @test(i32* %X) { - call void (...)* bitcast (void (i8*)* @free to void (...)*)( i32* %X ) ; :1 [#uses=0] -; CHECK: %tmp = bitcast i32* %X to i8* -; CHECK: call void @free(i8* %tmp) - ret void -; CHECK: ret void -} diff --git a/test/Transforms/InstCombine/call.ll b/test/Transforms/InstCombine/call.ll index c256724a08ee..2ef8dc0670f0 100644 --- a/test/Transforms/InstCombine/call.ll +++ b/test/Transforms/InstCombine/call.ll @@ -32,7 +32,7 @@ define i32 @test2(i32 %A) { ; Resolving this should insert a cast from sbyte to int, following the C ; promotion rules. -declare void @test3a(i8, ...) +define void @test3a(i8, ...) {unreachable } define void @test3(i8 %A, i8 %B) { call void bitcast (void (i8, ...)* @test3a to void (i8, i8)*)( i8 %A, i8 %B @@ -116,3 +116,17 @@ try.handler: ; preds = %entry ; CHECK: @test8() { ; CHECK-NEXT: invoke void @test8a() + + +; Don't turn this into a direct call, because test9x is just a prototype and +; doing so will make it varargs. +; rdar://9038601 +declare i8* @test9x(i8*, i8*, ...) noredzone +define i8* @test9(i8* %arg, i8* %tmp3) nounwind ssp noredzone { +entry: + %call = call i8* bitcast (i8* (i8*, i8*, ...)* @test9x to i8* (i8*, i8*)*)(i8* %arg, i8* %tmp3) noredzone + ret i8* %call +; CHECK: @test9( +; CHECK: call i8* bitcast +} + diff --git a/test/Transforms/InstCombine/or-xor.ll b/test/Transforms/InstCombine/or-xor.ll new file mode 100644 index 000000000000..f496dd48c402 --- /dev/null +++ b/test/Transforms/InstCombine/or-xor.ll @@ -0,0 +1,94 @@ +; RUN: opt -S -instcombine < %s | FileCheck %s + +define i32 @test1(i32 %x, i32 %y) nounwind { + %or = or i32 %x, %y + %not = xor i32 %or, -1 + %z = or i32 %x, %not + ret i32 %z +; CHECK: @test1 +; CHECK-NEXT: %y.not = xor i32 %y, -1 +; CHECK-NEXT: %z = or i32 %y.not, %x +; CHECK-NEXT: ret i32 %z +} + +define i32 @test2(i32 %x, i32 %y) nounwind { + %or = or i32 %x, %y + %not = xor i32 %or, -1 + %z = or i32 %y, %not + ret i32 %z +; CHECK: @test2 +; CHECK-NEXT: %x.not = xor i32 %x, -1 +; CHECK-NEXT: %z = or i32 %x.not, %y +; CHECK-NEXT: ret i32 %z +} + +define i32 @test3(i32 %x, i32 %y) nounwind { + %xor = xor i32 %x, %y + %not = xor i32 %xor, -1 + %z = or i32 %x, %not + ret i32 %z +; CHECK: @test3 +; CHECK-NEXT: %y.not = xor i32 %y, -1 +; CHECK-NEXT: %z = or i32 %y.not, %x +; CHECK-NEXT: ret i32 %z +} + +define i32 @test4(i32 %x, i32 %y) nounwind { + %xor = xor i32 %x, %y + %not = xor i32 %xor, -1 + %z = or i32 %y, %not + ret i32 %z +; CHECK: @test4 +; CHECK-NEXT: %x.not = xor i32 %x, -1 +; CHECK-NEXT: %z = or i32 %x.not, %y +; CHECK-NEXT: ret i32 %z +} + +define i32 @test5(i32 %x, i32 %y) nounwind { + %and = and i32 %x, %y + %not = xor i32 %and, -1 + %z = or i32 %x, %not + ret i32 %z +; CHECK: @test5 +; CHECK-NEXT: ret i32 -1 +} + +define i32 @test6(i32 %x, i32 %y) nounwind { + %and = and i32 %x, %y + %not = xor i32 %and, -1 + %z = or i32 %y, %not + ret i32 %z +; CHECK: @test6 +; CHECK-NEXT: ret i32 -1 +} + +define i32 @test7(i32 %x, i32 %y) nounwind { + %xor = xor i32 %x, %y + %z = or i32 %y, %xor + ret i32 %z +; CHECK: @test7 +; CHECK-NEXT: %z = or i32 %x, %y +; CHECK-NEXT: ret i32 %z +} + +define i32 @test8(i32 %x, i32 %y) nounwind { + %not = xor i32 %y, -1 + %xor = xor i32 %x, %not + %z = or i32 %y, %xor + ret i32 %z +; CHECK: @test8 +; CHECK-NEXT: %x.not = xor i32 %x, -1 +; CHECK-NEXT: %z = or i32 %x.not, %y +; CHECK-NEXT: ret i32 %z +} + +define i32 @test9(i32 %x, i32 %y) nounwind { + %not = xor i32 %x, -1 + %xor = xor i32 %not, %y + %z = or i32 %x, %xor + ret i32 %z +; CHECK: @test9 +; CHECK-NEXT: %y.not = xor i32 %y, -1 +; CHECK-NEXT: %z = or i32 %y.not, %x +; CHECK-NEXT: ret i32 %z +} diff --git a/test/Transforms/LoopDeletion/multiple-exits.ll b/test/Transforms/LoopDeletion/multiple-exits.ll new file mode 100644 index 000000000000..6af413b49cd9 --- /dev/null +++ b/test/Transforms/LoopDeletion/multiple-exits.ll @@ -0,0 +1,26 @@ +; RUN: opt < %s -loop-deletion -S | FileCheck %s + +; Checks whether dead loops with multiple exits can be eliminated + +; CHECK: entry: +; CHECK-NEXT: br label %return + +; CHECK: return: +; CHECK-NEXT: ret void + +define void @foo(i64 %n, i64 %m) nounwind { +entry: + br label %bb + +bb: + %x.0 = phi i64 [ 0, %entry ], [ %t0, %bb2 ] + %t0 = add i64 %x.0, 1 + %t1 = icmp slt i64 %x.0, %n + br i1 %t1, label %bb2, label %return +bb2: + %t2 = icmp slt i64 %x.0, %m + br i1 %t1, label %bb, label %return + +return: + ret void +} diff --git a/test/Transforms/SimplifyCFG/select-gep.ll b/test/Transforms/SimplifyCFG/select-gep.ll new file mode 100644 index 000000000000..009f05e5574c --- /dev/null +++ b/test/Transforms/SimplifyCFG/select-gep.ll @@ -0,0 +1,40 @@ +; RUN: opt -S -simplifycfg %s | FileCheck %s + +define i8* @test1(i8* %x, i64 %y) nounwind { +entry: + %tmp1 = load i8* %x, align 1 + %cmp = icmp eq i8 %tmp1, 47 + br i1 %cmp, label %if.then, label %if.end + +if.then: + %incdec.ptr = getelementptr inbounds i8* %x, i64 %y + br label %if.end + +if.end: + %x.addr = phi i8* [ %incdec.ptr, %if.then ], [ %x, %entry ] + ret i8* %x.addr + +; CHECK: @test1 +; CHECK-NOT: select +; CHECK: ret i8* %x.addr +} + +%ST = type { i8, i8 } + +define i8* @test2(%ST* %x, i8* %y) nounwind { +entry: + %cmp = icmp eq %ST* %x, null + br i1 %cmp, label %if.then, label %if.end + +if.then: + %incdec.ptr = getelementptr %ST* %x, i32 0, i32 1 + br label %if.end + +if.end: + %x.addr = phi i8* [ %incdec.ptr, %if.then ], [ %y, %entry ] + ret i8* %x.addr + +; CHECK: @test2 +; CHECK: %x.addr = select i1 %cmp, i8* %incdec.ptr, i8* %y +; CHECK: ret i8* %x.addr +} diff --git a/test/lit.cfg b/test/lit.cfg index 21b0a48be9f1..9a2f74c21c93 100644 --- a/test/lit.cfg +++ b/test/lit.cfg @@ -175,8 +175,12 @@ for sub in ['llvmgcc', 'llvmgxx', 'emitir', 'compile_cxx', 'compile_c', # (llvm_tools_dir in lit parlance). # Don't match 'bugpoint-' or 'clang-'. # Don't match '/clang'. +if os.pathsep == ';': + pathext = os.environ.get('PATHEXT', '').split(';') +else: + pathext = [''] for pattern in [r"\bbugpoint\b(?!-)", r"(? SilencePasses("silence-passes", cl::desc("Suppress output of running passes (both stdout and stderr)")); +cl::opt SilencePasses("silence-passes", + cl::desc("Suppress output of running passes (both stdout and stderr)")); static cl::list OptArgs("opt-args", cl::Positional, cl::desc("..."), diff --git a/tools/gold/gold-plugin.cpp b/tools/gold/gold-plugin.cpp index ad2774a64ef4..7ce176002551 100644 --- a/tools/gold/gold-plugin.cpp +++ b/tools/gold/gold-plugin.cpp @@ -64,7 +64,7 @@ namespace { std::string output_name = ""; std::list Modules; std::vector Cleanup; - lto_code_gen_t code_gen; + lto_code_gen_t code_gen = NULL; } namespace options { @@ -73,8 +73,6 @@ namespace options { static generate_bc generate_bc_file = BC_NO; static std::string bc_path; static std::string obj_path; - static std::string as_path; - static std::vector as_args; static std::vector pass_through; static std::string extra_library_path; static std::string triple; @@ -96,16 +94,6 @@ namespace options { generate_api_file = true; } else if (opt.startswith("mcpu=")) { mcpu = opt.substr(strlen("mcpu=")); - } else if (opt.startswith("as=")) { - if (!as_path.empty()) { - (*message)(LDPL_WARNING, "Path to as specified twice. " - "Discarding %s", opt_); - } else { - as_path = opt.substr(strlen("as=")); - } - } else if (opt.startswith("as-arg=")) { - llvm::StringRef item = opt.substr(strlen("as-arg=")); - as_args.push_back(item.str()); } else if (opt.startswith("extra-library-path=")) { extra_library_path = opt.substr(strlen("extra_library_path=")); } else if (opt.startswith("pass-through=")) { @@ -196,6 +184,8 @@ ld_plugin_status onload(ld_plugin_tv *tv) { if ((*callback)(all_symbols_read_hook) != LDPS_OK) return LDPS_ERR; + + code_gen = lto_codegen_create(); } break; case LDPT_REGISTER_CLEANUP_HOOK: { ld_plugin_register_cleanup callback; @@ -236,8 +226,6 @@ ld_plugin_status onload(ld_plugin_tv *tv) { return LDPS_ERR; } - code_gen = lto_codegen_create(); - return LDPS_OK; } @@ -322,6 +310,7 @@ static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file, cf.syms.push_back(ld_plugin_symbol()); ld_plugin_symbol &sym = cf.syms.back(); sym.name = const_cast(lto_module_get_symbol_name(M, i)); + sym.name = strdup(sym.name); sym.version = NULL; int scope = attrs & LTO_SYMBOL_SCOPE_MASK; @@ -379,7 +368,11 @@ static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file, } } - lto_codegen_add_module(code_gen, M); + if (code_gen) + lto_codegen_add_module(code_gen, M); + + lto_module_dispose(M); + return LDPS_OK; } @@ -389,6 +382,8 @@ static ld_plugin_status claim_file_hook(const ld_plugin_input_file *file, /// codegen. static ld_plugin_status all_symbols_read_hook(void) { std::ofstream api_file; + assert(code_gen); + if (options::generate_api_file) { api_file.open("apifile.txt", std::ofstream::out | std::ofstream::trunc); if (!api_file.is_open()) { @@ -425,18 +420,6 @@ static ld_plugin_status all_symbols_read_hook(void) { lto_codegen_set_pic_model(code_gen, output_type); lto_codegen_set_debug_model(code_gen, LTO_DEBUG_MODEL_DWARF); - if (!options::as_path.empty()) { - sys::Path p = sys::Program::FindProgramByName(options::as_path); - lto_codegen_set_assembler_path(code_gen, p.c_str()); - } - if (!options::as_args.empty()) { - std::vector as_args_p; - for (std::vector::iterator I = options::as_args.begin(), - E = options::as_args.end(); I != E; ++I) { - as_args_p.push_back(I->c_str()); - } - lto_codegen_set_assembler_args(code_gen, &as_args_p[0], as_args_p.size()); - } if (!options::mcpu.empty()) lto_codegen_set_cpu(code_gen, options::mcpu.c_str()); @@ -469,10 +452,10 @@ static ld_plugin_status all_symbols_read_hook(void) { std::string ErrMsg; const char *objPath; + sys::Path uniqueObjPath("/tmp/llvmgold.o"); if (!options::obj_path.empty()) { objPath = options::obj_path.c_str(); } else { - sys::Path uniqueObjPath("/tmp/llvmgold.o"); if (uniqueObjPath.createTemporaryFileOnDisk(true, &ErrMsg)) { (*message)(LDPL_ERROR, "%s", ErrMsg.c_str()); return LDPS_ERR; @@ -497,6 +480,13 @@ static ld_plugin_status all_symbols_read_hook(void) { objFile.keep(); lto_codegen_dispose(code_gen); + for (std::list::iterator I = Modules.begin(), + E = Modules.end(); I != E; ++I) { + for (unsigned i = 0; i != I->syms.size(); ++i) { + ld_plugin_symbol &sym = I->syms[i]; + free(sym.name); + } + } if ((*add_input_file)(objPath) != LDPS_OK) { (*message)(LDPL_ERROR, "Unable to add .o file to the link."); diff --git a/tools/llvm-config/CMakeLists.txt b/tools/llvm-config/CMakeLists.txt index d33ff0dad843..b6f42895c171 100644 --- a/tools/llvm-config/CMakeLists.txt +++ b/tools/llvm-config/CMakeLists.txt @@ -124,8 +124,7 @@ add_custom_command(OUTPUT ${LLVM_CONFIG} add_custom_target(llvm-config.target ALL DEPENDS ${LLVM_CONFIG}) -get_property(llvm_lib_targets GLOBAL PROPERTY LLVM_LIB_TARGETS) -add_dependencies(llvm-config.target ${llvm_lib_targets}) +add_dependencies( llvm-config.target ${llvm_libs} ) # Make sure that llvm-config builds before the llvm tools, so we have # LibDeps.txt and can use it for updating the hard-coded library diff --git a/tools/llvm-mc/Disassembler.cpp b/tools/llvm-mc/Disassembler.cpp index c29d82a2cb38..d98b57ebc659 100644 --- a/tools/llvm-mc/Disassembler.cpp +++ b/tools/llvm-mc/Disassembler.cpp @@ -227,113 +227,120 @@ int Disassembler::disassembleEnhanced(const std::string &TS, } EDDisassembler::initialize(); - EDDisassembler *disassembler = - EDDisassembler::getDisassembler(TS.c_str(), AS); + OwningPtr + disassembler(EDDisassembler::getDisassembler(TS.c_str(), AS)); if (disassembler == 0) { errs() << "error: couldn't get disassembler for " << TS << '\n'; return -1; } - EDInst *inst = - disassembler->createInst(byteArrayReader, 0, &ByteArray); - - if (inst == 0) { - errs() << "error: Didn't get an instruction\n"; - return -1; - } + while (ByteArray.size()) { + OwningPtr + inst(disassembler->createInst(byteArrayReader, 0, &ByteArray)); - unsigned numTokens = inst->numTokens(); - if ((int)numTokens < 0) { - errs() << "error: couldn't count the instruction's tokens\n"; - return -1; - } - - for (unsigned tokenIndex = 0; tokenIndex != numTokens; ++tokenIndex) { - EDToken *token; - - if (inst->getToken(token, tokenIndex)) { - errs() << "error: Couldn't get token\n"; + ByteArray.erase (ByteArray.begin(), ByteArray.begin() + inst->byteSize()); + + if (inst == 0) { + errs() << "error: Didn't get an instruction\n"; return -1; } - const char *buf; - if (token->getString(buf)) { - errs() << "error: Couldn't get string for token\n"; + unsigned numTokens = inst->numTokens(); + if ((int)numTokens < 0) { + errs() << "error: couldn't count the instruction's tokens\n"; return -1; } - Out << '['; - int operandIndex = token->operandID(); - - if (operandIndex >= 0) - Out << operandIndex << "-"; - - switch (token->type()) { - default: Out << "?"; break; - case EDToken::kTokenWhitespace: Out << "w"; break; - case EDToken::kTokenPunctuation: Out << "p"; break; - case EDToken::kTokenOpcode: Out << "o"; break; - case EDToken::kTokenLiteral: Out << "l"; break; - case EDToken::kTokenRegister: Out << "r"; break; - } - - Out << ":" << buf; - - if (token->type() == EDToken::kTokenLiteral) { - Out << "="; - if (token->literalSign()) - Out << "-"; - uint64_t absoluteValue; - if (token->literalAbsoluteValue(absoluteValue)) { - errs() << "error: Couldn't get the value of a literal token\n"; + for (unsigned tokenIndex = 0; tokenIndex != numTokens; ++tokenIndex) { + EDToken *token; + + if (inst->getToken(token, tokenIndex)) { + errs() << "error: Couldn't get token\n"; return -1; } - Out << absoluteValue; - } else if (token->type() == EDToken::kTokenRegister) { - Out << "="; - unsigned regID; - if (token->registerID(regID)) { - errs() << "error: Couldn't get the ID of a register token\n"; + + const char *buf; + if (token->getString(buf)) { + errs() << "error: Couldn't get string for token\n"; return -1; } - Out << "r" << regID; + + Out << '['; + int operandIndex = token->operandID(); + + if (operandIndex >= 0) + Out << operandIndex << "-"; + + switch (token->type()) { + default: Out << "?"; break; + case EDToken::kTokenWhitespace: Out << "w"; break; + case EDToken::kTokenPunctuation: Out << "p"; break; + case EDToken::kTokenOpcode: Out << "o"; break; + case EDToken::kTokenLiteral: Out << "l"; break; + case EDToken::kTokenRegister: Out << "r"; break; + } + + Out << ":" << buf; + + if (token->type() == EDToken::kTokenLiteral) { + Out << "="; + if (token->literalSign()) + Out << "-"; + uint64_t absoluteValue; + if (token->literalAbsoluteValue(absoluteValue)) { + errs() << "error: Couldn't get the value of a literal token\n"; + return -1; + } + Out << absoluteValue; + } else if (token->type() == EDToken::kTokenRegister) { + Out << "="; + unsigned regID; + if (token->registerID(regID)) { + errs() << "error: Couldn't get the ID of a register token\n"; + return -1; + } + Out << "r" << regID; + } + + Out << "]"; } - Out << "]"; - } - - Out << " "; + Out << " "; + + if (inst->isBranch()) + Out << "
"; + if (inst->isMove()) + Out << " "; - if (inst->isBranch()) - Out << "
"; - if (inst->isMove()) - Out << " "; - - unsigned numOperands = inst->numOperands(); - - if ((int)numOperands < 0) { - errs() << "error: Couldn't count operands\n"; - return -1; - } - - for (unsigned operandIndex = 0; operandIndex != numOperands; ++operandIndex) { - Out << operandIndex << ":"; + unsigned numOperands = inst->numOperands(); - EDOperand *operand; - if (inst->getOperand(operand, operandIndex)) { - errs() << "error: couldn't get operand\n"; + if ((int)numOperands < 0) { + errs() << "error: Couldn't count operands\n"; return -1; } - uint64_t evaluatedResult; - void *Arg[] = { disassembler, &Out }; - evaluatedResult = operand->evaluate(evaluatedResult, verboseEvaluator, Arg); - Out << "=" << evaluatedResult << " "; + for (unsigned operandIndex = 0; operandIndex != numOperands; ++operandIndex) { + Out << operandIndex << ":"; + + EDOperand *operand; + if (inst->getOperand(operand, operandIndex)) { + errs() << "error: couldn't get operand\n"; + return -1; + } + + uint64_t evaluatedResult; + void *Arg[] = { disassembler.get(), &Out }; + if (operand->evaluate(evaluatedResult, verboseEvaluator, Arg)) { + errs() << "error: Couldn't evaluate an operand\n"; + return -1; + } + Out << "=" << evaluatedResult << " "; + } + + Out << '\n'; } - Out << '\n'; - return 0; } diff --git a/tools/lto/LTOCodeGenerator.cpp b/tools/lto/LTOCodeGenerator.cpp index adb7102b3c76..f72fdb0fca94 100644 --- a/tools/lto/LTOCodeGenerator.cpp +++ b/tools/lto/LTOCodeGenerator.cpp @@ -71,10 +71,11 @@ LTOCodeGenerator::LTOCodeGenerator() _linker("LinkTimeOptimizer", "ld-temp.o", _context), _target(NULL), _emitDwarfDebugInfo(false), _scopeRestrictionsDone(false), _codeModel(LTO_CODEGEN_PIC_MODEL_DYNAMIC), - _nativeObjectFile(NULL), _assemblerPath(NULL) + _nativeObjectFile(NULL) { InitializeAllTargets(); InitializeAllAsmPrinters(); + InitializeAllAsmParsers(); } LTOCodeGenerator::~LTOCodeGenerator() @@ -126,21 +127,6 @@ void LTOCodeGenerator::setCpu(const char* mCpu) _mCpu = mCpu; } -void LTOCodeGenerator::setAssemblerPath(const char* path) -{ - if ( _assemblerPath ) - delete _assemblerPath; - _assemblerPath = new sys::Path(path); -} - -void LTOCodeGenerator::setAssemblerArgs(const char** args, int nargs) -{ - for (int i = 0; i < nargs; ++i) { - const char *arg = args[i]; - _assemblerArgs.push_back(arg); - } -} - void LTOCodeGenerator::addMustPreserveSymbol(const char* sym) { _mustPreserveSymbols[sym] = 1; @@ -183,55 +169,42 @@ bool LTOCodeGenerator::writeMergedModules(const char *path, const void* LTOCodeGenerator::compile(size_t* length, std::string& errMsg) { - // make unique temp .s file to put generated assembly code - sys::Path uniqueAsmPath("lto-llvm.s"); - if ( uniqueAsmPath.createTemporaryFileOnDisk(false, &errMsg) ) - return NULL; - sys::RemoveFileOnSignal(uniqueAsmPath); - - // generate assembly code - bool genResult = false; - { - tool_output_file asmFile(uniqueAsmPath.c_str(), errMsg); - if (!errMsg.empty()) - return NULL; - genResult = this->generateAssemblyCode(asmFile.os(), errMsg); - asmFile.os().close(); - if (asmFile.os().has_error()) { - asmFile.os().clear_error(); - return NULL; - } - asmFile.keep(); - } - if ( genResult ) { - uniqueAsmPath.eraseFromDisk(); - return NULL; - } - // make unique temp .o file to put generated object file sys::PathWithStatus uniqueObjPath("lto-llvm.o"); if ( uniqueObjPath.createTemporaryFileOnDisk(false, &errMsg) ) { - uniqueAsmPath.eraseFromDisk(); + uniqueObjPath.eraseFromDisk(); return NULL; } sys::RemoveFileOnSignal(uniqueObjPath); - // assemble the assembly code - const std::string& uniqueObjStr = uniqueObjPath.str(); - bool asmResult = this->assemble(uniqueAsmPath.str(), uniqueObjStr, errMsg); - if ( !asmResult ) { - // remove old buffer if compile() called twice - delete _nativeObjectFile; - - // read .o file into memory buffer - OwningPtr BuffPtr; - if (error_code ec = MemoryBuffer::getFile(uniqueObjStr.c_str(),BuffPtr)) - errMsg = ec.message(); - _nativeObjectFile = BuffPtr.take(); + // generate object file + bool genResult = false; + tool_output_file objFile(uniqueObjPath.c_str(), errMsg); + if (!errMsg.empty()) + return NULL; + genResult = this->generateObjectFile(objFile.os(), errMsg); + objFile.os().close(); + if (objFile.os().has_error()) { + objFile.os().clear_error(); + return NULL; + } + objFile.keep(); + if ( genResult ) { + uniqueObjPath.eraseFromDisk(); + return NULL; } + const std::string& uniqueObjStr = uniqueObjPath.str(); + // remove old buffer if compile() called twice + delete _nativeObjectFile; + + // read .o file into memory buffer + OwningPtr BuffPtr; + if (error_code ec = MemoryBuffer::getFile(uniqueObjStr.c_str(),BuffPtr)) + errMsg = ec.message(); + _nativeObjectFile = BuffPtr.take(); + // remove temp files - uniqueAsmPath.eraseFromDisk(); uniqueObjPath.eraseFromDisk(); // return buffer, unless error @@ -241,67 +214,6 @@ const void* LTOCodeGenerator::compile(size_t* length, std::string& errMsg) return _nativeObjectFile->getBufferStart(); } - -bool LTOCodeGenerator::assemble(const std::string& asmPath, - const std::string& objPath, std::string& errMsg) -{ - sys::Path tool; - bool needsCompilerOptions = true; - if ( _assemblerPath ) { - tool = *_assemblerPath; - needsCompilerOptions = false; - } else { - // find compiler driver - tool = sys::Program::FindProgramByName("gcc"); - if ( tool.isEmpty() ) { - errMsg = "can't locate gcc"; - return true; - } - } - - // build argument list - std::vector args; - llvm::Triple targetTriple(_linker.getModule()->getTargetTriple()); - const char *arch = targetTriple.getArchNameForAssembler(); - - args.push_back(tool.c_str()); - - if (targetTriple.getOS() == Triple::Darwin) { - // darwin specific command line options - if (arch != NULL) { - args.push_back("-arch"); - args.push_back(arch); - } - // add -static to assembler command line when code model requires - if ( (_assemblerPath != NULL) && - (_codeModel == LTO_CODEGEN_PIC_MODEL_STATIC) ) - args.push_back("-static"); - } - if ( needsCompilerOptions ) { - args.push_back("-c"); - args.push_back("-x"); - args.push_back("assembler"); - } else { - for (std::vector::iterator I = _assemblerArgs.begin(), - E = _assemblerArgs.end(); I != E; ++I) { - args.push_back(I->c_str()); - } - } - args.push_back("-o"); - args.push_back(objPath.c_str()); - args.push_back(asmPath.c_str()); - args.push_back(0); - - // invoke assembler - if ( sys::Program::ExecuteAndWait(tool, &args[0], 0, 0, 0, 0, &errMsg) ) { - errMsg = "error in assembly"; - return true; - } - return false; // success -} - - - bool LTOCodeGenerator::determineTarget(std::string& errMsg) { if ( _target == NULL ) { @@ -357,7 +269,7 @@ void LTOCodeGenerator::applyScopeRestrictions() { mangler.getNameWithPrefix(Buffer, f, false); if (!f->isDeclaration() && _mustPreserveSymbols.count(Buffer)) - mustPreserveList.push_back(::strdup(f->getNameStr().c_str())); + mustPreserveList.push_back(f->getName().data()); } for (Module::global_iterator v = mergedModule->global_begin(), e = mergedModule->global_end(); v != e; ++v) { @@ -365,7 +277,7 @@ void LTOCodeGenerator::applyScopeRestrictions() { mangler.getNameWithPrefix(Buffer, v, false); if (!v->isDeclaration() && _mustPreserveSymbols.count(Buffer)) - mustPreserveList.push_back(::strdup(v->getNameStr().c_str())); + mustPreserveList.push_back(v->getName().data()); } for (Module::alias_iterator a = mergedModule->alias_begin(), e = mergedModule->alias_end(); a != e; ++a) { @@ -373,7 +285,7 @@ void LTOCodeGenerator::applyScopeRestrictions() { mangler.getNameWithPrefix(Buffer, a, false); if (!a->isDeclaration() && _mustPreserveSymbols.count(Buffer)) - mustPreserveList.push_back(::strdup(a->getNameStr().c_str())); + mustPreserveList.push_back(a->getName().data()); } passes.add(createInternalizePass(mustPreserveList)); } @@ -385,8 +297,8 @@ void LTOCodeGenerator::applyScopeRestrictions() { } /// Optimize merged modules using various IPO passes -bool LTOCodeGenerator::generateAssemblyCode(raw_ostream& out, - std::string& errMsg) +bool LTOCodeGenerator::generateObjectFile(raw_ostream& out, + std::string& errMsg) { if ( this->determineTarget(errMsg) ) return true; @@ -423,7 +335,7 @@ bool LTOCodeGenerator::generateAssemblyCode(raw_ostream& out, formatted_raw_ostream Out(out); if (_target->addPassesToEmitFile(*codeGenPasses, Out, - TargetMachine::CGFT_AssemblyFile, + TargetMachine::CGFT_ObjectFile, CodeGenOpt::Aggressive)) { errMsg = "target file type not supported"; return true; @@ -441,6 +353,7 @@ bool LTOCodeGenerator::generateAssemblyCode(raw_ostream& out, codeGenPasses->run(*it); codeGenPasses->doFinalization(); + delete codeGenPasses; return false; // success } diff --git a/tools/lto/LTOCodeGenerator.h b/tools/lto/LTOCodeGenerator.h index f5b78a608a99..0556520424d7 100644 --- a/tools/lto/LTOCodeGenerator.h +++ b/tools/lto/LTOCodeGenerator.h @@ -37,18 +37,14 @@ struct LTOCodeGenerator { bool setDebugInfo(lto_debug_model, std::string& errMsg); bool setCodePICModel(lto_codegen_model, std::string& errMsg); void setCpu(const char *cpu); - void setAssemblerPath(const char* path); - void setAssemblerArgs(const char** args, int nargs); void addMustPreserveSymbol(const char* sym); bool writeMergedModules(const char* path, std::string& errMsg); const void* compile(size_t* length, std::string& errMsg); void setCodeGenDebugOptions(const char *opts); private: - bool generateAssemblyCode(llvm::raw_ostream& out, - std::string& errMsg); - bool assemble(const std::string& asmPath, - const std::string& objPath, std::string& errMsg); + bool generateObjectFile(llvm::raw_ostream& out, + std::string& errMsg); void applyScopeRestrictions(); bool determineTarget(std::string& errMsg); @@ -63,9 +59,7 @@ struct LTOCodeGenerator { StringSet _mustPreserveSymbols; llvm::MemoryBuffer* _nativeObjectFile; std::vector _codegenOptions; - llvm::sys::Path* _assemblerPath; std::string _mCpu; - std::vector _assemblerArgs; }; #endif // LTO_CODE_GENERATOR_H diff --git a/tools/lto/LTOModule.cpp b/tools/lto/LTOModule.cpp index 8562f747d0ee..1eac22cfaf1c 100644 --- a/tools/lto/LTOModule.cpp +++ b/tools/lto/LTOModule.cpp @@ -195,26 +195,28 @@ void LTOModule::addObjCClass(GlobalVariable *clgv) { std::string superclassName; if (objcClassNameFromExpression(c->getOperand(1), superclassName)) { NameAndAttributes info; - if (_undefines.find(superclassName.c_str()) == _undefines.end()) { - const char *symbolName = ::strdup(superclassName.c_str()); + StringMap::value_type &entry = + _undefines.GetOrCreateValue(superclassName.c_str()); + if (!entry.getValue().name) { + const char *symbolName = entry.getKey().data(); info.name = symbolName; info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED; - // string is owned by _undefines - _undefines[info.name] = info; + entry.setValue(info); } } // third slot in __OBJC,__class is pointer to class name std::string className; if (objcClassNameFromExpression(c->getOperand(2), className)) { - const char *symbolName = ::strdup(className.c_str()); + StringSet::value_type &entry = + _defines.GetOrCreateValue(className.c_str()); + entry.setValue(1); NameAndAttributes info; - info.name = symbolName; + info.name = entry.getKey().data(); info.attributes = (lto_symbol_attributes) (LTO_SYMBOL_PERMISSIONS_DATA | LTO_SYMBOL_DEFINITION_REGULAR | LTO_SYMBOL_SCOPE_DEFAULT); _symbols.push_back(info); - _defines[info.name] = 1; } } } @@ -227,13 +229,17 @@ void LTOModule::addObjCCategory(GlobalVariable *clgv) { std::string targetclassName; if (objcClassNameFromExpression(c->getOperand(1), targetclassName)) { NameAndAttributes info; - if (_undefines.find(targetclassName.c_str()) == _undefines.end()) { - const char *symbolName = ::strdup(targetclassName.c_str()); - info.name = symbolName; - info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED; - // string is owned by _undefines - _undefines[info.name] = info; - } + + StringMap::value_type &entry = + _undefines.GetOrCreateValue(targetclassName.c_str()); + + if (entry.getValue().name) + return; + + const char *symbolName = entry.getKey().data(); + info.name = symbolName; + info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED; + entry.setValue(info); } } } @@ -244,13 +250,16 @@ void LTOModule::addObjCClassRef(GlobalVariable *clgv) { std::string targetclassName; if (objcClassNameFromExpression(clgv->getInitializer(), targetclassName)) { NameAndAttributes info; - if (_undefines.find(targetclassName.c_str()) == _undefines.end()) { - const char *symbolName = ::strdup(targetclassName.c_str()); - info.name = symbolName; - info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED; - // string is owned by _undefines - _undefines[info.name] = info; - } + + StringMap::value_type &entry = + _undefines.GetOrCreateValue(targetclassName.c_str()); + if (entry.getValue().name) + return; + + const char *symbolName = entry.getKey().data(); + info.name = symbolName; + info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED; + entry.setValue(info); } } @@ -322,7 +331,6 @@ void LTOModule::addDefinedSymbol(GlobalValue *def, Mangler &mangler, // string is owned by _defines SmallString<64> Buffer; mangler.getNameWithPrefix(Buffer, def, false); - const char *symbolName = ::strdup(Buffer.c_str()); // set alignment part log2() can have rounding errors uint32_t align = def->getAlignment(); @@ -365,26 +373,31 @@ void LTOModule::addDefinedSymbol(GlobalValue *def, Mangler &mangler, // add to table of symbols NameAndAttributes info; - info.name = symbolName; + StringSet::value_type &entry = _defines.GetOrCreateValue(Buffer.c_str()); + entry.setValue(1); + + StringRef Name = entry.getKey(); + info.name = Name.data(); + assert(info.name[Name.size()] == '\0'); info.attributes = (lto_symbol_attributes)attr; _symbols.push_back(info); - _defines[info.name] = 1; } void LTOModule::addAsmGlobalSymbol(const char *name) { + StringSet::value_type &entry = _defines.GetOrCreateValue(name); + // only add new define if not already defined - if (_defines.count(name)) + if (entry.getValue()) return; - // string is owned by _defines - const char *symbolName = ::strdup(name); + entry.setValue(1); + const char *symbolName = entry.getKey().data(); uint32_t attr = LTO_SYMBOL_DEFINITION_REGULAR; attr |= LTO_SYMBOL_SCOPE_DEFAULT; NameAndAttributes info; info.name = symbolName; info.attributes = (lto_symbol_attributes)attr; _symbols.push_back(info); - _defines[info.name] = 1; } void LTOModule::addPotentialUndefinedSymbol(GlobalValue *decl, @@ -400,18 +413,22 @@ void LTOModule::addPotentialUndefinedSymbol(GlobalValue *decl, SmallString<64> name; mangler.getNameWithPrefix(name, decl, false); + StringMap::value_type &entry = + _undefines.GetOrCreateValue(name.c_str()); + // we already have the symbol - if (_undefines.find(name) != _undefines.end()) + if (entry.getValue().name) return; NameAndAttributes info; - // string is owned by _undefines - info.name = ::strdup(name.c_str()); + + info.name = entry.getKey().data(); if (decl->hasExternalWeakLinkage()) info.attributes = LTO_SYMBOL_DEFINITION_WEAKUNDEF; else info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED; - _undefines[name] = info; + + entry.setValue(info); } diff --git a/tools/lto/lto.cpp b/tools/lto/lto.cpp index 7d4871d9253d..f48570c1495d 100644 --- a/tools/lto/lto.cpp +++ b/tools/lto/lto.cpp @@ -231,7 +231,7 @@ void lto_codegen_set_cpu(lto_code_gen_t cg, const char* cpu) // void lto_codegen_set_assembler_path(lto_code_gen_t cg, const char* path) { - cg->setAssemblerPath(path); + // In here only for backwards compatibility. We use MC now. } @@ -241,7 +241,7 @@ void lto_codegen_set_assembler_path(lto_code_gen_t cg, const char* path) void lto_codegen_set_assembler_args(lto_code_gen_t cg, const char** args, int nargs) { - cg->setAssemblerArgs(args, nargs); + // In here only for backwards compatibility. We use MC now. } // diff --git a/unittests/ADT/APIntTest.cpp b/unittests/ADT/APIntTest.cpp index 557d835bacda..e05bdbfc7101 100644 --- a/unittests/ADT/APIntTest.cpp +++ b/unittests/ADT/APIntTest.cpp @@ -332,6 +332,24 @@ TEST(APIntTest, Log2) { EXPECT_EQ(APInt(15, 9).exactLogBase2(), -1); } +TEST(APIntTest, magic) { + EXPECT_EQ(APInt(32, 3).magic().m, APInt(32, "55555556", 16)); + EXPECT_EQ(APInt(32, 3).magic().s, 0U); + EXPECT_EQ(APInt(32, 5).magic().m, APInt(32, "66666667", 16)); + EXPECT_EQ(APInt(32, 5).magic().s, 1U); + EXPECT_EQ(APInt(32, 7).magic().m, APInt(32, "92492493", 16)); + EXPECT_EQ(APInt(32, 7).magic().s, 2U); +} + +TEST(APIntTest, magicu) { + EXPECT_EQ(APInt(32, 3).magicu().m, APInt(32, "AAAAAAAB", 16)); + EXPECT_EQ(APInt(32, 3).magicu().s, 1U); + EXPECT_EQ(APInt(32, 5).magicu().m, APInt(32, "CCCCCCCD", 16)); + EXPECT_EQ(APInt(32, 5).magicu().s, 2U); + EXPECT_EQ(APInt(32, 7).magicu().m, APInt(32, "24924925", 16)); + EXPECT_EQ(APInt(32, 7).magicu().s, 3U); +} + #ifdef GTEST_HAS_DEATH_TEST #ifndef NDEBUG TEST(APIntTest, StringDeath) { diff --git a/unittests/CMakeLists.txt b/unittests/CMakeLists.txt index 5f09fa248357..da4a6524bd97 100644 --- a/unittests/CMakeLists.txt +++ b/unittests/CMakeLists.txt @@ -12,9 +12,11 @@ function(add_llvm_unittest test_dirname) endif() add_llvm_executable(${test_name}Tests ${ARGN}) add_dependencies(UnitTests ${test_name}Tests) + set_target_properties(${test_name}Tests PROPERTIES FOLDER "Tests") endfunction() add_custom_target(UnitTests) +set_target_properties(UnitTests PROPERTIES FOLDER "Tests") include_directories(${LLVM_MAIN_SRC_DIR}/utils/unittest/googletest/include) add_definitions(-DGTEST_HAS_RTTI=0) diff --git a/unittests/Transforms/Utils/Local.cpp b/unittests/Transforms/Utils/Local.cpp index e969e958a742..e0322b37d332 100644 --- a/unittests/Transforms/Utils/Local.cpp +++ b/unittests/Transforms/Utils/Local.cpp @@ -42,6 +42,17 @@ TEST(Local, RecursivelyDeleteDeadPHINodes) { EXPECT_EQ(&bb0->front(), br0); EXPECT_EQ(&bb1->front(), br1); + builder.SetInsertPoint(bb0); + phi = builder.CreatePHI(Type::getInt32Ty(C)); + + EXPECT_TRUE(RecursivelyDeleteDeadPHINode(phi)); + + builder.SetInsertPoint(bb0); + phi = builder.CreatePHI(Type::getInt32Ty(C)); + builder.CreateAdd(phi, phi); + + EXPECT_TRUE(RecursivelyDeleteDeadPHINode(phi)); + bb0->dropAllReferences(); bb1->dropAllReferences(); delete bb0; diff --git a/utils/FileCheck/CMakeLists.txt b/utils/FileCheck/CMakeLists.txt index 54db453e70e8..fa56f92a8f28 100644 --- a/utils/FileCheck/CMakeLists.txt +++ b/utils/FileCheck/CMakeLists.txt @@ -1,4 +1,4 @@ -add_executable(FileCheck +add_llvm_utility(FileCheck FileCheck.cpp ) diff --git a/utils/FileUpdate/CMakeLists.txt b/utils/FileUpdate/CMakeLists.txt index 5dda49e0e4c5..655aaec3bc2a 100644 --- a/utils/FileUpdate/CMakeLists.txt +++ b/utils/FileUpdate/CMakeLists.txt @@ -1,4 +1,4 @@ -add_executable(FileUpdate +add_llvm_utility(FileUpdate FileUpdate.cpp ) diff --git a/utils/KillTheDoctor/CMakeLists.txt b/utils/KillTheDoctor/CMakeLists.txt index 99c671e74af9..37c2b7ceb467 100644 --- a/utils/KillTheDoctor/CMakeLists.txt +++ b/utils/KillTheDoctor/CMakeLists.txt @@ -1,4 +1,4 @@ -add_executable(KillTheDoctor +add_llvm_utility(KillTheDoctor KillTheDoctor.cpp ) diff --git a/utils/TableGen/AsmWriterEmitter.cpp b/utils/TableGen/AsmWriterEmitter.cpp index 448ebad91f09..cd31e0c3448d 100644 --- a/utils/TableGen/AsmWriterEmitter.cpp +++ b/utils/TableGen/AsmWriterEmitter.cpp @@ -542,7 +542,255 @@ void AsmWriterEmitter::EmitGetInstructionName(raw_ostream &O) { << "}\n\n#endif\n"; } +void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) { + CodeGenTarget Target(Records); + Record *AsmWriter = Target.getAsmWriter(); + O << "\n#ifdef PRINT_ALIAS_INSTR\n"; + O << "#undef PRINT_ALIAS_INSTR\n\n"; + + // Enumerate the register classes. + const std::vector &RegisterClasses = + Target.getRegisterClasses(); + + O << "namespace { // Register classes\n"; + O << " enum RegClass {\n"; + + // Emit the register enum value for each RegisterClass. + for (unsigned I = 0, E = RegisterClasses.size(); I != E; ++I) { + if (I != 0) O << ",\n"; + O << " RC_" << RegisterClasses[I].TheDef->getName(); + } + + O << "\n };\n"; + O << "} // end anonymous namespace\n\n"; + + // Emit a function that returns 'true' if a regsiter is part of a particular + // register class. I.e., RAX is part of GR64 on X86. + O << "static bool regIsInRegisterClass" + << "(unsigned RegClass, unsigned Reg) {\n"; + + // Emit the switch that checks if a register belongs to a particular register + // class. + O << " switch (RegClass) {\n"; + O << " default: break;\n"; + + for (unsigned I = 0, E = RegisterClasses.size(); I != E; ++I) { + const CodeGenRegisterClass &RC = RegisterClasses[I]; + + // Give the register class a legal C name if it's anonymous. + std::string Name = RC.TheDef->getName(); + O << " case RC_" << Name << ":\n"; + + // Emit the register list now. + unsigned IE = RC.Elements.size(); + if (IE == 1) { + O << " if (Reg == " << getQualifiedName(RC.Elements[0]) << ")\n"; + O << " return true;\n"; + } else { + O << " switch (Reg) {\n"; + O << " default: break;\n"; + + for (unsigned II = 0; II != IE; ++II) { + Record *Reg = RC.Elements[II]; + O << " case " << getQualifiedName(Reg) << ":\n"; + } + + O << " return true;\n"; + O << " }\n"; + } + + O << " break;\n"; + } + + O << " }\n\n"; + O << " return false;\n"; + O << "}\n\n"; + + // Emit the method that prints the alias instruction. + std::string ClassName = AsmWriter->getValueAsString("AsmWriterClassName"); + + bool isMC = AsmWriter->getValueAsBit("isMCAsmWriter"); + const char *MachineInstrClassName = isMC ? "MCInst" : "MachineInstr"; + + O << "bool " << Target.getName() << ClassName + << "::printAliasInstr(const " << MachineInstrClassName + << " *MI, raw_ostream &OS) {\n"; + + std::vector AllInstAliases = + Records.getAllDerivedDefinitions("InstAlias"); + + // Create a map from the qualified name to a list of potential matches. + std::map > AliasMap; + for (std::vector::iterator + I = AllInstAliases.begin(), E = AllInstAliases.end(); I != E; ++I) { + CodeGenInstAlias *Alias = new CodeGenInstAlias(*I, Target); + const Record *R = *I; + const DagInit *DI = R->getValueAsDag("ResultInst"); + const DefInit *Op = dynamic_cast(DI->getOperator()); + AliasMap[getQualifiedName(Op->getDef())].push_back(Alias); + } + + if (AliasMap.empty() || !isMC) { + // FIXME: Support MachineInstr InstAliases? + O << " return true;\n"; + O << "}\n\n"; + O << "#endif // PRINT_ALIAS_INSTR\n"; + return; + } + + O << " StringRef AsmString;\n"; + O << " std::map OpMap;\n"; + O << " switch (MI->getOpcode()) {\n"; + O << " default: return true;\n"; + + for (std::map >::iterator + I = AliasMap.begin(), E = AliasMap.end(); I != E; ++I) { + std::vector &Aliases = I->second; + + std::map CondCount; + std::map BodyMap; + + std::string AsmString = ""; + + for (std::vector::iterator + II = Aliases.begin(), IE = Aliases.end(); II != IE; ++II) { + const CodeGenInstAlias *CGA = *II; + AsmString = CGA->AsmString; + unsigned Indent = 8; + unsigned LastOpNo = CGA->ResultInstOperandIndex.size(); + + std::string Cond; + raw_string_ostream CondO(Cond); + + CondO << "if (MI->getNumOperands() == " << LastOpNo; + + std::map OpMap; + bool CantHandle = false; + + for (unsigned i = 0, e = LastOpNo; i != e; ++i) { + const CodeGenInstAlias::ResultOperand &RO = CGA->ResultOperands[i]; + + switch (RO.Kind) { + default: assert(0 && "unexpected InstAlias operand kind"); + case CodeGenInstAlias::ResultOperand::K_Record: { + const Record *Rec = RO.getRecord(); + StringRef ROName = RO.getName(); + + if (Rec->isSubClassOf("RegisterClass")) { + CondO << " &&\n"; + CondO.indent(Indent) << "MI->getOperand(" << i << ").isReg() &&\n"; + if (OpMap.find(ROName) == OpMap.end()) { + OpMap[ROName] = i; + CondO.indent(Indent) + << "regIsInRegisterClass(RC_" + << CGA->ResultOperands[i].getRecord()->getName() + << ", MI->getOperand(" << i << ").getReg())"; + } else { + CondO.indent(Indent) + << "MI->getOperand(" << i + << ").getReg() == MI->getOperand(" + << OpMap[ROName] << ").getReg()"; + } + } else { + assert(Rec->isSubClassOf("Operand") && "Unexpected operand!"); + // FIXME: We need to handle these situations. + CantHandle = true; + break; + } + + break; + } + case CodeGenInstAlias::ResultOperand::K_Imm: + CondO << " &&\n"; + CondO.indent(Indent) << "MI->getOperand(" << i << ").getImm() == "; + CondO << CGA->ResultOperands[i].getImm(); + break; + case CodeGenInstAlias::ResultOperand::K_Reg: + CondO << " &&\n"; + CondO.indent(Indent) << "MI->getOperand(" << i << ").getReg() == "; + CondO << Target.getName() << "::" + << CGA->ResultOperands[i].getRegister()->getName(); + break; + } + + if (CantHandle) break; + } + + if (CantHandle) continue; + + CondO << ")"; + + std::string Body; + raw_string_ostream BodyO(Body); + + BodyO << " // " << CGA->Result->getAsString() << "\n"; + BodyO << " AsmString = \"" << AsmString << "\";\n"; + + for (std::map::iterator + III = OpMap.begin(), IIE = OpMap.end(); III != IIE; ++III) + BodyO << " OpMap[\"" << III->first << "\"] = " + << III->second << ";\n"; + + ++CondCount[CondO.str()]; + BodyMap[CondO.str()] = BodyO.str(); + } + + std::string Code; + raw_string_ostream CodeO(Code); + + bool EmitElse = false; + for (std::map::iterator + II = CondCount.begin(), IE = CondCount.end(); II != IE; ++II) { + if (II->second != 1) continue; + CodeO << " "; + if (EmitElse) CodeO << "} else "; + CodeO << II->first << " {\n"; + CodeO << BodyMap[II->first]; + EmitElse = true; + } + + if (CodeO.str().empty()) continue; + + O << " case " << I->first << ":\n"; + O << CodeO.str(); + O << " }\n"; + O << " break;\n"; + } + + O << " }\n\n"; + + // Code that prints the alias, replacing the operands with the ones from the + // MCInst. + O << " if (AsmString.empty()) return true;\n"; + O << " std::pair ASM = AsmString.split(' ');\n"; + O << " OS << '\\t' << ASM.first;\n"; + + O << " if (!ASM.second.empty()) {\n"; + O << " OS << '\\t';\n"; + O << " for (StringRef::iterator\n"; + O << " I = ASM.second.begin(), E = ASM.second.end(); I != E; ) {\n"; + O << " if (*I == '$') {\n"; + O << " StringRef::iterator Start = ++I;\n"; + O << " while (I != E &&\n"; + O << " ((*I >= 'a' && *I <= 'z') ||\n"; + O << " (*I >= 'A' && *I <= 'Z') ||\n"; + O << " (*I >= '0' && *I <= '9') ||\n"; + O << " *I == '_'))\n"; + O << " ++I;\n"; + O << " StringRef Name(Start, I - Start);\n"; + O << " printOperand(MI, OpMap[Name], OS);\n"; + O << " } else {\n"; + O << " OS << *I++;\n"; + O << " }\n"; + O << " }\n"; + O << " }\n\n"; + + O << " return false;\n"; + O << "}\n\n"; + + O << "#endif // PRINT_ALIAS_INSTR\n"; +} void AsmWriterEmitter::run(raw_ostream &O) { EmitSourceFileHeader("Assembly Writer Source Fragment", O); @@ -550,5 +798,6 @@ void AsmWriterEmitter::run(raw_ostream &O) { EmitPrintInstruction(O); EmitGetRegisterName(O); EmitGetInstructionName(O); + EmitPrintAliasInstruction(O); } diff --git a/utils/TableGen/AsmWriterEmitter.h b/utils/TableGen/AsmWriterEmitter.h index 9f7d7761a497..5e8d6f5b7fe7 100644 --- a/utils/TableGen/AsmWriterEmitter.h +++ b/utils/TableGen/AsmWriterEmitter.h @@ -38,6 +38,7 @@ namespace llvm { void EmitPrintInstruction(raw_ostream &o); void EmitGetRegisterName(raw_ostream &o); void EmitGetInstructionName(raw_ostream &o); + void EmitPrintAliasInstruction(raw_ostream &O); AsmWriterInst *getAsmWriterInstByID(unsigned ID) const { assert(ID < NumberedInstructions.size()); diff --git a/utils/TableGen/CMakeLists.txt b/utils/TableGen/CMakeLists.txt index e24314c3e0ec..514b1912992a 100644 --- a/utils/TableGen/CMakeLists.txt +++ b/utils/TableGen/CMakeLists.txt @@ -3,7 +3,7 @@ set(LLVM_REQUIRES_RTTI 1) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${LLVM_TOOLS_BINARY_DIR}) -add_executable(tblgen +add_llvm_utility(tblgen ARMDecoderEmitter.cpp AsmMatcherEmitter.cpp AsmWriterEmitter.cpp diff --git a/utils/TableGen/ClangSACheckersEmitter.cpp b/utils/TableGen/ClangSACheckersEmitter.cpp index 3e49ab138fcd..8865db36b6c3 100644 --- a/utils/TableGen/ClangSACheckersEmitter.cpp +++ b/utils/TableGen/ClangSACheckersEmitter.cpp @@ -148,6 +148,7 @@ void ClangSACheckersEmitter::run(raw_ostream &OS) { // Create a pseudo-group to hold this checker. std::string fullName = getCheckerFullName(R); GroupInfo &info = groupInfoByName[fullName]; + info.Hidden = R->getValueAsBit("Hidden"); recordGroupMap[R] = &info; info.Checkers.push_back(R); } else { diff --git a/utils/TableGen/X86RecognizableInstr.cpp b/utils/TableGen/X86RecognizableInstr.cpp index ccd3efd980a2..b0839c33982d 100644 --- a/utils/TableGen/X86RecognizableInstr.cpp +++ b/utils/TableGen/X86RecognizableInstr.cpp @@ -34,7 +34,9 @@ using namespace llvm; MAP(E8, 39) \ MAP(F0, 40) \ MAP(F8, 41) \ - MAP(F9, 42) + MAP(F9, 42) \ + MAP(D0, 45) \ + MAP(D1, 46) // A clone of X86 since we can't depend on something that is generated. namespace X86Local { diff --git a/utils/buildit/GNUmakefile b/utils/buildit/GNUmakefile index 54577e2ef538..5140e1508a57 100644 --- a/utils/buildit/GNUmakefile +++ b/utils/buildit/GNUmakefile @@ -80,6 +80,10 @@ EmbeddedSim: export MACOSX_DEPLOYMENT_TARGET=`sw_vers -productVersion`; \ $(MAKE) IOS_SIM_BUILD=yes PREFIX=$(SDKROOT)/usr/local install +Embedded: + ARM_PLATFORM=`xcodebuild -version -sdk iphoneos PlatformPath` && \ + $(MAKE) DSTROOT=$(DSTROOT)$$ARM_PLATFORM install + # installhdrs does nothing, because the headers aren't useful until # the compiler is installed. installhdrs: @@ -128,4 +132,4 @@ clean: $(OBJROOT) $(SYMROOT) $(DSTROOT): mkdir -p $@ -.PHONY: install installsrc clean EmbeddedHosted EmbeddedSim +.PHONY: install installsrc clean EmbeddedHosted EmbeddedSim Embedded diff --git a/utils/buildit/build_llvm b/utils/buildit/build_llvm index 5e8369cdd326..38b0bfd3848d 100755 --- a/utils/buildit/build_llvm +++ b/utils/buildit/build_llvm @@ -267,8 +267,9 @@ fi # The Hello dylib is an example of how to build a pass. # The BugpointPasses module is only used to test bugpoint. # These unversioned dylibs cause verification failures, so do not install them. -rm $DEST_DIR$DEST_ROOT/lib/libLLVMHello.dylib -rm $DEST_DIR$DEST_ROOT/lib/libBugpointPasses.dylib +# (The wildcards are used to match a "lib" prefix if it is present.) +rm $DEST_DIR$DEST_ROOT/lib/*LLVMHello.dylib +rm $DEST_DIR$DEST_ROOT/lib/*BugpointPasses.dylib # Compress manpages MDIR=$DEST_DIR$DEST_ROOT/share/man/man1 @@ -341,6 +342,9 @@ else fi rm -f lib/libLTO.a lib/libLTO.la +# Omit lto.h from the result. Clang will supply. +find $DEST_DIR$DEST_ROOT -name lto.h -delete + ################################################################################ # Remove debugging information from DEST_DIR. diff --git a/utils/count/CMakeLists.txt b/utils/count/CMakeLists.txt index e124f61d2409..4e0d371334e4 100644 --- a/utils/count/CMakeLists.txt +++ b/utils/count/CMakeLists.txt @@ -1,3 +1,3 @@ -add_executable(count +add_llvm_utility(count count.c ) diff --git a/utils/llvmbuild b/utils/llvmbuild new file mode 100755 index 000000000000..fb8500d38efe --- /dev/null +++ b/utils/llvmbuild @@ -0,0 +1,740 @@ +#!/usr/bin/python3 +##===- utils/llvmbuild - Build the LLVM project ----------------*-python-*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## +# +# This script builds many different flavors of the LLVM ecosystem. It +# will build LLVM, Clang, llvm-gcc, and dragonegg as well as run tests +# on them. This script is convenient to use to check builds and tests +# before committing changes to the upstream repository +# +# A typical source setup uses three trees and looks like this: +# +# official +# dragonegg +# trunk +# gcc +# trunk +# llvm +# trunk +# tools +# clang +# tags +# RELEASE_28 +# tools +# clang +# llvm-gcc +# trunk +# tags +# RELEASE_28 +# staging +# dragonegg +# trunk +# gcc +# trunk +# llvm +# trunk +# tools +# clang +# tags +# RELEASE_28 +# tools +# clang +# llvm-gcc +# trunk +# tags +# RELEASE_28 +# commit +# dragonegg +# trunk +# gcc +# trunk +# llvm +# trunk +# tools +# clang +# tags +# RELEASE_28 +# tools +# clang +# llvm-gcc +# trunk +# tags +# RELEASE_28 +# +# "gcc" above is the upstream FSF gcc and "gcc/trunk" refers to the +# 4.5 branch as discussed in the dragonegg build guide. +# +# In a typical workflow, the "official" tree always contains unchanged +# sources from the main LLVM project repositories. The "staging" tree +# is where local work is done. A set of changes resides there waiting +# to be moved upstream. The "commit" tree is where changes from +# "staging" make their way upstream. Individual incremental changes +# from "staging" are applied to "commit" and committed upstream after +# a successful build and test run. A successful build is one in which +# testing results in no more failures than seen in the testing of the +# "official" tree. +# +# A build may be invoked as such: +# +# llvmbuild --src=~/llvm/commit --src=~/llvm/staging +# --src=~/llvm/official --branch=trunk --branch=tags/RELEASE_28 +# --build=debug --build=release --build=paranoid +# --prefix=/home/greened/install --builddir=/home/greened/build +# +# This will build the LLVM ecosystem, including LLVM, Clang, llvm-gcc, +# gcc 4.5 and dragonegg, putting build results in ~/build and +# installing tools in ~/install. llvmbuild creates separate build and +# install directories for each source/branch/build flavor. In the +# above example, llvmbuild will build debug, release and paranoid +# (debug+checks) flavors of the trunk and RELEASE_28 branches from +# each source tree (official, staging and commit) for a total of +# eighteen builds. All builds will be run in parallel. +# +# The user may control parallelism via the --jobs and --threads +# switches. --jobs tells llvmbuild the maximum total number of builds +# to activate in parallel. The user may think of it as equivalent to +# the GNU make -j switch. --threads tells llvmbuild how many worker +# threads to use to accomplish those builds. If --threads is less +# than --jobs, --threads workers will be launched and each one will +# pick a source/branch/flavor combination to build. Then llvmbuild +# will invoke GNU make with -j (--jobs / --threads) to use up the +# remaining job capacity. Once a worker is finished with a build, it +# will pick another combination off the list and start building it. +# +##===----------------------------------------------------------------------===## + +import optparse +import os +import sys +import threading +import queue +import logging +import traceback +import subprocess +import re + +# TODO: Use shutil.which when it is available (3.2 or later) +def find_executable(executable, path=None): + """Try to find 'executable' in the directories listed in 'path' (a + string listing directories separated by 'os.pathsep'; defaults to + os.environ['PATH']). Returns the complete filename or None if not + found + """ + if path is None: + path = os.environ['PATH'] + paths = path.split(os.pathsep) + extlist = [''] + if os.name == 'os2': + (base, ext) = os.path.splitext(executable) + # executable files on OS/2 can have an arbitrary extension, but + # .exe is automatically appended if no dot is present in the name + if not ext: + executable = executable + ".exe" + elif sys.platform == 'win32': + pathext = os.environ['PATHEXT'].lower().split(os.pathsep) + (base, ext) = os.path.splitext(executable) + if ext.lower() not in pathext: + extlist = pathext + for ext in extlist: + execname = executable + ext + if os.path.isfile(execname): + return execname + else: + for p in paths: + f = os.path.join(p, execname) + if os.path.isfile(f): + return f + else: + return None + +def is_executable(fpath): + return os.path.exists(fpath) and os.access(fpath, os.X_OK) + +def add_options(parser): + parser.add_option("-v", "--verbose", action="store_true", + default=False, + help=("Output informational messages" + " [default: %default]")) + parser.add_option("--src", action="append", + help=("Top-level source directory [default: %default]")) + parser.add_option("--build", action="append", + help=("Build types to run [default: %default]")) + parser.add_option("--branch", action="append", + help=("Source branch to build [default: %default]")) + parser.add_option("--cc", default=find_executable("cc"), + help=("The C compiler to use [default: %default]")) + parser.add_option("--cxx", default=find_executable("c++"), + help=("The C++ compiler to use [default: %default]")) + parser.add_option("--threads", default=4, type="int", + help=("The number of worker threads to use " + "[default: %default]")) + parser.add_option("--jobs", "-j", default=8, type="int", + help=("The number of simultaneous build jobs " + "[default: %default]")) + parser.add_option("--prefix", + help=("Root install directory [default: %default]")) + parser.add_option("--builddir", + help=("Root build directory [default: %default]")) + parser.add_option("--extra-llvm-config-flags", default="", + help=("Extra flags to pass to llvm configure [default: %default]")) + parser.add_option("--extra-llvm-gcc-config-flags", default="", + help=("Extra flags to pass to llvm-gcc configure [default: %default]")) + parser.add_option("--extra-gcc-config-flags", default="", + help=("Extra flags to pass to gcc configure [default: %default]")) + parser.add_option("--force-configure", default=False, action="store_true", + help=("Force reconfigure of all components")) + return + +def check_options(parser, options, valid_builds): + # See if we're building valid flavors. + for build in options.build: + if (build not in valid_builds): + parser.error("'" + build + "' is not a valid build flavor " + + str(valid_builds)) + + # See if we can find source directories. + for src in options.src: + for component in ["llvm", "llvm-gcc", "gcc", "dragonegg"]: + compsrc = src + "/" + component + if (not os.path.isdir(compsrc)): + parser.error("'" + compsrc + "' does not exist") + if (options.branch is not None): + for branch in options.branch: + if (not os.path.isdir(os.path.join(compsrc, branch))): + parser.error("'" + os.path.join(compsrc, branch) + + "' does not exist") + + # See if we can find the compilers + options.cc = find_executable(options.cc) + options.cxx = find_executable(options.cxx) + + return + +# Find a unique short name for the given set of paths. This searches +# back through path components until it finds unique component names +# among all given paths. +def get_path_abbrevs(paths): + # Find the number of common starting characters in the last component + # of the paths. + unique_paths = list(paths) + + class NotFoundException(Exception): pass + + # Find a unique component of each path. + unique_bases = unique_paths[:] + found = 0 + while len(unique_paths) > 0: + bases = [os.path.basename(src) for src in unique_paths] + components = { c for c in bases } + # Account for single entry in paths. + if len(components) > 1 or len(components) == len(bases): + # We found something unique. + for c in components: + if bases.count(c) == 1: + index = bases.index(c) + unique_bases[index] = c + # Remove the corresponding path from the set under + # consideration. + unique_paths[index] = None + unique_paths = [ p for p in unique_paths if p is not None ] + unique_paths = [os.path.dirname(src) for src in unique_paths] + + if len(unique_paths) > 0: + raise NotFoundException() + + abbrevs = dict(zip(paths, [base for base in unique_bases])) + + return abbrevs + +# Given a set of unique names, find a short character sequence that +# uniquely identifies them. +def get_short_abbrevs(unique_bases): + # Find a unique start character for each path base. + my_unique_bases = unique_bases[:] + unique_char_starts = unique_bases[:] + while len(my_unique_bases) > 0: + for start, char_tuple in enumerate(zip(*[base + for base in my_unique_bases])): + chars = { c for c in char_tuple } + # Account for single path. + if len(chars) > 1 or len(chars) == len(char_tuple): + # We found something unique. + for c in chars: + if char_tuple.count(c) == 1: + index = char_tuple.index(c) + unique_char_starts[index] = start + # Remove the corresponding path from the set under + # consideration. + my_unique_bases[index] = None + my_unique_bases = [ b for b in my_unique_bases + if b is not None ] + break + + if len(my_unique_bases) > 0: + raise NotFoundException() + + abbrevs = [abbrev[start_index:start_index+3] + for abbrev, start_index + in zip([base for base in unique_bases], + [index for index in unique_char_starts])] + + abbrevs = dict(zip(unique_bases, abbrevs)) + + return abbrevs + +class Builder(threading.Thread): + class ExecutableNotFound(Exception): pass + class FileNotExecutable(Exception): pass + + def __init__(self, work_queue, jobs, + build_abbrev, source_abbrev, branch_abbrev, + options): + super().__init__() + self.work_queue = work_queue + self.jobs = jobs + self.cc = options.cc + self.cxx = options.cxx + self.build_abbrev = build_abbrev + self.source_abbrev = source_abbrev + self.branch_abbrev = branch_abbrev + self.build_prefix = options.builddir + self.install_prefix = options.prefix + self.options = options + self.component_abbrev = dict( + llvm="llvm", + llvm_gcc="lgcc", + llvm2="llv2", + gcc="ugcc", + dagonegg="degg") + def run(self): + while True: + try: + source, branch, build = self.work_queue.get() + self.dobuild(source, branch, build) + except: + traceback.print_exc() + finally: + self.work_queue.task_done() + + def execute(self, command, execdir, env, component): + prefix = self.component_abbrev[component.replace("-", "_")] + pwd = os.getcwd() + if not os.path.exists(execdir): + os.makedirs(execdir) + + execenv = os.environ.copy() + + for key, value in env.items(): + execenv[key] = value + + self.logger.debug("[" + prefix + "] " + "env " + str(env) + " " + + " ".join(command)); + + try: + proc = subprocess.Popen(command, + cwd=execdir, + env=execenv, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + + line = proc.stdout.readline() + while line: + self.logger.info("[" + prefix + "] " + + str(line, "utf-8").rstrip()) + line = proc.stdout.readline() + + except: + traceback.print_exc() + + # Get a list of C++ include directories to pass to clang. + def get_includes(self): + # Assume we're building with g++ for now. + command = [self.cxx] + command += ["-v", "-x", "c++", "/dev/null", "-fsyntax-only"] + includes = [] + self.logger.debug(command) + try: + proc = subprocess.Popen(command, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + + gather = False + line = proc.stdout.readline() + while line: + self.logger.debug(line) + if re.search("End of search list", str(line)) is not None: + self.logger.debug("Stop Gather") + gather = False + if gather: + includes.append(str(line, "utf-8").strip()) + if re.search("#include <...> search starts", str(line)) is not None: + self.logger.debug("Start Gather") + gather = True + line = proc.stdout.readline() + except: + traceback.print_exc() + self.logger.debug(includes) + return includes + + def dobuild(self, source, branch, build): + build_suffix = "" + + ssabbrev = get_short_abbrevs([ab for ab in self.source_abbrev.values()]) + + if branch is not None: + sbabbrev = get_short_abbrevs([ab for ab in self.branch_abbrev.values()]) + + prefix = "[" + ssabbrev[self.source_abbrev[source]] + "-" + sbabbrev[self.branch_abbrev[branch]] + "-" + self.build_abbrev[build] + "]" + self.install_prefix += "/" + self.source_abbrev[source] + "/" + branch + "/" + build + build_suffix += self.source_abbrev[source] + "/" + branch + "/" + build + else: + prefix = "[" + ssabbrev[self.source_abbrev[source]] + "-" + self.build_abbrev[build] + "]" + self.install_prefix += "/" + self.source_abbrev[source] + "/" + build + build_suffix += "/" + self.source_abbrev[source] + "/" + build + + self.logger = logging.getLogger(prefix) + + self.logger.debug(self.install_prefix) + + # Assume we're building with gcc for now. + cxxincludes = self.get_includes() + cxxroot = cxxincludes[0] + cxxarch = os.path.basename(cxxincludes[1]) + + configure_flags = dict( + llvm=dict(debug=["--prefix=" + self.install_prefix, + "--with-extra-options=-Werror", + "--with-cxx-include-root=" + cxxroot, + "--with-cxx-include-arch=" + cxxarch], + release=["--prefix=" + self.install_prefix, + "--with-extra-options=-Werror", + "--enable-optimized", + "--with-cxx-include-root=" + cxxroot, + "--with-cxx-include-arch=" + cxxarch], + paranoid=["--prefix=" + self.install_prefix, + "--with-extra-options=-Werror", + "--enable-expensive-checks", + "--with-cxx-include-root=" + cxxroot, + "--with-cxx-include-arch=" + cxxarch]), + llvm_gcc=dict(debug=["--prefix=" + self.install_prefix, + "--enable-checking", + "--program-prefix=llvm-", + "--enable-llvm=" + self.build_prefix + "/llvm/" + build_suffix, +# Fortran install seems to be broken. +# "--enable-languages=c,c++,fortran"], + "--enable-languages=c,c++"], + release=["--prefix=" + self.install_prefix, + "--program-prefix=llvm-", + "--enable-llvm=" + self.build_prefix + "/llvm/" + build_suffix, +# Fortran install seems to be broken. +# "--enable-languages=c,c++,fortran"], + "--enable-languages=c,c++"], + paranoid=["--prefix=" + self.install_prefix, + "--enable-checking", + "--program-prefix=llvm-", + "--enable-llvm=" + self.build_prefix + "/llvm/" + build_suffix, +# Fortran install seems to be broken. +# "--enable-languages=c,c++,fortran"]), + "--enable-languages=c,c++"]), + llvm2=dict(debug=["--prefix=" + self.install_prefix, + "--with-extra-options=-Werror", + "--with-llvmgccdir=" + self.install_prefix + "/bin", + "--with-cxx-include-root=" + cxxroot, + "--with-cxx-include-arch=" + cxxarch], + release=["--prefix=" + self.install_prefix, + "--with-extra-options=-Werror", + "--enable-optimized", + "--with-llvmgccdir=" + self.install_prefix + "/bin", + "--with-cxx-include-root=" + cxxroot, + "--with-cxx-include-arch=" + cxxarch], + paranoid=["--prefix=" + self.install_prefix, + "--with-extra-options=-Werror", + "--enable-expensive-checks", + "--with-llvmgccdir=" + self.install_prefix + "/bin", + "--with-cxx-include-root=" + cxxroot, + "--with-cxx-include-arch=" + cxxarch]), + gcc=dict(debug=["--prefix=" + self.install_prefix, + "--enable-checking"], + release=["--prefix=" + self.install_prefix], + paranoid=["--prefix=" + self.install_prefix, + "--enable-checking"]), + dragonegg=dict(debug=[], + release=[], + paranoid=[])) + + configure_env = dict( + llvm=dict(debug=dict(CC=self.cc, + CXX=self.cxx), + release=dict(CC=self.cc, + CXX=self.cxx), + paranoid=dict(CC=self.cc, + CXX=self.cxx)), + llvm_gcc=dict(debug=dict(CC=self.cc, + CXX=self.cxx), + release=dict(CC=self.cc, + CXX=self.cxx), + paranoid=dict(CC=self.cc, + CXX=self.cxx)), + llvm2=dict(debug=dict(CC=self.cc, + CXX=self.cxx), + release=dict(CC=self.cc, + CXX=self.cxx), + paranoid=dict(CC=self.cc, + CXX=self.cxx)), + gcc=dict(debug=dict(CC=self.cc, + CXX=self.cxx), + release=dict(CC=self.cc, + CXX=self.cxx), + paranoid=dict(CC=self.cc, + CXX=self.cxx)), + dragonegg=dict(debug=dict(CC=self.cc, + CXX=self.cxx), + release=dict(CC=self.cc, + CXX=self.cxx), + paranoid=dict(CC=self.cc, + CXX=self.cxx))) + + make_flags = dict( + llvm=dict(debug=["-j" + str(self.jobs)], + release=["-j" + str(self.jobs)], + paranoid=["-j" + str(self.jobs)]), + llvm_gcc=dict(debug=["-j" + str(self.jobs), + "bootstrap"], + release=["-j" + str(self.jobs), + "bootstrap"], + paranoid=["-j" + str(self.jobs), + "bootstrap"]), + llvm2=dict(debug=["-j" + str(self.jobs)], + release=["-j" + str(self.jobs)], + paranoid=["-j" + str(self.jobs)]), + gcc=dict(debug=["-j" + str(self.jobs), + "bootstrap"], + release=["-j" + str(self.jobs), + "bootstrap"], + paranoid=["-j" + str(self.jobs), + "bootstrap"]), + dragonegg=dict(debug=["-j" + str(self.jobs)], + release=["-j" + str(self.jobs)], + paranoid=["-j" + str(self.jobs)])) + + make_env = dict( + llvm=dict(debug=dict(), + release=dict(), + paranoid=dict()), + llvm_gcc=dict(debug=dict(), + release=dict(), + paranoid=dict()), + llvm2=dict(debug=dict(), + release=dict(), + paranoid=dict()), + gcc=dict(debug=dict(), + release=dict(), + paranoid=dict()), + dragonegg=dict(debug=dict(GCC=self.install_prefix + "/bin/gcc", + LLVM_CONFIG=self.install_prefix + "/bin/llvm-config"), + release=dict(GCC=self.install_prefix + "/bin/gcc", + LLVM_CONFIG=self.install_prefix + "/bin/llvm-config"), + paranoid=dict(GCC=self.install_prefix + "/bin/gcc", + LLVM_CONFIG=self.install_prefix + "/bin/llvm-config"))) + + make_install_flags = dict( + llvm=dict(debug=["install"], + release=["install"], + paranoid=["install"]), + llvm_gcc=dict(debug=["install"], + release=["install"], + paranoid=["install"]), + llvm2=dict(debug=["install"], + release=["install"], + paranoid=["install"]), + gcc=dict(debug=["install"], + release=["install"], + paranoid=["install"]), + dragonegg=dict(debug=["install"], + release=["install"], + paranoid=["install"])) + + make_install_env = dict( + llvm=dict(debug=dict(), + release=dict(), + paranoid=dict()), + llvm_gcc=dict(debug=dict(), + release=dict(), + paranoid=dict()), + llvm2=dict(debug=dict(), + release=dict(), + paranoid=dict()), + gcc=dict(debug=dict(), + release=dict(), + paranoid=dict()), + dragonegg=dict(debug=dict(), + release=dict(), + paranoid=dict())) + + make_check_flags = dict( + llvm=dict(debug=["check"], + release=["check"], + paranoid=["check"]), + llvm_gcc=dict(debug=["check"], + release=["check"], + paranoid=["check"]), + llvm2=dict(debug=["check"], + release=["check"], + paranoid=["check"]), + gcc=dict(debug=["check"], + release=["check"], + paranoid=["check"]), + dragonegg=dict(debug=["check"], + release=["check"], + paranoid=["check"])) + + make_check_env = dict( + llvm=dict(debug=dict(), + release=dict(), + paranoid=dict()), + llvm_gcc=dict(debug=dict(), + release=dict(), + paranoid=dict()), + llvm2=dict(debug=dict(), + release=dict(), + paranoid=dict()), + gcc=dict(debug=dict(), + release=dict(), + paranoid=dict()), + dragonegg=dict(debug=dict(), + release=dict(), + paranoid=dict())) + + for component in ["llvm", "llvm-gcc", "llvm2", "gcc", "dragonegg"]: + comp = component[:] + + srcdir = source + "/" + comp.rstrip("2") + builddir = self.build_prefix + "/" + comp + "/" + build_suffix + installdir = self.install_prefix + + if (branch is not None): + srcdir += "/" + branch + + comp_key = comp.replace("-", "_") + + config_args = configure_flags[comp_key][build][:] + config_args.extend(getattr(self.options, + "extra_" + comp_key + + "_config_flags").split()) + + self.logger.info("Configuring " + component + " in " + builddir) + self.configure(component, srcdir, builddir, + config_args, + configure_env[comp_key][build]) + + self.logger.info("Building " + component + " in " + builddir) + self.make(component, srcdir, builddir, + make_flags[comp_key][build], + make_env[comp_key][build]) + + self.logger.info("Installing " + component + " in " + installdir) + self.make(component, srcdir, builddir, + make_install_flags[comp_key][build], + make_install_env[comp_key][build]) + + self.logger.info("Testing " + component + " in " + builddir) + self.make(component, srcdir, builddir, + make_check_flags[comp_key][build], + make_check_env[comp_key][build]) + + + def configure(self, component, srcdir, builddir, flags, env): + self.logger.debug("Configure " + str(flags)) + + configure_files = dict( + llvm=[(srcdir + "/configure", builddir + "/Makefile")], + llvm_gcc=[(srcdir + "/configure", builddir + "/Makefile"), + (srcdir + "/gcc/configure", builddir + "/gcc/Makefile")], + llvm2=[(srcdir + "/configure", builddir + "/Makefile")], + gcc=[(srcdir + "/configure", builddir + "/Makefile"), + (srcdir + "/gcc/configure", builddir + "/gcc/Makefile")], + dragonegg=[()]) + + + doconfig = False + for conf, mf in configure_files[component.replace("-", "_")]: + if not os.path.exists(conf): + return + if os.path.exists(conf) and os.path.exists(mf): + confstat = os.stat(conf) + makestat = os.stat(mf) + if confstat.st_mtime > makestat.st_mtime: + doconfig = True + break + else: + doconfig = True + break + + if not doconfig and not self.options.force_configure: + return + + program = srcdir + "/configure" + if not is_executable(program): + return + + args = [program] + args += ["--verbose"] + args += flags + self.execute(args, builddir, env, component) + + def make(self, component, srcdir, builddir, flags, env): + program = find_executable("make") + if program is None: + raise ExecutableNotFound + + if not is_executable(program): + raise FileNotExecutable + + args = [program] + args += flags + self.execute(args, builddir, env, component) + +# Global constants +build_abbrev = dict(debug="dbg", release="opt", paranoid="par") + +# Parse options +parser = optparse.OptionParser(version="%prog 1.0") +add_options(parser) +(options, args) = parser.parse_args() +check_options(parser, options, build_abbrev.keys()); + +if options.verbose: + logging.basicConfig(level=logging.DEBUG, + format='%(name)-13s: %(message)s') +else: + logging.basicConfig(level=logging.INFO, + format='%(name)-13s: %(message)s') + +source_abbrev = get_path_abbrevs(set(options.src)) +branch_abbrev = get_path_abbrevs(set(options.branch)) + +work_queue = queue.Queue() + +for t in range(options.threads): + jobs = options.jobs // options.threads + builder = Builder(work_queue, jobs, + build_abbrev, source_abbrev, branch_abbrev, + options) + builder.daemon = True + builder.start() + +for build in set(options.build): + for source in set(options.src): + if options.branch is not None: + for branch in set(options.branch): + work_queue.put((source, branch, build)) + else: + work_queue.put((source, None, build)) + +work_queue.join() diff --git a/utils/not/CMakeLists.txt b/utils/not/CMakeLists.txt index 155d2e3ae7e4..f4c02290d7d1 100644 --- a/utils/not/CMakeLists.txt +++ b/utils/not/CMakeLists.txt @@ -1,4 +1,4 @@ -add_executable(not +add_llvm_utility(not not.cpp ) diff --git a/utils/valgrind/i386-pc-linux-gnu.supp b/utils/valgrind/i386-pc-linux-gnu.supp index c9f68a0ab66c..0509791582be 100644 --- a/utils/valgrind/i386-pc-linux-gnu.supp +++ b/utils/valgrind/i386-pc-linux-gnu.supp @@ -12,19 +12,19 @@ { ADDRESS_IN_RANGE/Invalid read of size 4 Memcheck:Addr4 - obj:/usr/bin/python2.5 + obj:/usr/bin/python* } { ADDRESS_IN_RANGE/Invalid read of size 4 Memcheck:Value4 - obj:/usr/bin/python2.5 + obj:/usr/bin/python* } { ADDRESS_IN_RANGE/Conditional jump or move depends on uninitialised value Memcheck:Cond - obj:/usr/bin/python2.5 + obj:/usr/bin/python* } { @@ -37,5 +37,5 @@ We don't care if python leaks Memcheck:Leak fun:malloc - obj:/usr/bin/python2.5 + obj:/usr/bin/python* } From 6b943ff3a3f8617113ecbf611cf0f8957e4e19d2 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Mon, 2 May 2011 19:34:44 +0000 Subject: [PATCH 2/3] Vendor import of llvm trunk r130700: http://llvm.org/svn/llvm-project/llvm/trunk@130700 --- CMakeLists.txt | 50 +- Makefile | 15 + Makefile.rules | 105 +- autoconf/configure.ac | 27 +- autoconf/m4/libtool.m4 | 2 +- autoconf/m4/ltdl.m4 | 2 +- cmake/config-ix.cmake | 48 +- cmake/modules/AddLLVM.cmake | 49 +- cmake/modules/CMakeLists.txt | 24 +- cmake/modules/HandleLLVMOptions.cmake | 27 +- .../{LLVMConfig.cmake => LLVM-Config.cmake} | 36 +- .../{LLVM.cmake => LLVMConfig.cmake.in} | 17 +- cmake/modules/LLVMConfigVersion.cmake.in | 1 + cmake/modules/LLVMLibDeps.cmake | 9 +- configure | 99 +- docs/AliasAnalysis.html | 327 +- docs/BitCodeFormat.html | 407 +- docs/Bugpoint.html | 56 +- docs/CFEBuildInstrs.html | 2 +- docs/CMake.html | 174 +- docs/CodeGenerator.html | 558 +- docs/CodingStandards.html | 270 +- docs/CommandGuide/FileCheck.pod | 2 +- docs/CommandGuide/bugpoint.pod | 21 +- docs/CommandGuide/index.html | 36 +- docs/CommandGuide/lit.pod | 2 +- docs/CommandGuide/llc.pod | 2 +- docs/CommandGuide/lli.pod | 2 +- docs/CommandGuide/llvm-ar.pod | 2 +- docs/CommandGuide/llvm-as.pod | 2 +- docs/CommandGuide/llvm-bcanalyzer.pod | 4 +- docs/CommandGuide/llvm-config.pod | 2 +- docs/CommandGuide/llvm-diff.pod | 2 +- docs/CommandGuide/llvm-dis.pod | 2 +- docs/CommandGuide/llvm-extract.pod | 2 +- docs/CommandGuide/llvm-ld.pod | 2 +- docs/CommandGuide/llvm-link.pod | 2 +- docs/CommandGuide/llvm-nm.pod | 2 +- docs/CommandGuide/llvm-prof.pod | 2 +- docs/CommandGuide/llvm-ranlib.pod | 2 +- docs/CommandGuide/llvmc.pod | 2 +- docs/CommandGuide/llvmgcc.pod | 2 +- docs/CommandGuide/llvmgxx.pod | 2 +- docs/CommandGuide/opt.pod | 2 +- docs/CommandGuide/tblgen.pod | 2 +- docs/CommandLine.html | 277 +- docs/CompilerDriver.html | 771 ++- docs/CompilerDriverTutorial.html | 107 +- docs/CompilerWriterInfo.html | 92 +- docs/DebuggingJITedCode.html | 14 +- docs/DeveloperPolicy.html | 83 +- docs/ExceptionHandling.html | 168 +- docs/ExtendingLLVM.html | 54 +- docs/FAQ.html | 34 +- docs/GCCFEBuildInstrs.html | 26 +- docs/GarbageCollection.html | 161 +- docs/GetElementPtr.html | 244 +- docs/GettingStarted.html | 292 +- docs/GettingStartedVS.html | 74 +- docs/GoldPlugin.html | 41 +- .../2000-11-18-EarlyDesignIdeasResp.txt | 4 +- .../2000-12-06-MeetingSummary.txt | 2 +- .../2001-02-06-TypeNotationDebateResp4.txt | 2 +- .../2001-02-09-AdveCommentsResponse.txt | 6 +- .../2001-06-01-GCCOptimizations2.txt | 2 +- .../2002-05-12-InstListChange.txt | 2 +- docs/HowToReleaseLLVM.html | 856 +-- docs/HowToSubmitABug.html | 56 +- docs/LangRef.html | 1420 +++-- docs/Lexicon.html | 67 +- docs/LinkTimeOptimization.html | 93 +- docs/MakefileGuide.html | 183 +- docs/Packaging.html | 26 +- docs/Passes.html | 1229 ++-- docs/ProgrammersManual.html | 920 +-- docs/Projects.html | 76 +- docs/ReleaseNotes.html | 1360 ++-- docs/SourceLevelDebugging.html | 331 +- docs/SystemLibrary.html | 84 +- docs/TableGenFundamentals.html | 123 +- docs/TestingGuide.html | 166 +- docs/UsingLibraries.html | 64 +- docs/WritingAnLLVMBackend.html | 231 +- docs/WritingAnLLVMPass.html | 508 +- docs/doxygen.css | 30 + docs/doxygen.footer | 2 +- docs/index.html | 22 +- docs/llvm.css | 16 +- docs/tutorial/LangImpl1.html | 18 +- docs/tutorial/LangImpl2.html | 44 +- docs/tutorial/LangImpl3.html | 31 +- docs/tutorial/LangImpl4.html | 30 +- docs/tutorial/LangImpl5.html | 91 +- docs/tutorial/LangImpl6.html | 36 +- docs/tutorial/LangImpl7.html | 43 +- docs/tutorial/LangImpl8.html | 52 +- docs/tutorial/OCamlLangImpl1.html | 18 +- docs/tutorial/OCamlLangImpl2.html | 44 +- docs/tutorial/OCamlLangImpl3.html | 31 +- docs/tutorial/OCamlLangImpl4.html | 28 +- docs/tutorial/OCamlLangImpl5.html | 79 +- docs/tutorial/OCamlLangImpl6.html | 30 +- docs/tutorial/OCamlLangImpl7.html | 43 +- docs/tutorial/OCamlLangImpl8.html | 50 +- docs/tutorial/index.html | 2 +- examples/BrainF/BrainF.cpp | 7 +- examples/ExceptionDemo/ExceptionDemo.cpp | 2949 +++++---- examples/Kaleidoscope/Chapter5/toy.cpp | 4 +- examples/Kaleidoscope/Chapter6/toy.cpp | 4 +- examples/Kaleidoscope/Chapter7/toy.cpp | 2 +- include/llvm-c/Disassembler.h | 149 + include/llvm-c/EnhancedDisassembly.h | 52 +- include/llvm-c/Object.h | 77 + include/llvm-c/Transforms/Scalar.h | 19 + include/llvm-c/lto.h | 21 +- include/llvm/ADT/APFloat.h | 4 + include/llvm/ADT/APInt.h | 3 +- include/llvm/ADT/ArrayRef.h | 21 +- include/llvm/ADT/DenseMap.h | 65 +- include/llvm/ADT/DenseMapInfo.h | 5 +- include/llvm/ADT/DepthFirstIterator.h | 3 +- include/llvm/ADT/FoldingSet.h | 47 +- include/llvm/ADT/ImmutableIntervalMap.h | 6 + include/llvm/ADT/IntervalMap.h | 4 + include/llvm/ADT/IntrusiveRefCntPtr.h | 26 +- include/llvm/ADT/PointerUnion.h | 128 +- include/llvm/ADT/ScopedHashTable.h | 36 +- include/llvm/ADT/SmallPtrSet.h | 2 +- include/llvm/ADT/Statistic.h | 3 + include/llvm/ADT/StringExtras.h | 3 +- include/llvm/ADT/StringMap.h | 20 +- include/llvm/ADT/Triple.h | 88 +- include/llvm/ADT/ilist.h | 2 +- include/llvm/Analysis/AliasAnalysis.h | 1 - include/llvm/Analysis/AliasSetTracker.h | 2 + include/llvm/Analysis/CFGPrinter.h | 1 + include/llvm/Analysis/DIBuilder.h | 52 +- include/llvm/Analysis/DebugInfo.h | 250 +- include/llvm/Analysis/IVUsers.h | 2 + include/llvm/Analysis/InlineCost.h | 2 +- include/llvm/Analysis/InstructionSimplify.h | 15 + include/llvm/Analysis/Lint.h | 2 - include/llvm/Analysis/LiveValues.h | 99 - .../llvm/Analysis/MemoryDependenceAnalysis.h | 19 + include/llvm/Analysis/Passes.h | 6 - include/llvm/Analysis/PathProfileInfo.h | 1 - include/llvm/Analysis/PostDominators.h | 33 +- include/llvm/Analysis/RegionInfo.h | 13 +- include/llvm/Analysis/RegionIterator.h | 2 +- include/llvm/Analysis/RegionPass.h | 2 +- include/llvm/Analysis/ScalarEvolution.h | 81 +- .../Analysis/ScalarEvolutionExpressions.h | 32 +- include/llvm/Bitcode/Archive.h | 7 +- include/llvm/CodeGen/AsmPrinter.h | 12 + include/llvm/CodeGen/CalcSpillWeights.h | 27 +- include/llvm/CodeGen/CallingConvLower.h | 15 + include/llvm/CodeGen/EdgeBundles.h | 7 + include/llvm/CodeGen/FastISel.h | 46 +- include/llvm/CodeGen/FunctionLoweringInfo.h | 12 +- include/llvm/CodeGen/ISDOpcodes.h | 2 +- include/llvm/CodeGen/JITCodeEmitter.h | 4 +- include/llvm/CodeGen/LiveInterval.h | 21 +- include/llvm/CodeGen/LiveIntervalAnalysis.h | 12 +- include/llvm/CodeGen/MachineBasicBlock.h | 17 + include/llvm/CodeGen/MachineCodeEmitter.h | 4 +- include/llvm/CodeGen/MachineConstantPool.h | 5 +- include/llvm/CodeGen/MachineFrameInfo.h | 1 - include/llvm/CodeGen/MachineInstr.h | 70 +- include/llvm/CodeGen/MachineInstrBuilder.h | 11 + include/llvm/CodeGen/PBQP/Graph.h | 1 - include/llvm/CodeGen/PBQP/Heuristics/Briggs.h | 1 - include/llvm/CodeGen/ProcessImplicitDefs.h | 10 +- include/llvm/CodeGen/RegisterScavenging.h | 2 +- include/llvm/CodeGen/RuntimeLibcalls.h | 10 + include/llvm/CodeGen/ScheduleDAG.h | 29 +- .../llvm/CodeGen/ScoreboardHazardRecognizer.h | 1 - include/llvm/CodeGen/SelectionDAG.h | 28 +- include/llvm/CodeGen/SelectionDAGISel.h | 6 +- include/llvm/CodeGen/SelectionDAGNodes.h | 2 +- include/llvm/CodeGen/SlotIndexes.h | 265 +- .../CodeGen/TargetLoweringObjectFileImpl.h | 18 +- .../llvm/CompilerDriver/CompilationGraph.h | 2 +- include/llvm/CompilerDriver/Tool.h | 2 +- include/llvm/Config/config.h.cmake | 7 +- include/llvm/Config/config.h.in | 6 + include/llvm/Config/llvm-config.h.cmake | 6 + include/llvm/Config/llvm-config.h.in | 3 + include/llvm/Constant.h | 13 - include/llvm/Constants.h | 14 +- include/llvm/DebugInfoProbe.h | 67 + include/llvm/DerivedTypes.h | 14 +- .../llvm/ExecutionEngine/ExecutionEngine.h | 57 +- .../llvm/ExecutionEngine/JITMemoryManager.h | 18 +- include/llvm/ExecutionEngine/RuntimeDyld.h | 75 + include/llvm/GlobalVariable.h | 2 +- include/llvm/InitializePasses.h | 6 +- include/llvm/InstrTypes.h | 1 - include/llvm/Instructions.h | 46 +- include/llvm/Intrinsics.td | 5 +- include/llvm/IntrinsicsARM.td | 8 +- include/llvm/IntrinsicsPTX.td | 92 + include/llvm/IntrinsicsX86.td | 195 +- include/llvm/IntrinsicsXCore.td | 34 + include/llvm/LinkAllPasses.h | 6 +- include/llvm/MC/MCAsmInfo.h | 29 +- include/llvm/MC/MCAsmLayout.h | 6 +- include/llvm/MC/MCAssembler.h | 4 +- include/llvm/MC/MCContext.h | 23 +- include/llvm/MC/MCDisassembler.h | 30 +- include/llvm/MC/MCDwarf.h | 21 +- include/llvm/MC/MCExpr.h | 8 + include/llvm/MC/MCInstPrinter.h | 11 +- include/llvm/MC/MCObjectStreamer.h | 9 +- include/llvm/MC/MCParser/AsmLexer.h | 1 + include/llvm/MC/MCSection.h | 1 - include/llvm/MC/MCSectionMachO.h | 6 +- include/llvm/MC/MCStreamer.h | 71 +- include/llvm/MC/MCSymbol.h | 1 + include/llvm/Metadata.h | 20 +- include/llvm/Module.h | 11 +- include/llvm/Object/MachOObject.h | 20 + include/llvm/Pass.h | 6 +- include/llvm/PassAnalysisSupport.h | 2 + include/llvm/Support/Allocator.h | 3 + include/llvm/Support/CFG.h | 7 + include/llvm/Support/Casting.h | 4 +- include/llvm/Support/CommandLine.h | 295 +- include/llvm/Support/Compiler.h | 8 + include/llvm/Support/ConstantFolder.h | 4 +- include/llvm/Support/CrashRecoveryContext.h | 105 +- include/llvm/Support/DOTGraphTraits.h | 5 +- include/llvm/Support/DebugLoc.h | 28 + include/llvm/Support/Dwarf.h | 15 +- include/llvm/Support/ErrorHandling.h | 15 +- include/llvm/Support/FileSystem.h | 2 +- include/llvm/Support/FileUtilities.h | 26 +- include/llvm/Support/GraphWriter.h | 10 +- include/llvm/Support/IRBuilder.h | 33 +- include/llvm/Support/Memory.h | 4 +- include/llvm/Support/MemoryBuffer.h | 32 +- include/llvm/Support/NoFolder.h | 4 +- include/llvm/Support/PathV1.h | 5 +- include/llvm/Support/PatternMatch.h | 35 + include/llvm/Support/PrettyStackTrace.h | 2 +- include/llvm/Support/Program.h | 2 +- include/llvm/Support/Regex.h | 2 +- include/llvm/Support/Signals.h | 2 +- include/llvm/Support/SourceMgr.h | 7 +- include/llvm/Support/StandardPasses.h | 8 +- include/llvm/Support/TimeValue.h | 4 +- include/llvm/Support/system_error.h | 2 +- include/llvm/Target/SubtargetFeature.h | 6 +- include/llvm/Target/Target.td | 22 +- include/llvm/Target/TargetAsmBackend.h | 8 + include/llvm/Target/TargetAsmInfo.h | 8 + include/llvm/Target/TargetData.h | 13 +- include/llvm/Target/TargetInstrDesc.h | 7 + include/llvm/Target/TargetInstrInfo.h | 6 +- include/llvm/Target/TargetInstrItineraries.h | 10 +- include/llvm/Target/TargetLibraryInfo.h | 12 + include/llvm/Target/TargetLowering.h | 35 +- .../llvm/Target/TargetLoweringObjectFile.h | 13 +- include/llvm/Target/TargetMachine.h | 19 + include/llvm/Target/TargetOptions.h | 5 + include/llvm/Target/TargetRegisterInfo.h | 49 +- include/llvm/Target/TargetRegistry.h | 12 +- include/llvm/Target/TargetSelect.h | 13 + include/llvm/Target/TargetSelectionDAG.td | 36 +- include/llvm/Transforms/IPO.h | 1 - include/llvm/Transforms/Instrumentation.h | 4 +- include/llvm/Transforms/Scalar.h | 8 +- .../llvm/Transforms/Utils/BasicBlockUtils.h | 5 + include/llvm/Transforms/Utils/Cloning.h | 2 +- include/llvm/Transforms/Utils/Local.h | 27 +- include/llvm/TypeSymbolTable.h | 2 +- include/llvm/User.h | 4 +- include/llvm/Value.h | 4 +- lib/Analysis/AliasAnalysis.cpp | 37 +- lib/Analysis/AliasSetTracker.cpp | 4 + lib/Analysis/Analysis.cpp | 2 - lib/Analysis/BasicAliasAnalysis.cpp | 24 +- lib/Analysis/CMakeLists.txt | 9 +- lib/Analysis/CaptureTracking.cpp | 1 + lib/Analysis/ConstantFolding.cpp | 12 +- lib/Analysis/DIBuilder.cpp | 142 +- lib/Analysis/DebugInfo.cpp | 675 +- lib/Analysis/IPA/GlobalsModRef.cpp | 2 +- lib/Analysis/IVUsers.cpp | 7 +- lib/Analysis/InlineCost.cpp | 4 +- lib/Analysis/InstructionSimplify.cpp | 302 +- lib/Analysis/LazyValueInfo.cpp | 3 +- lib/Analysis/Lint.cpp | 2 +- lib/Analysis/LiveValues.cpp | 200 - lib/Analysis/Loads.cpp | 1 + lib/Analysis/LoopPass.cpp | 23 +- lib/Analysis/MemoryBuiltins.cpp | 15 +- lib/Analysis/MemoryDependenceAnalysis.cpp | 184 +- lib/Analysis/PHITransAddr.cpp | 1 + lib/Analysis/PathNumbering.cpp | 5 +- lib/Analysis/PathProfileVerifier.cpp | 2 +- lib/Analysis/PostDominators.cpp | 51 - lib/Analysis/ProfileEstimatorPass.cpp | 2 +- lib/Analysis/ProfileInfo.cpp | 6 +- lib/Analysis/ProfileInfoLoader.cpp | 1 - lib/Analysis/RegionInfo.cpp | 32 +- lib/Analysis/RegionPrinter.cpp | 26 + lib/Analysis/ScalarEvolution.cpp | 554 +- lib/Analysis/ScalarEvolutionExpander.cpp | 57 +- lib/Analysis/ScalarEvolutionNormalization.cpp | 3 +- lib/Analysis/TypeBasedAliasAnalysis.cpp | 3 +- lib/Analysis/ValueTracking.cpp | 70 +- lib/Archive/ArchiveWriter.cpp | 74 +- lib/AsmParser/LLParser.cpp | 15 +- lib/Bitcode/Reader/BitcodeReader.cpp | 12 +- lib/Bitcode/Writer/BitcodeWriter.cpp | 2 +- lib/Bitcode/Writer/ValueEnumerator.cpp | 98 +- lib/Bitcode/Writer/ValueEnumerator.h | 4 +- lib/CMakeLists.txt | 1 + lib/CodeGen/AggressiveAntiDepBreaker.cpp | 2 +- lib/CodeGen/Analysis.cpp | 2 +- lib/CodeGen/AsmPrinter/ARMException.cpp | 87 + lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 464 +- lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp | 58 +- lib/CodeGen/AsmPrinter/CMakeLists.txt | 2 + lib/CodeGen/AsmPrinter/DIE.h | 1 - lib/CodeGen/AsmPrinter/DwarfCFIException.cpp | 101 +- lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 983 +++ lib/CodeGen/AsmPrinter/DwarfCompileUnit.h | 282 + lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 2009 ++---- lib/CodeGen/AsmPrinter/DwarfDebug.h | 275 +- lib/CodeGen/AsmPrinter/DwarfException.h | 45 +- .../AsmPrinter/DwarfTableException.cpp | 4 +- lib/CodeGen/BranchFolding.cpp | 2 +- lib/CodeGen/CMakeLists.txt | 1 + lib/CodeGen/CalcSpillWeights.cpp | 124 +- lib/CodeGen/CallingConvLower.cpp | 9 +- lib/CodeGen/CodePlacementOpt.cpp | 8 +- lib/CodeGen/DwarfEHPrepare.cpp | 19 +- lib/CodeGen/ELF.h | 2 +- lib/CodeGen/ELFWriter.cpp | 24 +- lib/CodeGen/EdgeBundles.cpp | 15 +- lib/CodeGen/ExpandISelPseudos.cpp | 2 +- lib/CodeGen/IfConversion.cpp | 21 +- lib/CodeGen/InlineSpiller.cpp | 931 ++- lib/CodeGen/InterferenceCache.cpp | 155 + lib/CodeGen/InterferenceCache.h | 163 + lib/CodeGen/LLVMTargetMachine.cpp | 48 +- lib/CodeGen/LiveDebugVariables.cpp | 186 +- lib/CodeGen/LiveInterval.cpp | 143 +- lib/CodeGen/LiveIntervalAnalysis.cpp | 118 +- lib/CodeGen/LiveIntervalUnion.cpp | 22 +- lib/CodeGen/LiveIntervalUnion.h | 17 +- lib/CodeGen/LiveRangeEdit.cpp | 234 +- lib/CodeGen/LiveRangeEdit.h | 90 +- lib/CodeGen/LiveVariables.cpp | 6 +- lib/CodeGen/MachineBasicBlock.cpp | 6 +- lib/CodeGen/MachineCSE.cpp | 8 +- lib/CodeGen/MachineInstr.cpp | 46 +- lib/CodeGen/MachineLICM.cpp | 27 +- lib/CodeGen/MachineSink.cpp | 5 +- lib/CodeGen/MachineVerifier.cpp | 20 +- lib/CodeGen/PHIElimination.cpp | 17 +- lib/CodeGen/Passes.cpp | 7 +- lib/CodeGen/PeepholeOptimizer.cpp | 101 +- lib/CodeGen/ProcessImplicitDefs.cpp | 39 +- lib/CodeGen/PrologEpilogInserter.cpp | 17 +- lib/CodeGen/README.txt | 4 +- lib/CodeGen/RegAllocBase.h | 9 +- lib/CodeGen/RegAllocBasic.cpp | 89 +- lib/CodeGen/RegAllocFast.cpp | 22 +- lib/CodeGen/RegAllocGreedy.cpp | 1195 ++-- lib/CodeGen/RegAllocLinearScan.cpp | 59 +- lib/CodeGen/RegAllocPBQP.cpp | 3 +- lib/CodeGen/RegisterScavenging.cpp | 18 +- lib/CodeGen/RenderMachineFunction.cpp | 2 +- lib/CodeGen/ScheduleDAG.cpp | 4 +- lib/CodeGen/ScheduleDAGInstrs.cpp | 2 +- lib/CodeGen/ScheduleDAGPrinter.cpp | 3 +- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 82 +- lib/CodeGen/SelectionDAG/FastISel.cpp | 301 +- .../SelectionDAG/FunctionLoweringInfo.cpp | 32 +- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 285 +- .../SelectionDAG/LegalizeIntegerTypes.cpp | 31 +- lib/CodeGen/SelectionDAG/LegalizeTypes.h | 3 +- .../SelectionDAG/LegalizeVectorOps.cpp | 49 +- .../SelectionDAG/LegalizeVectorTypes.cpp | 170 +- lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp | 17 +- .../SelectionDAG/ScheduleDAGRRList.cpp | 572 +- .../SelectionDAG/ScheduleDAGSDNodes.cpp | 69 +- lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h | 6 + lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 24 +- .../SelectionDAG/SelectionDAGBuilder.cpp | 102 +- .../SelectionDAG/SelectionDAGBuilder.h | 6 - lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 160 +- .../SelectionDAG/SelectionDAGPrinter.cpp | 3 +- lib/CodeGen/SelectionDAG/TargetLowering.cpp | 101 +- lib/CodeGen/ShrinkWrapping.cpp | 2 +- lib/CodeGen/SimpleRegisterCoalescing.cpp | 157 +- lib/CodeGen/SimpleRegisterCoalescing.h | 16 +- lib/CodeGen/SjLjEHPrepare.cpp | 28 +- lib/CodeGen/SlotIndexes.cpp | 119 +- lib/CodeGen/SpillPlacement.cpp | 158 +- lib/CodeGen/SpillPlacement.h | 70 +- lib/CodeGen/Spiller.cpp | 24 +- lib/CodeGen/Spiller.h | 16 +- lib/CodeGen/SplitKit.cpp | 1199 ++-- lib/CodeGen/SplitKit.h | 338 +- lib/CodeGen/StackProtector.cpp | 12 +- lib/CodeGen/StrongPHIElimination.cpp | 2 +- lib/CodeGen/TargetInstrInfoImpl.cpp | 5 - lib/CodeGen/TargetLoweringObjectFileImpl.cpp | 103 +- lib/CodeGen/TwoAddressInstructionPass.cpp | 119 +- lib/CodeGen/VirtRegMap.cpp | 41 +- lib/CodeGen/VirtRegRewriter.cpp | 53 +- lib/CompilerDriver/CMakeLists.txt | 10 +- lib/ExecutionEngine/CMakeLists.txt | 1 + lib/ExecutionEngine/ExecutionEngine.cpp | 25 +- lib/ExecutionEngine/JIT/Intercept.cpp | 12 +- lib/ExecutionEngine/JIT/JIT.cpp | 70 +- lib/ExecutionEngine/JIT/JIT.h | 10 +- .../JIT/JITDebugRegisterer.cpp | 3 +- lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp | 46 +- lib/ExecutionEngine/JIT/JITDwarfEmitter.h | 24 +- lib/ExecutionEngine/JIT/JITEmitter.cpp | 38 +- .../JIT/OProfileJITEventListener.cpp | 8 +- lib/ExecutionEngine/JIT/TargetSelect.cpp | 2 +- lib/ExecutionEngine/MCJIT/CMakeLists.txt | 1 + lib/ExecutionEngine/MCJIT/Intercept.cpp | 161 + lib/ExecutionEngine/MCJIT/MCJIT.cpp | 153 +- lib/ExecutionEngine/MCJIT/MCJIT.h | 37 +- .../MCJIT/MCJITMemoryManager.h | 59 + lib/ExecutionEngine/Makefile | 2 +- .../RuntimeDyld/CMakeLists.txt | 3 + lib/ExecutionEngine/RuntimeDyld/Makefile | 13 + .../RuntimeDyld/RuntimeDyld.cpp | 669 ++ lib/Linker/LinkModules.cpp | 1 + lib/MC/CMakeLists.txt | 1 + lib/MC/ELFObjectWriter.cpp | 826 +-- lib/MC/ELFObjectWriter.h | 406 ++ lib/MC/MCAsmInfo.cpp | 28 +- lib/MC/MCAsmInfoDarwin.cpp | 4 +- lib/MC/MCAsmStreamer.cpp | 312 +- lib/MC/MCAssembler.cpp | 35 +- lib/MC/MCContext.cpp | 27 +- lib/MC/MCDisassembler/CMakeLists.txt | 3 +- lib/MC/MCDisassembler/Disassembler.cpp | 171 + lib/MC/MCDisassembler/Disassembler.h | 96 + lib/MC/MCDisassembler/EDDisassembler.cpp | 19 +- lib/MC/MCDisassembler/EDDisassembler.h | 9 +- lib/MC/MCDisassembler/EDInfo.h | 4 +- lib/MC/MCDisassembler/EDInst.cpp | 3 + lib/MC/MCDisassembler/EDOperand.cpp | 13 + lib/MC/MCDwarf.cpp | 366 +- lib/MC/MCELF.cpp | 72 + lib/MC/MCELF.h | 35 + lib/MC/MCELFStreamer.cpp | 193 +- lib/MC/MCELFStreamer.h | 274 + lib/MC/MCExpr.cpp | 49 +- lib/MC/MCInstPrinter.cpp | 5 + lib/MC/MCLoggingStreamer.cpp | 19 +- lib/MC/MCMachOStreamer.cpp | 17 + lib/MC/MCNullStreamer.cpp | 11 +- lib/MC/MCObjectStreamer.cpp | 55 +- lib/MC/MCParser/AsmLexer.cpp | 30 +- lib/MC/MCParser/AsmParser.cpp | 133 +- lib/MC/MCParser/DarwinAsmParser.cpp | 13 +- lib/MC/MCSectionELF.cpp | 24 +- lib/MC/MCSectionMachO.cpp | 4 + lib/MC/MCStreamer.cpp | 180 +- lib/MC/MCSymbol.cpp | 10 +- lib/MC/MachObjectWriter.cpp | 96 +- lib/MC/WinCOFFObjectWriter.cpp | 28 +- lib/Object/CMakeLists.txt | 2 + lib/Object/COFFObjectFile.cpp | 25 +- lib/Object/ELFObjectFile.cpp | 2 + lib/Object/MachOObject.cpp | 28 + lib/Object/MachOObjectFile.cpp | 327 + lib/Object/Object.cpp | 59 + lib/Object/ObjectFile.cpp | 2 +- lib/Support/APFloat.cpp | 57 +- lib/Support/APInt.cpp | 16 +- lib/Support/Allocator.cpp | 8 + lib/Support/CommandLine.cpp | 193 +- lib/Support/CrashRecoveryContext.cpp | 51 + lib/Support/Dwarf.cpp | 5 + lib/Support/ErrorHandling.cpp | 1 - lib/Support/FileUtilities.cpp | 2 +- lib/Support/FoldingSet.cpp | 5 + lib/Support/Host.cpp | 2 + lib/Support/MemoryBuffer.cpp | 142 +- lib/Support/Path.cpp | 28 +- lib/Support/PrettyStackTrace.cpp | 2 +- lib/Support/Regex.cpp | 2 +- lib/Support/Signals.cpp | 2 +- lib/Support/SmallPtrSet.cpp | 15 +- lib/Support/Statistic.cpp | 4 + lib/Support/StringMap.cpp | 17 +- lib/Support/StringRef.cpp | 4 +- lib/Support/Triple.cpp | 112 +- lib/Support/Unix/Host.inc | 5 +- lib/Support/Unix/Memory.inc | 2 +- lib/Support/Unix/Path.inc | 6 +- lib/Support/Unix/Program.inc | 20 +- lib/Support/Unix/Signals.inc | 8 +- lib/Support/Windows/DynamicLibrary.inc | 33 +- lib/Support/Windows/Path.inc | 16 +- lib/Support/Windows/PathV2.inc | 11 +- lib/Support/raw_ostream.cpp | 64 +- lib/Support/regcomp.c | 2 +- lib/Target/ARM/ARM.td | 24 +- lib/Target/ARM/ARMAddressingModes.h | 24 +- lib/Target/ARM/ARMAsmBackend.cpp | 36 +- lib/Target/ARM/ARMAsmPrinter.cpp | 375 +- lib/Target/ARM/ARMAsmPrinter.h | 9 + lib/Target/ARM/ARMBaseInfo.h | 53 + lib/Target/ARM/ARMBaseInstrInfo.cpp | 320 +- lib/Target/ARM/ARMBaseInstrInfo.h | 40 +- lib/Target/ARM/ARMBaseRegisterInfo.cpp | 73 +- lib/Target/ARM/ARMBaseRegisterInfo.h | 11 +- lib/Target/ARM/ARMCallingConv.td | 3 + lib/Target/ARM/ARMCodeEmitter.cpp | 11 +- lib/Target/ARM/ARMConstantIslandPass.cpp | 39 +- lib/Target/ARM/ARMExpandPseudoInsts.cpp | 132 +- lib/Target/ARM/ARMFastISel.cpp | 232 +- lib/Target/ARM/ARMFrameLowering.cpp | 65 +- lib/Target/ARM/ARMFrameLowering.h | 6 +- lib/Target/ARM/ARMHazardRecognizer.cpp | 2 + lib/Target/ARM/ARMISelDAGToDAG.cpp | 148 +- lib/Target/ARM/ARMISelLowering.cpp | 956 ++- lib/Target/ARM/ARMISelLowering.h | 40 +- lib/Target/ARM/ARMInstrFormats.td | 144 +- lib/Target/ARM/ARMInstrInfo.td | 1383 ++-- lib/Target/ARM/ARMInstrNEON.td | 329 +- lib/Target/ARM/ARMInstrThumb.td | 60 +- lib/Target/ARM/ARMInstrThumb2.td | 267 +- lib/Target/ARM/ARMInstrVFP.td | 70 +- lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 78 +- lib/Target/ARM/ARMMCAsmInfo.cpp | 12 + lib/Target/ARM/ARMMCCodeEmitter.cpp | 33 + lib/Target/ARM/ARMMCExpr.h | 3 + lib/Target/ARM/ARMRegisterInfo.cpp | 18 - lib/Target/ARM/ARMRegisterInfo.td | 59 +- lib/Target/ARM/ARMScheduleA9.td | 371 +- lib/Target/ARM/ARMSelectionDAGInfo.cpp | 2 +- lib/Target/ARM/ARMSubtarget.cpp | 2 + lib/Target/ARM/ARMSubtarget.h | 15 +- lib/Target/ARM/ARMTargetMachine.cpp | 31 +- lib/Target/ARM/ARMTargetObjectFile.cpp | 3 +- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 391 +- .../ARM/Disassembler/ARMDisassembler.cpp | 61 +- .../ARM/Disassembler/ARMDisassemblerCore.cpp | 827 ++- .../ARM/Disassembler/ARMDisassemblerCore.h | 76 +- .../ARM/Disassembler/ThumbDisassemblerCore.h | 398 +- lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp | 126 +- lib/Target/ARM/InstPrinter/ARMInstPrinter.h | 20 +- lib/Target/ARM/MLxExpansionPass.cpp | 28 +- lib/Target/ARM/README.txt | 24 + lib/Target/ARM/Thumb1FrameLowering.cpp | 34 +- lib/Target/ARM/Thumb1FrameLowering.h | 2 +- lib/Target/ARM/Thumb1RegisterInfo.cpp | 99 +- lib/Target/ARM/Thumb1RegisterInfo.h | 8 +- lib/Target/ARM/Thumb2InstrInfo.cpp | 19 +- lib/Target/ARM/Thumb2RegisterInfo.cpp | 29 +- lib/Target/ARM/Thumb2RegisterInfo.h | 3 +- lib/Target/ARM/Thumb2SizeReduction.cpp | 259 +- lib/Target/Alpha/Alpha.td | 2 +- lib/Target/Alpha/AlphaISelLowering.cpp | 2 +- lib/Target/Alpha/AlphaInstrInfo.td | 2 +- lib/Target/Alpha/README.txt | 4 +- lib/Target/Blackfin/BlackfinISelLowering.cpp | 2 +- lib/Target/CBackend/CBackend.cpp | 18 - lib/Target/CellSPU/SPU64InstrInfo.td | 2 +- lib/Target/CellSPU/SPUAsmPrinter.cpp | 7 + lib/Target/CellSPU/SPUISelDAGToDAG.cpp | 7 +- lib/Target/CellSPU/SPUISelLowering.cpp | 2 +- lib/Target/CellSPU/SPUISelLowering.h | 8 - lib/Target/CellSPU/SPUInstrFormats.td | 22 + lib/Target/CellSPU/SPUInstrInfo.cpp | 64 +- lib/Target/CellSPU/SPUInstrInfo.td | 70 +- lib/Target/CellSPU/SPURegisterInfo.h | 8 + lib/Target/CppBackend/CPPBackend.cpp | 6 +- .../Disassembler/MBlazeDisassembler.cpp | 129 +- .../MBlaze/InstPrinter/MBlazeInstPrinter.h | 5 +- lib/Target/MBlaze/MBlaze.td | 27 +- lib/Target/MBlaze/MBlazeAsmBackend.cpp | 15 +- lib/Target/MBlaze/MBlazeAsmPrinter.cpp | 3 +- lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp | 2 +- lib/Target/MBlaze/MBlazeISelLowering.cpp | 8 +- lib/Target/MBlaze/MBlazeInstrFPU.td | 43 +- lib/Target/MBlaze/MBlazeInstrFSL.td | 12 +- lib/Target/MBlaze/MBlazeInstrFormats.td | 2 +- lib/Target/MBlaze/MBlazeInstrInfo.cpp | 2 + lib/Target/MBlaze/MBlazeInstrInfo.h | 1 - lib/Target/MBlaze/MBlazeInstrInfo.td | 150 +- lib/Target/MBlaze/MBlazeRegisterInfo.cpp | 20 + lib/Target/MBlaze/MBlazeRegisterInfo.h | 2 + lib/Target/MBlaze/MBlazeRegisterInfo.td | 25 +- lib/Target/MBlaze/MBlazeSchedule.td | 83 +- lib/Target/MBlaze/MBlazeSchedule3.td | 236 + lib/Target/MBlaze/MBlazeSchedule5.td | 267 + lib/Target/MBlaze/MBlazeSubtarget.cpp | 36 +- lib/Target/MBlaze/MBlazeSubtarget.h | 37 +- lib/Target/MBlaze/MBlazeTargetMachine.cpp | 18 +- lib/Target/MBlaze/MBlazeTargetMachine.h | 7 +- lib/Target/MBlaze/TODO | 7 +- .../MSP430/InstPrinter/MSP430InstPrinter.h | 5 +- lib/Target/MSP430/MSP430AsmPrinter.cpp | 3 +- lib/Target/MSP430/MSP430ISelLowering.cpp | 2 +- lib/Target/Mips/CMakeLists.txt | 1 + lib/Target/Mips/Mips.h | 3 +- lib/Target/Mips/Mips.td | 4 +- lib/Target/Mips/MipsAsmPrinter.cpp | 115 +- lib/Target/Mips/MipsCallingConv.td | 18 +- lib/Target/Mips/MipsExpandPseudo.cpp | 117 + lib/Target/Mips/MipsFrameLowering.cpp | 99 +- lib/Target/Mips/MipsFrameLowering.h | 4 +- lib/Target/Mips/MipsISelDAGToDAG.cpp | 172 +- lib/Target/Mips/MipsISelLowering.cpp | 693 +- lib/Target/Mips/MipsISelLowering.h | 36 +- lib/Target/Mips/MipsInstrFPU.td | 213 +- lib/Target/Mips/MipsInstrFormats.td | 85 +- lib/Target/Mips/MipsInstrInfo.cpp | 391 +- lib/Target/Mips/MipsInstrInfo.h | 68 +- lib/Target/Mips/MipsInstrInfo.td | 148 +- lib/Target/Mips/MipsMCAsmInfo.h | 2 +- lib/Target/Mips/MipsRegisterInfo.cpp | 61 +- lib/Target/Mips/MipsRegisterInfo.h | 2 +- lib/Target/Mips/MipsRegisterInfo.td | 36 +- lib/Target/Mips/MipsSchedule.td | 2 +- lib/Target/Mips/MipsSubtarget.cpp | 4 +- lib/Target/Mips/MipsSubtarget.h | 16 +- lib/Target/Mips/MipsTargetMachine.cpp | 6 + lib/Target/Mips/MipsTargetMachine.h | 1 + lib/Target/Mips/MipsTargetObjectFile.h | 10 +- lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp | 2 +- lib/Target/PTX/PTX.h | 8 +- lib/Target/PTX/PTX.td | 31 +- lib/Target/PTX/PTXAsmPrinter.cpp | 210 +- lib/Target/PTX/PTXFrameLowering.h | 3 +- lib/Target/PTX/PTXISelDAGToDAG.cpp | 87 +- lib/Target/PTX/PTXISelLowering.cpp | 110 +- lib/Target/PTX/PTXISelLowering.h | 9 +- lib/Target/PTX/PTXInstrInfo.cpp | 278 +- lib/Target/PTX/PTXInstrInfo.h | 112 +- lib/Target/PTX/PTXInstrInfo.td | 854 ++- lib/Target/PTX/PTXIntrinsicInstrInfo.td | 84 + lib/Target/PTX/PTXMCAsmStreamer.cpp | 22 +- lib/Target/PTX/PTXMFInfoExtract.cpp | 4 +- lib/Target/PTX/PTXMachineFunctionInfo.h | 21 +- lib/Target/PTX/PTXRegisterInfo.td | 395 +- lib/Target/PTX/PTXSubtarget.cpp | 30 +- lib/Target/PTX/PTXSubtarget.h | 51 +- lib/Target/PTX/PTXTargetMachine.cpp | 42 +- lib/Target/PTX/PTXTargetMachine.h | 26 +- lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp | 8 +- .../PowerPC/InstPrinter/PPCInstPrinter.h | 7 +- lib/Target/PowerPC/PPCAsmBackend.cpp | 8 +- lib/Target/PowerPC/PPCAsmPrinter.cpp | 3 +- lib/Target/PowerPC/PPCISelLowering.cpp | 39 +- lib/Target/PowerPC/PPCInstr64Bit.td | 2 +- lib/Target/PowerPC/PPCInstrInfo.td | 2 +- lib/Target/PowerPC/PPCMCAsmInfo.cpp | 2 +- lib/Target/PowerPC/PPCSubtarget.cpp | 15 +- lib/Target/PowerPC/PPCSubtarget.h | 16 +- lib/Target/PowerPC/PPCTargetMachine.cpp | 10 +- lib/Target/README.txt | 156 +- lib/Target/Sparc/SparcISelLowering.cpp | 2 +- lib/Target/SubtargetFeature.cpp | 8 +- lib/Target/SystemZ/SystemZISelLowering.cpp | 2 +- lib/Target/TargetData.cpp | 10 +- lib/Target/TargetInstrInfo.cpp | 7 +- lib/Target/TargetLibraryInfo.cpp | 17 +- lib/Target/TargetLoweringObjectFile.cpp | 21 +- lib/Target/TargetMachine.cpp | 16 +- lib/Target/X86/AsmParser/X86AsmParser.cpp | 139 +- .../X86/Disassembler/X86Disassembler.cpp | 5 + .../X86/Disassembler/X86DisassemblerDecoder.c | 285 +- .../X86/Disassembler/X86DisassemblerDecoder.h | 86 +- .../X86DisassemblerDecoderCommon.h | 29 +- .../X86/InstPrinter/X86ATTInstPrinter.cpp | 19 +- .../X86/InstPrinter/X86ATTInstPrinter.h | 14 +- .../X86/InstPrinter/X86InstComments.cpp | 40 +- .../X86/InstPrinter/X86IntelInstPrinter.cpp | 1 + .../X86/InstPrinter/X86IntelInstPrinter.h | 9 +- lib/Target/X86/README-X86-64.txt | 2 +- lib/Target/X86/README.txt | 179 +- lib/Target/X86/Utils/X86ShuffleDecode.cpp | 66 +- lib/Target/X86/Utils/X86ShuffleDecode.h | 26 +- lib/Target/X86/X86.td | 23 +- lib/Target/X86/X86AsmBackend.cpp | 65 +- lib/Target/X86/X86AsmPrinter.cpp | 5 +- lib/Target/X86/X86CallingConv.td | 7 + lib/Target/X86/X86CodeEmitter.cpp | 8 + lib/Target/X86/X86FastISel.cpp | 513 +- lib/Target/X86/X86FloatingPoint.cpp | 2 +- lib/Target/X86/X86FrameLowering.cpp | 163 +- lib/Target/X86/X86ISelDAGToDAG.cpp | 75 + lib/Target/X86/X86ISelLowering.cpp | 264 +- lib/Target/X86/X86ISelLowering.h | 9 +- lib/Target/X86/X86Instr3DNow.td | 113 +- lib/Target/X86/X86InstrArithmetic.td | 2 +- lib/Target/X86/X86InstrControl.td | 10 + lib/Target/X86/X86InstrFormats.td | 66 +- lib/Target/X86/X86InstrFragmentsSIMD.td | 2 + lib/Target/X86/X86InstrInfo.cpp | 41 +- lib/Target/X86/X86InstrInfo.h | 136 +- lib/Target/X86/X86InstrInfo.td | 78 +- lib/Target/X86/X86InstrSSE.td | 855 ++- lib/Target/X86/X86InstrSystem.td | 40 +- lib/Target/X86/X86MCAsmInfo.cpp | 23 +- lib/Target/X86/X86MCAsmInfo.h | 8 + lib/Target/X86/X86MCCodeEmitter.cpp | 30 +- lib/Target/X86/X86RegisterInfo.cpp | 51 +- lib/Target/X86/X86RegisterInfo.h | 6 + lib/Target/X86/X86RegisterInfo.td | 20 +- lib/Target/X86/X86SelectionDAGInfo.cpp | 2 +- lib/Target/X86/X86Subtarget.cpp | 3 +- lib/Target/X86/X86Subtarget.h | 19 +- lib/Target/X86/X86TargetMachine.cpp | 48 +- lib/Target/X86/X86TargetObjectFile.cpp | 23 +- lib/Target/X86/X86TargetObjectFile.h | 10 +- lib/Target/XCore/XCoreISelDAGToDAG.cpp | 186 +- lib/Target/XCore/XCoreISelLowering.cpp | 4 +- lib/Target/XCore/XCoreInstrInfo.td | 92 +- lib/Target/XCore/XCoreRegisterInfo.cpp | 5 + lib/Target/XCore/XCoreRegisterInfo.h | 2 + lib/Transforms/IPO/ArgumentPromotion.cpp | 4 +- lib/Transforms/IPO/CMakeLists.txt | 1 - .../IPO/DeadArgumentElimination.cpp | 14 +- lib/Transforms/IPO/GlobalOpt.cpp | 164 +- lib/Transforms/IPO/IPConstantPropagation.cpp | 2 +- lib/Transforms/IPO/IPO.cpp | 1 - lib/Transforms/IPO/Inliner.cpp | 1 - lib/Transforms/IPO/Internalize.cpp | 5 +- lib/Transforms/IPO/LowerSetJmp.cpp | 2 +- lib/Transforms/IPO/MergeFunctions.cpp | 5 +- lib/Transforms/IPO/PartialInlining.cpp | 2 +- lib/Transforms/IPO/PruneEH.cpp | 1 - lib/Transforms/IPO/StructRetPromotion.cpp | 357 -- lib/Transforms/InstCombine/InstCombine.h | 9 +- .../InstCombine/InstCombineAndOrXor.cpp | 91 +- .../InstCombine/InstCombineCalls.cpp | 47 +- .../InstCombine/InstCombineCasts.cpp | 150 +- .../InstCombine/InstCombineCompares.cpp | 207 +- .../InstCombineLoadStoreAlloca.cpp | 33 +- .../InstCombine/InstCombineMulDivRem.cpp | 100 +- lib/Transforms/InstCombine/InstCombinePHI.cpp | 33 +- .../InstCombine/InstCombineSelect.cpp | 48 +- .../InstCombine/InstCombineShifts.cpp | 9 +- .../InstCombineSimplifyDemanded.cpp | 16 + .../InstCombine/InstCombineVectorOps.cpp | 10 +- .../InstCombine/InstCombineWorklist.h | 1 + .../InstCombine/InstructionCombining.cpp | 28 +- lib/Transforms/Instrumentation/CMakeLists.txt | 1 + .../Instrumentation/GCOVProfiling.cpp | 638 ++ .../Instrumentation/Instrumentation.cpp | 1 + .../Instrumentation/MaximumSpanningTree.h | 2 +- .../Instrumentation/OptimalEdgeProfiling.cpp | 28 +- .../Instrumentation/PathProfiling.cpp | 14 +- .../Instrumentation/ProfilingUtils.cpp | 43 +- .../Instrumentation/ProfilingUtils.h | 4 +- lib/Transforms/Scalar/CMakeLists.txt | 2 - lib/Transforms/Scalar/CodeGenPrepare.cpp | 377 +- .../Scalar/CorrelatedValuePropagation.cpp | 1 + lib/Transforms/Scalar/DCE.cpp | 1 - .../Scalar/DeadStoreElimination.cpp | 54 +- lib/Transforms/Scalar/GEPSplitter.cpp | 83 - lib/Transforms/Scalar/GVN.cpp | 449 +- lib/Transforms/Scalar/IndVarSimplify.cpp | 155 +- lib/Transforms/Scalar/JumpThreading.cpp | 25 +- lib/Transforms/Scalar/LICM.cpp | 28 +- lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 144 +- lib/Transforms/Scalar/LoopRotation.cpp | 9 +- lib/Transforms/Scalar/LoopStrengthReduce.cpp | 31 +- lib/Transforms/Scalar/LoopUnrollPass.cpp | 35 +- lib/Transforms/Scalar/MemCpyOptimizer.cpp | 19 +- lib/Transforms/Scalar/Reassociate.cpp | 154 +- lib/Transforms/Scalar/Reg2Mem.cpp | 2 +- lib/Transforms/Scalar/SCCP.cpp | 2 +- lib/Transforms/Scalar/Scalar.cpp | 27 +- .../Scalar/ScalarReplAggregates.cpp | 298 +- lib/Transforms/Scalar/SimplifyCFGPass.cpp | 10 +- .../Scalar/SimplifyHalfPowrLibCalls.cpp | 160 - lib/Transforms/Scalar/SimplifyLibCalls.cpp | 127 +- .../Scalar/TailRecursionElimination.cpp | 15 +- lib/Transforms/Utils/BasicBlockUtils.cpp | 14 +- lib/Transforms/Utils/BreakCriticalEdges.cpp | 4 +- lib/Transforms/Utils/CodeExtractor.cpp | 12 +- lib/Transforms/Utils/InlineFunction.cpp | 4 +- lib/Transforms/Utils/LCSSA.cpp | 15 +- lib/Transforms/Utils/Local.cpp | 110 +- lib/Transforms/Utils/LoopSimplify.cpp | 7 +- lib/Transforms/Utils/LowerSwitch.cpp | 2 +- .../Utils/PromoteMemoryToRegister.cpp | 49 +- lib/Transforms/Utils/SSAUpdater.cpp | 15 +- lib/Transforms/Utils/SimplifyCFG.cpp | 215 +- .../Utils/UnifyFunctionExitNodes.cpp | 3 +- lib/Transforms/Utils/ValueMapper.cpp | 4 +- lib/VMCore/AsmWriter.cpp | 73 +- lib/VMCore/AutoUpgrade.cpp | 99 +- lib/VMCore/CMakeLists.txt | 1 + lib/VMCore/ConstantFold.cpp | 3 +- lib/VMCore/Constants.cpp | 70 +- lib/VMCore/ConstantsContext.h | 12 +- lib/VMCore/Core.cpp | 7 +- lib/VMCore/DebugInfoProbe.cpp | 258 + lib/VMCore/DebugLoc.cpp | 26 +- lib/VMCore/Dominators.cpp | 3 +- lib/VMCore/Function.cpp | 4 +- lib/VMCore/IRBuilder.cpp | 3 +- lib/VMCore/Instructions.cpp | 87 +- lib/VMCore/LLVMContextImpl.h | 2 +- lib/VMCore/Metadata.cpp | 47 +- lib/VMCore/PassManager.cpp | 31 +- lib/VMCore/PassRegistry.cpp | 2 +- lib/VMCore/Type.cpp | 9 +- lib/VMCore/TypesContext.h | 11 +- lib/VMCore/Verifier.cpp | 45 +- projects/sample/autoconf/configure.ac | 2 +- runtime/CMakeLists.txt | 5 + runtime/libprofile/CMakeLists.txt | 19 + runtime/libprofile/CommonProfiling.c | 4 + runtime/libprofile/GCDAProfiling.c | 152 + runtime/libprofile/Makefile | 4 +- runtime/libprofile/OptimalEdgeProfiling.c | 2 +- runtime/libprofile/PathProfiling.c | 21 +- runtime/libprofile/libprofile.exports | 5 + test/Analysis/BasicAA/intrinsics.ll | 39 + test/Analysis/BasicAA/store-promote.ll | 2 +- .../2006-09-26-PostDominanceFrontier.ll | 97 - .../2007-04-17-PostDominanceFrontier.ll | 692 -- .../2007-04-20-PostDom-Reset.ll | 28 - test/Analysis/RegionInfo/next.ll | 4 +- .../2011-03-09-ExactNoMaxBECount.ll | 34 + .../ScalarEvolution/2011-04-26-FoldAddRec.ll | 33 + test/Analysis/ScalarEvolution/nsw-offset.ll | 12 +- test/Analysis/ScalarEvolution/nsw.ll | 12 +- test/Analysis/ScalarEvolution/sext-iv-0.ll | 11 +- .../TypeBasedAliasAnalysis/intrinsics.ll | 27 + test/Assembler/AutoUpgradeIntrinsics.ll | 12 + .../aggregate-return-single-value.ll | 6 - test/Assembler/comment.ll | 5 +- test/Bitcode/neon-intrinsics.ll | 13 +- test/CMakeLists.txt | 22 +- test/CodeGen/ARM/2009-10-27-double-align.ll | 3 +- test/CodeGen/ARM/2010-03-18-ldm-rtrn.ll | 2 +- test/CodeGen/ARM/2010-05-18-PostIndexBug.ll | 6 +- test/CodeGen/ARM/2010-08-04-StackVariable.ll | 4 +- test/CodeGen/ARM/2010-10-25-ifcvt-ldm.ll | 6 +- test/CodeGen/ARM/2010-11-29-PrologueBug.ll | 2 +- test/CodeGen/ARM/2010-12-13-reloc-pic.ll | 100 - test/CodeGen/ARM/2010-12-15-elf-lcomm.ll | 6 +- .../CodeGen/ARM/2011-03-10-DAGCombineCrash.ll | 47 + .../CodeGen/ARM/2011-03-15-LdStMultipleBug.ll | 55 + test/CodeGen/ARM/2011-03-23-PeepholeBug.ll | 41 + test/CodeGen/ARM/2011-04-07-schediv.ll | 31 + test/CodeGen/ARM/2011-04-11-MachineLICMBug.ll | 34 + test/CodeGen/ARM/2011-04-12-AlignBug.ll | 11 + test/CodeGen/ARM/2011-04-12-FastRegAlloc.ll | 15 + .../ARM/2011-04-15-AndVFlagPeepholeBug.ll | 22 + .../ARM/2011-04-15-RegisterCmpPeephole.ll | 41 + test/CodeGen/ARM/2011-04-26-SchedTweak.ll | 70 + test/CodeGen/ARM/2011-04-27-IfCvtBug.ll | 59 + test/CodeGen/ARM/align.ll | 2 +- test/CodeGen/ARM/arguments.ll | 2 +- test/CodeGen/ARM/arm-and-tst-peephole.ll | 10 +- test/CodeGen/ARM/arm-returnaddr.ll | 4 +- test/CodeGen/ARM/avoid-cpsr-rmw.ll | 16 + test/CodeGen/ARM/bx_fold.ll | 2 +- test/CodeGen/ARM/call-tc.ll | 2 +- test/CodeGen/ARM/carry.ll | 17 + test/CodeGen/ARM/code-placement.ll | 4 +- test/CodeGen/ARM/constants.ll | 12 +- test/CodeGen/ARM/crash-greedy.ll | 84 + test/CodeGen/ARM/debug-info-d16-reg.ll | 105 + test/CodeGen/ARM/debug-info-qreg.ll | 94 + test/CodeGen/ARM/debug-info-s16-reg.ll | 116 + test/CodeGen/ARM/divmod.ll | 58 + test/CodeGen/ARM/fabss.ll | 2 +- test/CodeGen/ARM/fadds.ll | 2 +- test/CodeGen/ARM/fast-isel-pred.ll | 58 + test/CodeGen/ARM/fast-isel-redefinition.ll | 11 + test/CodeGen/ARM/fast-isel-static.ll | 2 +- test/CodeGen/ARM/fast-isel.ll | 55 +- test/CodeGen/ARM/fcopysign.ll | 32 + test/CodeGen/ARM/fdivs.ll | 2 +- test/CodeGen/ARM/fmacs.ll | 53 + test/CodeGen/ARM/fmuls.ll | 2 +- test/CodeGen/ARM/fnmscs.ll | 17 +- test/CodeGen/ARM/fp-arg-shuffle.ll | 11 + test/CodeGen/ARM/fp.ll | 2 +- test/CodeGen/ARM/fp_convert.ll | 8 +- test/CodeGen/ARM/fpcmp-opt.ll | 2 +- test/CodeGen/ARM/ifcvt10.ll | 4 +- test/CodeGen/ARM/ifcvt5.ll | 2 +- test/CodeGen/ARM/ifcvt6.ll | 2 +- test/CodeGen/ARM/ifcvt7.ll | 2 +- test/CodeGen/ARM/ifcvt8.ll | 2 +- test/CodeGen/ARM/indirectbr.ll | 27 +- test/CodeGen/ARM/inlineasm3.ll | 2 +- test/CodeGen/ARM/int-to-fp.ll | 19 + test/CodeGen/ARM/ldm.ll | 10 +- test/CodeGen/ARM/ldrd.ll | 18 +- test/CodeGen/ARM/long.ll | 6 +- test/CodeGen/ARM/long_shift.ll | 10 +- test/CodeGen/ARM/lsr-code-insertion.ll | 8 +- test/CodeGen/ARM/lsr-on-unrolled-loops.ll | 5 - test/CodeGen/ARM/memcpy-inline.ll | 12 +- test/CodeGen/ARM/neon_div.ll | 2 +- test/CodeGen/ARM/neon_shift.ll | 11 + test/CodeGen/ARM/peephole-bitcast.ll | 26 + test/CodeGen/ARM/prefetch.ll | 21 +- test/CodeGen/ARM/reg_sequence.ll | 22 +- test/CodeGen/ARM/rev.ll | 30 +- test/CodeGen/ARM/select-imm.ll | 4 +- test/CodeGen/ARM/select.ll | 23 + test/CodeGen/ARM/select_xform.ll | 2 +- test/CodeGen/ARM/shifter_operand.ll | 10 +- test/CodeGen/ARM/shuffle.ll | 18 + test/CodeGen/ARM/smul.ll | 16 +- test/CodeGen/ARM/str_pre-2.ll | 5 +- test/CodeGen/ARM/sub.ll | 11 +- test/CodeGen/ARM/thumb1-varalloc.ll | 6 +- test/CodeGen/ARM/trap.ll | 11 +- test/CodeGen/ARM/umulo-32.ll | 27 + test/CodeGen/ARM/unaligned_load_store.ll | 16 +- test/CodeGen/ARM/undef-sext.ll | 14 + test/CodeGen/ARM/va_arg.ll | 10 +- test/CodeGen/ARM/vbsl-constant.ll | 115 + test/CodeGen/ARM/vcgt.ll | 7 +- test/CodeGen/ARM/vector-DAGCombine.ll | 18 + test/CodeGen/ARM/vext.ll | 12 +- test/CodeGen/ARM/vfp.ll | 5 +- test/CodeGen/ARM/vld1.ll | 9 +- test/CodeGen/ARM/vld3.ll | 7 +- test/CodeGen/ARM/vldlane.ll | 13 +- test/CodeGen/ARM/vmul.ll | 155 +- test/CodeGen/ARM/vst3.ll | 2 +- .../Blackfin/2009-08-04-LowerExtract-Live.ll | 1 + test/CodeGen/CellSPU/jumptable.ll | 2 +- test/CodeGen/CellSPU/loads.ll | 7 + test/CodeGen/CellSPU/rotate_ops.ll | 6 +- test/CodeGen/CellSPU/shift_ops.ll | 61 +- test/CodeGen/CellSPU/stores.ll | 8 + test/CodeGen/CellSPU/v2f32.ll | 12 +- test/CodeGen/Generic/crash.ll | 28 + test/CodeGen/MBlaze/fsl.ll | 18 +- test/CodeGen/MBlaze/loop.ll | 6 +- test/CodeGen/Mips/2008-07-22-Cstpool.ll | 2 +- test/CodeGen/Mips/2008-07-23-fpcmp.ll | 4 + test/CodeGen/Mips/2008-07-29-icmp.ll | 4 + test/CodeGen/Mips/2008-08-06-Alloca.ll | 1 + test/CodeGen/Mips/2010-07-20-Select.ll | 7 +- test/CodeGen/Mips/addc.ll | 13 + test/CodeGen/Mips/analyzebranch.ll | 46 + test/CodeGen/Mips/blockaddr.ll | 31 + .../Mips/buildpairextractelementf64.ll | 23 + test/CodeGen/Mips/cmov.ll | 5 +- test/CodeGen/Mips/divrem.ll | 51 + test/CodeGen/Mips/fpbr.ll | 119 + test/CodeGen/Mips/fpcmp.ll | 23 + test/CodeGen/Mips/internalfunc.ll | 52 + test/CodeGen/Mips/largeimm1.ll | 13 + test/CodeGen/Mips/o32_cc.ll | 52 +- test/CodeGen/Mips/o32_cc_vararg.ll | 278 + test/CodeGen/Mips/select.ll | 196 + test/CodeGen/PTX/add.ll | 70 +- test/CodeGen/PTX/bitwise.ll | 24 + test/CodeGen/PTX/bra.ll | 24 + test/CodeGen/PTX/exit.ll | 2 +- test/CodeGen/PTX/fdiv-sm10.ll | 15 + test/CodeGen/PTX/fdiv-sm13.ll | 15 + test/CodeGen/PTX/intrinsic.ll | 281 + test/CodeGen/PTX/ld.ll | 431 +- test/CodeGen/PTX/llvm-intrinsic.ll | 56 + test/CodeGen/PTX/mad.ll | 17 + test/CodeGen/PTX/mov.ll | 59 +- test/CodeGen/PTX/mul.ll | 39 + test/CodeGen/PTX/options.ll | 8 +- test/CodeGen/PTX/parameter-order.ll | 8 + test/CodeGen/PTX/ret.ll | 2 +- test/CodeGen/PTX/setp.ll | 134 + test/CodeGen/PTX/shl.ll | 2 +- test/CodeGen/PTX/shr.ll | 2 +- test/CodeGen/PTX/st.ll | 389 +- test/CodeGen/PTX/sub.ll | 70 +- test/CodeGen/PowerPC/2008-12-12-EH.ll | 2 +- test/CodeGen/PowerPC/2010-05-03-retaddr1.ll | 1 + test/CodeGen/PowerPC/Atomics-64.ll | 10 +- test/CodeGen/PowerPC/Frames-small.ll | 2 +- test/CodeGen/PowerPC/indirectbr.ll | 14 +- test/CodeGen/PowerPC/mulhs.ll | 2 +- test/CodeGen/PowerPC/ppc-prologue.ll | 4 +- test/CodeGen/SPARC/2011-01-11-FrameAddr.ll | 2 + .../2009-07-10-BadIncomingArgOffset.ll | 3 +- test/CodeGen/Thumb/2009-08-20-ISelBug.ll | 2 +- .../2010-01-15-local-alloc-spill-physical.ll | 20 - .../CodeGen/Thumb/2010-07-15-debugOrdering.ll | 2 +- test/CodeGen/Thumb/dyn-stackalloc.ll | 27 +- test/CodeGen/Thumb/rev.ll | 56 + test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll | 2 +- test/CodeGen/Thumb2/2009-09-28-ITBlockBug.ll | 2 +- .../Thumb2/2009-10-15-ITBlockBranch.ll | 5 +- .../CodeGen/Thumb2/2010-03-15-AsmCCClobber.ll | 16 +- test/CodeGen/Thumb2/2010-11-22-EpilogueBug.ll | 2 +- .../Thumb2/2011-04-21-FILoweringBug.ll | 23 + test/CodeGen/Thumb2/bfi.ll | 11 + test/CodeGen/Thumb2/cross-rc-coalescing-2.ll | 2 - test/CodeGen/Thumb2/ldr-str-imm12.ll | 4 +- test/CodeGen/Thumb2/machine-licm.ll | 26 +- test/CodeGen/Thumb2/thumb2-ldrd.ll | 2 +- test/CodeGen/Thumb2/thumb2-lsr3.ll | 6 +- test/CodeGen/Thumb2/thumb2-ror.ll | 17 +- test/CodeGen/Thumb2/thumb2-ror2.ll | 11 - test/CodeGen/Thumb2/thumb2-sbc.ll | 19 +- test/CodeGen/Thumb2/thumb2-sub3.ll | 10 +- test/CodeGen/Thumb2/thumb2-sub5.ll | 7 +- test/CodeGen/Thumb2/thumb2-uxtb.ll | 4 +- test/CodeGen/X86/2007-01-08-X86-64-Pointer.ll | 7 +- test/CodeGen/X86/2007-04-25-MMX-PADDQ.ll | 4 +- test/CodeGen/X86/2007-05-05-Personality.ll | 5 +- test/CodeGen/X86/2007-07-03-GR64ToVR64.ll | 8 +- test/CodeGen/X86/2007-07-18-Vector-Extract.ll | 6 +- test/CodeGen/X86/2007-09-17-ObjcFrameEH.ll | 4 +- test/CodeGen/X86/2007-09-27-LDIntrinsics.ll | 4 +- test/CodeGen/X86/2008-02-22-ReMatBug.ll | 2 +- test/CodeGen/X86/2008-03-18-CoalescerBug.ll | 4 +- test/CodeGen/X86/2008-04-02-unnamedEH.ll | 2 +- test/CodeGen/X86/2008-04-16-ReMatBug.ll | 2 +- test/CodeGen/X86/2008-07-11-SpillerBug.ll | 1 - test/CodeGen/X86/2008-07-19-movups-spills.ll | 6 +- test/CodeGen/X86/2008-08-05-SpillerBug.ll | 44 - test/CodeGen/X86/2008-09-18-inline-asm-2.ll | 6 +- .../CodeGen/X86/2008-12-12-PrivateEHSymbol.ll | 6 +- .../X86/2009-02-20-PreAllocSplit-Crash.ll | 2 +- test/CodeGen/X86/2009-03-11-CoalescerBug.ll | 85 - test/CodeGen/X86/2009-03-16-SpillerBug.ll | 2 +- test/CodeGen/X86/2009-04-20-LinearScanOpt.ll | 2 +- test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll | 2 +- test/CodeGen/X86/2009-04-24.ll | 2 +- .../X86/2010-02-19-TailCallRetAddrBug.ll | 2 +- test/CodeGen/X86/2010-04-23-mmx-movdq2q.ll | 5 +- .../X86/2010-05-03-CoalescerSubRegClobber.ll | 4 +- test/CodeGen/X86/2010-05-25-DotDebugLoc.ll | 1 + test/CodeGen/X86/2010-05-26-DotDebugLoc.ll | 18 +- test/CodeGen/X86/2010-05-28-Crash.ll | 1 + .../CodeGen/X86/2010-06-01-DeadArg-DbgInfo.ll | 3 +- test/CodeGen/X86/2010-08-04-StackVariable.ll | 4 +- .../X86/2010-09-17-SideEffectsInChain.ll | 10 +- .../X86/2010-09-30-CMOV-JumpTable-PHI.ll | 2 +- .../X86/2011-01-24-DbgValue-Before-Use.ll | 1 + test/CodeGen/X86/2011-02-27-Fpextend.ll | 7 + test/CodeGen/X86/2011-03-02-DAGCombiner.ll | 51 + test/CodeGen/X86/2011-03-08-Sched-crash.ll | 56 + .../X86/2011-03-09-Physreg-Coalescing.ll | 22 + .../X86/2011-03-30-CreateFixedObjCrash.ll | 10 + test/CodeGen/X86/2011-04-13-SchedCmpJmp.ll | 65 + test/CodeGen/X86/3dnow-intrinsics.ll | 297 + test/CodeGen/X86/MachineSink-PHIUse.ll | 2 +- test/CodeGen/X86/SIMD/dg.exp | 5 + test/CodeGen/X86/SIMD/notvunpcklpd.ll | 20 + test/CodeGen/X86/SIMD/notvunpcklps.ll | 20 + test/CodeGen/X86/SIMD/vunpcklpd.ll | 20 + test/CodeGen/X86/SIMD/vunpcklps.ll | 20 + test/CodeGen/X86/abi-isel.ll | 5575 +++++++++-------- test/CodeGen/X86/add.ll | 15 + test/CodeGen/X86/adde-carry.ll | 26 + test/CodeGen/X86/aliases.ll | 2 - test/CodeGen/X86/alignment.ll | 6 +- test/CodeGen/X86/apm.ll | 11 +- test/CodeGen/X86/avoid-lea-scale2.ll | 4 +- test/CodeGen/X86/avx-intrinsics-x86.ll | 4 +- test/CodeGen/X86/bool-zext.ll | 35 + test/CodeGen/X86/break-anti-dependencies.ll | 3 +- test/CodeGen/X86/byval.ll | 3 +- test/CodeGen/X86/byval2.ll | 27 +- test/CodeGen/X86/byval3.ll | 27 +- test/CodeGen/X86/byval4.ll | 27 +- test/CodeGen/X86/byval5.ll | 27 +- test/CodeGen/X86/call-push.ll | 16 + test/CodeGen/X86/coalesce-esp.ll | 2 +- test/CodeGen/X86/coalescer-commute2.ll | 9 +- test/CodeGen/X86/coalescer-cross.ll | 6 +- test/CodeGen/X86/commute-two-addr.ll | 3 +- test/CodeGen/X86/constant-pool-remat-0.ll | 17 +- .../X86/convert-2-addr-3-addr-inc64.ll | 10 +- test/CodeGen/X86/crash.ll | 18 +- test/CodeGen/X86/dbg-declare-arg.ll | 123 + test/CodeGen/X86/dbg-file-name.ll | 19 + test/CodeGen/X86/dbg-merge-loc-entry.ll | 3 +- .../X86/dbg-value-inlined-parameter.ll | 1 + test/CodeGen/X86/dbg-value-location.ll | 1 + test/CodeGen/X86/dbg-value-range.ll | 17 +- test/CodeGen/X86/divide-by-constant.ll | 11 + test/CodeGen/X86/dyn-stackalloc.ll | 9 +- test/CodeGen/X86/fast-isel-gep.ll | 19 + test/CodeGen/X86/fast-isel-i1.ll | 39 +- test/CodeGen/X86/fast-isel-shift-imm.ll | 8 - test/CodeGen/X86/fast-isel-x86-64.ll | 262 + test/CodeGen/X86/fast-isel-x86.ll | 17 +- test/CodeGen/X86/fast-isel.ll | 11 + test/CodeGen/X86/fold-mul-lohi.ll | 5 +- test/CodeGen/X86/fold-pcmpeqd-0.ll | 16 +- test/CodeGen/X86/fold-pcmpeqd-2.ll | 17 +- test/CodeGen/X86/fold-zext-trunc.ll | 23 + test/CodeGen/X86/fp-stack-compare.ll | 14 +- test/CodeGen/X86/fp-trunc.ll | 35 + test/CodeGen/X86/global-sections-tls.ll | 2 +- test/CodeGen/X86/global-sections.ll | 6 +- test/CodeGen/X86/h-register-store.ll | 32 +- test/CodeGen/X86/h-registers-0.ll | 26 +- test/CodeGen/X86/h-registers-1.ll | 2 +- test/CodeGen/X86/hidden-vis-pic.ll | 2 +- test/CodeGen/X86/i64-mem-copy.ll | 8 +- test/CodeGen/X86/iabs.ll | 11 +- test/CodeGen/X86/isel-sink3.ll | 6 +- test/CodeGen/X86/lea-3.ll | 17 +- test/CodeGen/X86/lock-inst-encoding.ll | 5 +- test/CodeGen/X86/loop-strength-reduce4.ll | 6 +- test/CodeGen/X86/lsr-interesting-step.ll | 4 +- test/CodeGen/X86/lsr-quadratic-expand.ll | 22 + test/CodeGen/X86/lsr-redundant-addressing.ll | 45 + test/CodeGen/X86/lsr-reuse-trunc.ll | 5 +- test/CodeGen/X86/lsr-reuse.ll | 1 + test/CodeGen/X86/machine-cse.ll | 4 +- test/CodeGen/X86/mcinst-lowering-cmp0.ll | 68 - test/CodeGen/X86/mmx-copy-gprs.ll | 10 +- test/CodeGen/X86/narrow-shl-cst.ll | 101 + test/CodeGen/X86/no-cfi.ll | 38 + test/CodeGen/X86/optimize-max-3.ll | 4 +- test/CodeGen/X86/or-address.ll | 8 +- .../CodeGen/X86/peep-vector-extract-concat.ll | 7 +- test/CodeGen/X86/personality.ll | 22 +- ...hi-constants.ll => phi-bit-propagation.ll} | 20 + test/CodeGen/X86/pic.ll | 2 +- test/CodeGen/X86/pmulld.ll | 12 +- test/CodeGen/X86/postra-licm.ll | 9 +- test/CodeGen/X86/pr2659.ll | 3 +- test/CodeGen/X86/pr3366.ll | 2 +- test/CodeGen/X86/pr3495-2.ll | 2 +- test/CodeGen/X86/pr3495.ll | 6 +- test/CodeGen/X86/pr9743.ll | 17 + test/CodeGen/X86/pre-split1.ll | 2 +- test/CodeGen/X86/pre-split10.ll | 2 +- test/CodeGen/X86/pre-split11.ll | 2 +- test/CodeGen/X86/pre-split2.ll | 2 +- test/CodeGen/X86/pre-split3.ll | 2 +- test/CodeGen/X86/pre-split4.ll | 2 +- test/CodeGen/X86/pre-split5.ll | 2 +- test/CodeGen/X86/pre-split6.ll | 2 +- test/CodeGen/X86/pre-split7.ll | 2 +- test/CodeGen/X86/pre-split8.ll | 2 +- test/CodeGen/X86/pre-split9.ll | 2 +- test/CodeGen/X86/remat-scalar-zero.ll | 1 + .../X86/scalar-min-max-fill-operand.ll | 13 +- test/CodeGen/X86/sext-i1.ll | 4 +- test/CodeGen/X86/shrink-compare.ll | 36 + test/CodeGen/X86/sse-align-0.ll | 3 +- test/CodeGen/X86/sse-align-3.ll | 7 +- test/CodeGen/X86/sse-align-7.ll | 4 +- test/CodeGen/X86/sse-commute.ll | 2 +- test/CodeGen/X86/sse2.ll | 6 +- test/CodeGen/X86/sse3.ll | 12 +- test/CodeGen/X86/sse_reload_fold.ll | 5 +- test/CodeGen/X86/stdarg.ll | 3 +- test/CodeGen/X86/stride-nine-with-base-reg.ll | 5 +- test/CodeGen/X86/stride-reuse.ll | 5 +- test/CodeGen/X86/sub-with-overflow.ll | 22 +- test/CodeGen/X86/tail-opts.ll | 30 +- test/CodeGen/X86/tailcall-returndup-void.ll | 37 + test/CodeGen/X86/tailcallbyval64.ll | 25 +- test/CodeGen/X86/tailcallstack64.ll | 12 +- test/CodeGen/X86/test-nofold.ll | 8 +- test/CodeGen/X86/twoaddr-lea.ll | 11 + test/CodeGen/X86/umulo-64.ll | 28 - test/CodeGen/X86/unaligned-load.ll | 4 +- test/CodeGen/X86/unknown-location.ll | 13 +- .../X86/unreachable-stack-protector.ll | 19 + test/CodeGen/X86/v2f32.ll | 67 +- test/CodeGen/X86/vec_cast.ll | 4 +- test/CodeGen/X86/vec_set-8.ll | 7 +- test/CodeGen/X86/vec_shuffle-16.ll | 23 +- test/CodeGen/X86/vec_shuffle-17.ll | 7 +- test/CodeGen/X86/vec_uint_to_fp.ll | 11 + test/CodeGen/X86/visibility.ll | 7 +- test/CodeGen/X86/widen_load-0.ll | 8 +- test/CodeGen/X86/win64_alloca_dynalloca.ll | 74 + test/CodeGen/X86/win64_vararg.ll | 33 + test/CodeGen/X86/win_chkstk.ll | 3 +- test/CodeGen/X86/x86-64-malloc.ll | 4 +- test/CodeGen/X86/zext-extract_subreg.ll | 1 + test/CodeGen/X86/zext-sext.ll | 1 + test/CodeGen/XCore/events.ll | 20 + test/CodeGen/XCore/mul64.ll | 33 +- test/CodeGen/XCore/ps-intrinsics.ll | 18 + test/CodeGen/XCore/resources.ll | 24 + test/CodeGen/XCore/scavenging.ll | 52 + test/CodeGen/XCore/sr-intrinsics.ll | 18 + test/CodeGen/XCore/threads.ll | 67 + test/CodeGen/XCore/trampoline.ll | 4 +- test/DebugInfo/2010-04-13-PubType.ll | 2 +- test/DebugInfo/array.ll | 34 + test/FrontendAda/real_cst.adb | 2 +- test/FrontendC++/2009-07-15-LineNumbers.cpp | 27 - .../2006-05-01-AppleAlignmentPragma.c | 2 +- test/FrontendC/2010-07-27-MinNoFoldConst.c | 2 +- test/FrontendC/2011-03-02-UnionInitializer.c | 2 + .../2011-03-08-ZeroFieldUnionInitializer.c | 7 + test/FrontendC/2011-03-31-ArrayRefFolding.c | 15 + test/FrontendC/cstring-align.c | 11 - test/FrontendC/mmx-inline-asm.c | 24 + test/FrontendC/vla-3.c | 11 + .../2011-03-02-ConstCFStringLiteralAlign.m | 11 + test/FrontendObjC/2011-03-08-IVarLookup.m | 32 + test/MC/ARM/arm_addrmode2.s | 34 + test/MC/ARM/arm_addrmode3.s | 18 + test/MC/ARM/arm_instructions.s | 27 + test/MC/ARM/elf-reloc-01.ll | 9 +- test/MC/ARM/elf-reloc-02.ll | 7 +- test/MC/ARM/elf-reloc-03.ll | 7 +- test/MC/ARM/neon-shift-encoding.s | 267 +- test/MC/ARM/simple-encoding.ll | 8 +- test/MC/ARM/thumb2.s | 16 + test/MC/AsmParser/directive_space.s | 5 + test/MC/AsmParser/dot-symbol.s | 12 + test/MC/AsmParser/exprs-invalid.s | 19 +- test/MC/AsmParser/floating-literals.s | 9 + test/MC/AsmParser/rename.s | 6 +- test/MC/AsmParser/section.s | 10 +- test/MC/COFF/basic-coff.s | 266 +- test/MC/COFF/bss.s | 30 +- test/MC/COFF/diff.s | 46 + test/MC/COFF/simple-fixups.s | 100 +- test/MC/COFF/symbol-alias.s | 124 +- test/MC/COFF/symbol-fragment-offset.s | 374 +- test/MC/COFF/weak.s | 102 +- test/MC/Disassembler/ARM/arm-tests.txt | 164 +- test/MC/Disassembler/ARM/invalid-BFI-arm.txt | 10 + .../MC/Disassembler/ARM/invalid-Bcc-thumb.txt | 10 + .../MC/Disassembler/ARM/invalid-CPS2p-arm.txt | 4 + .../MC/Disassembler/ARM/invalid-CPS3p-arm.txt | 4 + .../MC/Disassembler/ARM/invalid-DMB-thumb.txt | 16 + test/MC/Disassembler/ARM/invalid-DSB-arm.txt | 16 + .../Disassembler/ARM/invalid-LDC-form-arm.txt | 11 + .../ARM/invalid-LDRB_POST-arm.txt | 10 + .../ARM/invalid-LDRD_PRE-thumb.txt | 13 + test/MC/Disassembler/ARM/invalid-LDRT-arm.txt | 12 + .../Disassembler/ARM/invalid-LDR_POST-arm.txt | 4 + .../Disassembler/ARM/invalid-LDR_PRE-arm.txt | 10 + .../MC/Disassembler/ARM/invalid-LDRrs-arm.txt | 4 + .../Disassembler/ARM/invalid-LSL-regform.txt | 11 + test/MC/Disassembler/ARM/invalid-MCR-arm.txt | 10 + .../Disassembler/ARM/invalid-MOVTi16-arm.txt | 10 + test/MC/Disassembler/ARM/invalid-MOVr-arm.txt | 13 + .../Disassembler/ARM/invalid-MOVs-LSL-arm.txt | 9 + test/MC/Disassembler/ARM/invalid-MOVs-arm.txt | 17 + test/MC/Disassembler/ARM/invalid-MSRi-arm.txt | 12 + .../ARM/invalid-RFEorLDMIA-arm.txt | 11 + test/MC/Disassembler/ARM/invalid-RSC-arm.txt | 9 + test/MC/Disassembler/ARM/invalid-SBFX-arm.txt | 10 + .../MC/Disassembler/ARM/invalid-SMLAD-arm.txt | 11 + test/MC/Disassembler/ARM/invalid-SRS-arm.txt | 13 + test/MC/Disassembler/ARM/invalid-SSAT-arm.txt | 11 + .../ARM/invalid-STMIA_UPD-thumb.txt | 10 + .../Disassembler/ARM/invalid-STRBrs-arm.txt | 10 + test/MC/Disassembler/ARM/invalid-SXTB-arm.txt | 11 + .../MC/Disassembler/ARM/invalid-UMAAL-arm.txt | 11 + .../Disassembler/ARM/invalid-UQADD8-arm.txt | 12 + .../ARM/invalid-VLD1DUPq8_UPD-arm.txt | 10 + .../ARM/invalid-VLD3DUPd32_UPD-thumb.txt | 11 + .../ARM/invalid-VLDMSDB_UPD-arm.txt | 4 + .../MC/Disassembler/ARM/invalid-VQADD-arm.txt | 10 + .../ARM/invalid-VST2b32_UPD-arm.txt | 11 + .../Disassembler/ARM/invalid-t2Bcc-thumb.txt | 11 + .../ARM/invalid-t2LDRBT-thumb.txt | 10 + .../ARM/invalid-t2LDREXD-thumb.txt | 10 + .../ARM/invalid-t2LDRSHi12-thumb.txt | 10 + .../ARM/invalid-t2LDRSHi8-thumb.txt | 10 + .../ARM/invalid-t2STRD_PRE-thumb.txt | 10 + .../ARM/invalid-t2STREXB-thumb.txt | 10 + .../ARM/invalid-t2STREXD-thumb.txt | 10 + .../ARM/invalid-t2STR_POST-thumb.txt | 10 + test/MC/Disassembler/ARM/neon-tests.txt | 30 + test/MC/Disassembler/ARM/thumb-printf.txt | 77 + test/MC/Disassembler/ARM/thumb-tests.txt | 154 +- test/MC/Disassembler/X86/simple-tests.txt | 6 + test/MC/ELF/alias-reloc.s | 28 +- test/MC/ELF/align-bss.s | 2 +- test/MC/ELF/align-nops.s | 2 +- test/MC/ELF/align.s | 4 +- test/MC/ELF/basic-elf-32.s | 34 +- test/MC/ELF/basic-elf-64.s | 34 +- test/MC/ELF/cfi-adjust-cfa-offset.s | 46 + test/MC/ELF/cfi-advance-loc2.s | 8 +- test/MC/ELF/cfi-def-cfa-offset.s | 10 +- test/MC/ELF/cfi-def-cfa-register.s | 8 +- test/MC/ELF/cfi-def-cfa.s | 8 +- test/MC/ELF/cfi-offset.s | 8 +- test/MC/ELF/cfi-rel-offset.s | 49 + test/MC/ELF/cfi-rel-offset2.s | 41 + test/MC/ELF/cfi-remember.s | 10 +- test/MC/ELF/cfi-same-value.s | 42 + test/MC/ELF/cfi-zero-addr-delta.s | 8 +- test/MC/ELF/cfi.s | 10 +- test/MC/ELF/comdat.s | 12 +- test/MC/ELF/common.s | 2 +- test/MC/ELF/common2.s | 2 +- test/MC/ELF/debug-line.s | 2 +- test/MC/ELF/debug-loc.s | 2 +- test/MC/ELF/empty-dwarf-lines.s | 2 +- test/MC/ELF/empty.s | 10 +- test/MC/ELF/entsize.ll | 4 +- test/MC/ELF/entsize.s | 6 +- test/MC/ELF/global-offset.s | 2 +- test/MC/ELF/got.s | 6 +- test/MC/ELF/ident.s | 2 +- test/MC/ELF/local-reloc.s | 20 +- test/MC/ELF/merge.s | 48 +- test/MC/ELF/noexec.s | 2 +- test/MC/ELF/pic-diff.s | 18 +- test/MC/ELF/relocation-386.s | 55 +- test/MC/ELF/relocation-pc.s | 8 +- test/MC/ELF/relocation.s | 16 +- test/MC/ELF/rename.s | 16 +- test/MC/ELF/section-quoting.s | 10 + test/MC/ELF/section.s | 24 +- test/MC/ELF/symref.s | 68 +- test/MC/ELF/tls-i386.s | 10 + test/MC/ELF/tls.s | 2 +- test/MC/ELF/undef2.s | 2 +- test/MC/ELF/weak-relocation.s | 15 + test/MC/ELF/weakref-reloc.s | 26 +- test/MC/ELF/weakref.s | 4 +- test/MC/MachO/darwin-x86_64-diff-relocs.s | 2 +- test/MC/MachO/section-attributes.s | 7 + test/MC/MachO/temp-labels.s | 33 + test/MC/MachO/variable-errors.s | 8 + test/MC/MachO/variable-exprs.s | 446 ++ test/MC/X86/padlock.s | 53 + test/MC/X86/x86-32-coverage.s | 8 + test/MC/X86/x86-32.s | 130 +- test/MC/X86/x86-64.s | 219 +- test/MC/X86/x86_64-encoding.s | 16 + test/Makefile | 34 +- test/TableGen/TargetInstrInfo.td | 2 +- test/Transforms/CodeGenPrepare/basic.ll | 3 +- .../ConstProp/2002-05-03-NotOperator.ll | 2 +- test/Transforms/ConstProp/basictest.ll | 2 +- test/Transforms/ConstProp/logicaltest.ll | 2 +- test/Transforms/ConstProp/overflow-ops.ll | 41 +- test/Transforms/ConstProp/phi.ll | 2 +- test/Transforms/DeadArgElim/deadexternal.ll | 13 + .../2011-03-25-DSEMiscompile.ll | 23 + test/Transforms/GVN/invariant-simple.ll | 36 - test/Transforms/GVN/rle.ll | 100 +- .../GlobalOpt/2011-04-09-EmptyGlobalCtors.ll | 5 + test/Transforms/GlobalOpt/cxx-dtor.ll | 31 + .../2009-04-14-shorten_iv_vars.ll | 2 +- .../2009-04-15-shorten-iv-vars-2.ll | 2 +- .../IndVarSimplify/2009-04-27-Floating.ll | 11 +- test/Transforms/IndVarSimplify/ada-loops.ll | 2 +- test/Transforms/IndVarSimplify/addrec-gep.ll | 2 +- .../IndVarSimplify/ashr-tripcount.ll | 2 +- test/Transforms/IndVarSimplify/iv-sext.ll | 2 +- test/Transforms/IndVarSimplify/iv-zext.ll | 2 +- test/Transforms/IndVarSimplify/max-pointer.ll | 2 +- test/Transforms/IndVarSimplify/pointer.ll | 2 +- .../preserve-gep-loop-variant.ll | 2 +- .../IndVarSimplify/preserve-gep-nested.ll | 2 +- .../IndVarSimplify/preserve-gep-remainder.ll | 2 +- .../Transforms/IndVarSimplify/preserve-gep.ll | 2 +- .../2011-03-08-SRemMinusOneBadOpt.ll | 12 + test/Transforms/InstCombine/ExtractCast.ll | 27 + test/Transforms/InstCombine/and-or-not.ll | 2 +- test/Transforms/InstCombine/and2.ll | 9 + test/Transforms/InstCombine/debuginfo.ll | 57 + test/Transforms/InstCombine/div.ll | 38 +- test/Transforms/InstCombine/fcmp.ll | 60 + test/Transforms/InstCombine/fdiv.ll | 25 + .../InstCombine/fold-bin-operand.ll | 15 +- test/Transforms/InstCombine/gep-addrspace.ll | 19 + test/Transforms/InstCombine/icmp.ll | 133 + test/Transforms/InstCombine/intrinsics.ll | 27 + test/Transforms/InstCombine/merge-icmp.ll | 29 + test/Transforms/InstCombine/or.ll | 19 + test/Transforms/InstCombine/phi.ll | 86 +- test/Transforms/InstCombine/select.ll | 35 + test/Transforms/InstCombine/sext.ll | 58 + test/Transforms/InstCombine/shift.ll | 21 + .../InstCombine/sign-test-and-or.ll | 79 + test/Transforms/InstCombine/strcpy_chk-64.ll | 18 + .../InstCombine/udivrem-change-width.ll | 45 +- test/Transforms/InstSimplify/compare.ll | 155 +- test/Transforms/InstSimplify/rem.ll | 17 + .../Internalize/available_externally.ll | 16 + .../2011-04-02-SimplifyDeadBlock.ll | 32 + .../JumpThreading/2011-04-14-InfLoop.ll | 31 + test/Transforms/JumpThreading/pr9331.ll | 50 + .../LCSSA/2006-06-03-IncorrectIDFPhis.ll | 2 - test/Transforms/LCSSA/unused-phis.ll | 38 + .../LICM/2007-10-01-PromoteSafeValue.ll | 4 +- .../LICM/2011-04-06-HoistMissedASTUpdate.ll | 32 + .../2011-04-06-PromoteResultOfPromotion.ll | 37 + test/Transforms/LICM/2011-04-09-RAUW-AST.ll | 49 + test/Transforms/LICM/debug-value.ll | 62 + test/Transforms/LoopIdiom/debug-line.ll | 49 + test/Transforms/LoopRotate/crash.ll | 16 + test/Transforms/LoopSimplify/merge-exits.ll | 2 +- .../invariant_value_first.ll | 3 +- .../invariant_value_first_arg.ll | 3 +- .../LoopStrengthReduce/ops_after_indvar.ll | 4 +- .../var_stride_used_by_compare.ll | 2 +- test/Transforms/PhaseOrdering/basic.ll | 117 + test/Transforms/Reassociate/crash.ll | 25 + test/Transforms/Reassociate/secondary.ll | 24 + test/Transforms/SCCP/apint-basictest.ll | 2 +- test/Transforms/SCCP/apint-basictest2.ll | 2 +- test/Transforms/SCCP/apint-basictest3.ll | 2 +- test/Transforms/SCCP/apint-basictest4.ll | 2 +- .../SRETPromotion/2008-03-11-attributes.ll | 7 - .../2008-06-04-function-pointer-passing.ll | 24 - .../SRETPromotion/2008-06-05-non-call-use.ll | 20 - test/Transforms/SRETPromotion/basictest.ll | 33 - test/Transforms/SRETPromotion/dg.exp | 3 - .../ScalarRepl/2008-01-29-PromoteBug.ll | 2 +- .../ScalarRepl/2008-06-05-loadstore-agg.ll | 4 +- test/Transforms/ScalarRepl/dg.exp | 2 +- test/Transforms/ScalarRepl/inline-vector.ll | 53 + .../Transforms/ScalarRepl/only-memcpy-uses.ll | 27 + test/Transforms/ScalarRepl/union-pointer.ll | 2 +- test/Transforms/ScalarRepl/vector_promote.ll | 167 +- .../SimplifyCFG/2006-08-03-Crash.ll | 4 +- .../SimplifyCFG/2008-05-16-PHIBlockMerge.ll | 2 +- .../SimplifyCFG/2011-03-08-UnreachableUse.ll | 31 + test/Transforms/SimplifyCFG/PhiBlockMerge.ll | 1 + test/Transforms/SimplifyCFG/PhiEliminate2.ll | 15 +- test/Transforms/SimplifyCFG/PhiEliminate3.ll | 34 + .../SimplifyCFG/UnreachableEliminate.ll | 56 +- .../Transforms/SimplifyCFG/branch-fold-dbg.ll | 58 + test/Transforms/SimplifyCFG/hoist-dbgvalue.ll | 53 + .../SimplifyCFG/switch-on-const-select.ll | 138 + test/Transforms/SimplifyCFG/trap-debugloc.ll | 19 + .../Transforms/SimplifyLibCalls/debug-line.ll | 24 + test/Transforms/SimplifyLibCalls/half-powr.ll | 46 - test/Transforms/SimplifyLibCalls/iprintf.ll | 71 + tools/CMakeLists.txt | 12 + tools/Makefile | 7 +- tools/bugpoint/BugDriver.cpp | 22 +- tools/bugpoint/ExecutionDriver.cpp | 5 +- tools/bugpoint/Miscompilation.cpp | 34 +- tools/bugpoint/ToolRunner.cpp | 11 +- tools/edis/CMakeLists.txt | 33 +- tools/gold/CMakeLists.txt | 43 + tools/gold/gold-plugin.cpp | 121 +- tools/llc/llc.cpp | 30 +- tools/lli/lli.cpp | 1 + tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp | 10 +- tools/llvm-config/CMakeLists.txt | 30 +- tools/llvm-diff/CMakeLists.txt | 2 + tools/llvm-diff/DiffConsumer.cpp | 209 + tools/llvm-diff/DiffConsumer.h | 92 + tools/llvm-diff/DiffLog.cpp | 53 + tools/llvm-diff/DiffLog.h | 80 + tools/llvm-diff/DifferenceEngine.cpp | 27 +- tools/llvm-diff/DifferenceEngine.h | 96 +- tools/llvm-diff/llvm-diff.cpp | 218 +- tools/llvm-dis/CMakeLists.txt | 2 +- tools/llvm-dis/Makefile | 2 +- tools/llvm-dis/llvm-dis.cpp | 53 +- tools/llvm-ld/llvm-ld.cpp | 8 +- tools/llvm-mc/Disassembler.cpp | 9 +- tools/llvm-mc/Disassembler.h | 6 +- tools/llvm-mc/llvm-mc.cpp | 39 +- tools/llvm-objdump/llvm-objdump.cpp | 16 +- tools/llvm-rtdyld/CMakeLists.txt | 5 + tools/llvm-rtdyld/Makefile | 23 + tools/llvm-rtdyld/llvm-rtdyld.cpp | 151 + tools/llvm-stub/llvm-stub.c | 2 +- tools/llvmc/doc/LLVMC-Reference.rst | 443 +- tools/llvmc/doc/LLVMC-Tutorial.rst | 100 +- tools/llvmc/examples/Skeleton/README | 2 +- tools/llvmc/src/Base.td.in | 2 +- tools/lto/CMakeLists.txt | 27 + tools/lto/LTOCodeGenerator.cpp | 202 +- tools/lto/LTOCodeGenerator.h | 8 + tools/lto/LTOModule.cpp | 358 +- tools/lto/LTOModule.h | 21 +- tools/lto/Makefile | 3 +- tools/lto/lto.cpp | 23 +- tools/lto/lto.exports | 5 + tools/macho-dump/macho-dump.cpp | 23 +- tools/opt/GraphPrinters.cpp | 4 +- tools/opt/opt.cpp | 65 +- unittests/ADT/APFloatTest.cpp | 21 + unittests/ADT/APIntTest.cpp | 2 + unittests/Support/MathExtrasTest.cpp | 2 +- unittests/Transforms/Utils/Cloning.cpp | 1 + unittests/Transforms/Utils/Local.cpp | 6 +- unittests/VMCore/DerivedTypesTest.cpp | 26 +- unittests/VMCore/InstructionsTest.cpp | 14 + unittests/VMCore/MetadataTest.cpp | 12 +- unittests/VMCore/PassManagerTest.cpp | 2 +- unittests/VMCore/ValueMapTest.cpp | 1 + utils/CollectDebugInfoUsingLLDB.py | 182 - utils/CompareDebugInfo.py | 182 - utils/DSAextract.py | 2 +- utils/FileCheck/FileCheck.cpp | 24 +- utils/GenLibDeps.pl | 2 +- utils/KillTheDoctor/KillTheDoctor.cpp | 10 +- utils/NewNightlyTest.pl | 2 +- utils/TableGen/ARMDecoderEmitter.cpp | 102 +- utils/TableGen/AsmMatcherEmitter.cpp | 14 +- utils/TableGen/AsmMatcherEmitter.h | 2 - utils/TableGen/AsmWriterEmitter.cpp | 415 +- utils/TableGen/AsmWriterEmitter.h | 1 + utils/TableGen/CallingConvEmitter.h | 2 - utils/TableGen/ClangASTNodesEmitter.cpp | 11 +- utils/TableGen/ClangAttrEmitter.cpp | 66 +- utils/TableGen/ClangDiagnosticsEmitter.cpp | 58 +- utils/TableGen/ClangDiagnosticsEmitter.h | 10 +- utils/TableGen/ClangSACheckersEmitter.cpp | 155 +- utils/TableGen/CodeEmitterGen.cpp | 6 +- utils/TableGen/CodeGenDAGPatterns.cpp | 141 +- utils/TableGen/CodeGenDAGPatterns.h | 65 +- utils/TableGen/CodeGenInstruction.cpp | 1 + utils/TableGen/CodeGenInstruction.h | 94 +- utils/TableGen/CodeGenRegisters.h | 11 +- utils/TableGen/CodeGenTarget.cpp | 46 +- utils/TableGen/CodeGenTarget.h | 26 +- utils/TableGen/DAGISelEmitter.cpp | 26 +- utils/TableGen/DAGISelEmitter.h | 1 - utils/TableGen/DAGISelMatcher.cpp | 14 +- utils/TableGen/DAGISelMatcher.h | 18 +- utils/TableGen/DAGISelMatcherEmitter.cpp | 251 +- utils/TableGen/DAGISelMatcherGen.cpp | 15 +- utils/TableGen/DAGISelMatcherOpt.cpp | 1 - utils/TableGen/DisassemblerEmitter.cpp | 12 +- utils/TableGen/EDEmitter.cpp | 10 +- utils/TableGen/FastISelEmitter.cpp | 433 +- utils/TableGen/FixedLenDecoderEmitter.cpp | 22 +- utils/TableGen/InstrInfoEmitter.cpp | 1 + utils/TableGen/LLVMCConfigurationEmitter.cpp | 2 + utils/TableGen/NeonEmitter.cpp | 106 +- utils/TableGen/NeonEmitter.h | 4 - utils/TableGen/OptParserEmitter.cpp | 2 +- utils/TableGen/Record.h | 2 +- utils/TableGen/RegisterInfoEmitter.cpp | 104 +- utils/TableGen/SubtargetEmitter.cpp | 178 +- utils/TableGen/SubtargetEmitter.h | 9 +- utils/TableGen/TGLexer.h | 3 +- utils/TableGen/TGParser.cpp | 1 + utils/TableGen/TGValueTypes.cpp | 1 - utils/TableGen/TableGen.cpp | 10 +- utils/TableGen/X86DisassemblerTables.cpp | 68 +- utils/TableGen/X86DisassemblerTables.h | 11 +- utils/TableGen/X86RecognizableInstr.cpp | 243 +- utils/TableGen/X86RecognizableInstr.h | 16 +- utils/buildit/GNUmakefile | 2 +- utils/buildit/build_llvm | 4 +- utils/lit/lit/ProgressBar.py | 21 +- utils/lit/lit/TestRunner.py | 15 +- utils/lit/lit/TestingConfig.py | 2 +- utils/lit/setup.py | 2 +- utils/llvm-lit/Makefile | 9 +- utils/llvmbuild | 15 +- utils/profile.pl | 2 +- utils/release/findRegressions.py | 130 + utils/release/test-release.sh | 10 +- utils/show-diagnostics | 52 + utils/unittest/UnitTestMain/Makefile | 2 + utils/unittest/googletest/gtest-filepath.cc | 2 +- utils/unittest/googletest/gtest.cc | 2 +- .../unittest/googletest/include/gtest/gtest.h | 2 + .../include/gtest/internal/gtest-filepath.h | 2 +- 1574 files changed, 61681 insertions(+), 32596 deletions(-) rename cmake/modules/{LLVMConfig.cmake => LLVM-Config.cmake} (85%) rename cmake/modules/{LLVM.cmake => LLVMConfig.cmake.in} (65%) create mode 100644 cmake/modules/LLVMConfigVersion.cmake.in create mode 100644 include/llvm-c/Disassembler.h create mode 100644 include/llvm-c/Object.h delete mode 100644 include/llvm/Analysis/LiveValues.h create mode 100644 include/llvm/DebugInfoProbe.h create mode 100644 include/llvm/ExecutionEngine/RuntimeDyld.h create mode 100644 include/llvm/IntrinsicsPTX.td delete mode 100644 lib/Analysis/LiveValues.cpp create mode 100644 lib/CodeGen/AsmPrinter/ARMException.cpp create mode 100644 lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp create mode 100644 lib/CodeGen/AsmPrinter/DwarfCompileUnit.h create mode 100644 lib/CodeGen/InterferenceCache.cpp create mode 100644 lib/CodeGen/InterferenceCache.h create mode 100644 lib/ExecutionEngine/MCJIT/Intercept.cpp create mode 100644 lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h create mode 100644 lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt create mode 100644 lib/ExecutionEngine/RuntimeDyld/Makefile create mode 100644 lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp create mode 100644 lib/MC/ELFObjectWriter.h create mode 100644 lib/MC/MCDisassembler/Disassembler.cpp create mode 100644 lib/MC/MCDisassembler/Disassembler.h create mode 100644 lib/MC/MCELF.cpp create mode 100644 lib/MC/MCELF.h create mode 100644 lib/MC/MCELFStreamer.h create mode 100644 lib/Object/MachOObjectFile.cpp create mode 100644 lib/Object/Object.cpp create mode 100644 lib/Target/MBlaze/MBlazeSchedule3.td create mode 100644 lib/Target/MBlaze/MBlazeSchedule5.td create mode 100644 lib/Target/Mips/MipsExpandPseudo.cpp create mode 100644 lib/Target/PTX/PTXIntrinsicInstrInfo.td delete mode 100644 lib/Transforms/IPO/StructRetPromotion.cpp create mode 100644 lib/Transforms/Instrumentation/GCOVProfiling.cpp delete mode 100644 lib/Transforms/Scalar/GEPSplitter.cpp delete mode 100644 lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp create mode 100644 lib/VMCore/DebugInfoProbe.cpp create mode 100644 runtime/CMakeLists.txt create mode 100644 runtime/libprofile/CMakeLists.txt create mode 100644 runtime/libprofile/GCDAProfiling.c create mode 100644 test/Analysis/BasicAA/intrinsics.ll delete mode 100644 test/Analysis/PostDominators/2006-09-26-PostDominanceFrontier.ll delete mode 100644 test/Analysis/PostDominators/2007-04-17-PostDominanceFrontier.ll delete mode 100644 test/Analysis/PostDominators/2007-04-20-PostDom-Reset.ll create mode 100644 test/Analysis/ScalarEvolution/2011-03-09-ExactNoMaxBECount.ll create mode 100644 test/Analysis/ScalarEvolution/2011-04-26-FoldAddRec.ll create mode 100644 test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll delete mode 100644 test/CodeGen/ARM/2010-12-13-reloc-pic.ll create mode 100644 test/CodeGen/ARM/2011-03-10-DAGCombineCrash.ll create mode 100644 test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll create mode 100644 test/CodeGen/ARM/2011-03-23-PeepholeBug.ll create mode 100644 test/CodeGen/ARM/2011-04-07-schediv.ll create mode 100644 test/CodeGen/ARM/2011-04-11-MachineLICMBug.ll create mode 100644 test/CodeGen/ARM/2011-04-12-AlignBug.ll create mode 100644 test/CodeGen/ARM/2011-04-12-FastRegAlloc.ll create mode 100644 test/CodeGen/ARM/2011-04-15-AndVFlagPeepholeBug.ll create mode 100644 test/CodeGen/ARM/2011-04-15-RegisterCmpPeephole.ll create mode 100644 test/CodeGen/ARM/2011-04-26-SchedTweak.ll create mode 100644 test/CodeGen/ARM/2011-04-27-IfCvtBug.ll create mode 100644 test/CodeGen/ARM/avoid-cpsr-rmw.ll create mode 100644 test/CodeGen/ARM/crash-greedy.ll create mode 100644 test/CodeGen/ARM/debug-info-d16-reg.ll create mode 100644 test/CodeGen/ARM/debug-info-qreg.ll create mode 100644 test/CodeGen/ARM/debug-info-s16-reg.ll create mode 100644 test/CodeGen/ARM/divmod.ll create mode 100644 test/CodeGen/ARM/fast-isel-pred.ll create mode 100644 test/CodeGen/ARM/fast-isel-redefinition.ll create mode 100644 test/CodeGen/ARM/fp-arg-shuffle.ll create mode 100644 test/CodeGen/ARM/int-to-fp.ll create mode 100644 test/CodeGen/ARM/neon_shift.ll create mode 100644 test/CodeGen/ARM/peephole-bitcast.ll create mode 100644 test/CodeGen/ARM/shuffle.ll create mode 100644 test/CodeGen/ARM/undef-sext.ll create mode 100644 test/CodeGen/ARM/vbsl-constant.ll create mode 100644 test/CodeGen/Mips/addc.ll create mode 100644 test/CodeGen/Mips/analyzebranch.ll create mode 100644 test/CodeGen/Mips/blockaddr.ll create mode 100644 test/CodeGen/Mips/buildpairextractelementf64.ll create mode 100644 test/CodeGen/Mips/divrem.ll create mode 100644 test/CodeGen/Mips/fpbr.ll create mode 100644 test/CodeGen/Mips/fpcmp.ll create mode 100644 test/CodeGen/Mips/internalfunc.ll create mode 100644 test/CodeGen/Mips/largeimm1.ll create mode 100644 test/CodeGen/Mips/o32_cc_vararg.ll create mode 100644 test/CodeGen/Mips/select.ll create mode 100644 test/CodeGen/PTX/bitwise.ll create mode 100644 test/CodeGen/PTX/bra.ll create mode 100644 test/CodeGen/PTX/fdiv-sm10.ll create mode 100644 test/CodeGen/PTX/fdiv-sm13.ll create mode 100644 test/CodeGen/PTX/intrinsic.ll create mode 100644 test/CodeGen/PTX/llvm-intrinsic.ll create mode 100644 test/CodeGen/PTX/mad.ll create mode 100644 test/CodeGen/PTX/mul.ll create mode 100644 test/CodeGen/PTX/parameter-order.ll create mode 100644 test/CodeGen/PTX/setp.ll delete mode 100644 test/CodeGen/Thumb/2010-01-15-local-alloc-spill-physical.ll create mode 100644 test/CodeGen/Thumb/rev.ll create mode 100644 test/CodeGen/Thumb2/2011-04-21-FILoweringBug.ll delete mode 100644 test/CodeGen/Thumb2/thumb2-ror2.ll delete mode 100644 test/CodeGen/X86/2008-08-05-SpillerBug.ll delete mode 100644 test/CodeGen/X86/2009-03-11-CoalescerBug.ll create mode 100644 test/CodeGen/X86/2011-02-27-Fpextend.ll create mode 100644 test/CodeGen/X86/2011-03-02-DAGCombiner.ll create mode 100644 test/CodeGen/X86/2011-03-08-Sched-crash.ll create mode 100644 test/CodeGen/X86/2011-03-09-Physreg-Coalescing.ll create mode 100644 test/CodeGen/X86/2011-03-30-CreateFixedObjCrash.ll create mode 100644 test/CodeGen/X86/2011-04-13-SchedCmpJmp.ll create mode 100644 test/CodeGen/X86/3dnow-intrinsics.ll create mode 100644 test/CodeGen/X86/SIMD/dg.exp create mode 100644 test/CodeGen/X86/SIMD/notvunpcklpd.ll create mode 100644 test/CodeGen/X86/SIMD/notvunpcklps.ll create mode 100644 test/CodeGen/X86/SIMD/vunpcklpd.ll create mode 100644 test/CodeGen/X86/SIMD/vunpcklps.ll create mode 100644 test/CodeGen/X86/adde-carry.ll create mode 100644 test/CodeGen/X86/bool-zext.ll create mode 100644 test/CodeGen/X86/dbg-declare-arg.ll create mode 100644 test/CodeGen/X86/dbg-file-name.ll delete mode 100644 test/CodeGen/X86/fast-isel-shift-imm.ll create mode 100644 test/CodeGen/X86/fast-isel-x86-64.ll create mode 100644 test/CodeGen/X86/fold-zext-trunc.ll create mode 100644 test/CodeGen/X86/fp-trunc.ll create mode 100644 test/CodeGen/X86/lsr-quadratic-expand.ll create mode 100644 test/CodeGen/X86/lsr-redundant-addressing.ll delete mode 100644 test/CodeGen/X86/mcinst-lowering-cmp0.ll create mode 100644 test/CodeGen/X86/narrow-shl-cst.ll create mode 100644 test/CodeGen/X86/no-cfi.ll rename test/CodeGen/X86/{phi-constants.ll => phi-bit-propagation.ll} (64%) create mode 100644 test/CodeGen/X86/pr9743.ll create mode 100644 test/CodeGen/X86/shrink-compare.ll create mode 100644 test/CodeGen/X86/tailcall-returndup-void.ll delete mode 100644 test/CodeGen/X86/umulo-64.ll create mode 100644 test/CodeGen/X86/unreachable-stack-protector.ll create mode 100644 test/CodeGen/X86/vec_uint_to_fp.ll create mode 100644 test/CodeGen/X86/win64_alloca_dynalloca.ll create mode 100644 test/CodeGen/XCore/ps-intrinsics.ll create mode 100644 test/CodeGen/XCore/scavenging.ll create mode 100644 test/CodeGen/XCore/sr-intrinsics.ll create mode 100644 test/CodeGen/XCore/threads.ll create mode 100644 test/DebugInfo/array.ll delete mode 100644 test/FrontendC++/2009-07-15-LineNumbers.cpp create mode 100644 test/FrontendC/2011-03-02-UnionInitializer.c create mode 100644 test/FrontendC/2011-03-08-ZeroFieldUnionInitializer.c create mode 100644 test/FrontendC/2011-03-31-ArrayRefFolding.c delete mode 100644 test/FrontendC/cstring-align.c create mode 100644 test/FrontendC/mmx-inline-asm.c create mode 100644 test/FrontendC/vla-3.c create mode 100644 test/FrontendObjC/2011-03-02-ConstCFStringLiteralAlign.m create mode 100644 test/FrontendObjC/2011-03-08-IVarLookup.m create mode 100644 test/MC/ARM/arm_addrmode2.s create mode 100644 test/MC/ARM/arm_addrmode3.s create mode 100644 test/MC/AsmParser/dot-symbol.s create mode 100644 test/MC/COFF/diff.s create mode 100644 test/MC/Disassembler/ARM/invalid-BFI-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-Bcc-thumb.txt create mode 100644 test/MC/Disassembler/ARM/invalid-CPS2p-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-CPS3p-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-DMB-thumb.txt create mode 100644 test/MC/Disassembler/ARM/invalid-DSB-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-LDC-form-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-LDRB_POST-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-LDRD_PRE-thumb.txt create mode 100644 test/MC/Disassembler/ARM/invalid-LDRT-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-LDR_POST-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-LDR_PRE-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-LDRrs-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-LSL-regform.txt create mode 100644 test/MC/Disassembler/ARM/invalid-MCR-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-MOVTi16-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-MOVr-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-MOVs-LSL-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-MOVs-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-MSRi-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-RFEorLDMIA-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-RSC-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-SBFX-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-SMLAD-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-SRS-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-SSAT-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-STMIA_UPD-thumb.txt create mode 100644 test/MC/Disassembler/ARM/invalid-STRBrs-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-SXTB-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-UMAAL-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-UQADD8-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-VLD1DUPq8_UPD-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-VLD3DUPd32_UPD-thumb.txt create mode 100644 test/MC/Disassembler/ARM/invalid-VLDMSDB_UPD-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-VQADD-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-VST2b32_UPD-arm.txt create mode 100644 test/MC/Disassembler/ARM/invalid-t2Bcc-thumb.txt create mode 100644 test/MC/Disassembler/ARM/invalid-t2LDRBT-thumb.txt create mode 100644 test/MC/Disassembler/ARM/invalid-t2LDREXD-thumb.txt create mode 100644 test/MC/Disassembler/ARM/invalid-t2LDRSHi12-thumb.txt create mode 100644 test/MC/Disassembler/ARM/invalid-t2LDRSHi8-thumb.txt create mode 100644 test/MC/Disassembler/ARM/invalid-t2STRD_PRE-thumb.txt create mode 100644 test/MC/Disassembler/ARM/invalid-t2STREXB-thumb.txt create mode 100644 test/MC/Disassembler/ARM/invalid-t2STREXD-thumb.txt create mode 100644 test/MC/Disassembler/ARM/invalid-t2STR_POST-thumb.txt create mode 100644 test/MC/Disassembler/ARM/thumb-printf.txt create mode 100644 test/MC/ELF/cfi-adjust-cfa-offset.s create mode 100644 test/MC/ELF/cfi-rel-offset.s create mode 100644 test/MC/ELF/cfi-rel-offset2.s create mode 100644 test/MC/ELF/cfi-same-value.s create mode 100644 test/MC/ELF/section-quoting.s create mode 100644 test/MC/ELF/weak-relocation.s create mode 100644 test/MC/MachO/section-attributes.s create mode 100644 test/MC/MachO/temp-labels.s create mode 100644 test/MC/MachO/variable-errors.s create mode 100644 test/MC/MachO/variable-exprs.s create mode 100644 test/MC/X86/padlock.s create mode 100644 test/Transforms/DeadStoreElimination/2011-03-25-DSEMiscompile.ll delete mode 100644 test/Transforms/GVN/invariant-simple.ll create mode 100644 test/Transforms/GlobalOpt/2011-04-09-EmptyGlobalCtors.ll create mode 100644 test/Transforms/GlobalOpt/cxx-dtor.ll create mode 100644 test/Transforms/InstCombine/2011-03-08-SRemMinusOneBadOpt.ll create mode 100644 test/Transforms/InstCombine/ExtractCast.ll create mode 100644 test/Transforms/InstCombine/debuginfo.ll create mode 100644 test/Transforms/InstCombine/fcmp.ll create mode 100644 test/Transforms/InstCombine/fdiv.ll create mode 100644 test/Transforms/InstCombine/gep-addrspace.ll create mode 100644 test/Transforms/InstCombine/merge-icmp.ll create mode 100644 test/Transforms/InstCombine/sign-test-and-or.ll create mode 100644 test/Transforms/InstCombine/strcpy_chk-64.ll create mode 100644 test/Transforms/InstSimplify/rem.ll create mode 100644 test/Transforms/Internalize/available_externally.ll create mode 100644 test/Transforms/JumpThreading/2011-04-02-SimplifyDeadBlock.ll create mode 100644 test/Transforms/JumpThreading/2011-04-14-InfLoop.ll create mode 100644 test/Transforms/JumpThreading/pr9331.ll create mode 100644 test/Transforms/LCSSA/unused-phis.ll create mode 100644 test/Transforms/LICM/2011-04-06-HoistMissedASTUpdate.ll create mode 100644 test/Transforms/LICM/2011-04-06-PromoteResultOfPromotion.ll create mode 100644 test/Transforms/LICM/2011-04-09-RAUW-AST.ll create mode 100644 test/Transforms/LICM/debug-value.ll create mode 100644 test/Transforms/LoopIdiom/debug-line.ll create mode 100644 test/Transforms/PhaseOrdering/basic.ll create mode 100644 test/Transforms/Reassociate/secondary.ll delete mode 100644 test/Transforms/SRETPromotion/2008-03-11-attributes.ll delete mode 100644 test/Transforms/SRETPromotion/2008-06-04-function-pointer-passing.ll delete mode 100644 test/Transforms/SRETPromotion/2008-06-05-non-call-use.ll delete mode 100644 test/Transforms/SRETPromotion/basictest.ll delete mode 100644 test/Transforms/SRETPromotion/dg.exp create mode 100644 test/Transforms/ScalarRepl/inline-vector.ll create mode 100644 test/Transforms/ScalarRepl/only-memcpy-uses.ll create mode 100644 test/Transforms/SimplifyCFG/2011-03-08-UnreachableUse.ll create mode 100644 test/Transforms/SimplifyCFG/PhiEliminate3.ll create mode 100644 test/Transforms/SimplifyCFG/branch-fold-dbg.ll create mode 100644 test/Transforms/SimplifyCFG/hoist-dbgvalue.ll create mode 100644 test/Transforms/SimplifyCFG/switch-on-const-select.ll create mode 100644 test/Transforms/SimplifyCFG/trap-debugloc.ll create mode 100644 test/Transforms/SimplifyLibCalls/debug-line.ll delete mode 100644 test/Transforms/SimplifyLibCalls/half-powr.ll create mode 100644 test/Transforms/SimplifyLibCalls/iprintf.ll create mode 100644 tools/gold/CMakeLists.txt create mode 100644 tools/llvm-diff/DiffConsumer.cpp create mode 100644 tools/llvm-diff/DiffConsumer.h create mode 100644 tools/llvm-diff/DiffLog.cpp create mode 100644 tools/llvm-diff/DiffLog.h create mode 100644 tools/llvm-rtdyld/CMakeLists.txt create mode 100644 tools/llvm-rtdyld/Makefile create mode 100644 tools/llvm-rtdyld/llvm-rtdyld.cpp create mode 100644 tools/lto/CMakeLists.txt delete mode 100755 utils/CollectDebugInfoUsingLLDB.py delete mode 100755 utils/CompareDebugInfo.py create mode 100755 utils/release/findRegressions.py create mode 100755 utils/show-diagnostics diff --git a/CMakeLists.txt b/CMakeLists.txt index b357478bbc3a..e0404cf4b8bf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,7 +10,7 @@ set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules" ) -set(PACKAGE_VERSION "2.9") +set(PACKAGE_VERSION "3.0") set_property(GLOBAL PROPERTY USE_FOLDERS ON) @@ -81,6 +81,9 @@ set(LLVM_ALL_TARGETS XCore ) +# List of targets with JIT support: +set(LLVM_TARGETS_WITH_JIT X86 PowerPC ARM) + if( MSVC ) set(LLVM_TARGETS_TO_BUILD X86 CACHE STRING "Semicolon-separated list of targets to build, or \"all\".") @@ -135,9 +138,15 @@ include(AddLLVMDefinitions) option(LLVM_ENABLE_PIC "Build Position-Independent Code" ON) -include(config-ix) +# MSVC has a gazillion warnings with this. +if( MSVC ) + option(LLVM_ENABLE_WARNINGS "Enable compiler warnings." OFF) +else( MSVC ) + option(LLVM_ENABLE_WARNINGS "Enable compiler warnings." ON) +endif() -include(HandleLLVMOptions) +option(LLVM_ENABLE_PEDANTIC "Compile with pedantic enabled." ON) +option(LLVM_ENABLE_WERROR "Fail and stop if a warning is triggered." OFF) if( uppercase_CMAKE_BUILD_TYPE STREQUAL "RELEASE" ) option(LLVM_ENABLE_ASSERTIONS "Enable assertions" OFF) @@ -145,6 +154,12 @@ else() option(LLVM_ENABLE_ASSERTIONS "Enable assertions" ON) endif() +# All options referred to from HandleLLVMOptions have to be specified +# BEFORE this include, otherwise options will not be correctly set on +# first cmake run +include(config-ix) +include(HandleLLVMOptions) + configure_file( ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/config.h.cmake ${LLVM_BINARY_DIR}/include/llvm/Config/config.h) @@ -161,16 +176,6 @@ set( CMAKE_RUNTIME_OUTPUT_DIRECTORY ${LLVM_TOOLS_BINARY_DIR} ) set( CMAKE_LIBRARY_OUTPUT_DIRECTORY ${LLVM_BINARY_DIR}/lib ) set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${LLVM_BINARY_DIR}/lib ) -# MSVC has a gazillion warnings with this. -if( MSVC ) - option(LLVM_ENABLE_WARNINGS "Enable compiler warnings." OFF) -else( MSVC ) - option(LLVM_ENABLE_WARNINGS "Enable compiler warnings." ON) -endif() - -option(LLVM_ENABLE_PEDANTIC "Compile with pedantic enabled." ON) -option(LLVM_ENABLE_WERROR "Fail and stop if a warning is triggered." OFF) - set(CMAKE_INCLUDE_CURRENT_DIR ON) include_directories( ${LLVM_BINARY_DIR}/include ${LLVM_MAIN_INCLUDE_DIR}) @@ -182,15 +187,6 @@ endif( ${CMAKE_SYSTEM_NAME} MATCHES SunOS ) include(AddLLVM) include(TableGen) -if( MINGW ) - get_system_libs(LLVM_SYSTEM_LIBS_LIST) - foreach(l ${LLVM_SYSTEM_LIBS_LIST}) - set(LLVM_SYSTEM_LIBS "${LLVM_SYSTEM_LIBS} -l${l}") - endforeach() - set(CMAKE_CXX_STANDARD_LIBRARIES "${CMAKE_CXX_STANDARD_LIBRARIES}${LLVM_SYSTEM_LIBS}") - set(CMAKE_C_STANDARD_LIBRARIES "${CMAKE_C_STANDARD_LIBRARIES}${LLVM_SYSTEM_LIBS}") -endif() - if( MINGW ) # People report that -O3 is unreliable on MinGW. The traditional # build also uses -O2 for that reason: @@ -231,6 +227,13 @@ if( LLVM_INCLUDE_TOOLS ) add_subdirectory(tools) endif() +option(LLVM_BUILD_RUNTIME + "Build the LLVM runtime libraries. If OFF, just generate build targets." ON) +option(LLVM_INCLUDE_RUNTIME "Generate build targets for the LLVM runtimes" ON) +if( LLVM_INCLUDE_RUNTIME ) + add_subdirectory(runtime) +endif() + option(LLVM_BUILD_EXAMPLES "Build the LLVM example programs. If OFF, just generate build targets." OFF) option(LLVM_INCLUDE_EXAMPLES "Generate build targets for the LLVM examples" ON) @@ -239,7 +242,7 @@ if( LLVM_INCLUDE_EXAMPLES ) endif() option(LLVM_BUILD_TESTS - "Build LLVM unit tests. If OFF, just generate build targes." OFF) + "Build LLVM unit tests. If OFF, just generate build targets." OFF) if( LLVM_INCLUDE_TESTS ) add_subdirectory(test) add_subdirectory(utils/unittest) @@ -260,6 +263,7 @@ install(DIRECTORY include/ PATTERN "*.h" PATTERN "*.td" PATTERN "*.inc" + PATTERN "LICENSE.TXT" PATTERN ".svn" EXCLUDE ) diff --git a/Makefile b/Makefile index dbb759dd5fce..7dad07b6f053 100644 --- a/Makefile +++ b/Makefile @@ -168,6 +168,15 @@ install-clang: install install-clang-c: install install-libs: install +# If SHOW_DIAGNOSTICS is enabled, clear the diagnostics file first. +ifeq ($(SHOW_DIAGNOSTICS),1) +clean-diagnostics: + $(Verb) rm -f $(LLVM_OBJ_ROOT)/$(BuildMode)/diags +.PHONY: clean-diagnostics + +all-local:: clean-diagnostics +endif + #------------------------------------------------------------------------ # Make sure the generated headers are up-to-date. This must be kept in # sync with the AC_CONFIG_HEADER invocations in autoconf/configure.ac @@ -198,6 +207,12 @@ ifneq ($(ENABLE_OPTIMIZED),1) $(Echo) '*****' optimized build. Use 'make ENABLE_OPTIMIZED=1' to $(Echo) '*****' make an optimized build. Alternatively you can $(Echo) '*****' configure with --enable-optimized. +ifeq ($(SHOW_DIAGNOSTICS),1) + $(Verb) if test -s $(LLVM_OBJ_ROOT)/$(BuildMode)/diags; then \ + $(LLVM_SRC_ROOT)/utils/show-diagnostics \ + $(LLVM_OBJ_ROOT)/$(BuildMode)/diags; \ + fi +endif endif endif diff --git a/Makefile.rules b/Makefile.rules index c0a9112c31be..71d4307abae0 100644 --- a/Makefile.rules +++ b/Makefile.rules @@ -444,11 +444,11 @@ endif # LLVM Capable Compiler #-------------------------------------------------------------------- -ifeq ($(LLVMCC_OPTION),llvm-gcc) +ifneq ($(findstring llvm-gcc,$(LLVMCC_OPTION)),) LLVMCC := $(LLVMGCC) LLVMCXX := $(LLVMGXX) else - ifeq ($(LLVMCC_OPTION),clang) + ifneq ($(findstring clang,$(LLVMCC_OPTION)),) ifneq ($(CLANGPATH),) LLVMCC := $(CLANGPATH) LLVMCXX := $(CLANGXXPATH) @@ -646,26 +646,42 @@ CPP.Flags += $(sort -I$(PROJ_OBJ_DIR) -I$(PROJ_SRC_DIR) \ $(LLVM_OBJ_ROOT) $(LLVM_SRC_ROOT))) \ $(CPP.BaseFlags) +# SHOW_DIAGNOSTICS support. +ifeq ($(SHOW_DIAGNOSTICS),1) + Compile.Wrapper := env CC_LOG_DIAGNOSTICS=1 \ + CC_LOG_DIAGNOSTICS_FILE="$(LLVM_OBJ_ROOT)/$(BuildMode)/diags" +else + Compile.Wrapper := +endif + ifeq ($(BUILD_COMPONENT), 1) - Compile.C = $(BUILD_CC) $(CPP.Flags) $(C.Flags) $(CFLAGS) $(CPPFLAGS) \ + Compile.C = $(Compile.Wrapper) \ + $(BUILD_CC) $(CPP.Flags) $(C.Flags) $(CFLAGS) $(CPPFLAGS) \ $(TargetCommonOpts) $(CompileCommonOpts) -c - Compile.CXX = $(BUILD_CXX) $(CPP.Flags) $(CXX.Flags) $(CXXFLAGS) \ + Compile.CXX = $(Compile.Wrapper) \ + $(BUILD_CXX) $(CPP.Flags) $(CXX.Flags) $(CXXFLAGS) \ $(CPPFLAGS) \ $(TargetCommonOpts) $(CompileCommonOpts) -c - Preprocess.CXX= $(BUILD_CXX) $(CPP.Flags) $(CPPFLAGS) $(TargetCommonOpts) \ + Preprocess.CXX= $(Compile.Wrapper) \ + $(BUILD_CXX) $(CPP.Flags) $(CPPFLAGS) $(TargetCommonOpts) \ $(CompileCommonOpts) $(CXX.Flags) -E - Link = $(BUILD_CXX) $(CPP.Flags) $(CXX.Flags) $(CXXFLAGS) \ - $(LDFLAGS) \ - $(TargetCommonOpts) $(CompileCommonOpts) $(LD.Flags) $(Strip) + Link = $(Compile.Wrapper) \ + $(BUILD_CXX) $(CPP.Flags) $(CXX.Flags) $(CXXFLAGS) \ + $(LD.Flags) $(LDFLAGS) \ + $(TargetCommonOpts) $(CompileCommonOpts) $(Strip) else - Compile.C = $(CC) $(CPP.Flags) $(C.Flags) $(CFLAGS) $(CPPFLAGS) \ + Compile.C = $(Compile.Wrapper) \ + $(CC) $(CPP.Flags) $(C.Flags) $(CFLAGS) $(CPPFLAGS) \ $(TargetCommonOpts) $(CompileCommonOpts) -c - Compile.CXX = $(CXX) $(CPP.Flags) $(CXX.Flags) $(CXXFLAGS) $(CPPFLAGS) \ + Compile.CXX = $(Compile.Wrapper) \ + $(CXX) $(CPP.Flags) $(CXX.Flags) $(CXXFLAGS) $(CPPFLAGS) \ $(TargetCommonOpts) $(CompileCommonOpts) -c - Preprocess.CXX= $(CXX) $(CPP.Flags) $(TargetCommonOpts) $(CPPFLAGS) \ + Preprocess.CXX= $(Compile.Wrapper) \ + $(CXX) $(CPP.Flags) $(TargetCommonOpts) $(CPPFLAGS) \ $(CompileCommonOpts) $(CXX.Flags) -E - Link = $(CXX) $(CPP.Flags) $(CXX.Flags) $(CXXFLAGS) $(LDFLAGS) \ - $(TargetCommonOpts) $(CompileCommonOpts) $(LD.Flags) $(Strip) + Link = $(Compile.Wrapper) \ + $(CXX) $(CPP.Flags) $(CXX.Flags) $(CXXFLAGS) $(LD.Flags) \ + $(LDFLAGS) $(TargetCommonOpts) $(CompileCommonOpts) $(Strip) endif BCCompile.C = $(LLVMCC) $(CPP.Flags) $(C.Flags) $(CFLAGS) $(CPPFLAGS) \ @@ -719,6 +735,24 @@ BaseNameSources := $(sort $(basename $(Sources))) ObjectsO := $(BaseNameSources:%=$(ObjDir)/%.o) ObjectsBC := $(BaseNameSources:%=$(ObjDir)/%.bc) +#---------------------------------------------------------- +# For Mingw MSYS bash and Python/w32: +# +# $(ECHOPATH) prints DOSish pathstring. +# ex) $(ECHOPATH) /include/sys/types.h +# --> C:/mingw/include/sys/types.h +# built-in "echo" does not transform path to DOSish path. +# +# FIXME: It would not be needed when MSYS's python +# were provided. +#---------------------------------------------------------- + +ifeq (-mingw32,$(findstring -mingw32,$(BUILD_TRIPLE))) + ECHOPATH := $(Verb)python -u -c "import sys;print ' '.join(sys.argv[1:])" +else + ECHOPATH := $(Verb)$(ECHO) +endif + ############################################################################### # DIRECTORIES: Handle recursive descent of directory structure ############################################################################### @@ -1510,31 +1544,31 @@ BC_DEPEND_MOVEFILE = then $(MV) -f "$(ObjDir)/$*.bc.d.tmp" "$(ObjDir)/$*.bc.d"; $(ObjDir)/%.ll: %.cpp $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCXX) $(Echo) "Compiling $*.cpp for $(BuildMode) build (bytecode)" $(Verb) if $(BCCompile.CXX) $(BC_DEPEND_OPTIONS) \ - $< -o $(ObjDir)/$*.ll -S -$(LLVMCC_EMITIR_FLAG) ; \ + $< -o $(ObjDir)/$*.ll -S $(LLVMCC_EMITIR_FLAG) ; \ $(BC_DEPEND_MOVEFILE) $(ObjDir)/%.ll: %.mm $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCXX) $(Echo) "Compiling $*.mm for $(BuildMode) build (bytecode)" $(Verb) if $(BCCompile.CXX) $(BC_DEPEND_OPTIONS) \ - $< -o $(ObjDir)/$*.ll -S -$(LLVMCC_EMITIR_FLAG) ; \ + $< -o $(ObjDir)/$*.ll -S $(LLVMCC_EMITIR_FLAG) ; \ $(BC_DEPEND_MOVEFILE) $(ObjDir)/%.ll: %.cc $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCXX) $(Echo) "Compiling $*.cc for $(BuildMode) build (bytecode)" $(Verb) if $(BCCompile.CXX) $(BC_DEPEND_OPTIONS) \ - $< -o $(ObjDir)/$*.ll -S -$(LLVMCC_EMITIR_FLAG) ; \ + $< -o $(ObjDir)/$*.ll -S $(LLVMCC_EMITIR_FLAG) ; \ $(BC_DEPEND_MOVEFILE) $(ObjDir)/%.ll: %.c $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCC) $(Echo) "Compiling $*.c for $(BuildMode) build (bytecode)" $(Verb) if $(BCCompile.C) $(BC_DEPEND_OPTIONS) \ - $< -o $(ObjDir)/$*.ll -S -$(LLVMCC_EMITIR_FLAG) ; \ + $< -o $(ObjDir)/$*.ll -S $(LLVMCC_EMITIR_FLAG) ; \ $(BC_DEPEND_MOVEFILE) $(ObjDir)/%.ll: %.m $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCC) $(Echo) "Compiling $*.m for $(BuildMode) build (bytecode)" $(Verb) if $(BCCompile.C) $(BC_DEPEND_OPTIONS) \ - $< -o $(ObjDir)/$*.ll -S -$(LLVMCC_EMITIR_FLAG) ; \ + $< -o $(ObjDir)/$*.ll -S $(LLVMCC_EMITIR_FLAG) ; \ $(BC_DEPEND_MOVEFILE) # Provide alternate rule sets if dependencies are disabled @@ -1562,23 +1596,23 @@ $(ObjDir)/%.o: %.m $(ObjDir)/.dir $(BUILT_SOURCES) $(ObjDir)/%.ll: %.cpp $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCXX) $(Echo) "Compiling $*.cpp for $(BuildMode) build (bytecode)" - $(BCCompile.CXX) $< -o $@ -S -$(LLVMCC_EMITIR_FLAG) + $(BCCompile.CXX) $< -o $@ -S $(LLVMCC_EMITIR_FLAG) $(ObjDir)/%.ll: %.mm $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCXX) $(Echo) "Compiling $*.mm for $(BuildMode) build (bytecode)" - $(BCCompile.CXX) $< -o $@ -S -$(LLVMCC_EMITIR_FLAG) + $(BCCompile.CXX) $< -o $@ -S $(LLVMCC_EMITIR_FLAG) $(ObjDir)/%.ll: %.cc $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCXX) $(Echo) "Compiling $*.cc for $(BuildMode) build (bytecode)" - $(BCCompile.CXX) $< -o $@ -S -$(LLVMCC_EMITIR_FLAG) + $(BCCompile.CXX) $< -o $@ -S $(LLVMCC_EMITIR_FLAG) $(ObjDir)/%.ll: %.c $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCC) $(Echo) "Compiling $*.c for $(BuildMode) build (bytecode)" - $(BCCompile.C) $< -o $@ -S -$(LLVMCC_EMITIR_FLAG) + $(BCCompile.C) $< -o $@ -S $(LLVMCC_EMITIR_FLAG) $(ObjDir)/%.ll: %.m $(ObjDir)/.dir $(BUILT_SOURCES) $(LLVMCC) $(Echo) "Compiling $*.m for $(BuildMode) build (bytecode)" - $(BCCompile.C) $< -o $@ -S -$(LLVMCC_EMITIR_FLAG) + $(BCCompile.C) $< -o $@ -S $(LLVMCC_EMITIR_FLAG) endif @@ -1990,7 +2024,7 @@ $(DistZip) : $(TopDistDir)/.makedistdir $(Verb) cd $(PROJ_OBJ_ROOT) ; $(ZIP) -rq $(DistZip) $(DistName) dist :: $(DistTarGZip) $(DistTarBZ2) $(DistZip) - $(Echo) ===== DISTRIBUTION PACKAGING SUCESSFUL ===== + $(Echo) ===== DISTRIBUTION PACKAGING SUCCESSFUL ===== DistCheckDir := $(PROJ_OBJ_ROOT)/_distcheckdir @@ -2139,8 +2173,13 @@ install-local:: $(Verb) $(MKDIR) $(DESTDIR)$(PROJ_includedir) $(Verb) if test -d "$(PROJ_SRC_ROOT)/include" ; then \ cd $(PROJ_SRC_ROOT)/include && \ - for hdr in `find . -type f '!' '(' -name '*~' \ - -o -name '.#*' -o -name '*.in' ')' -print | grep -v CVS | \ + for hdr in `find . -type f \ + '(' -name LICENSE.TXT \ + -o -name '*.def' \ + -o -name '*.h' \ + -o -name '*.inc' \ + -o -name '*.td' \ + ')' -print | grep -v CVS | \ grep -v .svn` ; do \ instdir=`dirname "$(DESTDIR)$(PROJ_includedir)/$$hdr"` ; \ if test \! -d "$$instdir" ; then \ @@ -2153,7 +2192,19 @@ install-local:: ifneq ($(PROJ_SRC_ROOT),$(PROJ_OBJ_ROOT)) $(Verb) if test -d "$(PROJ_OBJ_ROOT)/include" ; then \ cd $(PROJ_OBJ_ROOT)/include && \ - for hdr in `find . -type f -print | grep -v CVS` ; do \ + for hdr in `find . -type f \ + '(' -name LICENSE.TXT \ + -o -name '*.def' \ + -o -name '*.h' \ + -o -name '*.inc' \ + -o -name '*.td' \ + ')' -print | grep -v CVS | \ + grep -v .svn` ; do \ + instdir=`dirname "$(DESTDIR)$(PROJ_includedir)/$$hdr"` ; \ + if test \! -d "$$instdir" ; then \ + $(EchoCmd) Making install directory $$instdir ; \ + $(MKDIR) $$instdir ;\ + fi ; \ $(DataInstall) $$hdr $(DESTDIR)$(PROJ_includedir)/$$hdr ; \ done ; \ fi diff --git a/autoconf/configure.ac b/autoconf/configure.ac index 9259633de94a..b55f56444faf 100644 --- a/autoconf/configure.ac +++ b/autoconf/configure.ac @@ -31,7 +31,7 @@ dnl=== dnl===-----------------------------------------------------------------------=== dnl Initialize autoconf and define the package name, version number and dnl email address for reporting bugs. -AC_INIT([[llvm]],[[2.9svn]],[llvmbugs@cs.uiuc.edu]) +AC_INIT([[llvm]],[[3.0svn]],[llvmbugs@cs.uiuc.edu]) dnl Provide a copyright substitution and ensure the copyright notice is included dnl in the output of --version option of the generated configure script. @@ -657,12 +657,19 @@ for a_target in $TARGETS_TO_BUILD; do LLVM_NATIVE_TARGET="LLVMInitialize${LLVM_NATIVE_ARCH}Target" LLVM_NATIVE_TARGETINFO="LLVMInitialize${LLVM_NATIVE_ARCH}TargetInfo" LLVM_NATIVE_ASMPRINTER="LLVMInitialize${LLVM_NATIVE_ARCH}AsmPrinter" + if test -f ${srcdir}/lib/Target/${LLVM_NATIVE_ARCH}/AsmParser/Makefile ; then + LLVM_NATIVE_ASMPARSER="LLVMInitialize${LLVM_NATIVE_ARCH}AsmParser" + fi AC_DEFINE_UNQUOTED(LLVM_NATIVE_TARGET, $LLVM_NATIVE_TARGET, [LLVM name for the native Target init function, if available]) AC_DEFINE_UNQUOTED(LLVM_NATIVE_TARGETINFO, $LLVM_NATIVE_TARGETINFO, [LLVM name for the native TargetInfo init function, if available]) AC_DEFINE_UNQUOTED(LLVM_NATIVE_ASMPRINTER, $LLVM_NATIVE_ASMPRINTER, [LLVM name for the native AsmPrinter init function, if available]) + if test -f ${srcdir}/lib/Target/${LLVM_NATIVE_ARCH}/AsmParser/Makefile ; then + AC_DEFINE_UNQUOTED(LLVM_NATIVE_ASMPARSER, $LLVM_NATIVE_ASMPARSER, + [LLVM name for the native AsmParser init function, if available]) + fi fi done @@ -1422,6 +1429,24 @@ if test "$llvm_cv_os_type" = "MingW" ; then AC_CHECK_LIB(gcc,__cmpdi2,AC_DEFINE([HAVE___CMPDI2],[1],[Have host's __cmpdi2])) fi +dnl Check Win32 API EnumerateLoadedModules. +if test "$llvm_cv_os_type" = "MingW" ; then + AC_MSG_CHECKING([whether EnumerateLoadedModules() accepts new decl]) + AC_COMPILE_IFELSE([[#include +#include +extern void foo(PENUMLOADED_MODULES_CALLBACK); +extern void foo(BOOL(CALLBACK*)(PCSTR,ULONG_PTR,ULONG,PVOID));]], +[ + AC_MSG_RESULT([yes]) + llvm_cv_win32_elmcb_pcstr="PCSTR" +], +[ + AC_MSG_RESULT([no]) + llvm_cv_win32_elmcb_pcstr="PSTR" +]) + AC_DEFINE_UNQUOTED([WIN32_ELMCB_PCSTR],$llvm_cv_win32_elmcb_pcstr,[Type of 1st arg on ELM Callback]) +fi + dnl Check for variations in the Standard C++ library and STL. These macros are dnl provided by LLVM in the autoconf/m4 directory. AC_FUNC_ISNAN diff --git a/autoconf/m4/libtool.m4 b/autoconf/m4/libtool.m4 index a8b5e6a94fc5..e89738cc9129 100644 --- a/autoconf/m4/libtool.m4 +++ b/autoconf/m4/libtool.m4 @@ -1118,7 +1118,7 @@ if test -n "$_LT_AC_TAGVAR(hardcode_libdir_flag_spec, $1)" || \ test -n "$_LT_AC_TAGVAR(runpath_var, $1)" || \ test "X$_LT_AC_TAGVAR(hardcode_automatic, $1)" = "Xyes" ; then - # We can hardcode non-existant directories. + # We can hardcode non-existent directories. if test "$_LT_AC_TAGVAR(hardcode_direct, $1)" != no && # If the only mechanism to avoid hardcoding is shlibpath_var, we # have to relink, otherwise we might link with an installed library diff --git a/autoconf/m4/ltdl.m4 b/autoconf/m4/ltdl.m4 index bc9e2ad24193..407a16e2d694 100644 --- a/autoconf/m4/ltdl.m4 +++ b/autoconf/m4/ltdl.m4 @@ -156,7 +156,7 @@ AC_CACHE_CHECK([whether deplibs are loaded by dlopen], osf[[1234]]*) # dlopen did load deplibs (at least at 4.x), but until the 5.x series, # it did *not* use an RPATH in a shared library to find objects the - # library depends on, so we explictly say `no'. + # library depends on, so we explicitly say `no'. libltdl_cv_sys_dlopen_deplibs=no ;; osf5.0|osf5.0a|osf5.1) diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake index c2fe4317b53a..c1b22d4eef58 100755 --- a/cmake/config-ix.cmake +++ b/cmake/config-ix.cmake @@ -224,6 +224,7 @@ check_type_exists(error_t errno.h HAVE_ERROR_T) # available programs checks function(llvm_find_program name) string(TOUPPER ${name} NAME) + string(REGEX REPLACE "\\." "_" NAME ${NAME}) find_program(LLVM_PATH_${NAME} ${name}) mark_as_advanced(LLVM_PATH_${NAME}) if(LLVM_PATH_${NAME}) @@ -241,6 +242,7 @@ llvm_find_program(neato) llvm_find_program(fdp) llvm_find_program(dot) llvm_find_program(dotty) +llvm_find_program(xdot.py) if( LLVM_ENABLE_FFI ) find_path(FFI_INCLUDE_PATH ffi.h PATHS ${FFI_INCLUDE_DIR}) @@ -269,6 +271,10 @@ if( LLVM_ENABLE_FFI ) check_symbol_exists(ffi_call ${FFI_HEADER} HAVE_FFI_CALL) list(REMOVE_ITEM CMAKE_REQUIRED_INCLUDES ${FFI_INCLUDE_PATH}) list(REMOVE_ITEM CMAKE_REQUIRED_LIBRARIES ${FFI_LIBRARY_PATH}) +else() + unset(HAVE_FFI_FFI_H CACHE) + unset(HAVE_FFI_H CACHE) + unset(HAVE_FFI_CALL CACHE) endif( LLVM_ENABLE_FFI ) # Define LLVM_MULTITHREADED if gcc atomic builtins exists. @@ -319,24 +325,19 @@ elseif (LLVM_NATIVE_ARCH MATCHES "xcore") elseif (LLVM_NATIVE_ARCH MATCHES "msp430") set(LLVM_NATIVE_ARCH MSP430) else () - message(STATUS - "Unknown architecture ${LLVM_NATIVE_ARCH}; lli will not JIT code") - set(LLVM_NATIVE_ARCH) + message(FATAL_ERROR "Unknown architecture ${LLVM_NATIVE_ARCH}") endif () -if (LLVM_NATIVE_ARCH) - list(FIND LLVM_TARGETS_TO_BUILD ${LLVM_NATIVE_ARCH} NATIVE_ARCH_IDX) - if (NATIVE_ARCH_IDX EQUAL -1) - message(STATUS - "Native target ${LLVM_NATIVE_ARCH} is not selected; lli will not JIT code") - set(LLVM_NATIVE_ARCH) - else () - message(STATUS "Native target architecture is ${LLVM_NATIVE_ARCH}") - set(LLVM_NATIVE_TARGET LLVMInitialize${LLVM_NATIVE_ARCH}Target) - set(LLVM_NATIVE_TARGETINFO LLVMInitialize${LLVM_NATIVE_ARCH}TargetInfo) - set(LLVM_NATIVE_ASMPRINTER LLVMInitialize${LLVM_NATIVE_ARCH}AsmPrinter) - endif () -endif() +list(FIND LLVM_TARGETS_TO_BUILD ${LLVM_NATIVE_ARCH} NATIVE_ARCH_IDX) +if (NATIVE_ARCH_IDX EQUAL -1) + message(STATUS + "Native target ${LLVM_NATIVE_ARCH} is not selected; lli will not JIT code") +else () + message(STATUS "Native target architecture is ${LLVM_NATIVE_ARCH}") + set(LLVM_NATIVE_TARGET LLVMInitialize${LLVM_NATIVE_ARCH}Target) + set(LLVM_NATIVE_TARGETINFO LLVMInitialize${LLVM_NATIVE_ARCH}TargetInfo) + set(LLVM_NATIVE_ASMPRINTER LLVMInitialize${LLVM_NATIVE_ARCH}AsmPrinter) +endif () if( MINGW ) set(HAVE_LIBIMAGEHLP 1) @@ -365,6 +366,21 @@ else( MSVC ) set(LTDL_DLOPEN_DEPLIBS 0) # TODO endif( MSVC ) +if( PURE_WINDOWS ) + CHECK_CXX_SOURCE_COMPILES(" + #include + #include + extern \"C\" void foo(PENUMLOADED_MODULES_CALLBACK); + extern \"C\" void foo(BOOL(CALLBACK*)(PCSTR,ULONG_PTR,ULONG,PVOID)); + int main(){return 0;}" + HAVE_ELMCB_PCSTR) + if( HAVE_ELMCB_PCSTR ) + set(WIN32_ELMCB_PCSTR "PCSTR") + else() + set(WIN32_ELMCB_PCSTR "PSTR") + endif() +endif( PURE_WINDOWS ) + # FIXME: Signal handler return type, currently hardcoded to 'void' set(RETSIGTYPE void) diff --git a/cmake/modules/AddLLVM.cmake b/cmake/modules/AddLLVM.cmake index 764c6591c457..c13143bb0d47 100755 --- a/cmake/modules/AddLLVM.cmake +++ b/cmake/modules/AddLLVM.cmake @@ -1,5 +1,5 @@ include(LLVMProcessSources) -include(LLVMConfig) +include(LLVM-Config) macro(add_llvm_library name) llvm_process_sources( ALL_FILES ${ARGN} ) @@ -10,13 +10,20 @@ macro(add_llvm_library name) endif( LLVM_COMMON_DEPENDS ) if( BUILD_SHARED_LIBS ) - get_system_libs(sl) - target_link_libraries( ${name} ${sl} ) + llvm_config( ${name} ${LLVM_LINK_COMPONENTS} ) endif() - install(TARGETS ${name} - LIBRARY DESTINATION lib${LLVM_LIBDIR_SUFFIX} - ARCHIVE DESTINATION lib${LLVM_LIBDIR_SUFFIX}) + # Ensure that the system libraries always comes last on the + # list. Without this, linking the unit tests on MinGW fails. + link_system_libs( ${name} ) + + if( EXCLUDE_FROM_ALL ) + set_target_properties( ${name} PROPERTIES EXCLUDE_FROM_ALL ON) + else() + install(TARGETS ${name} + LIBRARY DESTINATION lib${LLVM_LIBDIR_SUFFIX} + ARCHIVE DESTINATION lib${LLVM_LIBDIR_SUFFIX}) + endif() # The LLVM Target library shall be built before its sublibraries # (asmprinter, etc) because those may use tablegenned files which # generation is triggered by the main LLVM target library. Necessary @@ -45,15 +52,22 @@ ${name} ignored.") add_library( ${name} ${libkind} ${ALL_FILES} ) set_target_properties( ${name} PROPERTIES PREFIX "" ) + llvm_config( ${name} ${LLVM_LINK_COMPONENTS} ) + link_system_libs( ${name} ) + if (APPLE) # Darwin-specific linker flags for loadable modules. set_target_properties(${name} PROPERTIES LINK_FLAGS "-Wl,-flat_namespace -Wl,-undefined -Wl,suppress") endif() - install(TARGETS ${name} - LIBRARY DESTINATION lib${LLVM_LIBDIR_SUFFIX} - ARCHIVE DESTINATION lib${LLVM_LIBDIR_SUFFIX}) + if( EXCLUDE_FROM_ALL ) + set_target_properties( ${name} PROPERTIES EXCLUDE_FROM_ALL ON) + else() + install(TARGETS ${name} + LIBRARY DESTINATION lib${LLVM_LIBDIR_SUFFIX} + ARCHIVE DESTINATION lib${LLVM_LIBDIR_SUFFIX}) + endif() endif() set_target_properties(${name} PROPERTIES FOLDER "Loadable modules") @@ -68,23 +82,12 @@ macro(add_llvm_executable name) add_executable(${name} ${ALL_FILES}) endif() set(EXCLUDE_FROM_ALL OFF) - if( LLVM_USED_LIBS ) - foreach(lib ${LLVM_USED_LIBS}) - target_link_libraries( ${name} ${lib} ) - endforeach(lib) - endif( LLVM_USED_LIBS ) - if( LLVM_LINK_COMPONENTS ) - llvm_config(${name} ${LLVM_LINK_COMPONENTS}) - endif( LLVM_LINK_COMPONENTS ) + target_link_libraries( ${name} ${LLVM_USED_LIBS} ) + llvm_config( ${name} ${LLVM_LINK_COMPONENTS} ) if( LLVM_COMMON_DEPENDS ) add_dependencies( ${name} ${LLVM_COMMON_DEPENDS} ) endif( LLVM_COMMON_DEPENDS ) - if( NOT MINGW ) - get_system_libs(llvm_system_libs) - if( llvm_system_libs ) - target_link_libraries(${name} ${llvm_system_libs}) - endif() - endif() + link_system_libs( ${name} ) endmacro(add_llvm_executable name) diff --git a/cmake/modules/CMakeLists.txt b/cmake/modules/CMakeLists.txt index 1ab94749f156..257deb6d4c7a 100644 --- a/cmake/modules/CMakeLists.txt +++ b/cmake/modules/CMakeLists.txt @@ -1,15 +1,22 @@ set(llvm_cmake_builddir "${LLVM_BINARY_DIR}/share/llvm/cmake") +set(LLVM_INSTALL_PREFIX ${CMAKE_INSTALL_PREFIX}) get_property(llvm_libs GLOBAL PROPERTY LLVM_LIBS) configure_file( - LLVM.cmake - ${llvm_cmake_builddir}/LLVM.cmake + LLVMConfig.cmake.in + ${llvm_cmake_builddir}/LLVMConfig.cmake + @ONLY) + +configure_file( + LLVMConfigVersion.cmake.in + ${llvm_cmake_builddir}/LLVMConfigVersion.cmake @ONLY) install(FILES - ${llvm_cmake_builddir}/LLVM.cmake - LLVMConfig.cmake + ${llvm_cmake_builddir}/LLVMConfig.cmake + ${llvm_cmake_builddir}/LLVMConfigVersion.cmake + LLVM-Config.cmake LLVMLibDeps.cmake DESTINATION share/llvm/cmake) @@ -17,16 +24,11 @@ install(DIRECTORY . DESTINATION share/llvm/cmake FILES_MATCHING PATTERN *.cmake PATTERN .svn EXCLUDE - PATTERN LLVM.cmake EXCLUDE PATTERN LLVMConfig.cmake EXCLUDE + PATTERN LLVMConfigVersion.cmake EXCLUDE + PATTERN LLVM-Config.cmake EXCLUDE PATTERN LLVMLibDeps.cmake EXCLUDE PATTERN FindBison.cmake EXCLUDE PATTERN GetTargetTriple.cmake EXCLUDE PATTERN VersionFromVCS.cmake EXCLUDE PATTERN CheckAtomic.cmake EXCLUDE) - -install(FILES - ${llvm_cmake_builddir}/LLVM.cmake - LLVMConfig.cmake - LLVMLibDeps.cmake - DESTINATION share/llvm/cmake) diff --git a/cmake/modules/HandleLLVMOptions.cmake b/cmake/modules/HandleLLVMOptions.cmake index f62e86ae494b..0633ac9e3a5b 100644 --- a/cmake/modules/HandleLLVMOptions.cmake +++ b/cmake/modules/HandleLLVMOptions.cmake @@ -84,7 +84,7 @@ if( LLVM_ENABLE_PIC ) if( SUPPORTS_FPIC_FLAG ) message(STATUS "Building with -fPIC") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") - set(CMAKE_C_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC") else( SUPPORTS_FPIC_FLAG ) message(WARNING "-fPIC not supported.") endif() @@ -102,6 +102,29 @@ if( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT WIN32 ) endif( LLVM_BUILD_32_BITS ) endif( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT WIN32 ) +if( MSVC_IDE AND ( MSVC90 OR MSVC10 ) ) + # Only Visual Studio 2008 and 2010 officially supports /MP. + # Visual Studio 2005 do support it but it's experimental there. + set(LLVM_COMPILER_JOBS "0" CACHE STRING + "Number of parallel compiler jobs. 0 means use all processors. Default is 0.") + if( NOT LLVM_COMPILER_JOBS STREQUAL "1" ) + if( LLVM_COMPILER_JOBS STREQUAL "0" ) + add_llvm_definitions( /MP ) + else() + if (MSVC10) + message(FATAL_ERROR + "Due to a bug in CMake only 0 and 1 is supported for " + "LLVM_COMPILER_JOBS when generating for Visual Studio 2010") + else() + message(STATUS "Number of parallel compiler jobs set to " ${LLVM_COMPILER_JOBS}) + add_llvm_definitions( /MP${LLVM_COMPILER_JOBS} ) + endif() + endif() + else() + message(STATUS "Parallel compilation disabled") + endif() +endif() + if( MSVC ) include(ChooseMSVCCRT) @@ -130,7 +153,7 @@ if( MSVC ) -wd4715 # Suppress ''function' : not all control paths return a value' -wd4800 # Suppress ''type' : forcing value to bool 'true' or 'false' (performance warning)' -wd4065 # Suppress 'switch statement contains 'default' but no 'case' labels' - + -wd4181 # Suppress 'qualifier applied to reference type; ignored' -w14062 # Promote "enumerator in switch of enum is not handled" to level 1 warning. ) diff --git a/cmake/modules/LLVMConfig.cmake b/cmake/modules/LLVM-Config.cmake similarity index 85% rename from cmake/modules/LLVMConfig.cmake rename to cmake/modules/LLVM-Config.cmake index 349544edc335..a6286fee6856 100755 --- a/cmake/modules/LLVMConfig.cmake +++ b/cmake/modules/LLVM-Config.cmake @@ -16,6 +16,12 @@ function(get_system_libs return_var) endfunction(get_system_libs) +function(link_system_libs target) + get_system_libs(llvm_system_libs) + target_link_libraries(${target} ${llvm_system_libs}) +endfunction(link_system_libs) + + function(is_llvm_target_library library return_var) # Sets variable `return_var' to ON if `library' corresponds to a # LLVM supported target. To OFF if it doesn't. @@ -23,7 +29,8 @@ function(is_llvm_target_library library return_var) string(TOUPPER "${library}" capitalized_lib) string(TOUPPER "${LLVM_ALL_TARGETS}" targets) foreach(t ${targets}) - if( capitalized_lib STREQUAL "LLVM${t}" OR + if( capitalized_lib STREQUAL t OR + capitalized_lib STREQUAL "LLVM${t}" OR capitalized_lib STREQUAL "LLVM${t}CODEGEN" OR capitalized_lib STREQUAL "LLVM${t}ASMPARSER" OR capitalized_lib STREQUAL "LLVM${t}ASMPRINTER" OR @@ -61,6 +68,26 @@ function(explicit_map_components_to_libraries out_libs) set( link_components ${ARGN} ) get_property(llvm_libs GLOBAL PROPERTY LLVM_LIBS) string(TOUPPER "${llvm_libs}" capitalized_libs) + + # Expand some keywords: + list(FIND LLVM_TARGETS_TO_BUILD "${LLVM_NATIVE_ARCH}" have_native_backend) + list(FIND link_components "engine" engine_required) + if( NOT engine_required EQUAL -1 ) + list(FIND LLVM_TARGETS_WITH_JIT "${LLVM_NATIVE_ARCH}" have_jit) + if( NOT have_native_backend EQUAL -1 AND NOT have_jit EQUAL -1 ) + list(APPEND link_components "jit") + list(APPEND link_components "native") + else() + list(APPEND link_components "interpreter") + endif() + endif() + list(FIND link_components "native" native_required) + if( NOT native_required EQUAL -1 ) + if( NOT have_native_backend EQUAL -1 ) + list(APPEND link_components ${LLVM_NATIVE_ARCH}) + endif() + endif() + # Translate symbolic component names to real libraries: foreach(c ${link_components}) # add codegen, asmprinter, asmparser, disassembler @@ -94,14 +121,13 @@ function(explicit_map_components_to_libraries out_libs) list(APPEND expanded_components "LLVM${c}Disassembler") endif() elseif( c STREQUAL "native" ) - list(APPEND expanded_components "LLVM${LLVM_NATIVE_ARCH}CodeGen") + # already processed elseif( c STREQUAL "nativecodegen" ) list(APPEND expanded_components "LLVM${LLVM_NATIVE_ARCH}CodeGen") elseif( c STREQUAL "backend" ) # same case as in `native'. elseif( c STREQUAL "engine" ) - # TODO: as we assume we are on X86, this is `jit'. - list(APPEND expanded_components "LLVMJIT") + # already processed elseif( c STREQUAL "all" ) list(APPEND expanded_components ${llvm_libs}) else( NOT idx LESS 0 ) @@ -109,7 +135,7 @@ function(explicit_map_components_to_libraries out_libs) string(TOUPPER "${c}" capitalized) list(FIND capitalized_libs LLVM${capitalized} lib_idx) if( lib_idx LESS 0 ) - # The component is unkown. Maybe is an ommitted target? + # The component is unknown. Maybe is an omitted target? is_llvm_target_library(${c} iltl_result) if( NOT iltl_result ) message(FATAL_ERROR "Library `${c}' not found in list of llvm libraries.") diff --git a/cmake/modules/LLVM.cmake b/cmake/modules/LLVMConfig.cmake.in similarity index 65% rename from cmake/modules/LLVM.cmake rename to cmake/modules/LLVMConfig.cmake.in index 9182afdf2758..5a048b714b57 100644 --- a/cmake/modules/LLVM.cmake +++ b/cmake/modules/LLVMConfig.cmake.in @@ -10,6 +10,8 @@ set(LLVM_ALL_TARGETS @LLVM_ALL_TARGETS@) set(LLVM_TARGETS_TO_BUILD @LLVM_TARGETS_TO_BUILD@) +set(LLVM_TARGETS_WITH_JIT @LLVM_TARGETS_WITH_JIT@) + set(TARGET_TRIPLE "@TARGET_TRIPLE@") set(LLVM_TOOLS_BINARY_DIR @LLVM_TOOLS_BINARY_DIR@) @@ -20,21 +22,26 @@ set(LLVM_NATIVE_ARCH @LLVM_NATIVE_ARCH@) set(LLVM_ENABLE_PIC @LLVM_ENABLE_PIC@) -set(LLVM_ENABLE_THREADS @LLVM_ENABLE_THREADS) - set(HAVE_LIBDL @HAVE_LIBDL@) -set(HAVE_LIBPTHREAD @HAVE_LIBPTHREAD) +set(HAVE_LIBPTHREAD @HAVE_LIBPTHREAD@) +set(LLVM_ON_UNIX @LLVM_ON_UNIX@) +set(LLVM_ON_WIN32 @LLVM_ON_WIN32@) + +set(LLVM_INSTALL_PREFIX @LLVM_INSTALL_PREFIX@) +set(LLVM_INCLUDE_DIRS ${LLVM_INSTALL_PREFIX}/include) +set(LLVM_LIBRARY_DIRS ${LLVM_INSTALL_PREFIX}/lib) +set(LLVM_DEFINITIONS "-D__STDC_LIMIT_MACROS" "-D__STDC_CONSTANT_MACROS") # We try to include using the current setting of CMAKE_MODULE_PATH, # which suppossedly was filled by the user with the directory where # this file was installed: -include( LLVMConfig OPTIONAL RESULT_VARIABLE LLVMCONFIG_INCLUDED ) +include( LLVM-Config OPTIONAL RESULT_VARIABLE LLVMCONFIG_INCLUDED ) # If failed, we assume that this is an un-installed build: if( NOT LLVMCONFIG_INCLUDED ) set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "@LLVM_SOURCE_DIR@/cmake/modules") - include( LLVMConfig ) + include( LLVM-Config ) endif() diff --git a/cmake/modules/LLVMConfigVersion.cmake.in b/cmake/modules/LLVMConfigVersion.cmake.in new file mode 100644 index 000000000000..add5aa9a1cbb --- /dev/null +++ b/cmake/modules/LLVMConfigVersion.cmake.in @@ -0,0 +1 @@ +set(PACKAGE_VERSION "@PACKAGE_VERSION@") \ No newline at end of file diff --git a/cmake/modules/LLVMLibDeps.cmake b/cmake/modules/LLVMLibDeps.cmake index afba85e45aa3..509ac527cf4c 100644 --- a/cmake/modules/LLVMLibDeps.cmake +++ b/cmake/modules/LLVMLibDeps.cmake @@ -30,11 +30,11 @@ set(MSVC_LIB_DEPS_LLVMLinker LLVMArchive LLVMBitReader LLVMCore LLVMSupport LLVM set(MSVC_LIB_DEPS_LLVMMBlazeAsmParser LLVMMBlazeCodeGen LLVMMBlazeInfo LLVMMC LLVMMCParser LLVMSupport LLVMTarget) set(MSVC_LIB_DEPS_LLVMMBlazeAsmPrinter LLVMMC LLVMSupport) set(MSVC_LIB_DEPS_LLVMMBlazeCodeGen LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMBlazeAsmPrinter LLVMMBlazeInfo LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget) -set(MSVC_LIB_DEPS_LLVMMBlazeDisassembler LLVMMBlazeCodeGen LLVMMBlazeInfo LLVMMC LLVMSupport) +set(MSVC_LIB_DEPS_LLVMMBlazeDisassembler LLVMMBlazeCodeGen LLVMMBlazeInfo LLVMMC) set(MSVC_LIB_DEPS_LLVMMBlazeInfo LLVMMC LLVMSupport) set(MSVC_LIB_DEPS_LLVMMC LLVMSupport) -set(MSVC_LIB_DEPS_LLVMMCDisassembler LLVMARMAsmParser LLVMARMCodeGen LLVMARMDisassembler LLVMARMInfo LLVMAlphaCodeGen LLVMAlphaInfo LLVMBlackfinCodeGen LLVMBlackfinInfo LLVMCBackend LLVMCBackendInfo LLVMCellSPUCodeGen LLVMCellSPUInfo LLVMCppBackend LLVMCppBackendInfo LLVMMBlazeAsmParser LLVMMBlazeCodeGen LLVMMBlazeDisassembler LLVMMBlazeInfo LLVMMC LLVMMCParser LLVMMSP430CodeGen LLVMMSP430Info LLVMMipsCodeGen LLVMMipsInfo LLVMPTXCodeGen LLVMPTXInfo LLVMPowerPCCodeGen LLVMPowerPCInfo LLVMSparcCodeGen LLVMSparcInfo LLVMSupport LLVMSystemZCodeGen LLVMSystemZInfo LLVMX86AsmParser LLVMX86CodeGen LLVMX86Disassembler LLVMX86Info LLVMXCoreCodeGen LLVMXCoreInfo) -set(MSVC_LIB_DEPS_LLVMMCJIT LLVMExecutionEngine LLVMSupport LLVMTarget) +set(MSVC_LIB_DEPS_LLVMMCDisassembler LLVMARMAsmParser LLVMARMCodeGen LLVMARMDisassembler LLVMARMInfo LLVMAlphaCodeGen LLVMAlphaInfo LLVMBlackfinCodeGen LLVMBlackfinInfo LLVMCBackend LLVMCBackendInfo LLVMCellSPUCodeGen LLVMCellSPUInfo LLVMCppBackend LLVMCppBackendInfo LLVMMBlazeAsmParser LLVMMBlazeCodeGen LLVMMBlazeDisassembler LLVMMBlazeInfo LLVMMC LLVMMCParser LLVMMSP430CodeGen LLVMMSP430Info LLVMMipsCodeGen LLVMMipsInfo LLVMPTXCodeGen LLVMPTXInfo LLVMPowerPCCodeGen LLVMPowerPCInfo LLVMSparcCodeGen LLVMSparcInfo LLVMSupport LLVMSystemZCodeGen LLVMSystemZInfo LLVMTarget LLVMX86AsmParser LLVMX86CodeGen LLVMX86Disassembler LLVMX86Info LLVMXCoreCodeGen LLVMXCoreInfo) +set(MSVC_LIB_DEPS_LLVMMCJIT LLVMCore LLVMExecutionEngine LLVMRuntimeDyld LLVMSupport LLVMTarget) set(MSVC_LIB_DEPS_LLVMMCParser LLVMMC LLVMSupport) set(MSVC_LIB_DEPS_LLVMMSP430AsmPrinter LLVMMC LLVMSupport) set(MSVC_LIB_DEPS_LLVMMSP430CodeGen LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMMSP430AsmPrinter LLVMMSP430Info LLVMSelectionDAG LLVMSupport LLVMTarget) @@ -47,6 +47,7 @@ set(MSVC_LIB_DEPS_LLVMPTXInfo LLVMMC LLVMSupport) set(MSVC_LIB_DEPS_LLVMPowerPCAsmPrinter LLVMMC LLVMSupport) set(MSVC_LIB_DEPS_LLVMPowerPCCodeGen LLVMAnalysis LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMPowerPCAsmPrinter LLVMPowerPCInfo LLVMSelectionDAG LLVMSupport LLVMTarget) set(MSVC_LIB_DEPS_LLVMPowerPCInfo LLVMMC LLVMSupport) +set(MSVC_LIB_DEPS_LLVMRuntimeDyld LLVMObject LLVMSupport) set(MSVC_LIB_DEPS_LLVMScalarOpts LLVMAnalysis LLVMCore LLVMInstCombine LLVMSupport LLVMTarget LLVMTransformUtils) set(MSVC_LIB_DEPS_LLVMSelectionDAG LLVMAnalysis LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMTarget LLVMTransformUtils) set(MSVC_LIB_DEPS_LLVMSparcCodeGen LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSparcInfo LLVMSupport LLVMTarget) @@ -61,7 +62,7 @@ set(MSVC_LIB_DEPS_LLVMX86AsmPrinter LLVMMC LLVMSupport LLVMX86Utils) set(MSVC_LIB_DEPS_LLVMX86CodeGen LLVMAnalysis LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget LLVMX86AsmPrinter LLVMX86Info LLVMX86Utils) set(MSVC_LIB_DEPS_LLVMX86Disassembler LLVMMC LLVMSupport LLVMX86Info) set(MSVC_LIB_DEPS_LLVMX86Info LLVMMC LLVMSupport) -set(MSVC_LIB_DEPS_LLVMX86Utils LLVMSupport) +set(MSVC_LIB_DEPS_LLVMX86Utils LLVMCore LLVMSupport) set(MSVC_LIB_DEPS_LLVMXCoreCodeGen LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget LLVMXCoreInfo) set(MSVC_LIB_DEPS_LLVMXCoreInfo LLVMMC LLVMSupport) set(MSVC_LIB_DEPS_LLVMipa LLVMAnalysis LLVMCore LLVMSupport) diff --git a/configure b/configure index 959822fa0e15..f1f4dd3c7715 100755 --- a/configure +++ b/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.60 for llvm 2.9svn. +# Generated by GNU Autoconf 2.60 for llvm 3.0svn. # # Report bugs to . # @@ -561,8 +561,8 @@ SHELL=${CONFIG_SHELL-/bin/sh} # Identity of this package. PACKAGE_NAME='llvm' PACKAGE_TARNAME='-llvm-' -PACKAGE_VERSION='2.9svn' -PACKAGE_STRING='llvm 2.9svn' +PACKAGE_VERSION='3.0svn' +PACKAGE_STRING='llvm 3.0svn' PACKAGE_BUGREPORT='llvmbugs@cs.uiuc.edu' ac_unique_file="lib/VMCore/Module.cpp" @@ -1328,7 +1328,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures llvm 2.9svn to adapt to many kinds of systems. +\`configure' configures llvm 3.0svn to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1394,7 +1394,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of llvm 2.9svn:";; + short | recursive ) echo "Configuration of llvm 3.0svn:";; esac cat <<\_ACEOF @@ -1551,7 +1551,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -llvm configure 2.9svn +llvm configure 3.0svn generated by GNU Autoconf 2.60 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001, @@ -1567,7 +1567,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by llvm $as_me 2.9svn, which was +It was created by llvm $as_me 3.0svn, which was generated by GNU Autoconf 2.60. Invocation command line was $ $0 $@ @@ -5117,6 +5117,9 @@ _ACEOF LLVM_NATIVE_TARGET="LLVMInitialize${LLVM_NATIVE_ARCH}Target" LLVM_NATIVE_TARGETINFO="LLVMInitialize${LLVM_NATIVE_ARCH}TargetInfo" LLVM_NATIVE_ASMPRINTER="LLVMInitialize${LLVM_NATIVE_ARCH}AsmPrinter" + if test -f ${srcdir}/lib/Target/${LLVM_NATIVE_ARCH}/AsmParser/Makefile ; then + LLVM_NATIVE_ASMPARSER="LLVMInitialize${LLVM_NATIVE_ARCH}AsmParser" + fi cat >>confdefs.h <<_ACEOF #define LLVM_NATIVE_TARGET $LLVM_NATIVE_TARGET @@ -5132,6 +5135,13 @@ cat >>confdefs.h <<_ACEOF #define LLVM_NATIVE_ASMPRINTER $LLVM_NATIVE_ASMPRINTER _ACEOF + if test -f ${srcdir}/lib/Target/${LLVM_NATIVE_ARCH}/AsmParser/Makefile ; then + +cat >>confdefs.h <<_ACEOF +#define LLVM_NATIVE_ASMPARSER $LLVM_NATIVE_ASMPARSER +_ACEOF + + fi fi done @@ -11561,7 +11571,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <&5 +echo $ECHO_N "checking whether EnumerateLoadedModules() accepts new decl... $ECHO_C" >&6; } + cat >conftest.$ac_ext <<_ACEOF +#include +#include +extern void foo(PENUMLOADED_MODULES_CALLBACK); +extern void foo(BOOL(CALLBACK*)(PCSTR,ULONG_PTR,ULONG,PVOID)); +_ACEOF +rm -f conftest.$ac_objext +if { (ac_try="$ac_compile" +case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_compile") 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err' + { (case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (case "(($ac_try" in + *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; + *) ac_try_echo=$ac_try;; +esac +eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5 + (eval "$ac_try") 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + + { echo "$as_me:$LINENO: result: yes" >&5 +echo "${ECHO_T}yes" >&6; } + llvm_cv_win32_elmcb_pcstr="PCSTR" + +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + + + { echo "$as_me:$LINENO: result: no" >&5 +echo "${ECHO_T}no" >&6; } + llvm_cv_win32_elmcb_pcstr="PSTR" + +fi + +rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext + +cat >>confdefs.h <<_ACEOF +#define WIN32_ELMCB_PCSTR $llvm_cv_win32_elmcb_pcstr +_ACEOF + +fi + { echo "$as_me:$LINENO: checking for isnan in " >&5 echo $ECHO_N "checking for isnan in ... $ECHO_C" >&6; } @@ -22942,7 +23019,7 @@ exec 6>&1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by llvm $as_me 2.9svn, which was +This file was extended by llvm $as_me 3.0svn, which was generated by GNU Autoconf 2.60. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -22995,7 +23072,7 @@ Report bugs to ." _ACEOF cat >>$CONFIG_STATUS <<_ACEOF ac_cs_version="\\ -llvm config.status 2.9svn +llvm config.status 3.0svn configured by $0, generated by GNU Autoconf 2.60, with options \\"`echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`\\" diff --git a/docs/AliasAnalysis.html b/docs/AliasAnalysis.html index 20b7e96460f3..b93cff07bc4d 100644 --- a/docs/AliasAnalysis.html +++ b/docs/AliasAnalysis.html @@ -7,9 +7,9 @@ -
+

LLVM Alias Analysis Infrastructure -

+
  1. Introduction
  2. @@ -59,12 +59,12 @@ - + -
    +

    Alias Analysis (aka Pointer Analysis) is a class of techniques which attempt to determine whether or not two pointers ever can point to the same object in @@ -96,12 +96,12 @@ know.

    - + -
    +

    The AliasAnalysis @@ -122,14 +122,12 @@ multiple values, values which are not constants are all defined within the same function.

    -
    - - + -
    +

    Most importantly, the AliasAnalysis class provides several methods which are used to query whether or not two memory objects alias, whether @@ -181,11 +179,11 @@ that the accesses alias.

    - + -
    +

    The alias method is the primary interface used to determine whether or not two memory objects alias each other. It takes two memory objects as input and returns MustAlias, PartialAlias, MayAlias, or NoAlias as @@ -194,14 +192,13 @@ appropriate.

    Like all AliasAnalysis interfaces, the alias method requires that either the two pointer values be defined within the same function, or at least one of the values is a constant.

    -
    - + -
    +

    The NoAlias response may be used when there is never an immediate dependence between any memory reference based on one pointer and any memory reference based the other. The most obvious example is when the two @@ -227,12 +224,14 @@ implies that the pointers compare equal.

    - - -
    + +

    + The getModRefInfo methods +

    + +

    The getModRefInfo methods return information about whether the execution of an instruction can read or modify a memory location. Mod/Ref @@ -250,25 +249,23 @@ memory written to by CS2. Note that this relation is not commutative.

    - + -
    +

    Several other tidbits of information are often collected by various alias analysis implementations and can be put to good use by various clients.

    -
    - -
    +

    The pointsToConstantMemory method -

    + -
    +

    The pointsToConstantMemory method returns true if and only if the analysis can prove that the pointer only points to unchanging memory locations @@ -279,12 +276,12 @@ memory location to be modified.

    - + -
    +

    These methods are used to provide very simple mod/ref information for function calls. The doesNotAccessMemory method returns true for a @@ -307,13 +304,17 @@ functions that satisfy the doesNotAccessMemory method also satisfies

    - - + +
    + + +

    + Writing a new AliasAnalysis Implementation +

    -
    +

    Writing a new alias analysis implementation for LLVM is quite straight-forward. There are already several implementations that you can use @@ -321,14 +322,12 @@ for examples, and the following information should help fill in any details. For a examples, take a look at the various alias analysis implementations included with LLVM.

    -
    - - + -
    +

    The first step to determining what type of LLVM pass you need to use for your Alias @@ -352,11 +351,11 @@ solve:

    - + -
    +

    Your subclass of AliasAnalysis is required to invoke two methods on the AliasAnalysis base class: getAnalysisUsage and @@ -393,11 +392,11 @@ bool run(Module &M) {

    - + -
    +

    All of the AliasAnalysis @@ -412,11 +411,11 @@ implementing, you just override the interfaces you can improve.

    - + -
    +

    With only two special exceptions (the basicaa and no-aa @@ -451,11 +450,11 @@ updated.

    - + -
    +

    Alias analysis information is initially computed for a static snapshot of the program, but clients will use this information to make transformations to the @@ -471,12 +470,11 @@ their internal data structures are kept up-to-date as the program changes (for example, when an instruction is deleted), and clients of alias analysis must be sure to call these interfaces appropriately.

    -
    -
    The deleteValue method
    +

    The deleteValue method

    -
    +
    The deleteValue method is called by transformations when they remove an instruction or any other value from the program (including values that do not use pointers). Typically alias analyses keep data structures that have entries @@ -485,9 +483,9 @@ any entries for the specified value, if they exist.
    -
    The copyValue method
    +

    The copyValue method

    -
    +
    The copyValue method is used when a new value is introduced into the program. There is no way to introduce a value into the program that did not exist before (this doesn't make sense for a safe compiler transformation), so @@ -496,9 +494,9 @@ new value has exactly the same properties as the value being copied.
    -
    The replaceWithNewValue method
    +

    The replaceWithNewValue method

    -
    +
    This method is a simple helper method that is provided to make clients easier to use. It is implemented by copying the old analysis information to the new value, then deleting the old value. This method cannot be overridden by alias @@ -506,9 +504,9 @@ analysis implementations.
    -
    The addEscapingUse method
    +

    The addEscapingUse method

    -
    +

    The addEscapingUse method is used when the uses of a pointer value have changed in ways that may invalidate precomputed analysis information. Implementations may either use this callback to provide conservative responses @@ -527,12 +525,14 @@ uses below:

    - - -
    + +

    + Efficiency Issues +

    + +

    From the LLVM perspective, the only thing you need to do to provide an efficient alias analysis is to make sure that alias analysis queries are @@ -544,11 +544,11 @@ method as possible (within reason).

    - + -
    +

    The AliasAnalysis infrastructure has several limitations which make writing a new AliasAnalysis implementation difficult.

    @@ -567,7 +567,7 @@ which are intended to allow a pass to keep an AliasAnalysis consistent, however there's no way for a pass to declare in its getAnalysisUsage that it does so. Some passes attempt to use AU.addPreserved<AliasAnalysis>, however this doesn't -actually have any effect. +actually have any effect.

    AliasAnalysisCounter (-count-aa) and AliasDebugger (-debug-aa) are implemented as ModulePass classes, so if your @@ -616,25 +616,25 @@ from itself.

    - - + + +

    + Using alias analysis results +

    -
    +

    There are several different ways to use alias analysis results. In order of preference, these are...

    -
    - - + -
    +

    The memdep pass uses alias analysis to provide high-level dependence information about memory-using instructions. This will tell you which store @@ -645,11 +645,11 @@ efficient, and is used by Dead Store Elimination, GVN, and memcpy optimizations.

    - + -
    +

    Many transformations need information about alias sets that are active in some scope, rather than information about pairwise aliasing. The -

    - -
    +

    The AliasSetTracker implementation -

    + -
    +

    The AliasSetTracker class is implemented to be as efficient as possible. It uses the union-find algorithm to efficiently merge AliasSets when a pointer is @@ -706,12 +704,14 @@ are.

    - -
    -
    + +

    + Using the AliasAnalysis interface directly +

    + +

    If neither of these utility class are what your pass needs, you should use the interfaces exposed by the AliasAnalysis class directly. Try to use @@ -721,13 +721,15 @@ best precision and efficiency.

    - - + + +

    + Existing alias analysis implementations and clients +

    -
    +

    If you're going to be working with the LLVM alias analysis infrastructure, you should know what clients and implementations of alias analysis are @@ -735,28 +737,24 @@ available. In particular, if you are implementing an alias analysis, you should be aware of the the clients that are useful for monitoring and evaluating different implementations.

    -
    - - + -
    +

    This section lists the various implementations of the AliasAnalysis interface. With the exception of the -no-aa and -basicaa implementations, all of these chain to other alias analysis implementations.

    -
    - - + -
    +

    The -no-aa pass is just like what it sounds: an alias analysis that never returns any useful information. This pass can be useful if you think that @@ -766,11 +764,11 @@ problem.

    - + -
    +

    The -basicaa pass is an aggressive local analysis that "knows" many important facts:

    @@ -794,11 +792,11 @@ many important facts:

    - + -
    +

    This pass implements a simple context-sensitive mod/ref and alias analysis for internal global variables that don't "have their address taken". If a @@ -818,11 +816,11 @@ non-address taken globals), but is very quick analysis.

    - + -
    +

    The -steens-aa pass implements a variation on the well-known "Steensgaard's algorithm" for interprocedural alias analysis. Steensgaard's @@ -841,11 +839,11 @@ module, it is not part of the LLVM core.

    - + -
    +

    The -ds-aa pass implements the full Data Structure Analysis algorithm. Data Structure Analysis is a modular unification-based, @@ -864,11 +862,11 @@ module, it is not part of the LLVM core.

    - + -
    +

    The -scev-aa pass implements AliasAnalysis queries by translating them into ScalarEvolution queries. This gives it a @@ -877,22 +875,23 @@ and loop induction variables than other alias analyses have.

    - - -
    + +

    + Alias analysis driven transformations +

    + +
    LLVM includes several alias-analysis driven transformations which can be used with any of the implementations above. -
    - + -
    +

    The -adce pass, which implements Aggressive Dead Code Elimination uses the AliasAnalysis interface to delete calls to functions that do @@ -902,11 +901,11 @@ not have side-effects and are not used.

    - + -
    +

    The -licm pass implements various Loop Invariant Code Motion related transformations. It uses the AliasAnalysis interface for several @@ -927,11 +926,11 @@ no may aliases to the loaded/stored memory location.

    - + -
    +

    The -argpromotion pass promotes by-reference arguments to be passed in by-value instead. In particular, if pointer arguments are only loaded from it @@ -942,38 +941,38 @@ pointer.

    - + -
    +

    These passes use AliasAnalysis information to reason about loads and stores.

    - - -
    + +

    + Clients for debugging and evaluation of + implementations +

    + +

    These passes are useful for evaluating the various alias analysis implementations. You can use them with commands like 'opt -ds-aa -aa-eval foo.bc -disable-output -stats'.

    -
    - - + -
    +

    The -print-alias-sets pass is exposed as part of the opt tool to print out the Alias Sets formed by the AliasSetTracker class. To use it, use something like:

    - + -
    +

    The -count-aa pass is useful to see how many queries a particular pass is making and what responses are returned by the alias analysis. As an @@ -1014,11 +1013,11 @@ when debugging a transformation or an alias analysis implementation.

    - + -
    +

    The -aa-eval pass simply iterates through all pairs of pointers in a function and asks an alias analysis whether or not the pointers alias. This @@ -1028,13 +1027,17 @@ algorithm will have a lower number of may aliases).

    - - + +
    + + +

    + Memory Dependence Analysis +

    -
    +

    If you're just looking to be a client of alias analysis information, consider using the Memory Dependence Analysis interface instead. MemDep is a lazy, @@ -1056,8 +1059,8 @@ analysis directly.

    src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"> Chris Lattner
    - LLVM Compiler Infrastructure
    - Last modified: $Date: 2011-01-03 22:38:41 +0100 (Mon, 03 Jan 2011) $ + LLVM Compiler Infrastructure
    + Last modified: $Date: 2011-04-21 03:52:00 +0200 (Thu, 21 Apr 2011) $ diff --git a/docs/BitCodeFormat.html b/docs/BitCodeFormat.html index 8d3d382da7a9..9a042a0dbaa8 100644 --- a/docs/BitCodeFormat.html +++ b/docs/BitCodeFormat.html @@ -7,7 +7,7 @@ -
    LLVM Bitcode File Format
    +

    LLVM Bitcode File Format

    1. Abstract
    2. Overview
    3. @@ -47,10 +47,10 @@
    - +

    Abstract

    -
    +

    This document describes the LLVM bitstream file format and the encoding of the LLVM IR into it.

    @@ -58,10 +58,10 @@ the LLVM IR into it.

    - +

    Overview

    -
    +

    What is commonly known as the LLVM bitcode file format (also, sometimes @@ -88,10 +88,10 @@ wrapper format, then describes the record structure used by LLVM IR files.

    - +

    Bitstream Format

    -
    +

    The bitstream format is literally a stream of bits, with a very simple @@ -114,13 +114,12 @@ href="CommandGuide/html/llvm-bcanalyzer.html">llvm-bcanalyzer tool can be used to dump and inspect arbitrary bitstreams, which is very useful for understanding the encoding.

    -
    - - +

    + Magic Numbers +

    -
    +

    The first two bytes of a bitcode file are 'BC' (0x42, 0x43). The second two bytes are an application-specific magic number. Generic @@ -130,10 +129,11 @@ bitcode, while application-specific programs will want to look at all four.

    - +

    + Primitives +

    -
    +

    A bitstream literally consists of a stream of bits, which are read in order @@ -144,13 +144,12 @@ Width Integers or as Variable Width Integers.

    -
    - - +

    + Fixed Width Integers +

    -
    +

    Fixed-width integer values have their low bits emitted directly to the file. For example, a 3-bit integer value encodes 1 as 001. Fixed width integers @@ -161,10 +160,11 @@ Integers.

    - +

    + Variable Width Integers +

    -
    +

    Variable-width integer (VBR) values encode values of arbitrary size, optimizing for the case where the values are small. Given a 4-bit VBR field, @@ -182,9 +182,9 @@ value of 24 (011 << 3) with no continuation. The sum (3+24) yields the value

    - +

    6-bit characters

    -
    +

    6-bit characters encode common characters into a fixed 6-bit field. They represent the following characters with the following 6-bit values:

    @@ -206,9 +206,9 @@ characters not in the set.

    - +

    Word Alignment

    -
    +

    Occasionally, it is useful to emit zero bits until the bitstream is a multiple of 32 bits. This ensures that the bit position in the stream can be @@ -216,12 +216,14 @@ represented as a multiple of 32-bit words.

    - - - -
    + +

    + Abbreviation IDs +

    + +

    A bitstream is a sequential series of Blocks and @@ -253,10 +255,11 @@ an abbreviated record encoding.

    - +

    + Blocks +

    -
    +

    Blocks in a bitstream denote nested regions of the stream, and are identified by @@ -297,13 +300,10 @@ its own set of abbreviations, and its own abbrev id width. When a sub-block is popped, the saved values are restored.

    -
    - - +

    ENTER_SUBBLOCK Encoding

    -
    +

    [ENTER_SUBBLOCK, blockidvbr8, newabbrevlenvbr4, <align32bits>, blocklen32]

    @@ -322,10 +322,9 @@ reader to skip over the entire block in one jump.
    - +

    END_BLOCK Encoding

    -
    +

    [END_BLOCK, <align32bits>]

    @@ -337,13 +336,14 @@ an even multiple of 32-bits.
    - - - - -
    + +

    + Data Records +

    + +

    Data records consist of a record code and a number of (up to) 64-bit integer values. The interpretation of the code and values is @@ -355,13 +355,10 @@ which encodes the target triple of a module. The code is ASCII codes for the characters in the string.

    -
    - - +

    UNABBREV_RECORD Encoding

    -
    +

    [UNABBREV_RECORD, codevbr6, numopsvbr6, op0vbr6, op1vbr6, ...]

    @@ -385,10 +382,9 @@ bits. This is not an efficient encoding, but it is fully general.
    - +

    Abbreviated Record Encoding

    -
    +

    [<abbrevid>, fields...]

    @@ -409,11 +405,14 @@ operand value).

    - - -
    + +

    + Abbreviations +

    + +

    Abbreviations are an important form of compression for bitstreams. The idea is to specify a dense encoding for a class of records once, then use that encoding @@ -431,13 +430,11 @@ As a concrete example, LLVM IR files usually emit an abbreviation for binary operators. If a specific LLVM module contained no or few binary operators, the abbreviation does not need to be emitted.

    -
    - +

    DEFINE_ABBREV Encoding

    -
    +

    [DEFINE_ABBREV, numabbrevopsvbr5, abbrevop0, abbrevop1, ...]

    @@ -552,11 +549,14 @@ used for any other string value.
    - - -
    + +

    + Standard Blocks +

    + +

    In addition to the basic block structure and record encodings, the bitstream @@ -565,13 +565,10 @@ stream is to be decoded or other metadata. In the future, new standard blocks may be added. Block IDs 0-7 are reserved for standard blocks.

    -
    - - +

    #0 - BLOCKINFO Block

    -
    +

    The BLOCKINFO block allows the description of metadata for other @@ -620,11 +617,15 @@ from the corresponding blocks. It is not safe to skip them.

    +
    + +
    + - +

    Bitcode Wrapper Format

    -
    +

    Bitcode files for LLVM IR may optionally be wrapped in a simple wrapper @@ -652,10 +653,10 @@ value that can be used to encode the CPU of the target.

    - +

    LLVM IR Encoding

    -
    +

    LLVM IR is encoded into a bitstream by defining blocks and records. It uses @@ -666,16 +667,17 @@ that the writer uses, as these are fully self-described in the file, and the reader is not allowed to build in any knowledge of this.

    -
    - - +

    + Basics +

    + +
    - +

    LLVM IR Magic Number

    -
    +

    The magic number for LLVM IR files is: @@ -695,9 +697,9 @@ When combined with the bitcode magic number and viewed as bytes, this is

    - +

    Signed VBRs

    -
    +

    Variable Width Integer encoding is an efficient way to @@ -728,9 +730,9 @@ within CONSTANTS_BLOCK blocks. -

    +

    LLVM IR Blocks

    -
    +

    LLVM IR is defined with the following blocks: @@ -758,11 +760,14 @@ LLVM IR is defined with the following blocks:

    - - -
    + +

    + MODULE_BLOCK Contents +

    + +

    The MODULE_BLOCK block (id 8) is the top-level block for LLVM bitcode files, and each bitcode file must contain exactly one. In @@ -782,13 +787,10 @@ following sub-blocks:

  3. METADATA_BLOCK
  4. -
    - - +

    MODULE_CODE_VERSION Record

    -
    +

    [VERSION, version#]

    @@ -798,10 +800,9 @@ time.

    - +

    MODULE_CODE_TRIPLE Record

    -
    +

    [TRIPLE, ...string...]

    The TRIPLE record (code 2) contains a variable number of @@ -810,10 +811,9 @@ specification string.

    - +

    MODULE_CODE_DATALAYOUT Record

    -
    +

    [DATALAYOUT, ...string...]

    The DATALAYOUT record (code 3) contains a variable number of @@ -822,10 +822,9 @@ specification string.

    - +

    MODULE_CODE_ASM Record

    -
    +

    [ASM, ...string...]

    The ASM record (code 4) contains a variable number of @@ -834,10 +833,9 @@ individual assembly blocks separated by newline (ASCII 10) characters.

    - +

    MODULE_CODE_SECTIONNAME Record

    -
    +

    [SECTIONNAME, ...string...]

    The SECTIONNAME record (code 5) contains a variable number @@ -850,10 +848,9 @@ referenced by the 1-based index in the section fields of

    - +

    MODULE_CODE_DEPLIB Record

    -
    +

    [DEPLIB, ...string...]

    The DEPLIB record (code 6) contains a variable number of @@ -864,10 +861,9 @@ library name referenced.

    - +

    MODULE_CODE_GLOBALVAR Record

    -
    +

    [GLOBALVAR, pointer type, isconst, initid, linkage, alignment, section, visibility, threadlocal]

    The GLOBALVAR record (code 7) marks the declaration or @@ -923,16 +919,15 @@ encoding of the visibility of this variable: is thread_local

  5. unnamed_addr: If present and non-zero, indicates that the variable -has unnamed_addr
  6. +has unnamed_addr
    - +

    MODULE_CODE_FUNCTION Record

    -
    +

    [FUNCTION, type, callingconv, isproto, linkage, paramattr, alignment, section, visibility, gc]

    @@ -980,16 +975,15 @@ index in the table of MODULE_CODE_GCNAME entries.
  7. unnamed_addr: If present and non-zero, indicates that the function -has unnamed_addr
  8. +has unnamed_addr
    - +

    MODULE_CODE_ALIAS Record

    -
    +

    [ALIAS, alias type, aliasee val#, linkage, visibility]

    @@ -1011,10 +1005,9 @@ for this alias
    - +

    MODULE_CODE_PURGEVALS Record

    -
    +

    [PURGEVALS, numvals]

    The PURGEVALS record (code 10) resets the module-level @@ -1025,10 +1018,9 @@ new value indices will start from the given numvals value.

    - +

    MODULE_CODE_GCNAME Record

    -
    +

    [GCNAME, ...string...]

    The GCNAME record (code 11) contains a variable number of @@ -1039,11 +1031,14 @@ the module. These records can be referenced by 1-based index in the gc fields of FUNCTION records.

    - - -
    + +

    + PARAMATTR_BLOCK Contents +

    + +

    The PARAMATTR_BLOCK block (id 9) contains a table of entries describing the attributes of function parameters. These @@ -1057,14 +1052,10 @@ href="#FUNC_CODE_INST_CALL">INST_CALL records.

    that each is unique (i.e., no two indicies represent equivalent attribute lists).

    -
    - - - +

    PARAMATTR_CODE_ENTRY Record

    -
    +

    [ENTRY, paramidx0, attr0, paramidx1, attr1...]

    @@ -1105,11 +1096,14 @@ the logarithm base 2 of the requested alignment, plus 1
    - - -
    + +

    + TYPE_BLOCK Contents +

    + +

    The TYPE_BLOCK block (id 10) contains records which constitute a table of type operator entries used to represent types @@ -1124,13 +1118,10 @@ type operator records. each entry is unique (i.e., no two indicies represent structurally equivalent types).

    -
    - - +

    TYPE_CODE_NUMENTRY Record

    -
    +

    [NUMENTRY, numentries]

    @@ -1142,10 +1133,9 @@ in the block.
    - +

    TYPE_CODE_VOID Record

    -
    +

    [VOID]

    @@ -1155,10 +1145,9 @@ type table.
    - +

    TYPE_CODE_FLOAT Record

    -
    +

    [FLOAT]

    @@ -1168,10 +1157,9 @@ floating point) type to the type table.
    - +

    TYPE_CODE_DOUBLE Record

    -
    +

    [DOUBLE]

    @@ -1181,10 +1169,9 @@ floating point) type to the type table.
    - +

    TYPE_CODE_LABEL Record

    -
    +

    [LABEL]

    @@ -1194,10 +1181,9 @@ the type table.
    - +

    TYPE_CODE_OPAQUE Record

    -
    +

    [OPAQUE]

    @@ -1208,10 +1194,9 @@ unified.
    - +

    TYPE_CODE_INTEGER Record

    -
    +

    [INTEGER, width]

    @@ -1222,10 +1207,9 @@ integer type.
    - +

    TYPE_CODE_POINTER Record

    -
    +

    [POINTER, pointee type, address space]

    @@ -1243,10 +1227,9 @@ default address space is zero.
    - +

    TYPE_CODE_FUNCTION Record

    -
    +

    [FUNCTION, vararg, ignored, retty, ...paramty... ]

    @@ -1268,10 +1251,9 @@ parameter types of the function
    - +

    TYPE_CODE_STRUCT Record

    -
    +

    [STRUCT, ispacked, ...eltty...]

    @@ -1287,10 +1269,9 @@ types of the structure
    - +

    TYPE_CODE_ARRAY Record

    -
    +

    [ARRAY, numelts, eltty]

    @@ -1305,10 +1286,9 @@ table. The operand fields are

    - +

    TYPE_CODE_VECTOR Record

    -
    +

    [VECTOR, numelts, eltty]

    @@ -1323,10 +1303,9 @@ table. The operand fields are

    - +

    TYPE_CODE_X86_FP80 Record

    -
    +

    [X86_FP80]

    @@ -1336,10 +1315,9 @@ floating point) type to the type table.
    - +

    TYPE_CODE_FP128 Record

    -
    +

    [FP128]

    @@ -1349,10 +1327,9 @@ floating point) type to the type table.
    - +

    TYPE_CODE_PPC_FP128 Record

    -
    +

    [PPC_FP128]

    @@ -1362,10 +1339,9 @@ floating point) type to the type table.
    - +

    TYPE_CODE_METADATA Record

    -
    +

    [METADATA]

    @@ -1374,11 +1350,14 @@ type to the type table.

    - - -
    + +

    + CONSTANTS_BLOCK Contents +

    + +

    The CONSTANTS_BLOCK block (id 11) ...

    @@ -1387,10 +1366,11 @@ type to the type table. - +

    + FUNCTION_BLOCK Contents +

    -
    +

    The FUNCTION_BLOCK block (id 12) ...

    @@ -1409,23 +1389,21 @@ type to the type table. - +

    + TYPE_SYMTAB_BLOCK Contents +

    -
    +

    The TYPE_SYMTAB_BLOCK block (id 13) contains entries which map between module-level named types and their corresponding type indices.

    -
    - - +

    TST_CODE_ENTRY Record

    -
    +

    [ENTRY, typeid, ...string...]

    @@ -1436,12 +1414,14 @@ name. Each entry corresponds to a single named type.

    - - - -
    + +

    + VALUE_SYMTAB_BLOCK Contents +

    + +

    The VALUE_SYMTAB_BLOCK block (id 14) ...

    @@ -1450,10 +1430,11 @@ name. Each entry corresponds to a single named type. - +

    + METADATA_BLOCK Contents +

    -
    +

    The METADATA_BLOCK block (id 15) ...

    @@ -1462,16 +1443,18 @@ name. Each entry corresponds to a single named type. - +

    + METADATA_ATTACHMENT Contents +

    -
    +

    The METADATA_ATTACHMENT block (id 16) ...

    +

    @@ -1480,8 +1463,8 @@ name. Each entry corresponds to a single named type. Valid HTML 4.01 Chris Lattner
    -The LLVM Compiler Infrastructure
    -Last modified: $Date: 2011-01-08 17:42:36 +0100 (Sat, 08 Jan 2011) $ +The LLVM Compiler Infrastructure
    +Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $ diff --git a/docs/Bugpoint.html b/docs/Bugpoint.html index cbd71aab91a7..05c867bcb08c 100644 --- a/docs/Bugpoint.html +++ b/docs/Bugpoint.html @@ -6,9 +6,9 @@ -
    +

    LLVM bugpoint tool: design and usage -

    +
    - + -
    +

    bugpoint narrows down the source of problems in LLVM tools and passes. It can be used to debug three types of failures: optimizer crashes, @@ -50,12 +50,12 @@ href="HowToSubmitABug.html">How To Submit a Bug Report document.

    - + -
    +

    bugpoint is designed to be a useful tool without requiring any hooks into the LLVM infrastructure at all. It works with any and all LLVM @@ -68,14 +68,12 @@ is still worth it. Note that bugpoint is generally very quick unless debugging a miscompilation where each test of the program (which requires executing it) takes a long time.

    -
    - - + -
    +

    bugpoint reads each .bc or .ll file specified on the command line and links them together into a single module, called the test @@ -104,11 +102,11 @@ Otherwise, there is no problem bugpoint can debug.

    - + -
    +

    If an optimizer or code generator crashes, bugpoint will try as hard as it can to reduce the list of passes (for optimizer crashes) and the size of @@ -129,11 +127,11 @@ reproduce the failure with opt or llc.

    - + -
    +

    The code generator debugger attempts to narrow down the amount of code that is being miscompiled by the selected code generator. To do this, it takes the @@ -150,11 +148,11 @@ good code.

    - + -
    +

    The miscompilation debugger works similarly to the code generator debugger. It works by splitting the test program into two pieces, running the @@ -166,13 +164,15 @@ assumes that the selected code generator is working properly.

    - - + + +

    + Advice for using bugpoint +

    -
    +
    bugpoint can be a remarkably useful tool, but it sometimes works in non-obvious ways. Here are some hints and tips:

    @@ -242,8 +242,8 @@ non-obvious ways. Here are some hints and tips:

    src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"> Chris Lattner
    - LLVM Compiler Infrastructure
    - Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $ + LLVM Compiler Infrastructure
    + Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $ diff --git a/docs/CFEBuildInstrs.html b/docs/CFEBuildInstrs.html index ed2f295b7aff..ab10844a8e0e 100644 --- a/docs/CFEBuildInstrs.html +++ b/docs/CFEBuildInstrs.html @@ -21,7 +21,7 @@ This page has moved here. Valid HTML 4.01 - LLVM Compiler Infrastructure
    + LLVM Compiler Infrastructure
    Last modified: $Date: 2008-02-13 17:46:10 +0100 (Wed, 13 Feb 2008) $ diff --git a/docs/CMake.html b/docs/CMake.html index e303d132b590..0d8cf62e33c4 100644 --- a/docs/CMake.html +++ b/docs/CMake.html @@ -6,9 +6,9 @@ -

    +

    Building LLVM with CMake -

    +
    - + -
    +

    CMake is a cross-platform build-generator tool. CMake does not build the project, it generates @@ -56,12 +59,12 @@

    - + -
    +

    We use here the command-line, non-interactive CMake interface

    @@ -109,12 +112,12 @@
    - + -
    +

    This section explains basic aspects of CMake, mostly for explaining those options which you may need on your day-to-day @@ -157,12 +160,12 @@

    - + -
    +

    Variables customize how the build will be generated. Options are boolean variables, with possible values ON/OFF. Options and @@ -191,14 +194,12 @@

    cmake -DVARIABLE:TYPE=value path/to/llvm/source

    -
    - - + -
    +

    Here are listed some of the CMake variables that are used often, along with a brief explanation and LLVM-specific notes. For full @@ -237,11 +238,11 @@

    - + -
    +
    LLVM_TARGETS_TO_BUILD:STRING
    @@ -342,7 +343,7 @@
    LLVM_LIT_TOOLS_DIR:STRING
    The path to GnuWin32 tools for tests. Valid on Windows host. Defaults to "", then Lit seeks tools according to %PATH%. - Lit can find tools(eg. grep, sort, &c) on LLVM_LIT_TOOLS_DIR at first, + Lit can find tools(eg. grep, sort, &c) on LLVM_LIT_TOOLS_DIR at first, without specifying GnuWin32 to %PATH%.
    LLVM_ENABLE_FFI:BOOL
    @@ -354,13 +355,15 @@
    - - + + +

    + Executing the test suite +

    -
    +

    Testing is performed when the check target is built. For instance, if you are using makefiles, execute this command while on @@ -375,12 +378,12 @@

    - + -
    +

    See this wiki page for generic instructions on how to cross-compile @@ -396,12 +399,12 @@

    - + -
    +

    The most difficult part of adding LLVM to the build of a project is to determine the set of LLVM libraries corresponding to the set @@ -418,7 +421,7 @@ endif() # We incorporate the CMake features provided by LLVM: set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${LLVM_ROOT}/share/llvm/cmake") - include(LLVM) + include(LLVMConfig) # Now set the header and library paths: include_directories( ${LLVM_ROOT}/include ) link_directories( ${LLVM_ROOT}/lib ) @@ -436,20 +439,111 @@ headers on the LLVM source directory (if we are building out-of-source.)

    +

    Alternativaly, you can utilize CMake's find_package + functionality. Here is an equivalent variant of snippet shown above:

    + +
    +
    +    find_package(LLVM)
    +
    +    if( NOT LLVM_FOUND )
    +      message(FATAL_ERROR "LLVM package can't be found. Set CMAKE_PREFIX_PATH variable to LLVM's installation prefix.")
    +    endif()
    +
    +    include_directories( ${LLVM_INCLUDE_DIRS} )
    +    link_directories( ${LLVM_LIBRARY_DIRS} )
    +
    +    llvm_map_components_to_libraries(REQ_LLVM_LIBRARIES jit native)
    +
    +    target_link_libraries(mycompiler ${REQ_LLVM_LIBRARIES})
    +    
    +
    + + +

    + Developing LLVM pass out of source +

    + +
    + +

    It is possible to develop LLVM passes against installed LLVM. + An example of project layout provided below:

    + +
    +
    +      <project dir>/
    +          |
    +          CMakeLists.txt
    +          <pass name>/
    +              |
    +              CMakeLists.txt
    +              Pass.cpp
    +              ...
    +    
    +
    + +

    Contents of <project dir>/CMakeLists.txt:

    + +
    +
    +    find_package(LLVM)
    +
    +    # Define add_llvm_* macro's.
    +    include(AddLLVM)
    +
    +    add_definitions(${LLVM_DEFINITIONS})
    +    include_directories(${LLVM_INCLUDE_DIRS})
    +    link_directories(${LLVM_LIBRARY_DIRS})
    +
    +    add_subdirectory(<pass name>)
    +    
    +
    + +

    Contents of <project dir>/<pass name>/CMakeLists.txt:

    + +
    +
    +    add_llvm_loadable_module(LLVMPassname
    +      Pass.cpp
    +      )
    +    
    +
    + +

    When you are done developing your pass, you may wish to integrate it + into LLVM source tree. You can achieve it in two easy steps:
    + 1. Copying <pass name> folder into <LLVM root>/lib/Transform directory.
    + 2. Adding "add_subdirectory(<pass name>)" line into <LLVM root>/lib/Transform/CMakeLists.txt

    +
    + +
    - - - + -
    +

    Notes for specific compilers and/or platforms.

    +

    + Microsoft Visual C++ +

    + +
    + +
    +
    LLVM_COMPILER_JOBS:STRING
    +
    Specifies the maximum number of parallell compiler jobs to use + per project when building with msbuild or Visual Studio. Only supported for + Visual Studio 2008 and Visual Studio 2010 CMake generators. 0 means use all + processors. Default is 0.
    +
    + +
    +
    @@ -462,7 +556,7 @@ src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"> Oscar Fuentes
    - LLVM Compiler Infrastructure
    + LLVM Compiler Infrastructure
    Last modified: $Date: 2010-08-09 03:59:36 +0100 (Mon, 9 Aug 2010) $ diff --git a/docs/CodeGenerator.html b/docs/CodeGenerator.html index 925156ff0787..d082acc57dc1 100644 --- a/docs/CodeGenerator.html +++ b/docs/CodeGenerator.html @@ -19,9 +19,9 @@ -
    +

    The LLVM Target-Independent Code Generator -

    +
    1. Introduction @@ -127,12 +127,12 @@
    - + -
    +

    The LLVM target-independent code generator is a framework that provides a suite of reusable components for translating the LLVM internal representation @@ -188,14 +188,12 @@ depend on the target-description and machine code representation classes, ensuring that it is portable.

    -
    - - + -
    +

    The two pieces of the LLVM code generator are the high-level interface to the code generator and the set of reusable components that can be used to build @@ -223,11 +221,11 @@

    - + -
    +

    The LLVM target-independent code generator is designed to support efficient and quality code generation for standard register-based microprocessors. @@ -297,11 +295,11 @@

    - + -
    +

    The target description classes require a detailed description of the target architecture. These target descriptions often have a large amount of common @@ -324,13 +322,15 @@

    - - + + +

    + Target description classes +

    -
    +

    The LLVM target description classes (located in the include/llvm/Target directory) provide an abstract description of @@ -346,14 +346,12 @@ TargetMachine class provides accessors that should be implemented by the target.

    -
    - - + -
    +

    The TargetMachine class provides virtual methods that are used to access the target-specific implementations of the various target description @@ -369,11 +367,11 @@

    - + -
    +

    The TargetData class is the only required target description class, and it is the only class that is not extensible (you cannot derived a new @@ -385,11 +383,11 @@

    - + -
    +

    The TargetLowering class is used by SelectionDAG based instruction selectors primarily to describe how LLVM code should be lowered to @@ -411,11 +409,11 @@

    - + -
    +

    The TargetRegisterInfo class is used to describe the register file of the target and any interactions between the registers.

    @@ -445,11 +443,11 @@
    - + -
    +

    The TargetInstrInfo class is used to describe the machine instructions supported by the target. It is essentially an array of @@ -463,11 +461,11 @@

    - + -
    +

    The TargetFrameInfo class is used to provide information about the stack frame layout of the target. It holds the direction of stack growth, the @@ -479,11 +477,11 @@

    - + -
    +

    The TargetSubtarget class is used to provide information about the specific chip set being targeted. A sub-target informs code generation of @@ -495,11 +493,11 @@ -

    + -
    +

    The TargetJITInfo class exposes an abstract interface used by the Just-In-Time code generator to perform target-specific activities, such as @@ -509,13 +507,15 @@

    - - + + +

    + Machine code description classes +

    -
    +

    At the high-level, LLVM code is translated to a machine specific representation formed out of @@ -528,14 +528,12 @@ SSA representation for machine code, as well as a register allocated, non-SSA form.

    -
    - - + -
    +

    Target machine instructions are represented as instances of the MachineInstr class. This class is an extremely abstract way of @@ -576,14 +574,12 @@

    Also if the first operand is a def, it is easier to create instructions whose only def is the first operand.

    -
    - - + -
    +

    Machine instructions are created by using the BuildMI functions, located in the include/llvm/CodeGen/MachineInstrBuilder.h file. The @@ -630,11 +626,11 @@ MI.addReg(Reg, RegState::Define);

    - + -
    +

    One important issue that the code generator needs to be aware of is the presence of fixed registers. In particular, there are often places in the @@ -702,11 +698,11 @@ ret

    - + -
    +

    MachineInstr's are initially selected in SSA-form, and are maintained in SSA-form until register allocation happens. For the most part, @@ -719,12 +715,14 @@ ret

    - - -
    + +

    + The MachineBasicBlock class +

    + +

    The MachineBasicBlock class contains a list of machine instructions (MachineInstr instances). It roughly @@ -737,11 +735,11 @@ ret

    - + -
    +

    The MachineFunction class contains a list of machine basic blocks (MachineBasicBlock instances). It @@ -754,14 +752,15 @@ ret

    - - - + + +

    + The "MC" Layer +

    -
    +

    The MC Layer is used to represent and process code at the raw machine code @@ -770,7 +769,7 @@ level, devoid of "high level" information like "constant pools", "jump tables", like label names, machine instructions, and sections in the object file. The code in this layer is used for a number of important purposes: the tail end of the code generator uses it to write a .s or .o file, and it is also used by the -llvm-mc tool to implement standalone machine codeassemblers and disassemblers. +llvm-mc tool to implement standalone machine code assemblers and disassemblers.

    @@ -779,15 +778,12 @@ of important subsystems that interact at this layer, they are described later in this manual.

    -
    - - - + -
    +

    MCStreamer is best thought of as an assembler API. It is an abstract API which @@ -817,11 +813,11 @@ MCObjectStreamer implements a full assembler.

    - + -
    +

    The MCContext class is the owner of a variety of uniqued data structures at the @@ -832,11 +828,11 @@ interact with to create symbols and sections. This class can not be subclassed.

    - + -
    +

    The MCSymbol class represents a symbol (aka label) in the assembly file. There @@ -864,11 +860,11 @@ like this to the .s file:

    - + -
    +

    The MCSection class represents an object-file specific section. It is subclassed @@ -882,11 +878,11 @@ directive in a .s file).

    - + -
    +

    The MCInst class is a target-independent representation of an instruction. It @@ -904,27 +900,26 @@ printer, and the type generated by the assembly parser and disassembler.

    - - - + + +

    + Target-independent code generation algorithms +

    -
    +

    This section documents the phases described in the high-level design of the code generator. It explains how they work and some of the rationale behind their design.

    -
    - - + -
    +

    Instruction Selection is the process of translating LLVM code presented to the code generator into target-specific machine instructions. There are @@ -936,14 +931,12 @@ printer, and the type generated by the assembly parser and disassembler. selector to be generated from these .td files, though currently there are still things that require custom C++ code.

    -
    - - + -
    +

    The SelectionDAG provides an abstraction for code representation in a way that is amenable to instruction selection using automatic techniques @@ -1001,11 +994,11 @@ printer, and the type generated by the assembly parser and disassembler.

    - + -
    +

    SelectionDAG-based instruction selection consists of the following steps:

    @@ -1082,11 +1075,11 @@ printer, and the type generated by the assembly parser and disassembler.
    - + -
    +

    The initial SelectionDAG is naïvely peephole expanded from the LLVM input by the SelectionDAGLowering class in the @@ -1102,11 +1095,11 @@ printer, and the type generated by the assembly parser and disassembler.

    - + -
    +

    The Legalize phase is in charge of converting a DAG to only use the types that are natively supported by the target.

    @@ -1135,11 +1128,11 @@ printer, and the type generated by the assembly parser and disassembler.
    - + -
    +

    The Legalize phase is in charge of converting a DAG to only use the operations that are natively supported by the target.

    @@ -1167,12 +1160,13 @@ printer, and the type generated by the assembly parser and disassembler.
    - +

    + + SelectionDAG Optimization Phase: the DAG Combiner + +

    -
    +

    The SelectionDAG optimization phase is run multiple times for code generation, immediately after the DAG is built and once after each @@ -1202,11 +1196,11 @@ printer, and the type generated by the assembly parser and disassembler.

    - + -
    +

    The Select phase is the bulk of the target-specific code for instruction selection. This phase takes a legal SelectionDAG as input, pattern matches @@ -1363,11 +1357,11 @@ def : Pat<(i32 imm:$imm),

    - + -
    +

    The scheduling phase takes the DAG of target instructions from the selection phase and assigns an order. The scheduler can pick an order depending on @@ -1384,11 +1378,11 @@ def : Pat<(i32 imm:$imm),

    - + -
    +
    1. Optional function-at-a-time selection.
    2. @@ -1398,18 +1392,20 @@ def : Pat<(i32 imm:$imm),
    +
    + - -

    To Be Written

    + +

    To Be Written

    - + -
    +

    Live Intervals are the ranges (intervals) where a variable is live. They are used by some register allocator passes to @@ -1417,14 +1413,12 @@ def : Pat<(i32 imm:$imm), register are live at the same point in the program (i.e., they conflict). When this situation occurs, one virtual register must be spilled.

    -
    - - + -
    +

    The first step in determining the live intervals of variables is to calculate the set of registers that are immediately dead after the instruction (i.e., @@ -1466,11 +1460,11 @@ def : Pat<(i32 imm:$imm),

    - + -
    +

    We now have the information available to perform the live intervals analysis and build the live intervals themselves. We start off by numbering the basic @@ -1485,12 +1479,14 @@ def : Pat<(i32 imm:$imm),

    - - -
    + +

    + Register Allocation +

    + +

    The Register Allocation problem consists in mapping a program Pv, that can use an unbounded number of virtual registers, @@ -1500,15 +1496,13 @@ def : Pat<(i32 imm:$imm), accommodate all the virtual registers, some of them will have to be mapped into memory. These virtuals are called spilled virtuals.

    -
    - - + -
    +

    In LLVM, physical registers are denoted by integer numbers that normally range from 1 to 1023. To see how this numbering is defined for a particular @@ -1617,11 +1611,11 @@ bool RegMapping_Fer::compatible_class(MachineFunction &mf, -

    + -
    +

    There are two ways to map virtual registers to physical registers (or to memory slots). The first way, that we will call direct mapping, is @@ -1667,11 +1661,11 @@ bool RegMapping_Fer::compatible_class(MachineFunction &mf,

    - + -
    +

    With very rare exceptions (e.g., function calls), the LLVM machine code instructions are three address instructions. That is, each instruction is @@ -1703,11 +1697,11 @@ bool RegMapping_Fer::compatible_class(MachineFunction &mf,

    - + -
    +

    An important transformation that happens during register allocation is called the SSA Deconstruction Phase. The SSA form simplifies many analyses @@ -1727,11 +1721,11 @@ bool RegMapping_Fer::compatible_class(MachineFunction &mf,

    - + -
    +

    Instruction folding is an optimization performed during register allocation that removes unnecessary copy instructions. For instance, a @@ -1764,11 +1758,11 @@ bool RegMapping_Fer::compatible_class(MachineFunction &mf, -

    + -
    +

    The LLVM infrastructure provides the application developer with three different register allocators:

    @@ -1805,23 +1799,25 @@ $ llc -regalloc=pbqp file.bc -o pbqp.s;
    +
    + - -

    To Be Written

    + +

    To Be Written

    - -

    To Be Written

    + +

    To Be Written

    - + -
    +

    The code emission step of code generation is responsible for lowering from the code generator abstractions (like

    - - - + + +

    + Implementing a Native Assembler +

    -
    +

    Though you're probably reading this because you want to write or maintain a compiler backend, LLVM also fully supports building a native assemblers too. @@ -1896,20 +1893,18 @@ We've tried hard to automate the generation of the assembler from the .td files part of the manual and repetitive data entry can be factored and shared with the compiler.

    -
    - -
    Instruction Parsing
    +

    Instruction Parsing

    -

    To Be Written

    +

    To Be Written

    -
    +

    Instruction Alias Processing -

    + -
    +

    Once the instruction is parsed, it enters the MatchInstructionImpl function. The MatchInstructionImpl function performs alias processing and then does actual matching.

    @@ -1922,12 +1917,10 @@ complex/powerful). Generally you want to use the first alias mechanism that meets the needs of your instruction, because it will allow a more concise description.

    -
    - -
    Mnemonic Aliases
    +

    Mnemonic Aliases

    -
    +

    The first phase of alias processing is simple instruction mnemonic remapping for classes of instructions which are allowed with two different @@ -1965,9 +1958,9 @@ on the current instruction set.

    -
    Instruction Aliases
    +

    Instruction Aliases

    -
    +

    The most general phase of alias processing occurs while matching is happening: it provides new forms for the matcher to match along with a specific @@ -2028,36 +2021,33 @@ subtarget specific.

    - +
    -
    Instruction Matching
    +

    Instruction Matching

    -

    To Be Written

    +

    To Be Written

    - - - - - + + +

    + Target-specific Implementation Notes +

    -
    +

    This section of the document explains features or design decisions that are specific to the code generator for a particular target. First we start with a table that summarizes what features are supported by each target.

    -
    - - + -
    +

    Note that this table does not include the C backend or Cpp backends, since they do not use the target independent code generator infrastructure. It also @@ -2228,12 +2218,10 @@ is the key:

    -
    - -
    Is Generally Reliable
    +

    Is Generally Reliable

    -
    +

    This box indicates whether the target is considered to be production quality. This indicates that the target has been used as a static compiler to compile large amounts of code by a variety of different people and is in @@ -2241,9 +2229,9 @@ continuous use.

    -
    Assembly Parser
    +

    Assembly Parser

    -
    +

    This box indicates whether the target supports parsing target specific .s files by implementing the MCAsmParser interface. This is required for llvm-mc to be able to act as a native assembler and is required for inline assembly @@ -2253,18 +2241,18 @@ support in the native .o file writer.

    -
    Disassembler
    +

    Disassembler

    -
    +

    This box indicates whether the target supports the MCDisassembler API for disassembling machine opcode bytes into MCInst's.

    -
    Inline Asm
    +

    Inline Asm

    -
    +

    This box indicates whether the target supports most popular inline assembly constraints and modifiers.

    @@ -2274,9 +2262,9 @@ constraints relating to the X86 floating point stack.

    -
    JIT Support
    +

    JIT Support

    -
    +

    This box indicates whether the target supports the JIT compiler through the ExecutionEngine interface.

    @@ -2286,9 +2274,9 @@ in ARM codegen mode, but lacks NEON and full Thumb support.

    -
    .o File Writing
    +

    .o File Writing

    -
    +

    This box indicates whether the target supports writing .o files (e.g. MachO, ELF, and/or COFF) files directly from the target. Note that the target also @@ -2302,9 +2290,9 @@ file to a .o file (as is the case for many C compilers).

    -
    Tail Calls
    +

    Tail Calls

    -
    +

    This box indicates whether the target supports guaranteed tail calls. These are calls marked "tail" and use the fastcc @@ -2313,15 +2301,14 @@ more more details.

    - - - - - -
    + +

    + Tail call optimization +

    + +

    Tail call optimization, callee reusing the stack of the caller, is currently supported on x86/x86-64 and PowerPC. It is performed if:

    @@ -2383,11 +2370,11 @@ define fastcc i32 @tailcaller(i32 %in1, i32 %in2) {
    - + -
    +

    Sibling call optimization is a restricted form of tail call optimization. Unlike tail call optimization described in the previous section, it can be @@ -2427,24 +2414,22 @@ entry:

    - + -
    +

    The X86 code generator lives in the lib/Target/X86 directory. This code generator is capable of targeting a variety of x86-32 and x86-64 processors, and includes support for ISA extensions such as MMX and SSE.

    -
    - - + -
    +

    The following are the known target triples that are supported by the X86 backend. This is not an exhaustive list, and it would be useful to add those @@ -2469,12 +2454,12 @@ entry:

    - + -
    +

    The following target-specific calling conventions are known to backend:

    @@ -2489,11 +2474,11 @@ entry:
    - + -
    +

    The x86 has a very flexible way of accessing memory. It is capable of forming memory addresses of the following expression directly in integer @@ -2526,13 +2511,13 @@ OperandTy: VirtReg, | VirtReg, UnsImm, VirtReg, SignExtImm PhysReg

    - + -
    +
    -

    x86 has an experimental feature which provides +

    x86 has a feature which provides the ability to perform loads and stores to different address spaces via the x86 segment registers. A segment override prefix byte on an instruction causes the instruction's memory access to go to the specified @@ -2571,11 +2556,11 @@ OperandTy: VirtReg, | VirtReg, UnsImm, VirtReg, SignExtImm PhysReg

    - + -
    +

    An instruction name consists of the base name, a default operand size, and a a character per operand with an optional special size. For example:

    @@ -2591,25 +2576,25 @@ MOVSX32rm16 -> movsx, 32-bit register, 16-bit memory
    - - -
    + +

    + The PowerPC backend +

    + +

    The PowerPC code generator lives in the lib/Target/PowerPC directory. The code generation is retargetable to several variations or subtargets of the PowerPC ISA; including ppc32, ppc64 and altivec.

    -
    - - + -
    +

    LLVM follows the AIX PowerPC ABI, with two deviations. LLVM uses a PC relative (PIC) or static addressing for accessing global values, so no TOC @@ -2625,11 +2610,11 @@ MOVSX32rm16 -> movsx, 32-bit register, 16-bit memory

    - + -
    +

    The size of a PowerPC frame is usually fixed for the duration of a function's invocation. Since the frame is fixed size, all references @@ -2772,11 +2757,11 @@ MOVSX32rm16 -> movsx, 32-bit register, 16-bit memory

    - + -
    +

    The llvm prolog and epilog are the same as described in the PowerPC ABI, with the following exceptions. Callee saved registers are spilled after the frame @@ -2789,16 +2774,19 @@ MOVSX32rm16 -> movsx, 32-bit register, 16-bit memory

    - + -
    +

    TODO - More to come.

    +
    + +

    @@ -2809,8 +2797,8 @@ MOVSX32rm16 -> movsx, 32-bit register, 16-bit memory src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"> Chris Lattner
    - The LLVM Compiler Infrastructure
    - Last modified: $Date: 2011-01-09 00:10:59 +0100 (Sun, 09 Jan 2011) $ + The LLVM Compiler Infrastructure
    + Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $ diff --git a/docs/CodingStandards.html b/docs/CodingStandards.html index 4a9ab7d857a8..139bbdb24902 100644 --- a/docs/CodingStandards.html +++ b/docs/CodingStandards.html @@ -7,9 +7,9 @@ -
    +

    LLVM Coding Standards -

    +
    1. Introduction
    2. @@ -83,12 +83,12 @@ - + -
      +

      This document attempts to describe a few coding standards that are being used in the LLVM source tree. Although no coding standards should be regarded as @@ -117,22 +117,26 @@ href="mailto:sabre@nondot.org">Chris.

      - + +
      + - + + +
      - + -
      +

      Comments are one critical part of readability and maintainability. Everyone knows they should comment, so should you. When writing comments, write them as @@ -141,7 +145,9 @@ etc. Although we all should probably comment our code more than we do, there are a few very critical places that documentation is very useful:

      -File Headers +
      File Headers
      + +

      Every source file should have a header on it that describes the basic purpose of the file. If a file does not have a header, it should not be @@ -184,7 +190,9 @@ Here it's only two lines. If an algorithm is being implemented or something tricky is going on, a reference to the paper where it is published should be included, as well as any notes or "gotchas" in the code to watch out for.

      -Class overviews +
      + +
      Class overviews

      Classes are one fundamental part of a good object oriented design. As such, a class definition should have a comment block that explains what the class is @@ -193,7 +201,9 @@ could figure it out, it's probably safe to leave it out. Naming classes something sane goes a long ways towards avoiding writing documentation.

      -Method information +
      Method information
      + +

      Methods defined in a class (as well as any global functions) should also be documented properly. A quick note about what it does and a description of the @@ -207,12 +217,14 @@ happens: does the method return null? Abort? Format your hard disk?

      - - -
      + +

      + Comment Formatting +

      + +

      In general, prefer C++ style (//) comments. They take less space, require less typing, don't have nesting problems, etc. There are a few cases @@ -233,11 +245,11 @@ These nest properly and are better behaved in general than C style comments.

      - + -
      +

      Immediately after the header file comment (and include guards if working on a header file), the

      - + -
      +

      Write your code to fit within 80 columns of text. This helps those of us who like to print out code and look at your code in an xterm without resizing @@ -298,11 +310,11 @@ for debate.

      - + -
      +

      In all cases, prefer spaces to tabs in source files. People have different preferred indentation levels, and different styles of indentation that they @@ -319,11 +331,11 @@ makes for incredible diffs that are absolutely worthless.

      - + -
      +

      Okay, in your first year of programming you were told that indentation is important. If you didn't believe and internalize this then, now is the time. @@ -331,19 +343,21 @@ Just do it.

      +
      - + +
      - + -
      +

      If your code has compiler warnings in it, something is wrong — you aren't casting values correctly, your have "questionable" constructs in your @@ -393,11 +407,11 @@ be fixed by massaging the code appropriately.

      - + -
      +

      In almost all cases, it is possible and within reason to write completely portable code. If there are cases where it isn't possible to write portable @@ -412,10 +426,10 @@ libSystem.

      - -
      + +

      In an effort to reduce code and executable size, LLVM does not use RTTI (e.g. dynamic_cast<>) or exceptions. These two language features @@ -433,10 +447,10 @@ than dynamic_cast<>.

      - -
      + +

      In C++, the class and struct keywords can be used almost interchangeably. The only difference is when they are used to declare a class: @@ -454,26 +468,32 @@ which case struct is allowed.

      +
      + +
      + - + +
      - + +
      - + -
      +

      C++ doesn't do too well in the modularity department. There is no real encapsulation or data hiding (unless you use expensive protocol classes), but it @@ -499,11 +519,11 @@ translation unit.

      - + -
      +

      #include hurts compile time performance. Don't do it unless you have to, especially in header files.

      @@ -528,11 +548,11 @@ dependencies that you'll find out about later.

      - + -
      +

      Many modules have a complex implementation that causes them to use more than one implementation (.cpp) file. It is often tempting to put the @@ -549,11 +569,11 @@ class itself. Just make them private (or protected) and all is well.

      - + -
      +

      When reading code, keep in mind how much state and how many previous decisions have to be remembered by the reader to understand a block of code. @@ -658,11 +678,11 @@ can be a big understandability win.

      - + -
      +

      For similar reasons above (reduction of indentation and easier reading), please do not use 'else' or 'else if' after something that @@ -741,11 +761,11 @@ track of when reading the code.

      - + -
      +

      It is very common to write small loops that just compute a boolean value. There are a number of ways that people commonly write these, but an example of @@ -802,20 +822,24 @@ locality.

      - - - + + +

      + The Low-Level Issues +

      +
      - +

      + + Name Types, Functions, Variables, and Enumerators Properly + +

      -
      +

      Poorly-chosen names can mislead the reader and cause bugs. We cannot stress enough how important it is to use descriptive names. Pick names that @@ -894,11 +918,11 @@ Vehicle MakeVehicle(VehicleType Type) { -

      + -
      +

      Use the "assert" macro to its fullest. Check all of your preconditions and assumptions, you never know when a bug (not necessarily even @@ -997,11 +1021,11 @@ assert(NewToSet && "The value shouldn't be in the set yet");

      - + -
      +

      In LLVM, we prefer to explicitly prefix all identifiers from the standard namespace with an "std::" prefix, rather than rely on @@ -1035,12 +1059,13 @@ use any others.

      - +

      + + Provide a Virtual Method Anchor for Classes in Headers + +

      -
      +

      If a class is defined in a header file and has a v-table (either it has virtual methods or it derives from classes with virtual methods), it must @@ -1052,11 +1077,11 @@ increasing link times.

      - + -
      +

      Because C++ doesn't have a standard "foreach" loop (though it can be emulated with macros and may be coming in C++'0x) we end up writing a lot of @@ -1114,11 +1139,11 @@ prefer it.

      - + -
      +

      The use of #include <iostream> in library files is hereby forbidden. The primary reason for doing this is to @@ -1149,11 +1174,11 @@ the llvm::MemoryBuffer API for reading files.

      - + -
      +

      LLVM includes a lightweight, simple, and efficient stream implementation in llvm/Support/raw_ostream.h, which provides all of the common @@ -1169,11 +1194,11 @@ declarations and constant references to raw_ostream instances.

      - + -
      +

      The std::endl modifier, when used with iostreams outputs a newline to the output stream specified. In addition to doing this, however, it @@ -1191,22 +1216,25 @@ it's better to use a literal '\n'.

      +
      - + +
      +

      This section describes preferred low-level formatting guidelines along with reasoning on why we prefer them.

      - + -
      +

      We prefer to put a space before an open parenthesis only in control flow statements, but not in normal function call expressions and function-like @@ -1260,11 +1288,11 @@ this misinterpretation.

      - + -
      +

      Hard fast rule: Preincrement (++X) may be no slower than postincrement (X++) and could very well be a lot faster than it. Use @@ -1280,11 +1308,11 @@ get in the habit of always using preincrement, and you won't have a problem.

      - + -
      +

      In general, we strive to reduce indentation wherever possible. This is useful @@ -1368,11 +1396,11 @@ the contents of the namespace.

      - + -
      +

      After talking about namespaces in general, you may be wondering about anonymous namespaces in particular. @@ -1452,15 +1480,17 @@ namespace just because it was declared there.

      - - - - + +
      + + +

      + See Also +

      -
      +

      A lot of these comments and recommendations have been culled for other sources. Two particularly important books for our work are:

      @@ -1491,8 +1521,8 @@ something.

      src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"> Chris Lattner
      - LLVM Compiler Infrastructure
      - Last modified: $Date: 2011-02-20 03:03:04 +0100 (Sun, 20 Feb 2011) $ + LLVM Compiler Infrastructure
      + Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $ diff --git a/docs/CommandGuide/FileCheck.pod b/docs/CommandGuide/FileCheck.pod index 3ccaa63e176b..dbd626c7cb7f 100644 --- a/docs/CommandGuide/FileCheck.pod +++ b/docs/CommandGuide/FileCheck.pod @@ -240,6 +240,6 @@ define two separate CHECK lines that match on the same line. =head1 AUTHORS -Maintained by The LLVM Team (L). +Maintained by The LLVM Team (L). =cut diff --git a/docs/CommandGuide/bugpoint.pod b/docs/CommandGuide/bugpoint.pod index 1870a0d84b60..31db62fe2d6b 100644 --- a/docs/CommandGuide/bugpoint.pod +++ b/docs/CommandGuide/bugpoint.pod @@ -21,7 +21,7 @@ distribution. =head1 OPTIONS -=over +=over =item B<--additional-so> F @@ -87,7 +87,7 @@ mis-management. =item B<-find-bugs> Continually randomize the specified passes and run them on the test program -until a bug is found or the user kills B. +until a bug is found or the user kills B. =item B<-help> @@ -147,6 +147,21 @@ This option defines the command to use with the B<--run-custom> and B<--safe-custom> options to execute the bitcode testcase. This can be useful for cross-compilation. +=item B<--compile-command> I + +This option defines the command to use with the B<--compile-custom> +option to compile the bitcode testcase. This can be useful for +testing compiler output without running any link or execute stages. To +generate a reduced unit test, you may add CHECK directives to the +testcase and pass the name of an executable compile-command script in this form: + + #!/bin/sh + llc "$@" + not FileCheck [bugpoint input file].ll < bugpoint-test-program.s + +This script will "fail" as long as FileCheck passes. So the result +will be the minimum bitcode that passes FileCheck. + =item B<--safe-path> I This option defines the path to the command to execute with the @@ -166,6 +181,6 @@ L =head1 AUTHOR -Maintained by the LLVM Team (L). +Maintained by the LLVM Team (L). =cut diff --git a/docs/CommandGuide/index.html b/docs/CommandGuide/index.html index 3c1a9f9ed4f0..cb5438f58baa 100644 --- a/docs/CommandGuide/index.html +++ b/docs/CommandGuide/index.html @@ -3,15 +3,15 @@ LLVM Command Guide - + -
      +

      LLVM Command Guide -

      + -
      +

      These documents are HTML versions of the man pages for all of the LLVM tools. These pages describe how to use the LLVM commands @@ -23,12 +23,12 @@ options) arguments to the tool you are interested in.

      - + -
      +
        @@ -80,12 +80,12 @@ options) arguments to the tool you are interested in.

      - + -
      +
      • llvm-gcc - @@ -99,13 +99,13 @@ options) arguments to the tool you are interested in.

      - + -
      +
        @@ -123,12 +123,12 @@ options) arguments to the tool you are interested in.

      - + -
      +
      • FileCheck - @@ -150,8 +150,8 @@ options) arguments to the tool you are interested in.

        Valid HTML 4.01 - LLVM Compiler Infrastructure
        - Last modified: $Date: 2010-09-08 01:10:21 +0200 (Wed, 08 Sep 2010) $ + LLVM Compiler Infrastructure
        + Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $ diff --git a/docs/CommandGuide/lit.pod b/docs/CommandGuide/lit.pod index 989a5d7acb51..faf4811dd18c 100644 --- a/docs/CommandGuide/lit.pod +++ b/docs/CommandGuide/lit.pod @@ -349,6 +349,6 @@ L =head1 AUTHOR -Written by Daniel Dunbar and maintained by the LLVM Team (L). +Written by Daniel Dunbar and maintained by the LLVM Team (L). =cut diff --git a/docs/CommandGuide/llc.pod b/docs/CommandGuide/llc.pod index eb26ec00fd76..50b45c8d5a2a 100644 --- a/docs/CommandGuide/llc.pod +++ b/docs/CommandGuide/llc.pod @@ -196,6 +196,6 @@ L =head1 AUTHORS -Maintained by the LLVM Team (L). +Maintained by the LLVM Team (L). =cut diff --git a/docs/CommandGuide/lli.pod b/docs/CommandGuide/lli.pod index 52a2721e7d70..a313a31718c4 100644 --- a/docs/CommandGuide/lli.pod +++ b/docs/CommandGuide/lli.pod @@ -214,6 +214,6 @@ L =head1 AUTHOR -Maintained by the LLVM Team (L). +Maintained by the LLVM Team (L). =cut diff --git a/docs/CommandGuide/llvm-ar.pod b/docs/CommandGuide/llvm-ar.pod index 63ba43f6f6f8..a8f01b0319a0 100644 --- a/docs/CommandGuide/llvm-ar.pod +++ b/docs/CommandGuide/llvm-ar.pod @@ -401,6 +401,6 @@ L, ar(1) =head1 AUTHORS -Maintained by the LLVM Team (L). +Maintained by the LLVM Team (L). =cut diff --git a/docs/CommandGuide/llvm-as.pod b/docs/CommandGuide/llvm-as.pod index 185c009698f3..cc8188708a53 100644 --- a/docs/CommandGuide/llvm-as.pod +++ b/docs/CommandGuide/llvm-as.pod @@ -72,6 +72,6 @@ L, L =head1 AUTHORS -Maintained by the LLVM Team (L). +Maintained by the LLVM Team (L). =cut diff --git a/docs/CommandGuide/llvm-bcanalyzer.pod b/docs/CommandGuide/llvm-bcanalyzer.pod index b0bc0cddba82..9c5021b639f1 100644 --- a/docs/CommandGuide/llvm-bcanalyzer.pod +++ b/docs/CommandGuide/llvm-bcanalyzer.pod @@ -268,7 +268,7 @@ The number of bytes consumed by instructions in the function. =item B -The average number of bytes consumed by the instructions in the funtion. This +The average number of bytes consumed by the instructions in the function. This value is computed by dividing Instruction Size by Instructions. =item B @@ -310,6 +310,6 @@ L, L =head1 AUTHORS -Maintained by the LLVM Team (L). +Maintained by the LLVM Team (L). =cut diff --git a/docs/CommandGuide/llvm-config.pod b/docs/CommandGuide/llvm-config.pod index 4e38dae3df63..7d68564a6d75 100644 --- a/docs/CommandGuide/llvm-config.pod +++ b/docs/CommandGuide/llvm-config.pod @@ -126,6 +126,6 @@ occurs, it will exit with a non-zero value. =head1 AUTHORS -Maintained by the LLVM Team (L). +Maintained by the LLVM Team (L). =cut diff --git a/docs/CommandGuide/llvm-diff.pod b/docs/CommandGuide/llvm-diff.pod index c8cfdb3be94d..ffe0b4896fdd 100644 --- a/docs/CommandGuide/llvm-diff.pod +++ b/docs/CommandGuide/llvm-diff.pod @@ -48,6 +48,6 @@ massive detected differences in blocks. =head1 AUTHORS -Maintained by the LLVM Team (L). +Maintained by the LLVM Team (L). =cut diff --git a/docs/CommandGuide/llvm-dis.pod b/docs/CommandGuide/llvm-dis.pod index 5b2f4ef4e924..9f4026c27fe7 100644 --- a/docs/CommandGuide/llvm-dis.pod +++ b/docs/CommandGuide/llvm-dis.pod @@ -55,6 +55,6 @@ L =head1 AUTHORS -Maintained by the LLVM Team (L). +Maintained by the LLVM Team (L). =cut diff --git a/docs/CommandGuide/llvm-extract.pod b/docs/CommandGuide/llvm-extract.pod index d4baab73991a..797e79d128d4 100644 --- a/docs/CommandGuide/llvm-extract.pod +++ b/docs/CommandGuide/llvm-extract.pod @@ -68,6 +68,6 @@ L =head1 AUTHORS -Maintained by the LLVM Team (L). +Maintained by the LLVM Team (L). =cut diff --git a/docs/CommandGuide/llvm-ld.pod b/docs/CommandGuide/llvm-ld.pod index 536ab0fa43d5..efa9ebd06c53 100644 --- a/docs/CommandGuide/llvm-ld.pod +++ b/docs/CommandGuide/llvm-ld.pod @@ -229,6 +229,6 @@ L =head1 AUTHORS -Maintained by the LLVM Team (L). +Maintained by the LLVM Team (L). =cut diff --git a/docs/CommandGuide/llvm-link.pod b/docs/CommandGuide/llvm-link.pod index 8d06cc9d9d9f..1e466a56f4ec 100644 --- a/docs/CommandGuide/llvm-link.pod +++ b/docs/CommandGuide/llvm-link.pod @@ -74,6 +74,6 @@ L =head1 AUTHORS -Maintained by the LLVM Team (L). +Maintained by the LLVM Team (L). =cut diff --git a/docs/CommandGuide/llvm-nm.pod b/docs/CommandGuide/llvm-nm.pod index a580d3f5ca70..a6dc4901dd61 100644 --- a/docs/CommandGuide/llvm-nm.pod +++ b/docs/CommandGuide/llvm-nm.pod @@ -117,6 +117,6 @@ L, ar(1), nm(1) =head1 AUTHOR -Maintained by the LLVM Team (L). +Maintained by the LLVM Team (L). =cut diff --git a/docs/CommandGuide/llvm-prof.pod b/docs/CommandGuide/llvm-prof.pod index 9541b05dcafc..4b2e09d70470 100644 --- a/docs/CommandGuide/llvm-prof.pod +++ b/docs/CommandGuide/llvm-prof.pod @@ -52,6 +52,6 @@ information. Otherwise, it exits with zero. =head1 AUTHOR -B is maintained by the LLVM Team (L). +B is maintained by the LLVM Team (L). =cut diff --git a/docs/CommandGuide/llvm-ranlib.pod b/docs/CommandGuide/llvm-ranlib.pod index 53cd34bbb5c4..431bc551048a 100644 --- a/docs/CommandGuide/llvm-ranlib.pod +++ b/docs/CommandGuide/llvm-ranlib.pod @@ -47,6 +47,6 @@ L, ranlib(1) =head1 AUTHORS -Maintained by the LLVM Team (L). +Maintained by the LLVM Team (L). =cut diff --git a/docs/CommandGuide/llvmc.pod b/docs/CommandGuide/llvmc.pod index d237ca4c14fa..95a9e5ef3611 100644 --- a/docs/CommandGuide/llvmc.pod +++ b/docs/CommandGuide/llvmc.pod @@ -185,6 +185,6 @@ L, L, L =head1 AUTHORS -Maintained by the LLVM Team (L). +Maintained by the LLVM Team (L). =cut diff --git a/docs/CommandGuide/llvmgcc.pod b/docs/CommandGuide/llvmgcc.pod index 9892ca71861e..30af0a06e06f 100644 --- a/docs/CommandGuide/llvmgcc.pod +++ b/docs/CommandGuide/llvmgcc.pod @@ -70,7 +70,7 @@ L =head1 AUTHORS -Maintained by the LLVM Team (L). +Maintained by the LLVM Team (L). =cut diff --git a/docs/CommandGuide/llvmgxx.pod b/docs/CommandGuide/llvmgxx.pod index 64b670ebe069..1ea3d4967006 100644 --- a/docs/CommandGuide/llvmgxx.pod +++ b/docs/CommandGuide/llvmgxx.pod @@ -79,7 +79,7 @@ L =head1 AUTHORS -Maintained by the LLVM Team (L). +Maintained by the LLVM Team (L). =cut diff --git a/docs/CommandGuide/opt.pod b/docs/CommandGuide/opt.pod index d1d1db5ef67e..f5f496817eb8 100644 --- a/docs/CommandGuide/opt.pod +++ b/docs/CommandGuide/opt.pod @@ -138,6 +138,6 @@ occurs, it will exit with a non-zero value. =head1 AUTHORS -Maintained by the LLVM Team (L). +Maintained by the LLVM Team (L). =cut diff --git a/docs/CommandGuide/tblgen.pod b/docs/CommandGuide/tblgen.pod index d127492a91bb..fe1be5ecfa3c 100644 --- a/docs/CommandGuide/tblgen.pod +++ b/docs/CommandGuide/tblgen.pod @@ -110,6 +110,6 @@ occurs, it will exit with a non-zero value. =head1 AUTHORS -Maintained by The LLVM Team (L). +Maintained by The LLVM Team (L). =cut diff --git a/docs/CommandLine.html b/docs/CommandLine.html index 2e5b3a2c4dcb..7535ca498400 100644 --- a/docs/CommandLine.html +++ b/docs/CommandLine.html @@ -8,9 +8,9 @@ -
        +

        CommandLine 2.0 Library Manual -

        +
        1. Introduction
        2. @@ -100,12 +100,12 @@
      - + -
      +

      This document describes the CommandLine argument processing library. It will show you how to use it, and what it can do. The CommandLine library uses a @@ -184,12 +184,12 @@ href="mailto:sabre@nondot.org">Chris Lattner.

      - + -
      +

      This section of the manual runs through a simple CommandLine'ification of a basic compiler tool. This is intended to show you how to jump into using the @@ -231,11 +231,11 @@ represented like this:

      cl::opt<string> OutputFilename("o", cl::desc("Specify output filename"), cl::value_desc("filename"));
      -

      This declares a global variable "OutputFilename" that is used to -capture the result of the "o" argument (first parameter). We specify -that this is a simple scalar option by using the "cl::opt" template (as opposed to the "cl::list template), and tell the CommandLine library +

      This declares a global variable "OutputFilename" that is used to +capture the result of the "o" argument (first parameter). We specify +that this is a simple scalar option by using the "cl::opt" template (as opposed to the "cl::list template), and tell the CommandLine library that the data type that we are parsing is a string.

      The second and third parameters (which are optional) are used to specify what @@ -321,14 +321,12 @@ OPTIONS:

      ... indicating that an input filename is expected.

      -
      - - + -
      +

      In addition to input and output filenames, we would like the compiler example to support three boolean flags: "-f" to force writing binary output to @@ -406,11 +404,11 @@ and lists of options.

      - + -
      +

      So far, the example works well, except for the fact that we need to check the quiet condition like this now:

      @@ -456,12 +454,12 @@ uses.

      - + -
      +

      So far we have seen how the CommandLine library handles builtin types like std::string, bool and int, but how does it handle @@ -567,11 +565,11 @@ which is when you would use it.

      - + -
      +

      Another useful argument form is a named alternative style. We shall use this style in our compiler to specify different debug levels that can be used. @@ -629,11 +627,11 @@ that you can choose the form most appropriate for your application.

      - + -
      +

      Now that we have the standard run-of-the-mill argument types out of the way, lets get a little wild and crazy. Lets say that we want our optimizer to accept @@ -699,11 +697,11 @@ checking we have to do.

      - + -
      +

      Instead of collecting sets of options in a list, it is also possible to gather information for enum values in a bit vector. The representation used by @@ -758,11 +756,11 @@ href="#list"> cl::list option.

      - + -
      +

      As our program grows and becomes more mature, we may decide to put summary information about what it does into the help output. The help output is styled @@ -800,28 +798,27 @@ OPTIONS:

      - - - + + +

      + Reference Guide +

      -
      +

      Now that you know the basics of how to use the CommandLine library, this section will give you the detailed information you need to tune how command line options work, as well as information on more "advanced" command line option processing capabilities.

      -
      - - + -
      +

      Positional arguments are those arguments that are not named, and are not specified with a hyphen. Positional arguments should be used when an option is @@ -854,15 +851,12 @@ that command line options will be ordered according to how they are listed in a are defined in multiple .cpp files. The fix for this problem is simply to define all of your positional arguments in one .cpp file.

      -
      - - - + -
      +

      Sometimes you may want to specify a value to your positional argument that starts with a hyphen (for example, searching for '-foo' in a file). At @@ -895,10 +889,10 @@ can use it like this:

      - -
      + +

      Sometimes an option can affect or modify the meaning of another option. For example, consider gcc's -x LANG option. This tells gcc to ignore the suffix of subsequent positional arguments and force @@ -954,11 +948,11 @@ can use it like this:

      - + -
      +

      The cl::ConsumeAfter formatting option is used to construct programs that use "interpreter style" option processing. With @@ -1006,12 +1000,14 @@ href="#cl::list">cl::list option.

      - - -
      + +

      + Internal vs External Storage +

      + +

      By default, all command line options automatically hold the value that they parse from the command line. This is very convenient in the common case, @@ -1076,11 +1072,11 @@ that DebugFlag is automatically set.

      - + -
      +

      This section describes the basic attributes that you can specify on options.

      @@ -1166,11 +1162,11 @@ obviously).
      - + -
      +

      Option modifiers are the flags and expressions that you pass into the constructors for cl::opt and -

      - - + -
      +

      The cl::NotHidden, cl::Hidden, and cl::ReallyHidden modifiers are used to control whether or not an option @@ -1230,12 +1224,12 @@ indicates that the option should not appear in any help output.

      - + -
      +

      This group of options is used to control how many time an option is allowed (or required) to be specified on the command line of your program. Specifying a @@ -1279,11 +1273,11 @@ retained.

      - + -
      +

      This group of options is used to control whether or not the option allows a value to be present. In the case of the CommandLine library, a value is either @@ -1328,11 +1322,11 @@ when extending the library.

      - + -
      +

      The formatting option group is used to specify that the command line option has special abilities and is otherwise different from other command line @@ -1409,11 +1403,11 @@ strategy basically looks like this:

      - + -
      +

      The miscellaneous option modifiers are the only flags where you can specify more than one flag from the set: they are not mutually exclusive. These flags @@ -1453,11 +1447,11 @@ only makes sense with a cl::list option.

      - + -
      +

      Some systems, such as certain variants of Microsoft Windows and some older Unices have a relatively low limit on command-line @@ -1474,13 +1468,14 @@ and

      - - - -
      + +

      + Top-Level Classes and Functions +

      + +

      Despite all of the built-in flexibility, the CommandLine option library really only consists of one function (cl::list, and cl::alias. This section describes these three classes in detail.

      -
      - - + -
      +

      The cl::ParseCommandLineOptions function is designed to be called directly from main, and is used to fill in the values of all of the @@ -1514,12 +1507,12 @@ which holds additional extra text to emit when the

      - + -
      +

      The cl::ParseEnvironmentOptions function has mostly the same effects as

      - + -
      +

      The cl::SetVersionPrinter function is designed to be called directly from main and before @@ -1572,11 +1565,11 @@ called when the --version option is given by the user.

      - + -
      +

      The cl::opt class is the class used to represent scalar command line options, and is the one used most of the time. It is a templated class which @@ -1607,11 +1600,11 @@ href="#customparser">custom parser.

      - + -
      +

      The cl::list class is the class used to represent a list of command line options. It too is a templated class which can take up to three @@ -1634,11 +1627,11 @@ be used.

      - + -
      +

      The cl::bits class is the class used to represent a list of command line options in the form of a bit vector. It is also a templated class which @@ -1659,11 +1652,11 @@ must be of type unsigned if external storage is used.

      - + -
      +

      The cl::alias class is a nontemplated class that is used to form aliases for other arguments.

      @@ -1682,11 +1675,11 @@ the conversion from string to data.

      - + -
      +

      The cl::extrahelp class is a nontemplated class that allows extra help text to be printed out for the -help option.

      @@ -1709,12 +1702,14 @@ single cl::extrahelp instance.

      - - -
      + +

      + Builtin parsers +

      + +

      Parsers control how the string value taken from the command line is translated into a typed value, suitable for use in a C++ program. By default, @@ -1773,27 +1768,27 @@ exponential notation (ex: 1.7e15) and properly supports locales.

      - - + + +

      + Extension Guide +

      -
      +

      Although the CommandLine library has a lot of functionality built into it already (as discussed previously), one of its true strengths lie in its extensibility. This section discusses how the CommandLine library works under the covers and illustrates how to do some simple, common, extensions.

      -
      - - + -
      +

      One of the simplest and most common extensions is the use of a custom parser. As discussed previously, parsers are the portion @@ -1932,11 +1927,11 @@ tutorial.

      - + -
      +

      Several of the LLVM libraries define static cl::opt instances that will automatically be included in any program that links with that library. This is a feature. However, sometimes it is necessary to know the value of the @@ -1951,16 +1946,18 @@ tutorial.

      - + -
      +

      TODO: fill in this section

      +
      +
      @@ -1971,8 +1968,8 @@ tutorial.

      src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"> Chris Lattner
      - LLVM Compiler Infrastructure
      - Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $ + LLVM Compiler Infrastructure
      + Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $ diff --git a/docs/CompilerDriver.html b/docs/CompilerDriver.html index c63e1747fa69..03db3a09bde7 100644 --- a/docs/CompilerDriver.html +++ b/docs/CompilerDriver.html @@ -3,7 +3,7 @@ - + Customizing LLVMC: Reference Manual @@ -17,28 +17,23 @@ The ReST source lives in the directory 'tools/llvmc/doc'. -->

      Contents

        -
      • Introduction
      • -
      • Compiling with LLVMC
      • -
      • Predefined options
      • -
      • Compiling LLVMC plugins
      • -
      • Compiling standalone LLVMC-based drivers
      • -
      • Customizing LLVMC: the compilation graph
      • -
      • Describing options
          -
        • External options
        • +
        • Introduction
        • +
        • Compiling with llvmc
        • +
        • Predefined options
        • +
        • Compiling LLVMC-based drivers
        • +
        • Customizing LLVMC: the compilation graph
        • +
        • Describing options
        • +
        • Conditional evaluation
        • +
        • Writing a tool description
        • -
        • Conditional evaluation
        • -
        • Writing a tool description -
        • -
        • Language map
        • -
        • Option preprocessor
        • -
        • More advanced topics @@ -46,25 +41,24 @@ The ReST source lives in the directory 'tools/llvmc/doc'. -->

          Written by Mikhail Glushenkov

          -

          Introduction

          +

          Introduction

          LLVMC is a generic compiler driver, designed to be customizable and -extensible. It plays the same role for LLVM as the gcc program -does for GCC - LLVMC's job is essentially to transform a set of input -files into a set of targets depending on configuration rules and user -options. What makes LLVMC different is that these transformation rules -are completely customizable - in fact, LLVMC knows nothing about the -specifics of transformation (even the command-line options are mostly -not hard-coded) and regards the transformation structure as an -abstract graph. The structure of this graph is completely determined -by plugins, which can be either statically or dynamically linked. This -makes it possible to easily adapt LLVMC for other purposes - for -example, as a build tool for game resources.

          +extensible. It plays the same role for LLVM as the gcc program does for +GCC - LLVMC's job is essentially to transform a set of input files into a set of +targets depending on configuration rules and user options. What makes LLVMC +different is that these transformation rules are completely customizable - in +fact, LLVMC knows nothing about the specifics of transformation (even the +command-line options are mostly not hard-coded) and regards the transformation +structure as an abstract graph. The structure of this graph is described in +high-level TableGen code, from which an efficient C++ representation is +automatically derived. This makes it possible to adapt LLVMC for other +purposes - for example, as a build tool for game resources.

          Because LLVMC employs TableGen as its configuration language, you need to be familiar with it to customize LLVMC.

          -

          Compiling with LLVMC

          -

          LLVMC tries hard to be as compatible with gcc as possible, +

          Compiling with llvmc

          +

          LLVMC tries hard to be as compatible with gcc as possible, although there are some small differences. Most of the time, however, you shouldn't be able to notice them:

          @@ -74,11 +68,11 @@ $ ./a.out
           hello
           

          One nice feature of LLVMC is that one doesn't have to distinguish between -different compilers for different languages (think g++ vs. gcc) - the +different compilers for different languages (think g++ vs. gcc) - the right toolchain is chosen automatically based on input language names (which are, in turn, determined from file extensions). If you want to force files ending with ".c" to compile as C++, use the -x option, just like you would -do it with gcc:

          +do it with gcc:

           $ # hello.c is really a C++ file
           $ llvmc -x c++ hello.c
          @@ -97,138 +91,100 @@ $ ./a.out
           hello
           

          By default, LLVMC uses llvm-gcc to compile the source code. It is also -possible to choose the clang compiler with the -clang option.

          +possible to choose the clang compiler with the -clang option.

          -

          Predefined options

          -

          LLVMC has some built-in options that can't be overridden in the -configuration libraries:

          +

          Predefined options

          +

          LLVMC has some built-in options that can't be overridden in the TableGen code:

            -
          • -o FILE - Output file name.
          • -
          • -x LANGUAGE - Specify the language of the following input files +
          • -o FILE - Output file name.
          • +
          • -x LANGUAGE - Specify the language of the following input files until the next -x option.
          • -
          • -load PLUGIN_NAME - Load the specified plugin DLL. Example: --load $LLVM_DIR/Release/lib/LLVMCSimple.so.
          • -v - Enable verbose mode, i.e. print out all executed commands.
          • --save-temps - Write temporary files to the current directory and do not delete them on exit. This option can also take an argument: the --save-temps=obj switch will write files into the directory specified with the -o option. The --save-temps=cwd and --save-temps switches are both synonyms for the default behaviour.
          • -
          • --temp-dir DIRECTORY - Store temporary files in the given directory. This +
          • --temp-dir DIRECTORY - Store temporary files in the given directory. This directory is deleted on exit unless --save-temps is specified. If --save-temps=obj is also specified, --temp-dir is given the precedence.
          • --check-graph - Check the compilation for common errors like mismatched -output/input language names, multiple default edges and cycles. Because of -plugins, these checks can't be performed at compile-time. Exit with code zero -if no errors were found, and return the number of found errors -otherwise. Hidden option, useful for debugging LLVMC plugins.
          • +output/input language names, multiple default edges and cycles. Exit with code +zero if no errors were found, and return the number of found errors +otherwise. Hidden option, useful for debugging.
          • --view-graph - Show a graphical representation of the compilation graph -and exit. Requires that you have dot and gv programs installed. Hidden -option, useful for debugging LLVMC plugins.
          • +and exit. Requires that you have dot and gv programs installed. Hidden +option, useful for debugging.
          • --write-graph - Write a compilation-graph.dot file in the current directory with the compilation graph description in Graphviz format (identical to the file used by the --view-graph option). The -o option can be -used to set the output file name. Hidden option, useful for debugging LLVMC -plugins.
          • -
          • -help, -help-hidden, --version - These options have +used to set the output file name. Hidden option, useful for debugging.
          • +
          • --help, --help-hidden, --version - These options have their standard meaning.
          -
          -

          Compiling LLVMC plugins

          -

          It's easiest to start working on your own LLVMC plugin by copying the -skeleton project which lives under $LLVMC_DIR/plugins/Simple:

          +
          +

          Compiling LLVMC-based drivers

          +

          It's easiest to start working on your own LLVMC driver by copying the skeleton +project which lives under $LLVMC_DIR/examples/Skeleton:

          -$ cd $LLVMC_DIR/plugins
          -$ cp -r Simple MyPlugin
          -$ cd MyPlugin
          +$ cd $LLVMC_DIR/examples
          +$ cp -r Skeleton MyDriver
          +$ cd MyDriver
           $ ls
          -Makefile PluginMain.cpp Simple.td
          +AutoGenerated.td  Hooks.cpp  Main.cpp  Makefile
           
          -

          As you can see, our basic plugin consists of only two files (not -counting the build script). Simple.td contains TableGen -description of the compilation graph; its format is documented in the -following sections. PluginMain.cpp is just a helper file used to -compile the auto-generated C++ code produced from TableGen source. It -can also contain hook definitions (see below).

          -

          The first thing that you should do is to change the LLVMC_PLUGIN -variable in the Makefile to avoid conflicts (since this variable -is used to name the resulting library):

          +

          As you can see, our basic driver consists of only three files (not counting the +build script). AutoGenerated.td contains TableGen description of the +compilation graph; its format is documented in the following +sections. Hooks.cpp is an empty file that should be used for hook +definitions (see below). Main.cpp is just a helper used to compile the +auto-generated C++ code produced from TableGen source.

          +

          The first thing that you should do is to change the LLVMC_BASED_DRIVER +variable in the Makefile:

          -LLVMC_PLUGIN=MyPlugin
          +LLVMC_BASED_DRIVER=MyDriver
           
          -

          It is also a good idea to rename Simple.td to something less -generic:

          +

          It can also be a good idea to put your TableGen code into a file with a less +generic name:

          -$ mv Simple.td MyPlugin.td
          -
          -

          To build your plugin as a dynamic library, just cd to its source -directory and run make. The resulting file will be called -plugin_llvmc_$(LLVMC_PLUGIN).$(DLL_EXTENSION) (in our case, -plugin_llvmc_MyPlugin.so). This library can be then loaded in with the --load option. Example:

          -
          -$ cd $LLVMC_DIR/plugins/Simple
          -$ make
          -$ llvmc -load $LLVM_DIR/Release/lib/plugin_llvmc_Simple.so
          -
          -
          -
          -

          Compiling standalone LLVMC-based drivers

          -

          By default, the llvmc executable consists of a driver core plus several -statically linked plugins (Base and Clang at the moment). You can -produce a standalone LLVMC-based driver executable by linking the core with your -own plugins. The recommended way to do this is by starting with the provided -Skeleton example ($LLVMC_DIR/example/Skeleton):

          -
          -$ cd $LLVMC_DIR/example/
          -$ cp -r Skeleton mydriver
          -$ cd mydriver
          -$ vim Makefile
          +$ touch MyDriver.td
          +$ vim AutoGenerated.td
           [...]
          -$ make
          +include "MyDriver.td"
           
          +

          If you have more than one TableGen source file, they all should be included from +AutoGenerated.td, since this file is used by the build system to generate +C++ code.

          +

          To build your driver, just cd to its source directory and run make. The +resulting executable will be put into $LLVM_OBJ_DIR/$(BuildMode)/bin.

          If you're compiling LLVM with different source and object directories, then you -must perform the following additional steps before running make:

          +must perform the following additional steps before running make:

           # LLVMC_SRC_DIR = $LLVM_SRC_DIR/tools/llvmc/
           # LLVMC_OBJ_DIR = $LLVM_OBJ_DIR/tools/llvmc/
          -$ cp $LLVMC_SRC_DIR/example/mydriver/Makefile \
          -  $LLVMC_OBJ_DIR/example/mydriver/
          -$ cd $LLVMC_OBJ_DIR/example/mydriver
          +$ mkdir $LLVMC_OBJ_DIR/examples/MyDriver/
          +$ cp $LLVMC_SRC_DIR/examples/MyDriver/Makefile \
          +  $LLVMC_OBJ_DIR/examples/MyDriver/
          +$ cd $LLVMC_OBJ_DIR/examples/MyDriver
           $ make
           
          -

          Another way to do the same thing is by using the following command:

          -
          -$ cd $LLVMC_DIR
          -$ make LLVMC_BUILTIN_PLUGINS=MyPlugin LLVMC_BASED_DRIVER_NAME=mydriver
          -
          -

          This works with both srcdir == objdir and srcdir != objdir, but assumes that the -plugin source directory was placed under $LLVMC_DIR/plugins.

          -

          Sometimes, you will want a 'bare-bones' version of LLVMC that has no -built-in plugins. It can be compiled with the following command:

          -
          -$ cd $LLVMC_DIR
          -$ make LLVMC_BUILTIN_PLUGINS=""
          -
          -

          Customizing LLVMC: the compilation graph

          -

          Each TableGen configuration file should include the common -definitions:

          +

          Customizing LLVMC: the compilation graph

          +

          Each TableGen configuration file should include the common definitions:

           include "llvm/CompilerDriver/Common.td"
           
          -

          Internally, LLVMC stores information about possible source -transformations in form of a graph. Nodes in this graph represent -tools, and edges between two nodes represent a transformation path. A -special "root" node is used to mark entry points for the -transformations. LLVMC also assigns a weight to each edge (more on -this later) to choose between several alternative edges.

          -

          The definition of the compilation graph (see file -plugins/Base/Base.td for an example) is just a list of edges:

          +

          Internally, LLVMC stores information about possible source transformations in +form of a graph. Nodes in this graph represent tools, and edges between two +nodes represent a transformation path. A special "root" node is used to mark +entry points for the transformations. LLVMC also assigns a weight to each edge +(more on this later) to choose between several alternative edges.

          +

          The definition of the compilation graph (see file llvmc/src/Base.td for an +example) is just a list of edges:

           def CompilationGraph : CompilationGraph<[
               Edge<"root", "llvm_gcc_c">,
          @@ -253,39 +209,33 @@ def CompilationGraph : CompilationGraph<[
           
               ]>;
           
          -

          As you can see, the edges can be either default or optional, where -optional edges are differentiated by an additional case expression -used to calculate the weight of this edge. Notice also that we refer -to tools via their names (as strings). This makes it possible to add -edges to an existing compilation graph in plugins without having to -know about all tool definitions used in the graph.

          -

          The default edges are assigned a weight of 1, and optional edges get a -weight of 0 + 2*N where N is the number of tests that evaluated to -true in the case expression. It is also possible to provide an -integer parameter to inc_weight and dec_weight - in this case, -the weight is increased (or decreased) by the provided value instead -of the default 2. It is also possible to change the default weight of -an optional edge by using the default clause of the case +

          As you can see, the edges can be either default or optional, where optional +edges are differentiated by an additional case expression used to calculate +the weight of this edge. Notice also that we refer to tools via their names (as +strings). This makes it possible to add edges to an existing compilation graph +without having to know about all tool definitions used in the graph.

          +

          The default edges are assigned a weight of 1, and optional edges get a weight of +0 + 2*N where N is the number of tests that evaluated to true in the case +expression. It is also possible to provide an integer parameter to +inc_weight and dec_weight - in this case, the weight is increased (or +decreased) by the provided value instead of the default 2. Default weight of an +optional edge can be changed by using the default clause of the case construct.

          -

          When passing an input file through the graph, LLVMC picks the edge -with the maximum weight. To avoid ambiguity, there should be only one -default edge between two nodes (with the exception of the root node, -which gets a special treatment - there you are allowed to specify one -default edge per language).

          -

          When multiple plugins are loaded, their compilation graphs are merged -together. Since multiple edges that have the same end nodes are not -allowed (i.e. the graph is not a multigraph), an edge defined in -several plugins will be replaced by the definition from the plugin -that was loaded last. Plugin load order can be controlled by using the -plugin priority feature described above.

          -

          To get a visual representation of the compilation graph (useful for -debugging), run llvmc --view-graph. You will need dot and -gsview installed for this to work properly.

          +

          When passing an input file through the graph, LLVMC picks the edge with the +maximum weight. To avoid ambiguity, there should be only one default edge +between two nodes (with the exception of the root node, which gets a special +treatment - there you are allowed to specify one default edge per language).

          +

          When multiple compilation graphs are defined, they are merged together. Multiple +edges with the same end nodes are not allowed (i.e. the graph is not a +multigraph), and will lead to a compile-time error.

          +

          To get a visual representation of the compilation graph (useful for debugging), +run llvmc --view-graph. You will need dot and gsview installed for +this to work properly.

          -

          Describing options

          -

          Command-line options that the plugin supports are defined by using an -OptionList:

          +

          Describing options

          +

          Command-line options supported by the driver are defined by using an +OptionList:

           def Options : OptionList<[
           (switch_option "E", (help "Help string")),
          @@ -293,101 +243,95 @@ def Options : OptionList<[
           ...
           ]>;
           
          -

          As you can see, the option list is just a list of DAGs, where each DAG -is an option description consisting of the option name and some -properties. A plugin can define more than one option list (they are -all merged together in the end), which can be handy if one wants to -separate option groups syntactically.

          +

          As you can see, the option list is just a list of DAGs, where each DAG is an +option description consisting of the option name and some properties. More than +one option list can be defined (they are all merged together in the end), which +can be handy if one wants to separate option groups syntactically.

          • Possible option types:

              -
            • switch_option - a simple boolean switch without arguments, for example --O2 or -time. At most one occurrence is allowed.
            • -
            • parameter_option - option that takes one argument, for example +
            • switch_option - a simple boolean switch without arguments, for example +-O2 or -time. At most one occurrence is allowed by default.
            • +
            • parameter_option - option that takes one argument, for example -std=c99. It is also allowed to use spaces instead of the equality -sign: -std c99. At most one occurrence is allowed.
            • -
            • parameter_list_option - same as the above, but more than one option -occurence is allowed.
            • -
            • prefix_option - same as the parameter_option, but the option name and +sign: -std c99. At most one occurrence is allowed.
            • +
            • parameter_list_option - same as the above, but more than one option +occurrence is allowed.
            • +
            • prefix_option - same as the parameter_option, but the option name and argument do not have to be separated. Example: -ofile. This can be also -specified as -o file; however, -o=file will be parsed incorrectly -(=file will be interpreted as option value). At most one occurrence is +specified as -o file; however, -o=file will be parsed incorrectly +(=file will be interpreted as option value). At most one occurrence is allowed.
            • -
            • prefix_list_option - same as the above, but more than one occurence of +
            • prefix_list_option - same as the above, but more than one occurrence of the option is allowed; example: -lm -lpthread.
            • -
            • alias_option - a special option type for creating aliases. Unlike other +
            • alias_option - a special option type for creating aliases. Unlike other option types, aliases are not allowed to have any properties besides the -aliased option name. Usage example: (alias_option "preprocess", "E")
            • +aliased option name. +Usage example: (alias_option "preprocess", "E") +
            • switch_list_option - like switch_option with the zero_or_more +property, but remembers how many times the switch was turned on. Useful +mostly for forwarding. Example: when -foo is a switch option (with the +zero_or_more property), the command driver -foo -foo is forwarded +as some-tool -foo, but when -foo is a switch list, the same command +is forwarded as some-tool -foo -foo.
          • Possible option properties:

              -
            • help - help string associated with this option. Used for -help +
            • help - help string associated with this option. Used for --help output.
            • -
            • required - this option must be specified exactly once (or, in case of -the list options without the multi_val property, at least -once). Incompatible with zero_or_one and one_or_more.
            • -
            • one_or_more - the option must be specified at least one time. Useful -only for list options in conjunction with multi_val; for ordinary lists -it is synonymous with required. Incompatible with required and -zero_or_one.
            • -
            • optional - the option can be specified zero or one times. Useful only -for list options in conjunction with multi_val. Incompatible with -required and one_or_more.
            • -
            • hidden - the description of this option will not appear in -the -help output (but will appear in the -help-hidden +
            • required - this option must be specified exactly once (or, in case of +the list options without the multi_val property, at least +once). Incompatible with optional and one_or_more.
            • +
            • optional - the option can be specified either zero times or exactly +once. The default for switch options. Useful only for list options in +conjunction with multi_val. Incompatible with required, +zero_or_more and one_or_more.
            • +
            • one_or_more - the option must be specified at least once. Can be useful +to allow switch options be both obligatory and be specified multiple +times. For list options is useful only in conjunction with multi_val; +for ordinary it is synonymous with required. Incompatible with +required, optional and zero_or_more.
            • +
            • zero_or_more - the option can be specified zero or more times. Useful +to allow a single switch option to be specified more than +once. Incompatible with required, optional and one_or_more.
            • +
            • hidden - the description of this option will not appear in +the --help output (but will appear in the --help-hidden output).
            • -
            • really_hidden - the option will not be mentioned in any help +
            • really_hidden - the option will not be mentioned in any help output.
            • -
            • comma_separated - Indicates that any commas specified for an option's +
            • comma_separated - Indicates that any commas specified for an option's value should be used to split the value up into multiple values for the option. This property is valid only for list options. In conjunction with -forward_value can be used to implement option forwarding in style of +forward_value can be used to implement option forwarding in style of gcc's -Wa,.
            • -
            • multi_val n - this option takes n arguments (can be useful in some -special cases). Usage example: (parameter_list_option "foo", (multi_val -3)); the command-line syntax is '-foo a b c'. Only list options can have -this attribute; you can, however, use the one_or_more, optional -and required properties.
            • -
            • init - this option has a default value, either a string (if it is a +
            • multi_val n - this option takes n arguments (can be useful in some +special cases). Usage example: (parameter_list_option "foo", (multi_val +3)); the command-line syntax is '-foo a b c'. Only list options can have +this attribute; you can, however, use the one_or_more, optional +and required properties.
            • +
            • init - this option has a default value, either a string (if it is a parameter), or a boolean (if it is a switch; as in C++, boolean constants -are called true and false). List options can't have init +are called true and false). List options can't have init attribute. -Usage examples: (switch_option "foo", (init true)); (prefix_option -"bar", (init "baz")).
            • -
            • extern - this option is defined in some other plugin, see below.
            • +Usage examples: (switch_option "foo", (init true)); (prefix_option +"bar", (init "baz")).
          -
          -

          External options

          -

          Sometimes, when linking several plugins together, one plugin needs to -access options defined in some other plugin. Because of the way -options are implemented, such options must be marked as -extern. This is what the extern option property is -for. Example:

          -
          -...
          -(switch_option "E", (extern))
          -...
          -
          -

          If an external option has additional attributes besides 'extern', they are -ignored. See also the section on plugin priorities.

          -
          -

          Conditional evaluation

          -

          The 'case' construct is the main means by which programmability is -achieved in LLVMC. It can be used to calculate edge weights, program -actions and modify the shell commands to be executed. The 'case' -expression is designed after the similarly-named construct in -functional languages and takes the form (case (test_1), statement_1, -(test_2), statement_2, ... (test_N), statement_N). The statements -are evaluated only if the corresponding tests evaluate to true.

          +

          Conditional evaluation

          +

          The 'case' construct is the main means by which programmability is achieved in +LLVMC. It can be used to calculate edge weights, program actions and modify the +shell commands to be executed. The 'case' expression is designed after the +similarly-named construct in functional languages and takes the form (case +(test_1), statement_1, (test_2), statement_2, ... (test_N), statement_N). The +statements are evaluated only if the corresponding tests evaluate to true.

          Examples:

           // Edge weight calculation
          @@ -410,129 +354,139 @@ are evaluated only if the corresponding tests evaluate to true.

          (switch_on "B"), "cmdline2", (default), "cmdline3")
          -

          Note the slight difference in 'case' expression handling in contexts -of edge weights and command line specification - in the second example -the value of the "B" switch is never checked when switch "A" is -enabled, and the whole expression always evaluates to "cmdline1" in -that case.

          +

          Note the slight difference in 'case' expression handling in contexts of edge +weights and command line specification - in the second example the value of the +"B" switch is never checked when switch "A" is enabled, and the whole +expression always evaluates to "cmdline1" in that case.

          Case expressions can also be nested, i.e. the following is legal:

           (case (switch_on "E"), (case (switch_on "o"), ..., (default), ...)
                 (default), ...)
           
          -

          You should, however, try to avoid doing that because it hurts -readability. It is usually better to split tool descriptions and/or -use TableGen inheritance instead.

          +

          You should, however, try to avoid doing that because it hurts readability. It is +usually better to split tool descriptions and/or use TableGen inheritance +instead.

          • Possible tests are:
              -
            • switch_on - Returns true if a given command-line switch is provided by -the user. Can be given a list as argument, in that case (switch_on ["foo", -"bar", "baz"]) is equivalent to (and (switch_on "foo"), (switch_on -"bar"), (switch_on "baz")). -Example: (switch_on "opt").
            • -
            • any_switch_on - Given a list of switch options, returns true if any of +
            • switch_on - Returns true if a given command-line switch is provided by +the user. Can be given multiple arguments, in that case (switch_on "foo", +"bar", "baz") is equivalent to (and (switch_on "foo"), (switch_on +"bar"), (switch_on "baz")). +Example: (switch_on "opt").
            • +
            • any_switch_on - Given a number of switch options, returns true if any of the switches is turned on. -Example: (any_switch_on ["foo", "bar", "baz"]) is equivalent to (or -(switch_on "foo"), (switch_on "bar"), (switch_on "baz")).
            • -
            • parameter_equals - Returns true if a command-line parameter equals -a given value. -Example: (parameter_equals "W", "all").
            • -
            • element_in_list - Returns true if a command-line parameter -list contains a given value. -Example: (element_in_list "l", "pthread").
            • -
            • input_languages_contain - Returns true if a given language +Example: (any_switch_on "foo", "bar", "baz") is equivalent to (or +(switch_on "foo"), (switch_on "bar"), (switch_on "baz")).
            • +
            • parameter_equals - Returns true if a command-line parameter (first +argument) equals a given value (second argument). +Example: (parameter_equals "W", "all").
            • +
            • element_in_list - Returns true if a command-line parameter list (first +argument) contains a given value (second argument). +Example: (element_in_list "l", "pthread").
            • +
            • input_languages_contain - Returns true if a given language belongs to the current input language set. -Example: (input_languages_contain "c++").
            • -
            • in_language - Evaluates to true if the input file language is equal to -the argument. At the moment works only with cmd_line and actions (on +Example: (input_languages_contain "c++").
            • +
            • in_language - Evaluates to true if the input file language is equal to +the argument. At the moment works only with command and actions (on non-join nodes). -Example: (in_language "c++").
            • -
            • not_empty - Returns true if a given option (which should be either a -parameter or a parameter list) is set by the user. Like switch_on, can -be also given a list as argument. -Example: (not_empty "o").
            • -
            • any_not_empty - Returns true if not_empty returns true for any of -the options in the list. -Example: (any_not_empty ["foo", "bar", "baz"]) is equivalent to (or -(not_empty "foo"), (not_empty "bar"), (not_empty "baz")).
            • -
            • empty - The opposite of not_empty. Equivalent to (not (not_empty -X)). Provided for convenience. Can be given a list as argument.
            • -
            • any_not_empty - Returns true if not_empty returns true for any of -the options in the list. -Example: (any_empty ["foo", "bar", "baz"]) is equivalent to (not (and -(not_empty "foo"), (not_empty "bar"), (not_empty "baz"))).
            • -
            • single_input_file - Returns true if there was only one input file +Example: (in_language "c++").
            • +
            • not_empty - Returns true if a given option (which should be either a +parameter or a parameter list) is set by the user. Like switch_on, can +be also given multiple arguments. +Examples: (not_empty "o"), (not_empty "o", "l").
            • +
            • any_not_empty - Returns true if not_empty returns true for any of +the provided options. +Example: (any_not_empty "foo", "bar", "baz") is equivalent to (or +(not_empty "foo"), (not_empty "bar"), (not_empty "baz")).
            • +
            • empty - The opposite of not_empty. Equivalent to (not (not_empty +X)). Can be given multiple arguments.
            • +
            • any_not_empty - Returns true if not_empty returns true for any of +the provided options. +Example: (any_empty "foo", "bar", "baz") is equivalent to (or +(not_empty "foo"), (not_empty "bar"), (not_empty "baz")).
            • +
            • single_input_file - Returns true if there was only one input file provided on the command-line. Used without arguments: -(single_input_file).
            • -
            • multiple_input_files - Equivalent to (not (single_input_file)) (the +(single_input_file).
            • +
            • multiple_input_files - Equivalent to (not (single_input_file)) (the case of zero input files is considered an error).
            • -
            • default - Always evaluates to true. Should always be the last -test in the case expression.
            • -
            • and - A standard binary logical combinator that returns true iff all of -its arguments return true. Used like this: (and (test1), (test2), -... (testN)). Nesting of and and or is allowed, but not +
            • default - Always evaluates to true. Should always be the last +test in the case expression.
            • +
            • and - A standard logical combinator that returns true iff all of +its arguments return true. Used like this: (and (test1), (test2), +... (testN)). Nesting of and and or is allowed, but not encouraged.
            • -
            • or - A binary logical combinator that returns true iff any of its -arguments returns true. Example: (or (test1), (test2), ... (testN)).
            • -
            • not - Standard unary logical combinator that negates its -argument. Example: (not (or (test1), (test2), ... (testN))).
            • +
            • or - A logical combinator that returns true iff any of its arguments +return true. +Example: (or (test1), (test2), ... (testN)).
            • +
            • not - Standard unary logical combinator that negates its +argument. +Example: (not (or (test1), (test2), ... (testN))).
          -

          Writing a tool description

          -

          As was said earlier, nodes in the compilation graph represent tools, -which are described separately. A tool definition looks like this -(taken from the include/llvm/CompilerDriver/Tools.td file):

          +

          Writing a tool description

          +

          As was said earlier, nodes in the compilation graph represent tools, which are +described separately. A tool definition looks like this (taken from the +llvmc/src/Base.td file):

           def llvm_gcc_cpp : Tool<[
               (in_language "c++"),
               (out_language "llvm-assembler"),
               (output_suffix "bc"),
          -    (cmd_line "llvm-g++ -c $INFILE -o $OUTFILE -emit-llvm"),
          +    (command "llvm-g++ -c -emit-llvm"),
               (sink)
               ]>;
           
          -

          This defines a new tool called llvm_gcc_cpp, which is an alias for -llvm-g++. As you can see, a tool definition is just a list of -properties; most of them should be self-explanatory. The sink -property means that this tool should be passed all command-line -options that aren't mentioned in the option list.

          +

          This defines a new tool called llvm_gcc_cpp, which is an alias for +llvm-g++. As you can see, a tool definition is just a list of properties; +most of them should be self-explanatory. The sink property means that this +tool should be passed all command-line options that aren't mentioned in the +option list.

          The complete list of all currently implemented tool properties follows.

          • Possible tool properties:
              -
            • in_language - input language name. Can be either a string or a -list, in case the tool supports multiple input languages.
            • -
            • out_language - output language name. Multiple output languages are not -allowed.
            • -
            • output_suffix - output file suffix. Can also be changed -dynamically, see documentation on actions.
            • -
            • cmd_line - the actual command used to run the tool. You can -use $INFILE and $OUTFILE variables, output redirection -with >, hook invocations ($CALL), environment variables -(via $ENV) and the case construct.
            • -
            • join - this tool is a "join node" in the graph, i.e. it gets a -list of input files and joins them together. Used for linkers.
            • -
            • sink - all command-line options that are not handled by other -tools are passed to this tool.
            • -
            • actions - A single big case expression that specifies how -this tool reacts on command-line options (described in more detail -below).
            • +
            • in_language - input language name. Can be given multiple arguments, in +case the tool supports multiple input languages. Used for typechecking and +mapping file extensions to tools.
            • +
            • out_language - output language name. Multiple output languages are +allowed. Used for typechecking the compilation graph.
            • +
            • output_suffix - output file suffix. Can also be changed dynamically, see +documentation on actions.
          -
          -

          Actions

          -

          A tool often needs to react to command-line options, and this is -precisely what the actions property is for. The next example -illustrates this feature:

          +
          +
            +
          • command - the actual command used to run the tool. You can use output +redirection with >, hook invocations ($CALL), environment variables +(via $ENV) and the case construct.
          • +
          • join - this tool is a "join node" in the graph, i.e. it gets a list of +input files and joins them together. Used for linkers.
          • +
          • sink - all command-line options that are not handled by other tools are +passed to this tool.
          • +
          • actions - A single big case expression that specifies how this tool +reacts on command-line options (described in more detail below).
          • +
          +
          +
          +
            +
          • out_file_option, in_file_option - Options appended to the +command string to designate output and input files. Default values are +"-o" and "", respectively.
          • +
          +
          +
          +

          Actions

          +

          A tool often needs to react to command-line options, and this is precisely what +the actions property is for. The next example illustrates this feature:

           def llvm_gcc_linker : Tool<[
               (in_language "object-code"),
               (out_language "executable"),
               (output_suffix "out"),
          -    (cmd_line "llvm-gcc $INFILE -o $OUTFILE"),
          +    (command "llvm-gcc"),
               (join),
               (actions (case (not_empty "L"), (forward "L"),
                              (not_empty "l"), (forward "l"),
          @@ -540,47 +494,46 @@ def llvm_gcc_linker : Tool<[
                                        [(append_cmd "-dummy1"), (append_cmd "-dummy2")])
               ]>;
           
          -

          The actions tool property is implemented on top of the omnipresent -case expression. It associates one or more different actions -with given conditions - in the example, the actions are forward, -which forwards a given option unchanged, and append_cmd, which -appends a given string to the tool execution command. Multiple actions -can be associated with a single condition by using a list of actions -(used in the example to append some dummy options). The same case -construct can also be used in the cmd_line property to modify the -tool command line.

          -

          The "join" property used in the example means that this tool behaves -like a linker.

          +

          The actions tool property is implemented on top of the omnipresent case +expression. It associates one or more different actions with given +conditions - in the example, the actions are forward, which forwards a given +option unchanged, and append_cmd, which appends a given string to the tool +execution command. Multiple actions can be associated with a single condition by +using a list of actions (used in the example to append some dummy options). The +same case construct can also be used in the cmd_line property to modify +the tool command line.

          +

          The "join" property used in the example means that this tool behaves like a +linker.

          The list of all possible actions follows.

          • Possible actions:

              -
            • append_cmd - Append a string to the tool invocation command. -Example: (case (switch_on "pthread"), (append_cmd "-lpthread")).
            • -
            • error - Exit with error. -Example: (error "Mixing -c and -S is not allowed!").
            • -
            • warning - Print a warning. -Example: (warning "Specifying both -O1 and -O2 is meaningless!").
            • -
            • forward - Forward the option unchanged. -Example: (forward "Wall").
            • -
            • forward_as - Change the option's name, but forward the argument +
            • append_cmd - Append a string to the tool invocation command. +Example: (case (switch_on "pthread"), (append_cmd "-lpthread")).
            • +
            • error - Exit with error. +Example: (error "Mixing -c and -S is not allowed!").
            • +
            • warning - Print a warning. +Example: (warning "Specifying both -O1 and -O2 is meaningless!").
            • +
            • forward - Forward the option unchanged. +Example: (forward "Wall").
            • +
            • forward_as - Change the option's name, but forward the argument unchanged. -Example: (forward_as "O0", "--disable-optimization").
            • -
            • forward_value - Forward only option's value. Cannot be used with switch +Example: (forward_as "O0", "--disable-optimization").
            • +
            • forward_value - Forward only option's value. Cannot be used with switch options (since they don't have values), but works fine with lists. -Example: (forward_value "Wa,").
            • -
            • forward_transformed_value - As above, but applies a hook to the +Example: (forward_value "Wa,").
            • +
            • forward_transformed_value - As above, but applies a hook to the option's value before forwarding (see below). When -forward_transformed_value is applied to a list +forward_transformed_value is applied to a list option, the hook must have signature -std::string hooks::HookName (const std::vector<std::string>&). -Example: (forward_transformed_value "m", "ConvertToMAttr").
            • -
            • output_suffix - Modify the output suffix of this tool. -Example: (output_suffix "i").
            • -
            • stop_compilation - Stop compilation after this tool processes its +std::string hooks::HookName (const std::vector<std::string>&). +Example: (forward_transformed_value "m", "ConvertToMAttr").
            • +
            • output_suffix - Modify the output suffix of this tool. +Example: (output_suffix "i").
            • +
            • stop_compilation - Stop compilation after this tool processes its input. Used without arguments. -Example: (stop_compilation).
            • +Example: (stop_compilation).
          • @@ -588,11 +541,11 @@ Example: (stop_compilation)
          -

          Language map

          -

          If you are adding support for a new language to LLVMC, you'll need to -modify the language map, which defines mappings from file extensions -to language names. It is used to choose the proper toolchain(s) for a -given input file set. Language map definition looks like this:

          +

          Language map

          +

          If you are adding support for a new language to LLVMC, you'll need to modify the +language map, which defines mappings from file extensions to language names. It +is used to choose the proper toolchain(s) for a given input file set. Language +map definition looks like this:

           def LanguageMap : LanguageMap<
               [LangToSuffixes<"c++", ["cc", "cp", "cxx", "cpp", "CPP", "c++", "C"]>,
          @@ -606,73 +559,69 @@ $ llvmc hello.cpp
           llvmc: Unknown suffix: cpp
           

          The language map entries are needed only for the tools that are linked from the -root node. Since a tool can't have multiple output languages, for inner nodes of -the graph the input and output languages should match. This is enforced at -compile-time.

          +root node. A tool can have multiple output languages.

          -

          Option preprocessor

          +

          Option preprocessor

          It is sometimes useful to run error-checking code before processing the compilation graph. For example, if optimization options "-O1" and "-O2" are implemented as switches, we might want to output a warning if the user invokes the driver with both of these options enabled.

          -

          The OptionPreprocessor feature is reserved specially for these -occasions. Example (adapted from the built-in Base plugin):

          +

          The OptionPreprocessor feature is reserved specially for these +occasions. Example (adapted from llvm/src/Base.td.in):

           def Preprocess : OptionPreprocessor<
          -(case (not (any_switch_on ["O0", "O1", "O2", "O3"])),
          +(case (not (any_switch_on "O0", "O1", "O2", "O3")),
                      (set_option "O2"),
          -      (and (switch_on "O3"), (any_switch_on ["O0", "O1", "O2"])),
          -           (unset_option ["O0", "O1", "O2"]),
          -      (and (switch_on "O2"), (any_switch_on ["O0", "O1"])),
          -           (unset_option ["O0", "O1"]),
          +      (and (switch_on "O3"), (any_switch_on "O0", "O1", "O2")),
          +           (unset_option "O0", "O1", "O2"),
          +      (and (switch_on "O2"), (any_switch_on "O0", "O1")),
          +           (unset_option "O0", "O1"),
                 (and (switch_on "O1"), (switch_on "O0")),
                      (unset_option "O0"))
           >;
           
          -

          Here, OptionPreprocessor is used to unset all spurious -O options so +

          Here, OptionPreprocessor is used to unset all spurious -O options so that they are not forwarded to the compiler. If no optimization options are specified, -O2 is enabled.

          -

          OptionPreprocessor is basically a single big case expression, which is -evaluated only once right after the plugin is loaded. The only allowed actions -in OptionPreprocessor are error, warning, and two special actions: -unset_option and set_option. As their names suggest, they can be used to -set or unset a given option. To set an option with set_option, use the -two-argument form: (set_option "parameter", VALUE). Here, VALUE can be +

          OptionPreprocessor is basically a single big case expression, which is +evaluated only once right after the driver is started. The only allowed actions +in OptionPreprocessor are error, warning, and two special actions: +unset_option and set_option. As their names suggest, they can be used to +set or unset a given option. To set an option with set_option, use the +two-argument form: (set_option "parameter", VALUE). Here, VALUE can be either a string, a string list, or a boolean constant.

          -

          For convenience, set_option and unset_option also work on lists. That -is, instead of [(unset_option "A"), (unset_option "B")] you can use -(unset_option ["A", "B"]). Obviously, (set_option ["A", "B"]) is valid -only if both A and B are switches.

          +

          For convenience, set_option and unset_option also work with multiple +arguments. That is, instead of [(unset_option "A"), (unset_option "B")] you +can use (unset_option "A", "B"). Obviously, (set_option "A", "B") is +only valid if both A and B are switches.

          -

          More advanced topics

          +

          More advanced topics

          -

          Hooks and environment variables

          -

          Normally, LLVMC executes programs from the system PATH. Sometimes, -this is not sufficient: for example, we may want to specify tool paths -or names in the configuration file. This can be easily achieved via -the hooks mechanism. To write your own hooks, just add their -definitions to the PluginMain.cpp or drop a .cpp file into the -your plugin directory. Hooks should live in the hooks namespace -and have the signature std::string hooks::MyHookName ([const char* -Arg0 [ const char* Arg2 [, ...]]]). They can be used from the -cmd_line tool property:

          +

          Hooks and environment variables

          +

          Normally, LLVMC searches for programs in the system PATH. Sometimes, this is +not sufficient: for example, we may want to specify tool paths or names in the +configuration file. This can be achieved via the hooks mechanism. To write your +own hooks, add their definitions to the Hooks.cpp or drop a .cpp file +into your driver directory. Hooks should live in the hooks namespace and +have the signature std::string hooks::MyHookName ([const char* Arg0 [ const +char* Arg2 [, ...]]]). They can be used from the command tool property:

          -(cmd_line "$CALL(MyHook)/path/to/file -o $CALL(AnotherHook)")
          +(command "$CALL(MyHook)/path/to/file -o $CALL(AnotherHook)")
           

          To pass arguments to hooks, use the following syntax:

          -(cmd_line "$CALL(MyHook, 'Arg1', 'Arg2', 'Arg # 3')/path/to/file -o1 -o2")
          +(command "$CALL(MyHook, 'Arg1', 'Arg2', 'Arg # 3')/path/to/file -o1 -o2")
           

          It is also possible to use environment variables in the same manner:

          -(cmd_line "$ENV(VAR1)/path/to/file -o $ENV(VAR2)")
          +(command "$ENV(VAR1)/path/to/file -o $ENV(VAR2)")
           

          To change the command line string based on user-provided options use -the case expression (documented above):

          +the case expression (documented above):

          -(cmd_line
          +(command
             (case
               (switch_on "E"),
                  "llvm-g++ -E -x c $INFILE -o $OUTFILE",
          @@ -680,41 +629,23 @@ the case expression (
                  "llvm-g++ -c -x c $INFILE -o $OUTFILE -emit-llvm"))
           
          -
          -

          How plugins are loaded

          -

          It is possible for LLVMC plugins to depend on each other. For example, -one can create edges between nodes defined in some other plugin. To -make this work, however, that plugin should be loaded first. To -achieve this, the concept of plugin priority was introduced. By -default, every plugin has priority zero; to specify the priority -explicitly, put the following line in your plugin's TableGen file:

          -
          -def Priority : PluginPriority<$PRIORITY_VALUE>;
          -# Where PRIORITY_VALUE is some integer > 0
          -
          -

          Plugins are loaded in order of their (increasing) priority, starting -with 0. Therefore, the plugin with the highest priority value will be -loaded last.

          -
          -

          Debugging

          -

          When writing LLVMC plugins, it can be useful to get a visual view of -the resulting compilation graph. This can be achieved via the command -line option --view-graph. This command assumes that Graphviz and -Ghostview are installed. There is also a --write-graph option that -creates a Graphviz source file (compilation-graph.dot) in the -current directory.

          -

          Another useful llvmc option is --check-graph. It checks the -compilation graph for common errors like mismatched output/input -language names, multiple default edges and cycles. These checks can't -be performed at compile-time because the plugins can load code -dynamically. When invoked with --check-graph, llvmc doesn't -perform any compilation tasks and returns the number of encountered -errors as its status code.

          +

          Debugging

          +

          When writing LLVMC-based drivers, it can be useful to get a visual view of the +resulting compilation graph. This can be achieved via the command line option +--view-graph (which assumes that Graphviz and Ghostview are +installed). There is also a --write-graph option that creates a Graphviz +source file (compilation-graph.dot) in the current directory.

          +

          Another useful llvmc option is --check-graph. It checks the compilation +graph for common errors like mismatched output/input language names, multiple +default edges and cycles. When invoked with --check-graph, llvmc doesn't +perform any compilation tasks and returns the number of encountered errors as +its status code. In the future, these checks will be performed at compile-time +and this option will disappear.

          -

          Conditioning on the executable name

          -

          For now, the executable name (the value passed to the driver in argv[0]) is +

          Conditioning on the executable name

          +

          For now, the executable name (the value passed to the driver in argv[0]) is accessible only in the C++ code (i.e. hooks). Use the following code:

           namespace llvmc {
          @@ -734,8 +665,8 @@ if (strcmp(ProgramName, "mydriver") == 0) {
           

          In general, you're encouraged not to make the behaviour dependent on the executable file name, and use command-line switches instead. See for example how -the Base plugin behaves when it needs to choose the correct linker options -(think g++ vs. gcc).

          +the llvmc program behaves when it needs to choose the correct linker options +(think g++ vs. gcc).


          @@ -748,7 +679,7 @@ the Base plugin behav Mikhail Glushenkov
          LLVM Compiler Infrastructure
          -Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $ +Last modified: $Date: 2011-04-24 16:17:37 +0200 (Sun, 24 Apr 2011) $
          diff --git a/docs/CompilerDriverTutorial.html b/docs/CompilerDriverTutorial.html index 317b1d127127..4ed373aa160a 100644 --- a/docs/CompilerDriverTutorial.html +++ b/docs/CompilerDriverTutorial.html @@ -3,7 +3,7 @@ - + Tutorial - Using LLVMC @@ -18,7 +18,7 @@ The ReST source lives in the directory 'tools/llvmc/doc'. -->

          Contents

          @@ -26,51 +26,47 @@ The ReST source lives in the directory 'tools/llvmc/doc'. -->

          Written by Mikhail Glushenkov

          Introduction

          -

          LLVMC is a generic compiler driver, which plays the same role for LLVM -as the gcc program does for GCC - the difference being that LLVMC -is designed to be more adaptable and easier to customize. Most of -LLVMC functionality is implemented via plugins, which can be loaded -dynamically or compiled in. This tutorial describes the basic usage -and configuration of LLVMC.

          +

          LLVMC is a generic compiler driver, which plays the same role for LLVM as the +gcc program does for GCC - the difference being that LLVMC is designed to be +more adaptable and easier to customize. Most of LLVMC functionality is +implemented via high-level TableGen code, from which a corresponding C++ source +file is automatically generated. This tutorial describes the basic usage and +configuration of LLVMC.

          -
          -

          Compiling with LLVMC

          -

          In general, LLVMC tries to be command-line compatible with gcc as -much as possible, so most of the familiar options work:

          +
          +

          Using the llvmc program

          +

          In general, llvmc tries to be command-line compatible with gcc as much +as possible, so most of the familiar options work:

           $ llvmc -O3 -Wall hello.cpp
           $ ./a.out
           hello
           
          -

          This will invoke llvm-g++ under the hood (you can see which -commands are executed by using the -v option). For further help on -command-line LLVMC usage, refer to the llvmc --help output.

          +

          This will invoke llvm-g++ under the hood (you can see which commands are +executed by using the -v option). For further help on command-line LLVMC +usage, refer to the llvmc --help output.

          Using LLVMC to generate toolchain drivers

          -

          LLVMC plugins are written mostly using TableGen, so you need to -be familiar with it to get anything done.

          -

          Start by compiling example/Simple, which is a primitive wrapper for -gcc:

          +

          LLVMC-based drivers are written mostly using TableGen, so you need to be +familiar with it to get anything done.

          +

          Start by compiling example/Simple, which is a primitive wrapper for +gcc:

          -$ cd $LLVM_DIR/tools/llvmc
          -$ cp -r example/Simple plugins/Simple
          -
          -  # NB: A less verbose way to compile standalone LLVMC-based drivers is
          -  # described in the reference manual.
          -
          -$ make LLVMC_BASED_DRIVER_NAME=mygcc LLVMC_BUILTIN_PLUGINS=Simple
          +$ cd $LLVM_OBJ_DIR/tools/examples/Simple
          +$ make
           $ cat > hello.c
          -[...]
          -$ mygcc hello.c
          +#include <stdio.h>
          +int main() { printf("Hello\n"); }
          +$ $LLVM_BIN_DIR/Simple -v hello.c
          +gcc hello.c -o hello.out
           $ ./hello.out
           Hello
           
          -

          Here we link our plugin with the LLVMC core statically to form an executable -file called mygcc. It is also possible to build our plugin as a dynamic -library to be loaded by the llvmc executable (or any other LLVMC-based -standalone driver); this is described in the reference manual.

          -

          Contents of the file Simple.td look like this:

          +

          We have thus produced a simple driver called, appropriately, Simple, from +the input TableGen file Simple.td. The llvmc program itself is generated +using a similar process (see llvmc/src). Contents of the file Simple.td +look like this:

           // Include common definitions
           include "llvm/CompilerDriver/Common.td"
          @@ -80,33 +76,36 @@ def gcc : Tool<
           [(in_language "c"),
            (out_language "executable"),
            (output_suffix "out"),
          - (cmd_line "gcc $INFILE -o $OUTFILE"),
          - (sink)
          + (command "gcc"),
          + (sink),
          +
          + // -o is what is used by default, out_file_option here is included for
          + // instructive purposes.
          + (out_file_option "-o")
           ]>;
           
           // Language map
          -def LanguageMap : LanguageMap<[LangToSuffixes<"c", ["c"]>]>;
          +def LanguageMap : LanguageMap<[(lang_to_suffixes "c", "c")]>;
           
           // Compilation graph
          -def CompilationGraph : CompilationGraph<[Edge<"root", "gcc">]>;
          +def CompilationGraph : CompilationGraph<[(edge "root", "gcc")]>;
           
          -

          As you can see, this file consists of three parts: tool descriptions, -language map, and the compilation graph definition.

          -

          At the heart of LLVMC is the idea of a compilation graph: vertices in -this graph are tools, and edges represent a transformation path -between two tools (for example, assembly source produced by the -compiler can be transformed into executable code by an assembler). The -compilation graph is basically a list of edges; a special node named -root is used to mark graph entry points.

          -

          Tool descriptions are represented as property lists: most properties -in the example above should be self-explanatory; the sink property -means that all options lacking an explicit description should be -forwarded to this tool.

          -

          The LanguageMap associates a language name with a list of suffixes -and is used for deciding which toolchain corresponds to a given input -file.

          -

          To learn more about LLVMC customization, refer to the reference -manual and plugin source code in the plugins directory.

          +

          As you can see, this file consists of three parts: tool descriptions, language +map, and the compilation graph definition.

          +

          At the heart of LLVMC is the idea of a compilation graph: vertices in this graph +are tools, and edges represent a transformation path between two tools (for +example, assembly source produced by the compiler can be transformed into +executable code by an assembler). The compilation graph is basically a list of +edges; a special node named root is used to mark graph entry points.

          +

          Tool descriptions are represented as property lists: most properties in the +example above should be self-explanatory; the sink property means that all +options lacking an explicit description should be forwarded to this tool.

          +

          The LanguageMap associates a language name with a list of suffixes and is +used for deciding which toolchain corresponds to a given input file.

          +

          To learn more about writing your own drivers with LLVMC, refer to the reference +manual and examples in the examples directory. Of a particular interest is +the Skeleton example, which can serve as a template for your LLVMC-based +drivers.


          diff --git a/docs/CompilerWriterInfo.html b/docs/CompilerWriterInfo.html index 6cd9d7df509d..ed326b30eb6d 100644 --- a/docs/CompilerWriterInfo.html +++ b/docs/CompilerWriterInfo.html @@ -9,9 +9,9 @@ -
          +

          Architecture/platform information for compiler writers -

          +

          Note: This document is a work-in-progress. Additions and clarifications @@ -43,13 +43,15 @@

          -
          +

          Hardware

          - - +
          -
          + +

          Alpha

          + +
          • Alpha manuals @@ -58,9 +60,9 @@ href="http://ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-libra
          - +

          ARM

          -
          + - +

          Itanium (ia64)

          -
          +
          • Itanium documentation @@ -81,9 +83,9 @@ href="http://developer.intel.com/design/itanium2/documentation.htm">Itanium docu
          - +

          MIPS

          -
          + - +

          PowerPC

          + +
          -
          IBM - Official manuals and docs
          +

          IBM - Official manuals and docs

          -
          + -
          Other documents, collections, notes
          +

          Other documents, collections, notes

          -
          +
          - - +
          -
          + +

          SPARC

          + +
          • SPARC resources
          • @@ -156,12 +162,14 @@ branch stubs for powerpc64-linux (from binutils)
          - +

          X86

          + +
          -
          AMD - Official manuals and docs
          +

          AMD - Official manuals and docs

          -
          +
          • AMD processor manuals
          • @@ -170,9 +178,9 @@ href="http://www.amd.com/us-en/Processors/TechnicalResources/0,,30_182_739,00.ht
          -
          Intel - Official manuals and docs
          +

          Intel - Official manuals and docs

          -
          + -
          Other x86-specific information
          +

          Other x86-specific information

          - -
          + +

          Other relevant lists

          + +
          • GCC reading list
          • @@ -204,14 +214,18 @@ conventions for different C++ compilers and operating systems
          +
          + - +

          ABI

          +
          + - +

          Linux

          -
          +
          1. PowerPC 64-bit ELF ABI Supplement
          2. @@ -219,9 +233,9 @@ Supplement
          - +

          OS X

          - + - +

          Miscellaneous resources

          diff --git a/docs/DebuggingJITedCode.html b/docs/DebuggingJITedCode.html index a9193f2f9590..fa15633fddbf 100644 --- a/docs/DebuggingJITedCode.html +++ b/docs/DebuggingJITedCode.html @@ -7,7 +7,7 @@ -
          Debugging JITed Code With GDB
          +

          Debugging JITed Code With GDB

          1. Example usage
          2. Background
          3. @@ -15,9 +15,9 @@
            Written by Reid Kleckner
            - +

            Example usage

            -
            +

            In order to debug code JITed by LLVM, you need GDB 7.0 or newer, which is available on most modern distributions of Linux. The version of GDB that Apple @@ -96,9 +96,9 @@ function names.

            - +

            Background

            -
            +

            Without special runtime support, debugging dynamically generated code with GDB (as well as most debuggers) can be quite painful. Debuggers generally read @@ -145,8 +145,8 @@ coordinate with GDB to get better debug information. Valid HTML 4.01 Reid Kleckner
            - The LLVM Compiler Infrastructure
            - Last modified: $Date: 2010-07-07 22:16:45 +0200 (Wed, 07 Jul 2010) $ + The LLVM Compiler Infrastructure
            + Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $ diff --git a/docs/DeveloperPolicy.html b/docs/DeveloperPolicy.html index ef99ebc9d412..c12165750668 100644 --- a/docs/DeveloperPolicy.html +++ b/docs/DeveloperPolicy.html @@ -8,7 +8,7 @@ -

            LLVM Developer Policy
            +

            LLVM Developer Policy

            1. Introduction
            2. Developer Policies @@ -34,9 +34,9 @@
              Written by the LLVM Oversight Team
              - +

              Introduction

              -
              +

              This document contains the LLVM Developer Policy which defines the project's policy towards developers and their contributions. The intent of this policy is to eliminate miscommunication, rework, and confusion that might arise from @@ -63,20 +63,19 @@

              - +

              Developer Policies

              -
              +

              This section contains policies that pertain to frequent LLVM developers. We always welcome one-off patches from people who do not routinely contribute to LLVM, but we expect more from frequent contributors to keep the system as efficient as possible for everyone. Frequent LLVM contributors are expected to meet the following requirements in order for LLVM to maintain a high standard of quality.

              -

              - -
              +

              Stay Informed

              +

              Developers should stay informed by reading at least the "dev" mailing list for the projects you are interested in, such as llvmdev for @@ -102,9 +101,9 @@

              - +

              Making a Patch

              -
              +

              When making a patch for review, the goal is to make it as easy for the reviewer to read it as possible. As such, we recommend that you:

              @@ -142,8 +141,8 @@
              - -
              +

              Code Reviews

              +

              LLVM has a code review policy. Code review is one way to increase the quality of software. We generally follow these policies:

              @@ -174,8 +173,8 @@
              - -
              +

              Code Owners

              +

              The LLVM Project relies on two features of its process to maintain rapid development in addition to the high quality of its source base: the @@ -225,8 +224,8 @@

              - -
              +

              Test Cases

              +

              Developers are required to create test cases for any bugs fixed and any new features added. Some tips for getting your testcase approved:

              @@ -258,8 +257,8 @@
              - -
              +

              Quality

              +

              The minimum quality standards that any change must satisfy before being committed to the main development branch are:

              @@ -318,9 +317,8 @@
              - -
              +

              Obtaining Commit Access

              +

              We grant commit access to contributors with a track record of submitting high quality patches. If you would like commit access, please send an email to @@ -381,8 +379,8 @@

              - -
              +

              Making a Major Change

              +

              When a developer begins a major new project with the aim of contributing it back to LLVM, s/he should inform the community with an email to the llvmdev @@ -410,9 +408,8 @@

              - -
              +

              Incremental Development

              +

              In the LLVM project, we do all significant changes as a series of incremental patches. We have a strong dislike for huge changes or long-term development branches. Long-term development branches have a number of drawbacks:

              @@ -472,9 +469,8 @@
              - -
              +

              Attribution of Changes

              +

              We believe in correct attribution of contributions to their contributors. However, we do not want the source code to be littered with random attributions "this code written by J. Random Hacker" (this is noisy and @@ -486,13 +482,15 @@ Changes

              Overall, please do not add contributor names to the source code.

              - - + + +

              + Copyright, License, and Patents +

              -
              +

              This section addresses the issues of copyright, license and patents for the LLVM project. Currently, the University of Illinois is the LLVM copyright holder and the terms of its license to LLVM users and developers is the @@ -504,11 +502,10 @@ Changes

              legal matters but does not provide legal advice. We are not lawyers, please seek legal counsel from an attorney.

              -
              - -
              +

              Copyright

              +

              The LLVM project does not require copyright assignments, which means that the copyright for the code in the project is held by its respective contributors @@ -530,8 +527,8 @@ Changes

              - -
              +

              License

              +

              We intend to keep LLVM perpetually open source and to use a liberal open source license. All of the code in LLVM is available under the University of @@ -585,8 +582,8 @@ Changes

              - -
              +

              Patents

              +

              To the best of our knowledge, LLVM does not infringe on any patents (we have actually removed code from LLVM in the past that was found to infringe). Having code in LLVM that infringes on patents would violate an important goal @@ -602,6 +599,8 @@ Changes

              details.

              +
              +
              @@ -611,8 +610,8 @@ Changes
              src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"> Written by the LLVM Oversight Group
              - The LLVM Compiler Infrastructure
              - Last modified: $Date: 2010-11-16 22:32:53 +0100 (Tue, 16 Nov 2010) $ + The LLVM Compiler Infrastructure
              + Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $ diff --git a/docs/ExceptionHandling.html b/docs/ExceptionHandling.html index 009dbb5abd53..16820f3b045b 100644 --- a/docs/ExceptionHandling.html +++ b/docs/ExceptionHandling.html @@ -11,7 +11,7 @@ -
              Exception Handling in LLVM
              +

              Exception Handling in LLVM

              @@ -58,10 +58,10 @@ - +

              Introduction

              -
              +

              This document is the central repository for all information pertaining to exception handling in LLVM. It describes the format that LLVM exception @@ -70,14 +70,12 @@ provides specific examples of what exception handling information is used for in C/C++.

              -
              - - + -
              +

              Exception handling for most programming languages is designed to recover from conditions that rarely occur during general use of an application. To that @@ -106,11 +104,11 @@

              - + -
              +

              Setjmp/Longjmp (SJLJ) based exception handling uses LLVM intrinsics llvm.eh.sjlj.setjmp and @@ -138,11 +136,11 @@

              - + -
              +

              When an exception is thrown in LLVM code, the runtime does its best to find a handler suited to processing the circumstance.

              @@ -185,12 +183,14 @@
              - - -
              + +

              + LLVM Code Generation +

              + +

              At the time of this writing, only C++ exception handling support is available in LLVM. So the remainder of this document will be somewhat C++-centric.

              @@ -200,14 +200,12 @@ we will describe the implementation of LLVM exception handling in terms of C++ examples.

              -
              - -
              +

              Throw -

              + -
              +

              Languages that support exception handling typically provide a throw operation to initiate the exception process. Internally, a throw operation @@ -225,11 +223,11 @@

              - + -
              +

              A call within the scope of a try statement can potentially raise an exception. In those circumstances, the LLVM C++ front-end replaces the call @@ -313,11 +311,11 @@

              - + -
              +

              To handle destructors and cleanups in try code, control may not run directly from a landing pad to the first catch. Control may actually flow @@ -332,11 +330,11 @@

              - + -
              +

              C++ allows the specification of which exception types can be thrown from a function. To represent this a top level landing pad may exist to filter out @@ -359,11 +357,11 @@

              - + -
              +

              The semantics of the invoke instruction require that any exception that unwinds through an invoke call should result in a branch to the invoke's @@ -384,25 +382,25 @@

              - - -
              + +

              + Exception Handling Intrinsics +

              + +

              LLVM uses several intrinsic functions (name prefixed with "llvm.eh") to provide exception handling information at various points in generated code.

              -
              - - + -
              +
                 i8* %llvm.eh.exception()
              @@ -413,11 +411,11 @@
               
              - + -
              +
                 i32 %llvm.eh.selector(i8*, i8*, ...)
              @@ -445,11 +443,11 @@
               
              - + -
              +
                 i32 %llvm.eh.typeid.for(i8*)
              @@ -463,11 +461,11 @@
               
              - + -
              +
                 i32 %llvm.eh.sjlj.setjmp(i8*)
              @@ -492,11 +490,11 @@
               
              - + -
              +
                 void %llvm.eh.sjlj.setjmp(i8*)
              @@ -507,16 +505,16 @@
                  style exception handling. The single parameter is a pointer to a
                  buffer populated by 
                    llvm.eh.sjlj.setjmp. The frame pointer and stack pointer
              -   are restored from the buffer, then control is transfered to the
              +   are restored from the buffer, then control is transferred to the
                  destination address.

              - + -
              +
                 i8* %llvm.eh.sjlj.lsda()
              @@ -531,11 +529,11 @@
               
              - + -
              +
                 void %llvm.eh.sjlj.callsite(i32)
              @@ -549,11 +547,11 @@
               
              - + -
              +
                 void %llvm.eh.sjlj.dispatchsetup(i32)
              @@ -565,24 +563,24 @@
               
               
              - - -
              + +

              + Asm Table Formats +

              + +

              There are two tables that are used by the exception handling runtime to determine which actions should take place when an exception is thrown.

              -
              - - + -
              +

              An exception handling frame eh_frame is very similar to the unwind frame used by dwarf debug info. The frame contains all the information @@ -596,11 +594,11 @@

              - + -
              +

              An exception table contains information about what actions to take when an exception is thrown in a particular part of a function's code. There is one @@ -611,12 +609,14 @@

              - -
              - ToDo
              -
              + +

              + ToDo +

              + +
                @@ -636,8 +636,8 @@ src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"> Chris Lattner
                - LLVM Compiler Infrastructure
                - Last modified: $Date: 2010-12-10 00:05:48 +0100 (Fri, 10 Dec 2010) $ + LLVM Compiler Infrastructure
                + Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $ diff --git a/docs/ExtendingLLVM.html b/docs/ExtendingLLVM.html index 22375608457e..03cfa7e841ea 100644 --- a/docs/ExtendingLLVM.html +++ b/docs/ExtendingLLVM.html @@ -8,9 +8,9 @@ -
                +

                Extending LLVM: Adding instructions, intrinsics, types, etc. -

                +
                1. Introduction and Warning
                2. @@ -31,12 +31,12 @@
              - + -
              +

              During the course of using LLVM, you may wish to customize it for your research project or for experimentation. At this point, you may realize that @@ -68,12 +68,12 @@ effort by doing so.

              - + -
              +

              Adding a new intrinsic function to LLVM is much easier than adding a new instruction. Almost all extensions to LLVM should start as an intrinsic @@ -130,12 +130,12 @@ support for it. Generally you must do the following steps:

              - + -
              +

              As with intrinsics, adding a new SelectionDAG node to LLVM is much easier than adding a new instruction. New nodes are often added to help represent @@ -220,12 +220,12 @@ complicated behavior in a single node (rotate).

              - + -
              +

              WARNING: adding instructions changes the bitcode format, and it will take some effort to maintain compatibility with @@ -277,25 +277,23 @@ to understand this new instruction.

              - + -
              +

              WARNING: adding new types changes the bitcode format, and will break compatibility with currently-existing LLVM installations. Only add new types if it is absolutely necessary.

              -
              - - + -
              +
                @@ -317,11 +315,11 @@ installations. Only add new types if it is absolutely necessary.

              - + -
              +
              1. llvm/include/llvm/Type.h: @@ -373,6 +371,8 @@ void calcTypeName(const Type *Ty,
              +
              +
              @@ -382,9 +382,9 @@ void calcTypeName(const Type *Ty, Valid HTML 4.01 - The LLVM Compiler Infrastructure + The LLVM Compiler Infrastructure
              - Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $ + Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $ diff --git a/docs/FAQ.html b/docs/FAQ.html index 9415a902756b..20ba1d5d2912 100644 --- a/docs/FAQ.html +++ b/docs/FAQ.html @@ -12,9 +12,9 @@ -
              +

              LLVM: Frequently Asked Questions -

              +
              1. License @@ -133,14 +133,14 @@
              -

              Written by The LLVM Team

              +

              Written by The LLVM Team

              -
              +

              License -

              +
              @@ -189,9 +189,9 @@
              - +
              @@ -227,9 +227,9 @@ LLVM have been ported to a plethora of platforms.

              - +
              @@ -449,7 +449,9 @@ Stop.
              - +

              + Source Languages +

              What source languages are supported?

              @@ -555,9 +557,9 @@ Stop.
              - +

              When I compile software that uses a configure script, the configure script @@ -712,9 +714,9 @@ Stop.

              - +

              What is this llvm.global_ctors and @@ -930,8 +932,8 @@ F.i: Valid HTML 4.01 - LLVM Compiler Infrastructure
              - Last modified: $Date: 2010-05-28 19:07:41 +0200 (Fri, 28 May 2010) $ + LLVM Compiler Infrastructure
              + Last modified: $Date: 2011-04-19 01:59:50 +0200 (Tue, 19 Apr 2011) $ diff --git a/docs/GCCFEBuildInstrs.html b/docs/GCCFEBuildInstrs.html index 0b2827c9d5c1..f502481b89ea 100644 --- a/docs/GCCFEBuildInstrs.html +++ b/docs/GCCFEBuildInstrs.html @@ -8,9 +8,9 @@ -

              +

              Building the LLVM GCC Front-End -

              +
              1. Building llvm-gcc from Source
              2. @@ -24,10 +24,10 @@
              -

              Building llvm-gcc from Source

              +

              Building llvm-gcc from Source

              -
              +

              This section describes how to acquire and build llvm-gcc 4.2, which is based on the GCC 4.2.1 front-end. Supported languages are Ada, C, C++, Fortran, @@ -67,10 +67,10 @@ svn co http://llvm.org/svn/llvm-project/llvm-gcc-4.2/trunk dst-directory

              -

              Building the Ada front-end

              +

              Building the Ada front-end

              -
              +

              Building with support for Ada amounts to following the directions in the top-level README.LLVM file, adding ",ada" to EXTRALANGS, for example: EXTRALANGS=,ada

              @@ -100,7 +100,7 @@ top-level README.LLVM file, adding ",ada" to EXTRALANGS, for example:
            3. Because the Ada front-end is experimental, it is wise to build the compiler with checking enabled. This causes it to run much slower, but helps catch mistakes in the compiler (please report any problems using - LLVM bugzilla).

            4. + LLVM bugzilla).

            5. The Ada front-end fails to bootstrap, due to lack of LLVM support for setjmp/longjmp style exception handling (used @@ -233,10 +233,10 @@ make install

            6. -

              Building the Fortran front-end

              +

              Building the Fortran front-end

              -
              +

              To build with support for Fortran, follow the directions in the top-level README.LLVM file, adding ",fortran" to EXTRALANGS, for example:

              @@ -247,10 +247,10 @@ EXTRALANGS=,fortran
              -

              License Information

              +

              License Information

              -
              +

              The LLVM GCC frontend is licensed to you under the GNU General Public License and the GNU Lesser General Public License. Please see the files COPYING and @@ -271,8 +271,8 @@ More information is available in the FAQ. Valid HTML 4.01 - LLVM Compiler Infrastructure
              - Last modified: $Date: 2010-08-31 21:40:21 +0200 (Tue, 31 Aug 2010) $ + LLVM Compiler Infrastructure
              + Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $ diff --git a/docs/GarbageCollection.html b/docs/GarbageCollection.html index fde070ce6b45..13a3714e8233 100644 --- a/docs/GarbageCollection.html +++ b/docs/GarbageCollection.html @@ -13,9 +13,9 @@ -

              +

              Accurate Garbage Collection with LLVM -

              +
              1. Introduction @@ -79,12 +79,12 @@
              - + -
              +

              Garbage collection is a widely used technique that frees the programmer from having to know the lifetimes of heap objects, making software easier to produce @@ -124,14 +124,12 @@ techniques dominates any low-level losses.

              This document describes the mechanisms and interfaces provided by LLVM to support accurate garbage collection.

              -
              - - + -
              +

              LLVM's intermediate representation provides garbage collection intrinsics that offer support for a broad class of @@ -151,14 +149,14 @@ collector models. For instance, the intrinsics permit:

              support a broad class of garbage collected languages including Scheme, ML, Java, C#, Perl, Python, Lua, Ruby, other scripting languages, and more.

              -

              However, LLVM does not itself provide a garbage collector—this should +

              However, LLVM does not itself provide a garbage collector—this should be part of your language's runtime library. LLVM provides a framework for compile time code generation plugins. The role of these plugins is to generate code and data structures which conforms to the binary interface specified by the runtime library. This is similar to the relationship between LLVM and DWARF debugging info, for example. The difference primarily lies in the lack of an established standard in the domain -of garbage collection—thus the plugins.

              +of garbage collection—thus the plugins.

              The aspects of the binary interface with which LLVM's GC support is concerned are:

              @@ -198,13 +196,15 @@ compiler matures.

              - - + + +

              + Getting started +

              -
              +

              Using a GC with LLVM implies many things, for example:

              @@ -246,14 +246,12 @@ compiler matures.

              includes a highly portable, built-in ShadowStack code generator. It is compiled into llc and works even with the interpreter and C backends.

              -
              - - + -
              +

              To turn the shadow stack on for your functions, first call:

              @@ -276,11 +274,11 @@ switching to a more advanced GC.

              - + -
              +

              The shadow stack doesn't imply a memory allocation algorithm. A semispace collector or building atop malloc are great places to start, and can @@ -343,11 +341,11 @@ void visitGCRoots(void (*Visitor)(void **Root, const void *Meta)) { }

              - + -
              +

              Unlike many GC algorithms which rely on a cooperative code generator to compile stack maps, this algorithm carefully maintains a linked list of stack @@ -372,13 +370,15 @@ in order to improve performance.

              - - + + +

              + IR features +

              -
              +

              This section describes the garbage collection facilities provided by the LLVM intermediate representation. The exact behavior @@ -390,18 +390,16 @@ intended to be a complete interface to any garbage collector. A program will need to interface with the GC library using the facilities provided by that program.

              -
              - - +
              define ty @name(...) gc "name" { ...
              -
              +

              The gc function attribute is used to specify the desired GC style to the compiler. Its programmatic equivalent is the setGC method of @@ -418,15 +416,15 @@ programs that use different garbage collection algorithms (or none at all).

              - +
              void @llvm.gcroot(i8** %ptrloc, i8* %metadata)
              -
              +

              The llvm.gcroot intrinsic is used to inform LLVM that a stack variable references an object on the heap and is to be tracked for garbage @@ -494,11 +492,11 @@ CodeBlock:

              - + -
              +

              Some collectors need to be informed when the mutator (the program that needs garbage collection) either reads a pointer from or writes a pointer to a field @@ -534,18 +532,16 @@ require the corresponding barrier. Such a GC plugin will replace the intrinsic calls with the corresponding load or store instruction if they are used.

              -
              - - +
              void @llvm.gcwrite(i8* %value, i8* %object, i8** %derived)
              -
              +

              For write barriers, LLVM provides the llvm.gcwrite intrinsic function. It has exactly the same semantics as a non-volatile store to @@ -559,15 +555,15 @@ implement reference counting.

              - +
              i8* @llvm.gcread(i8* %object, i8** %derived)
              -
              +

              For read barriers, LLVM provides the llvm.gcread intrinsic function. It has exactly the same semantics as a non-volatile load from the @@ -580,13 +576,17 @@ writes.

              - - + +
              + + +

              + Implementing a collector plugin +

              -
              +

              User code specifies which GC code generation to use with the gc function attribute or, equivalently, with the setGC method of @@ -666,14 +666,12 @@ $ llvm-as < sample.ll | llc -load=MyGC.so

              It is also possible to statically link the collector plugin into tools, such as a language-specific compiler front-end.

              -
              - - + -
              +

              GCStrategy provides a range of features through which a plugin may do useful work. Some of these are callbacks, some are algorithms that can @@ -958,11 +956,11 @@ interest.

              - + -
              +

              LLVM automatically computes a stack map. One of the most important features of a GCStrategy is to compile this information into the executable in @@ -1014,11 +1012,11 @@ for collector plugins which implement reference counting or a shadow stack.

              - + -
              +
              MyGC::MyGC() {
              @@ -1039,12 +1037,12 @@ this feature should be used by all GC plugins. It is enabled by default.

              - + -
              +

              For GCs which use barriers or unusual treatment of stack roots, these flags allow the collector to perform arbitrary transformations of the LLVM @@ -1129,11 +1127,11 @@ bool MyGC::performCustomLowering(Function &F) { -

              + -
              +

              LLVM can compute four kinds of safe points:

              @@ -1193,11 +1191,11 @@ safe point (because only the topmost function has been patched).

              - + -
              +

              LLVM allows a plugin to print arbitrary assembly code before and after the rest of a module's assembly code. At the end of the module, the GC can compile @@ -1341,14 +1339,15 @@ void MyGCPrinter::finishAssembly(std::ostream &OS, AsmPrinter &AP,

              - - - + + +

              + References +

              -
              +

              [Appel89] Runtime Tags Aren't Necessary. Andrew W. Appel. Lisp and Symbolic Computation 19(7):703-705, July 1989.

              @@ -1379,8 +1378,8 @@ Fergus Henderson. International Symposium on Memory Management 2002.

              src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"> Chris Lattner
              - LLVM Compiler Infrastructure
              - Last modified: $Date: 2010-05-11 22:16:09 +0200 (Tue, 11 May 2010) $ + LLVM Compiler Infrastructure
              + Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $ diff --git a/docs/GetElementPtr.html b/docs/GetElementPtr.html index 5410137861ff..4c347a6aaf12 100644 --- a/docs/GetElementPtr.html +++ b/docs/GetElementPtr.html @@ -11,9 +11,9 @@ -
              +

              The Often Misunderstood GEP Instruction -

              +
              1. Introduction
              2. @@ -58,10 +58,10 @@ - +

                Introduction

                -
                +

                This document seeks to dispel the mystery and confusion surrounding LLVM's GetElementPtr (GEP) instruction. Questions about the wily GEP instruction are @@ -72,21 +72,20 @@

                - +

                Address Computation

                -
                +

                When people are first confronted with the GEP instruction, they tend to relate it to known concepts from other programming paradigms, most notably C array indexing and field selection. GEP closely resembles C array indexing and field selection, however it's is a little different and this leads to the following questions.

                -
                - -
                +

                + What is the first index of the GEP instruction? +

                +

                Quick answer: The index stepping through the first operand.

                The confusion with the first index usually arises from thinking about the GetElementPtr instruction as if it was a C index operator. They aren't the @@ -205,11 +204,11 @@ idx3 = (char*) &MyVar + 8

                - +

                + Why is the extra 0 index required? +

                -
                +

                Quick answer: there are no superfluous indices.

                This question arises most often when the GEP instruction is applied to a global variable which is always a pointer type. For example, consider @@ -247,10 +246,10 @@ idx3 = (char*) &MyVar + 8

                - -
                +

                + What is dereferenced by GEP? +

                +

                Quick answer: nothing.

                The GetElementPtr instruction dereferences nothing. That is, it doesn't access memory in any way. That's what the Load and Store instructions are for. @@ -302,10 +301,10 @@ idx3 = (char*) &MyVar + 8

                - -
                +

                + Why don't GEP x,0,0,1 and GEP x,1 alias? +

                +

                Quick Answer: They compute different address locations.

                If you look at the first indices in these GEP instructions you find that they are different (0 and 1), therefore the address @@ -331,10 +330,10 @@ idx3 = (char*) &MyVar + 8

                - -
                +

                + Why do GEP x,1,0,0 and GEP x,1 alias? +

                +

                Quick Answer: They compute the same address location.

                These two GEP instructions will compute the same address because indexing through the 0th element does not change the address. However, it does change @@ -355,10 +354,10 @@ idx3 = (char*) &MyVar + 8 -

                -
                +

                + Can GEP index into vector elements? +

                +

                This hasn't always been forcefully disallowed, though it's not recommended. It leads to awkward special cases in the optimizers, and fundamental inconsistency in the IR. In the future, it will probably be outright @@ -368,10 +367,10 @@ idx3 = (char*) &MyVar + 8 -

                -
                +

                + What effect do address spaces have on GEPs? +

                +

                None, except that the address space qualifier on the first operand pointer type always matches the address space qualifier on the result type.

                @@ -379,11 +378,12 @@ idx3 = (char*) &MyVar + 8 - -
                +

                + + How is GEP different from ptrtoint, arithmetic, and inttoptr? + +

                +

                It's very similar; there are only subtle differences.

                With ptrtoint, you have to pick an integer type. One approach is to pick i64; @@ -409,11 +409,13 @@ idx3 = (char*) &MyVar + 8 -

                -
                +

                + + I'm writing a backend for a target which needs custom lowering for GEP. + How do I do this? + +

                +

                You don't. The integer computation implied by a GEP is target-independent. Typically what you'll need to do is make your backend pattern-match expressions trees involving ADD, MUL, etc., which are what GEP is lowered @@ -431,10 +433,10 @@ idx3 = (char*) &MyVar + 8 -

                -
                +

                + How does VLA addressing work with GEPs? +

                +

                GEPs don't natively support VLAs. LLVM's type system is entirely static, and GEP address computations are guided by an LLVM type.

                @@ -450,16 +452,18 @@ idx3 = (char*) &MyVar + 8 VLA and non-VLA indexing in the same manner.

                - - - - - - - -
                + + +

                Rules

                + +
                + + +

                + What happens if an array index is out of bounds? +

                +

                There are two senses in which an array index can be out of bounds.

                First, there's the array type which comes from the (static) type of @@ -498,20 +502,20 @@ idx3 = (char*) &MyVar + 8

                - -
                +

                + Can array indices be negative? +

                +

                Yes. This is basically a special case of array indices being out of bounds.

                - -
                +

                + Can I compare two values computed with GEPs? +

                +

                Yes. If both addresses are within the same allocated object, or one-past-the-end, you'll get the comparison result you expect. If either is outside of it, integer arithmetic wrapping may occur, so the @@ -520,11 +524,13 @@ idx3 = (char*) &MyVar + 8

                - -
                +

                + + Can I do GEP with a different pointer type than the type of + the underlying object? + +

                +

                Yes. There are no restrictions on bitcasting a pointer value to an arbitrary pointer type. The types in a GEP serve only to define the parameters for the underlying integer computation. They need not correspond with the actual @@ -538,11 +544,12 @@ idx3 = (char*) &MyVar + 8

                - -
                +

                + + Can I cast an object's address to integer and add it to null? + +

                +

                You can compute an address that way, but if you use GEP to do the add, you can't use that pointer to actually access the object, unless the object is managed outside of LLVM.

                @@ -562,11 +569,13 @@ idx3 = (char*) &MyVar + 8
                - -
                +

                + + Can I compute the distance between two objects, and add + that value to one address to compute the other address? + +

                +

                As with arithmetic on null, You can use GEP to compute an address that way, but you can't use that pointer to actually access the object if you do, unless the object is managed outside of LLVM.

                @@ -577,10 +586,10 @@ idx3 = (char*) &MyVar + 8
                - -
                +

                + Can I do type-based alias analysis on LLVM IR? +

                +

                You can't do type-based alias analysis using LLVM's built-in type system, because LLVM has no restrictions on mixing types in addressing, loads or stores.

                @@ -594,10 +603,10 @@ idx3 = (char*) &MyVar + 8 - -
                +

                + What happens if a GEP computation overflows? +

                +

                If the GEP lacks the inbounds keyword, the value is the result from evaluating the implied two's complement integer computation. However, since there's no guarantee of where an object will be allocated in the @@ -624,11 +633,12 @@ idx3 = (char*) &MyVar + 8 -

                -
                +

                + + How can I tell if my front-end is following the rules? + +

                +

                There is currently no checker for the getelementptr rules. Currently, the only way to do this is to manually check each place in your front-end where GetElementPtr operators are created.

                @@ -641,16 +651,18 @@ idx3 = (char*) &MyVar + 8
                - - - - - - - -
                + + +

                Rationale

                + +
                + + +

                + Why is GEP designed this way? +

                +

                The design of GEP has the following goals, in rough unofficial order of priority:

                  @@ -669,10 +681,10 @@ idx3 = (char*) &MyVar + 8
                - -
                +

                + Why do struct member indices always use i32? +

                +

                The specific type i32 is probably just a historical artifact, however it's wide enough for all practical purposes, so there's been no need to change it. It doesn't necessarily imply i32 address arithmetic; it's just an identifier @@ -684,10 +696,10 @@ idx3 = (char*) &MyVar + 8 -

                -
                +

                + What's an uglygep? +

                +

                Some LLVM optimizers operate on GEPs by internally lowering them into more primitive integer expressions, which allows them to be combined with other integer expressions and/or split into multiple separate @@ -704,11 +716,13 @@ idx3 = (char*) &MyVar + 8

                +
                + - +

                Summary

                -
                +

                In summary, here's some things to always remember about the GetElementPtr instruction:

                  @@ -732,8 +746,8 @@ idx3 = (char*) &MyVar + 8 src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"> Valid HTML 4.01 - The LLVM Compiler Infrastructure
                  - Last modified: $Date: 2011-02-11 22:50:52 +0100 (Fri, 11 Feb 2011) $ + The LLVM Compiler Infrastructure
                  + Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $ diff --git a/docs/GettingStarted.html b/docs/GettingStarted.html index dfb976a29f1e..cc5c59e78f66 100644 --- a/docs/GettingStarted.html +++ b/docs/GettingStarted.html @@ -8,9 +8,9 @@ -
                  +

                  Getting Started with the LLVM System -

                  +
                  • Overview @@ -62,7 +62,7 @@

                    Written by: John Criswell, Chris Lattner, - Misha Brukman, + Misha Brukman, Vikram Adve, and Guochun Shi.

                    @@ -70,12 +70,12 @@ - +

                    + Overview +

                    -
                    +

                    Welcome to LLVM! In order to get started, you first need to know some basic information.

                    @@ -102,12 +102,12 @@ and performance.
                    - +

                    + Getting Started Quickly (A Summary) +

                    -
                    +

                    Here's the short story for getting up and running quickly with LLVM:

                    @@ -116,13 +116,13 @@ and performance.
                  • Read the documentation.
                  • Remember that you were warned twice about reading the documentation.
                  • Install the llvm-gcc-4.2 front end if you intend to compile C or C++ - (see Install the GCC Front End for details):
                  • + (see Install the GCC Front End for details):
                    1. cd where-you-want-the-C-front-end-to-live
                    2. gunzip --stdout llvm-gcc-4.2-version-platform.tar.gz | tar -xvf -
                    3. install-binutils-binary-from-MinGW (Windows only)
                    4. Note: If the binary extension is ".bz" use bunzip2 instead of gunzip.
                    5. -
                    6. Note: On Windows, use 7-Zip or a similar archiving tool.
                    7. +
                    8. Note: On Windows, use 7-Zip or a similar archiving tool.
                    9. Add llvm-gcc's "bin" directory to your PATH environment variable.
                    @@ -191,25 +191,23 @@ Layout to learn about the layout of the source code tree.

                - +

                + Requirements +

                -
                +

                Before you begin to use the LLVM system, review the requirements given below. This may save you some trouble by knowing ahead of time what hardware and software you will need.

                -
                - - +

                + Hardware +

                -
                +

                LLVM is known to work on the following platforms:

                @@ -268,7 +266,8 @@ software you will need.

              + 8, 10, + 11
              MinGW/Win32 x861,6, - 8, 10 GCC 3.4.X, binutils 2.20
              @@ -311,6 +310,11 @@ software you will need.

              Itanium (IA-64) HP aCC + + Windows x64 + x86-64 + mingw-w64's GCC-4.5.x12 +

              Notes:

              @@ -337,9 +341,10 @@ up
            7. before any Windows-based versions such as Strawberry Perl and ActivePerl, as these have Windows-specifics that will cause the build to fail. -
            8. In general, LLVM modules requiring dynamic linking can - not be built on Windows. However, you can build LLVM tools using - "make tools-only".
            9. +
            10. To use LLVM modules on Win32-based system, + you may configure LLVM with "--enable-shared".
            11. +
            12. To compile SPU backend, you need to add + "LDFLAGS=-Wl,--stack,16777216" to configure.
            @@ -363,8 +368,10 @@ href="GCCFEBuildInstrs.html">try to compile it on your platform.

            - -
            +

            + Software +

            +

            Compiling LLVM requires that you have several software packages installed. The table below lists those required packages. The Package column is the usual name for the software package that LLVM depends on. The Version @@ -380,13 +387,13 @@ href="GCCFEBuildInstrs.html">try to compile it on your platform.

            - GCC + GCC 3.4.2 C/C++ compiler1 - TeXinfo + TeXinfo 4.5 For building the CFE @@ -397,6 +404,11 @@ href="GCCFEBuildInstrs.html">try to compile it on your platform.

            Subversion access to LLVM2 + + DejaGnu 1.4.2 @@ -428,13 +440,13 @@ href="GCCFEBuildInstrs.html">try to compile it on your platform.

            - GNU Autoconf + GNU Autoconf 2.60 Configuration script builder4 - GNU Automake + GNU Automake 1.9.6 aclocal macro generator4 @@ -496,11 +508,11 @@ href="GCCFEBuildInstrs.html">try to compile it on your platform.

            - + -
            +

            LLVM is very demanding of the host C++ compiler, and as such tends to expose bugs in the compiler. In particular, several versions of GCC crash when trying @@ -593,15 +605,15 @@ upgrading to a newer version of Gold.

            - - - - + + +

            + Getting Started with LLVM +

            -
            +

            The remainder of this guide is meant to get you up and running with LLVM and to give you some basic information about the LLVM environment.

            @@ -611,14 +623,13 @@ href="#layout">general layout of the the LLVM source tree, a simple example using the LLVM tool chain, and links to find more information about LLVM or to get help via e-mail.

            -
            - + -
            +

            Throughout this manual, the following names are used to denote paths specific to the local system and working environment. These are not @@ -651,11 +662,11 @@ All these paths are absolute:

            - + -
            +

            In order to compile and use LLVM, you may need to set some environment @@ -674,11 +685,11 @@ variables.

            - + -
            +

            If you have the LLVM distribution, you will need to unpack it before you @@ -708,11 +719,11 @@ compressed with the gzip program.

            - + -
            +

            If you have access to our Subversion repository, you can get a fresh copy of the entire source code. All you need to do is check it out from Subversion as @@ -736,6 +747,7 @@ revision), you can checkout it from the 'tags' directory (instead of subdirectories of the 'tags' directory:

              +
            • Release 2.9: RELEASE_29/final
            • Release 2.8: RELEASE_28
            • Release 2.7: RELEASE_27
            • Release 2.6: RELEASE_26
            • @@ -778,30 +790,30 @@ instructions to successfully get and build the LLVM GCC front-end.

            - + -
            +

            GIT mirrors are available for a number of LLVM subprojects. These mirrors sync automatically with each Subversion commit and contain all necessary git-svn marks (so, you can recreate git-svn metadata locally). Note that right now mirrors reflect only trunk for each project. You can do the - read-only GIT clone of LLVM via: + read-only GIT clone of LLVM via:

            +
             % git clone http://llvm.org/git/llvm.git
             
            -

            - + -
            +

            Before configuring and compiling the LLVM suite (or if you want to use just the LLVM GCC front end) you can optionally extract the front end from the binary distribution. @@ -810,7 +822,7 @@ you can optionally build llvm-gcc yourself a main LLVM repository.

            To install the GCC front end, do the following (on Windows, use an archival tool -like 7-zip that understands gzipped tars):

            +like 7-zip that understands gzipped tars):

            1. cd where-you-want-the-front-end-to-live
            2. @@ -867,11 +879,11 @@ please let us know how you would like to see things improved by dropping us a no
            - + -
            +

            Once checked out from the Subversion repository, the LLVM suite source code must be @@ -989,11 +1001,11 @@ script to configure the build system:

            - + -
            +

            Once you have configured LLVM, you can build it. There are three types of builds:

            @@ -1123,11 +1135,11 @@ that directory that is out of date.

            - + -
            +

            It is possible to cross-compile LLVM itself. That is, you can create LLVM executables and libraries to be hosted on a platform different from the platform where they are build (a Canadian Cross build). To configure a @@ -1141,11 +1153,11 @@ that directory that is out of date.

            - + -
            +

            The LLVM build system is capable of sharing a single LLVM source tree among several LLVM builds. Hence, it is possible to build LLVM for several different @@ -1201,11 +1213,11 @@ named after the build type:

            - + -
            +

            If you're running on a Linux system that supports the " This allows you to execute LLVM bitcode files directly. On Debian, you -can also use this command instead of the 'echo' command above:

            +can also use this command instead of the 'echo' command above:

            @@ -1236,31 +1248,37 @@ $ sudo update-binfmts --install llvm /path/to/lli --magic 'BC'
            - -
            + + +

            + Program Layout +

            -
            +

            One useful source of information about the LLVM source base is the LLVM doxygen documentation available at doxygen documentation available at http://llvm.org/doxygen/. The following is a brief introduction to code layout:

            -
            - - -
            +

            + llvm/examples +

            + +

            This directory contains some simple examples of how to use the LLVM IR and JIT.

            - -
            +

            + llvm/include +

            + +

            This directory contains public header files exported from the LLVM library. The three main subdirectories of this directory are:

            @@ -1287,8 +1305,11 @@ library. The three main subdirectories of this directory are:

            - -
            +

            + llvm/lib +

            + +

            This directory contains most of the source files of the LLVM system. In LLVM, almost all code exists in libraries, making it very easy to share code among the @@ -1327,6 +1348,10 @@ different tools.

            This directory contains the major parts of the code generator: Instruction Selector, Instruction Scheduling, and Register Allocation.
            +
            llvm/lib/MC/
            +
            (FIXME: T.B.D.)
            + +
            llvm/lib/Debugger/
            This directory contains the source level debugger library that makes it possible to instrument LLVM programs so that a debugger could identify @@ -1340,6 +1365,7 @@ different tools.

            This directory contains the source code that corresponds to the header files located in llvm/include/Support/.
            +
            llvm/lib/System/
            This directory contains the operating system abstraction layer that shields LLVM from platform-specific coding.
            @@ -1348,8 +1374,11 @@ different tools.

            - -
            +

            + llvm/projects +

            + +

            This directory contains projects that are not strictly part of LLVM but are shipped with LLVM. This is also the directory where you should create your own LLVM-based projects. See llvm/projects/sample for an example of how @@ -1357,8 +1386,11 @@ different tools.

            - -
            +

            + llvm/runtime +

            + +

            This directory contains libraries which are compiled into LLVM bitcode and used when linking programs with the GCC front end. Most of these libraries are @@ -1371,16 +1403,22 @@ end to compile.

            - -
            +

            + llvm/test +

            + +

            This directory contains feature and regression tests and other basic sanity checks on the LLVM infrastructure. These are intended to run quickly and cover a lot of territory without being exhaustive.

            - -
            +

            + test-suite +

            + +

            This is not a directory in the normal llvm module; it is a separate Subversion module that must be checked out (usually to projects/test-suite). @@ -1395,8 +1433,11 @@ end to compile.

            - -
            +

            + llvm/tools +

            + +

            The tools directory contains the executables built out of the libraries above, which form the main part of the user interface. You can @@ -1480,8 +1521,11 @@ information is in the Command Guide.

            - -
            +

            + llvm/utils +

            + +

            This directory contains utilities for working with LLVM source code, and some of the utilities are actually required as part of the build process because they @@ -1542,13 +1586,15 @@ are code generators for parts of LLVM infrastructure.

            - - + + +

            + An Example Using the LLVM Tool Chain +

            -
            +

            This section gives an example of using LLVM. llvm-gcc3 is now obsolete, so we only include instructions for llvm-gcc4.

            @@ -1559,12 +1605,13 @@ create bitcode by default: gcc4 produces native code. As the example belo the '--emit-llvm' flag is needed to produce LLVM bitcode output. For makefiles and configure scripts, the CFLAGS variable needs '--emit-llvm' to produce bitcode output.

            -
            - +

            + Example with llvm-gcc4 +

            -
            +
            1. First, create a simple C file, name it 'hello.c':

              @@ -1645,14 +1692,15 @@ int main() {
            - - - + + +

            + Common Problems +

            -
            +

            If you are having problems building or using LLVM, or if you have any other general questions about LLVM, please consult the Frequently @@ -1661,12 +1709,12 @@ Asked Questions page.

            -
            +

            Links -

            + -
            +

            This document is just an introduction on how to use LLVM to do some simple things... there are many more interesting and complicated things @@ -1694,8 +1742,8 @@ out:

            Chris Lattner
            Reid Spencer
            - The LLVM Compiler Infrastructure
            - Last modified: $Date: 2011-02-01 21:08:28 +0100 (Tue, 01 Feb 2011) $ + The LLVM Compiler Infrastructure
            + Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $ diff --git a/docs/GettingStartedVS.html b/docs/GettingStartedVS.html index 7c0bf007ec3a..6a604333c8f1 100644 --- a/docs/GettingStartedVS.html +++ b/docs/GettingStartedVS.html @@ -8,9 +8,9 @@ -
            +

            Getting Started with the LLVM System using Microsoft Visual Studio -

            +
            -

            Written by: - Jeff Cohen -

            +

            Written by: The LLVM Team

            - + -
            +

            Welcome to LLVM on Windows! This document only covers LLVM on Windows using Visual Studio, not mingw or cygwin. In order to get started, you first need to @@ -72,25 +70,23 @@

            - + -
            +

            Before you begin to use the LLVM system, review the requirements given below. This may save you some trouble by knowing ahead of time what hardware and software you will need.

            -
            - - + -
            +

            Any system that can adequately run Visual Studio .NET 2005 SP1 is fine. The LLVM source tree and object files, libraries and executables will consume @@ -99,8 +95,8 @@

            - -
            +

            Software

            +

            You will need Visual Studio .NET 2005 SP1 or higher. The VS2005 SP1 beta and the normal VS2005 still have bugs that are not completely @@ -120,13 +116,15 @@

            - - + + +

            + Getting Started +

            -
            +

            Here's the short story for getting up and running quickly with LLVM:

            @@ -196,7 +194,9 @@
            • If %PATH% does not contain GnuWin32, you may specify LLVM_LIT_TOOLS_DIR on CMake for the path to GnuWin32.
            • -
            • You can run LLVM tests to build the project "check".
            • +
            • You can run LLVM tests by merely building the project + "check". The test results will be shown in the VS output + window.
            @@ -215,25 +215,26 @@

            Note that quite a few of these test will fail.

            -
          4. A specific test or test directory can be run with:
          5. +
          6. A specific test or test directory can be run with:
             % llvm-lit test/path/to/test
             
            - +
          7. +
    - + -
    +
    1. First, create a simple C file, name it 'hello.c':

      @@ -318,12 +319,12 @@ int main() {
    - + -
    +

    If you are having problems building or using LLVM, or if you have any other general questions about LLVM, please consult the Frequently @@ -332,12 +333,12 @@ Asked Questions page.

    -
    +

    Links -

    + -
    +

    This document is just an introduction to how to use LLVM to do some simple things... there are many more interesting and complicated things @@ -361,9 +362,8 @@ out:

    Valid HTML 4.01 - Jeff Cohen
    - The LLVM Compiler Infrastructure
    - Last modified: $Date: 2011-02-20 16:34:12 +0100 (Sun, 20 Feb 2011) $ + The LLVM Compiler Infrastructure
    + Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $ diff --git a/docs/GoldPlugin.html b/docs/GoldPlugin.html index 68c5cf192802..e25c45751929 100644 --- a/docs/GoldPlugin.html +++ b/docs/GoldPlugin.html @@ -7,7 +7,7 @@ -
    LLVM gold plugin
    +

    LLVM gold plugin

    1. Introduction
    2. How to build it
    3. @@ -21,9 +21,9 @@
      Written by Nick Lewycky
      - +

      Introduction

      -
      +

      Building with link time optimization requires cooperation from the system linker. LTO support on Linux systems requires that you use the gold linker which supports @@ -33,14 +33,14 @@ project.

      The LLVM gold plugin implements the gold plugin interface on top of -libLTO. +libLTO. The same plugin can also be used by other tools such as ar and nm.

      - +

      How to build it

      -
      +

      You need to have gold with plugin support and build the LLVMgold plugin. Check whether you have gold running /usr/bin/ld -v. It will report “GNU gold” or else “GNU ld” if not. If you have @@ -72,9 +72,9 @@ placed.

      - +

      Usage

      -
      +

      The linker takes a -plugin option that points to the path of the plugin .so file. To find out what link command gcc would run in a given situation, run gcc -v [...] and look @@ -95,14 +95,13 @@ placed. own gold, be sure to install the ar and nm-new you built to /usr/bin.

      -

      - + -
      +

      The following example shows a worked example of the gold plugin mixing LLVM bitcode and native code.

      @@ -145,14 +144,20 @@ $ llvm-gcc -use-gold-plugin a.a b.o -o main # <-- link with LLVMgold plugin
       

      Gold informs the plugin that foo3 is never referenced outside the IR, leading LLVM to delete that function. However, unlike in the - libLTO + libLTO example gold does not currently eliminate foo4.

      +
      + - +

      + + Quickstart for using LTO with autotooled projects + +

      -
      +

      Once your system ld, ar and nm all support LLVM bitcode, everything is in place for an easy to use LTO build of autotooled projects:

      @@ -189,9 +194,9 @@ export CFLAGS="-O4"
      - +

      Licensing

      -
      +

      Gold is licensed under the GPLv3. LLVMgold uses the interface file plugin-api.h from gold which means that the resulting LLVMgold.so binary is also GPLv3. This can still be used to link non-GPLv3 programs just @@ -206,7 +211,7 @@ as much as gold could without the plugin.

      Valid HTML 4.01 Nick Lewycky
      - The LLVM Compiler Infrastructure
      + The LLVM Compiler Infrastructure
      Last modified: $Date: 2010-04-16 23:58:21 -0800 (Fri, 16 Apr 2010) $ diff --git a/docs/HistoricalNotes/2000-11-18-EarlyDesignIdeasResp.txt b/docs/HistoricalNotes/2000-11-18-EarlyDesignIdeasResp.txt index 1c725f5aa715..81ca53919d42 100644 --- a/docs/HistoricalNotes/2000-11-18-EarlyDesignIdeasResp.txt +++ b/docs/HistoricalNotes/2000-11-18-EarlyDesignIdeasResp.txt @@ -60,11 +60,11 @@ Understood. :) Yup, I think that this makes a lot of sense. I am still intrigued, however, by the prospect of a minimally allocated VM representation... I -think that it could have definate advantages for certain applications +think that it could have definite advantages for certain applications (think very small machines, like PDAs). I don't, however, think that our initial implementations should focus on this. :) -Here are some other auxilliary goals that I think we should consider: +Here are some other auxiliary goals that I think we should consider: 1. Primary goal: Support a high performance dynamic compilation system. This means that we have an "ideal" division of labor between diff --git a/docs/HistoricalNotes/2000-12-06-MeetingSummary.txt b/docs/HistoricalNotes/2000-12-06-MeetingSummary.txt index b66e18556f5e..01b644b3517f 100644 --- a/docs/HistoricalNotes/2000-12-06-MeetingSummary.txt +++ b/docs/HistoricalNotes/2000-12-06-MeetingSummary.txt @@ -40,7 +40,7 @@ IDEAS TO CONSIDER packaged with the bytecodes themselves. As a conceptual implementation idea, we could include an immediate dominator number for each basic block in the LLVM bytecode program. Basic blocks could be numbered according - to the order of occurance in the bytecode representation. + to the order of occurrence in the bytecode representation. 2. Including loop header and body information. This would facilitate detection of intervals and natural loops. diff --git a/docs/HistoricalNotes/2001-02-06-TypeNotationDebateResp4.txt b/docs/HistoricalNotes/2001-02-06-TypeNotationDebateResp4.txt index 7b9032742a27..839732444f93 100644 --- a/docs/HistoricalNotes/2001-02-06-TypeNotationDebateResp4.txt +++ b/docs/HistoricalNotes/2001-02-06-TypeNotationDebateResp4.txt @@ -39,7 +39,7 @@ declaration and calling syntax. Very true. If you're implementing an object oriented language, however, remember that you have to do all the pointer to member function stuff -yourself.... so everytime you invoke a virtual method one is involved +yourself.... so every time you invoke a virtual method one is involved (instead of having C++ hide it for you behind "syntactic sugar"). > And the old array syntax: diff --git a/docs/HistoricalNotes/2001-02-09-AdveCommentsResponse.txt b/docs/HistoricalNotes/2001-02-09-AdveCommentsResponse.txt index 5c87330fb7ea..da5026366539 100644 --- a/docs/HistoricalNotes/2001-02-09-AdveCommentsResponse.txt +++ b/docs/HistoricalNotes/2001-02-09-AdveCommentsResponse.txt @@ -18,7 +18,7 @@ suggested, as specified below: Very true. We should discuss this more, but my reasoning is more of a consistency argument. There are VERY few instructions that can have all -of the types eliminated, and doing so when available unnecesarily makes +of the types eliminated, and doing so when available unnecessarily makes the language more difficult to handle. Especially when you see 'int %this' and 'bool %that' all over the place, I think it would be disorienting to see: @@ -44,7 +44,7 @@ branches). No. This was something I was debating for a while, and didn't really feel strongly about either way. It is common to switch on other types in HLL's -(for example signed int's are particually common), but in this case, all +(for example signed int's are particularly common), but in this case, all that will be added is an additional 'cast' instruction. I removed that from the spec. @@ -160,7 +160,7 @@ that can be trivally translated into a conditional move... > I agree that we need a static data space. Otherwise, emulating global > data gets unnecessarily complex. -Definately. Also a later item though. :) +Definitely. Also a later item though. :) > We once talked about adding a symbolic thread-id field to each > .. diff --git a/docs/HistoricalNotes/2001-06-01-GCCOptimizations2.txt b/docs/HistoricalNotes/2001-06-01-GCCOptimizations2.txt index 6c9e0971a04d..e61042fd657a 100644 --- a/docs/HistoricalNotes/2001-06-01-GCCOptimizations2.txt +++ b/docs/HistoricalNotes/2001-06-01-GCCOptimizations2.txt @@ -42,7 +42,7 @@ Does using GCC's backend buy us anything? > optimization (step 16 in your list). Do you have a breakdown of that? Not really. The irritating part of GCC is that it mixes it all up and -doesn't have a clean seperation of concerns. A lot of the "back end +doesn't have a clean separation of concerns. A lot of the "back end optimization" happens right along with other data optimizations (ie, CSE of machine specific things). diff --git a/docs/HistoricalNotes/2002-05-12-InstListChange.txt b/docs/HistoricalNotes/2002-05-12-InstListChange.txt index 004edb068d73..638682b49fda 100644 --- a/docs/HistoricalNotes/2002-05-12-InstListChange.txt +++ b/docs/HistoricalNotes/2002-05-12-InstListChange.txt @@ -17,7 +17,7 @@ iterator to an instruction, which, given just an Instruction*, requires a linear search of the basic block the instruction is contained in... just to insert an instruction before another instruction, or to delete an instruction! This complicates algorithms that should be very simple (like -simple constant propogation), because they aren't actually sparse anymore, +simple constant propagation), because they aren't actually sparse anymore, they have to traverse basic blocks to remove constant propogated instructions. diff --git a/docs/HowToReleaseLLVM.html b/docs/HowToReleaseLLVM.html index 7663321fdb82..f52f326eef62 100644 --- a/docs/HowToReleaseLLVM.html +++ b/docs/HowToReleaseLLVM.html @@ -7,7 +7,7 @@ -
      How To Release LLVM To The Public
      +

      How To Release LLVM To The Public

      1. Introduction
      2. Qualification Criteria
      3. @@ -17,495 +17,597 @@ - +

        Introduction

        -
        -

        - This document collects information about successfully releasing LLVM - (including subprojects llvm-gcc and Clang) to the public. - It is the release manager's responsibility to ensure that a high quality - build of LLVM is released. -

        +
        + +

        This document contains information about successfully releasing LLVM — + including subprojects: e.g., llvm-gcc and clang — to + the public. It is the Release Manager's responsibility to ensure that a high + quality build of LLVM is released.

        +
        - +

        Release Timeline

        -
        -

        LLVM is released on a time based schedule (currently every 6 months). We - do not have dot releases because of the nature of LLVM incremental - development philosophy. The release schedule is roughly as follows: -

        +
        + +

        LLVM is released on a time based schedule — roughly every 6 months. We + do not normally have dot releases because of the nature of LLVM's incremental + development philosophy. That said, the only thing preventing dot releases for + critical bug fixes from happening is a lack of resources — testers, + machines, time, etc. And, because of the high quality we desire for LLVM + releases, we cannot allow for a truncated form of release qualification.

        + +

        The release process is roughly as follows:

        + +
          +
        • Set code freeze and branch creation date for 6 months after last code + freeze date. Announce release schedule to the LLVM community and update + the website.

        • + +
        • Create release branch and begin release process.

        • + +
        • Send out release candidate sources for first round of testing. Testing + lasts 7-10 days. During the first round of testing, any regressions found + should be fixed. Patches are merged from mainline into the release + branch. Also, all features need to be completed during this time. Any + features not completed at the end of the first round of testing will be + removed or disabled for the release.

        • + +
        • Generate and send out the second release candidate sources. Only + critial bugs found during this testing phase will be fixed. Any + bugs introduced by merged patches will be fixed. If so a third round of + testing is needed.

        • + +
        • The release notes are updated.

        • + +
        • Finally, release!

        • +
        + +
        + + +

        Release Process

        + + +
        +
          -
        1. Set code freeze and branch creation date for 6 months after last code freeze -date. Announce release schedule to the LLVM community and update the website.
        2. -
        3. Create release branch and begin release process.
        4. -
        5. Send out pre-release for first round of testing. Testing will last 7-10 days. -During the first round of testing, regressions should be found and fixed. Patches -are merged from mainline to the release branch.
        6. -
        7. Generate and send out second pre-release. Bugs found during this time will -not be fixed unless absolutely critical. Bugs introduce by patches merged in -will be fixed and if so, a 3rd round of testing is needed.
        8. -
        9. The release notes should be updated during the first and second round of -pre-release testing.
        10. -
        11. Finally, release!
        12. -
        -
        - - - - - - - + +
      - +

      Release Administrative Tasks

      -
      -This section describes a few administrative tasks that need to be done for the -release process to begin. Specifically, it involves creating the release branch, - resetting version numbers, and creating the release tarballs for the release - team to begin testing. -
      +
      + +

      This section describes a few administrative tasks that need to be done for + the release process to begin. Specifically, it involves:

      + +
        +
      • Creating the release branch,
      • +
      • Setting version numbers, and
      • +
      • Tagging release candidates for the release team to begin testing
      • +
      - -
      -

      Branch the Subversion HEAD using the following procedure:

      -
        -
      1. -

        Verify that the current Subversion HEAD is in decent shape by examining - nightly tester or buildbot results.

      2. -
      3. -

        Request all developers to refrain from committing. Offenders get commit - rights taken away (temporarily).

      4. -
      5. -

        Create the release branch for llvm, llvm-gcc4.2, - clang, and the test-suite. The branch name will be - release_XX,where XX is the major and minor release numbers. - Clang will have a different release number than llvm/ - llvm-gcc4 since its first release was years later - (still deciding if this will be true or not). These branches - can be created without checking out anything from subversion. -

        - -
        -
        -svn copy https://llvm.org/svn/llvm-project/llvm/trunk \
        -         https://llvm.org/svn/llvm-project/llvm/branches/release_XX
        -svn copy https://llvm.org/svn/llvm-project/llvm-gcc-4.2/trunk \
        -         https://llvm.org/svn/llvm-project/llvm-gcc-4.2/branches/release_XX
        -svn copy https://llvm.org/svn/llvm-project/test-suite/trunk \
        -         https://llvm.org/svn/llvm-project/test-suite/branches/release_XX
        -svn copy https://llvm.org/svn/llvm-project/cfe/trunk \
        -         https://llvm.org/svn/llvm-project/cfe/branches/release_XX
        -
        -
        +

        Create Release Branch

        -
      6. -

        Advise developers they can work on Subversion HEAD again.

      7. - -
      8. -

        The Release Manager should switch to the release branch (as all changes - to the release will now be done in the branch). The easiest way to do this - is to grab another working copy using the following commands:

        +
        +

        Branch the Subversion trunk using the following procedure:

        + +
          +
        1. Remind developers that the release branching is imminent and to refrain + from committing patches that might break the build. E.g., new features, + large patches for works in progress, an overhaul of the type system, an + exciting new TableGen feature, etc.

        2. + +
        3. Verify that the current Subversion trunk is in decent shape by + examining nightly tester and buildbot results.

        4. + +
        5. Create the release branch for llvm, llvm-gcc-4.2, + clang, and the test-suite from the last known good + revision. The branch's name is release_XY, where X is + the major and Y the minor release numbers. The branches should be + created using the following commands:

          +
          -svn co https://llvm.org/svn/llvm-project/llvm/branches/release_XX
          -svn co https://llvm.org/svn/llvm-project/llvm-gcc-4.2/branches/release_XX
          -svn co https://llvm.org/svn/llvm-project/test-suite/branches/release_XX
          -svn co https://llvm.org/svn/llvm-project/cfe/branches/release_XX
          +$ svn copy https://llvm.org/svn/llvm-project/llvm/trunk \
          +           https://llvm.org/svn/llvm-project/llvm/branches/release_XY
          +
          +$ svn copy https://llvm.org/svn/llvm-project/llvm-gcc-4.2/trunk \
          +           https://llvm.org/svn/llvm-project/llvm-gcc-4.2/branches/release_XY
          +
          +$ svn copy https://llvm.org/svn/llvm-project/test-suite/trunk \
          +           https://llvm.org/svn/llvm-project/test-suite/branches/release_XY
          +
          +$ svn copy https://llvm.org/svn/llvm-project/cfe/trunk \
          +           https://llvm.org/svn/llvm-project/cfe/branches/release_XY
           
        6. -
        -
        +
      9. Advise developers that they may now check their patches into the + Subversion tree again.

      10. - - -
        -

        - After creating the LLVM release branch, update the release branches' - autoconf/configure.ac version from X.Xsvn to just X.X. Update it on mainline - as well to be the next version (X.X+1svn). Regenerated the configure script - for both. This must be done for both llvm and the - test-suite. -

        -

        FIXME: Add a note about clang.

        -

        In addition, the version number of all the Bugzilla components must be - updated for the next release. -

        -
        - - - -
        -

        - Create source distributions for LLVM, LLVM-GCC, - clang, and the llvm test-suite by exporting the source from - Subversion and archiving it. This can be done with the following commands: -

        +
      11. The Release Manager should switch to the release branch, because all + changes to the release will now be done in the branch. The easiest way to + do this is to grab a working copy using the following commands:

        -svn export https://llvm.org/svn/llvm-project/llvm/branches/release_XX llvm-X.X
        -svn export https://llvm.org/svn/llvm-project/llvm-gcc-4.2/branches/release_XX llvm-gcc4.2-X.X.source
        -svn export https://llvm.org/svn/llvm-project/test-suite/branches/release_XX llvm-test-X.X
        -svn export https://llvm.org/svn/llvm-project/cfe/branches/release_XX clang-X.X
        -tar -czvf - llvm-X.X          | gzip > llvm-X.X.tar.gz
        -tar -czvf - llvm-test-X.X     | gzip > llvm-test-X.X.tar.gz
        -tar -czvf - llvm-gcc4.2-X.X.source | gzip > llvm-gcc-4.2-X.X.source.tar.gz
        -tar -czvf - clang-X.X | gzip > clang-X.X.tar.gz
        +$ svn co https://llvm.org/svn/llvm-project/llvm/branches/release_XY llvm-X.Y
        +
        +$ svn co https://llvm.org/svn/llvm-project/llvm-gcc-4.2/branches/release_XY llvm-gcc-4.2-X.Y
        +
        +$ svn co https://llvm.org/svn/llvm-project/test-suite/branches/release_XY test-suite-X.Y
        +
        +$ svn co https://llvm.org/svn/llvm-project/cfe/branches/release_XY clang-X.Y
        +
        +
      12. +
      + +
      + + +

      Update LLVM Version

      + +
      + +

      After creating the LLVM release branch, update the release branches' + autoconf and configure.ac versions from 'X.Ysvn' + to 'X.Y'. Update it on mainline as well to be the next version + ('X.Y+1svn'). Regenerate the configure scripts for both + llvm and the test-suite.

      + +

      In addition, the version numbers of all the Bugzilla components must be + updated for the next release.

      + +
      + + +

      Build the LLVM Release Candidates

      + +
      + +

      Create release candidates for llvm, llvm-gcc, + clang, and the LLVM test-suite by tagging the branch with + the respective release candidate number. For instance, to create Release + Candidate 1 you would issue the following commands:

      + +
      +
      +$ svn mkdir https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_XY
      +$ svn copy https://llvm.org/svn/llvm-project/llvm/branches/release_XY \
      +           https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_XY/rc1
      +
      +$ svn mkdir https://llvm.org/svn/llvm-project/llvm-gcc-4.2/tags/RELEASE_XY
      +$ svn copy https://llvm.org/svn/llvm-project/llvm-gcc-4.2/branches/release_XY \
      +           https://llvm.org/svn/llvm-project/llvm-gcc-4.2/tags/RELEASE_XY/rc1
      +
      +$ svn mkdir https://llvm.org/svn/llvm-project/test-suite/tags/RELEASE_XY
      +$ svn copy https://llvm.org/svn/llvm-project/test-suite/branches/release_XY \
      +           https://llvm.org/svn/llvm-project/test-suite/tags/RELEASE_XY/rc1
      +
      +$ svn mkdir https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_XY
      +$ svn copy https://llvm.org/svn/llvm-project/cfe/branches/release_XY \
      +           https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_XY/rc1
       
      + +

      Similarly, Release Candidate 2 would be named RC2 and so + on. This keeps a permanent copy of the release candidate around for people to + export and build as they wish. The final released sources will be tagged in + the RELEASE_XY directory as Final + (c.f. Tag the LLVM Final Release).

      + +

      The Release Manager may supply pre-packaged source tarballs for users. This + can be done with the following commands:

      + +
      +
      +$ svn export https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_XY/rc1 llvm-X.Yrc1
      +$ svn export https://llvm.org/svn/llvm-project/llvm-gcc-4.2/tags/RELEASE_XY/rc1 llvm-gcc4.2-X.Yrc1
      +$ svn export https://llvm.org/svn/llvm-project/test-suite/tags/RELEASE_XY/rc1 llvm-test-X.Yrc1
      +$ svn export https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_XY/rc1 clang-X.Yrc1
      +
      +$ tar -cvf - llvm-X.Yrc1        | gzip > llvm-X.Yrc1.src.tar.gz
      +$ tar -cvf - llvm-test-X.Yrc1   | gzip > llvm-test-X.Yrc1.src.tar.gz
      +$ tar -cvf - llvm-gcc4.2-X.Yrc1 | gzip > llvm-gcc-4.2-X.Yrc1.src.tar.gz
      +$ tar -cvf - clang-X.Yrc1       | gzip > clang-X.Yrc1.src.tar.gz
      +
      +
      + +
      +
      - +

      Building the Release

      -
      -The build of llvm, llvm-gcc, and clang must be free -of errors and warnings in both debug, release+asserts, and release builds. -If all builds are clean, then the release passes build qualification. +
      + +

      The builds of llvm, llvm-gcc, and clang + must be free of errors and warnings in Debug, Release+Asserts, and + Release builds. If all builds are clean, then the release passes Build + Qualification.

      + +

      The make options for building the different modes:

      + + + + + + +
      ModeOptions
      DebugENABLE_OPTIMIZED=0
      Release+AssertsENABLE_OPTIMIZED=1
      ReleaseENABLE_OPTIMIZED=1 DISABLE_ASSERTIONS=1
      + + +

      Build LLVM

      + +
      + +

      Build Debug, Release+Asserts, and Release versions + of llvm on all supported platforms. Directions to build + llvm are + here.

      + +
      + + +

      Build the LLVM GCC Binary Distribution

      + +
      + +

      Creating the llvm-gcc binary distribution (Release/Optimized) + requires performing the following steps for each supported platform:

        -
      1. debug: ENABLE_OPTIMIZED=0
      2. -
      3. release+asserts: ENABLE_OPTIMIZED=1
      4. -
      5. release: ENABLE_OPTIMIZED=1 DISABLE_ASSERTIONS=1
      6. +
      7. Build the llvm-gcc front-end by following the directions in + the README.LLVM file. The front-end must be compiled with C, C++, + Objective-C (Mac only), Objective-C++ (Mac only), and Fortran + support.

      8. + +
      9. Boostrapping must be enabled.

      10. + +
      11. Be sure to build with LLVM_VERSION_INFO=X.Y, where X + is the major and Y is the minor release numbers.

      12. + +
      13. Copy the installation directory to a directory named for the specific + target. For example on Red Hat Enterprise Linux, the directory would be + named llvm-gcc4.2-2.6-x86-linux-RHEL4. Archive and compress the + new directory.

      +
      - -
      -

      - Build both debug, release+asserts (optimized), and release versions of - LLVM on all supported platforms. Direction to build llvm are - here. -

      +

      Build Clang Binary Distribution

      + +
      + +

      Creating the clang binary distribution + (Debug/Release+Asserts/Release) requires performing the following steps for + each supported platform:

      + +
        +
      1. Build clang according to the directions + here.
      2. + +
      3. Build both a debug and release version of clang. The binary will be the + release build.
      4. + +
      5. Package clang (details to follow).
      6. +
      +
      - -
      -

      - Creating the LLVM GCC binary distribution (release/optimized) requires - performing the following steps for each supported platform: -

      +

      Target Specific Build Details

      -
        -
      1. - Build the LLVM GCC front-end by following the directions in the README.LLVM - file. The frontend must be compiled with c, c++, objc (mac only), - objc++ (mac only) and fortran support.
      2. -
      3. Please boostrap as well.
      4. -
      5. Be sure to build with LLVM_VERSION_INFO=X.X, where X is the major and - minor release numbers. -
      6. +
        -
      7. - Copy the installation directory to a directory named for the specific target. - For example on Red Hat Enterprise Linux, the directory would be named - llvm-gcc4.2-2.6-x86-linux-RHEL4. Archive and compress the new directory. -
      8. -
      -
      +

      The table below specifies which compilers are used for each Arch/OS + combination when qualifying the build of llvm, llvm-gcc, + and clang.

      - - -
      -

      - Creating the Clang binary distribution (debug/release/release) requires - performing the following steps for each supported platform: -

      - -
        -
      1. - Build clang according to the directions - here. -
      2. - -
      3. Build both a debug and release version of clang, but the binary - will be a release build.
      4. - -
      5. - Package clang (details to follow). -
      6. -
      -
      - - - - -
      -

      - The table below specifies which compilers are used for each arch/os combination - when qualifying the build of llvm, llvm-gcc, clang. -

      - -

      - +
      - + - -
      ArchitectureOScompiler
      x86-32Mac OS 10.5gcc 4.0.1
      x86-32Linuxgcc 4.2.X, gcc 4.3.X
      x86-32FreeBSDgcc 4.2.X
      x86-32mingwgcc 3.4.5
      x86-32mingwgcc 3.4.5
      x86-64Mac OS 10.5gcc 4.0.1
      x86-64Linuxgcc 4.2.X, gcc 4.3.X
      x86-64FreeBSDgcc 4.2.X
      -

      +
      - - - - -
      - A release is qualified when it has no regressions from the previous - release (or baseline). Regressions are related to correctness only and not - performance at this time. Regressions are new failures in the set of tests that - are used to qualify each product and only include things on the list. - Ultimately, there is no end to the number of possible bugs in a release. We - need a very concrete and definitive release criteria that ensures we have - monotonically improving quality on some metric. The metric we use is - described below. This doesn't mean that we don't care about other things, - but this are things that must be satisfied before a release can go out -
      - - - - -
      -

      - LLVM is qualified when it has a clean dejagnu test run without a frontend and - it has no regressions when using either llvm-gcc or clang - with the test-suite from the previous release. -

      - -
      -

      - LLVM-GCC is qualified when front-end specific tests in the - llvm dejagnu test suite all pass and there are no regressions in - the test-suite.

      -

      We do not use the gcc dejagnu test suite as release criteria.

      +

      Building the Release

      + +
      + +

      A release is qualified when it has no regressions from the previous release + (or baseline). Regressions are related to correctness first and performance + second. (We may tolerate some minor performance regressions if they are + deemed necessary for the general quality of the compiler.)

      + +

      Regressions are new failures in the set of tests that are used to qualify + each product and only include things on the list. Every release will have + some bugs in it. It is the reality of developing a complex piece of + software. We need a very concrete and definitive release criteria that + ensures we have monotonically improving quality on some metric. The metric we + use is described below. This doesn't mean that we don't care about other + criteria, but these are the criteria which we found to be most important and + which must be satisfied before a release can go out

      + + +

      Qualify LLVM

      + +
      + +

      LLVM is qualified when it has a clean test run without a front-end. And it + has no regressions when using either llvm-gcc or clang with + the test-suite from the previous release.

      +
      - -
      - Clang is qualified when front-end specific tests in the - llvm dejagnu test suite all pass, clang's own test suite passes - cleanly, and there are no regressions in the test-suite.

      +

      Qualify LLVM-GCC

      + +
      + +

      LLVM-GCC is qualified when front-end specific tests in the + llvm regression test suite all pass and there are no regressions in + the test-suite.

      + +

      We do not use the GCC DejaGNU test suite as release criteria.

      +
      - -
      -

      - +

      Qualify Clang

      + +
      + +

      Clang is qualified when front-end specific tests in the + llvm dejagnu test suite all pass, clang's own test suite passes + cleanly, and there are no regressions in the test-suite.

      + +
      + + +

      Specific Target Qualification Details

      + +
      + +
      ArchitectureOSllvm-gcc baselineclang baseline - tests
      + -
      ArchitectureOSllvm-gcc baselineclang baselinetests
      x86-32Linuxlast releaselast releasellvm dejagnu, clang tests, test-suite (including spec)
      x86-32FreeBSDnonelast releasellvm dejagnu, clang tests, test-suite
      x86-32mingwlast releasenoneQT
      x86-64Mac OS 10.Xlast releaselast releasellvm dejagnu, clang tests, test-suite (including spec)
      x86-64Linuxlast releaselast releasellvm dejagnu, clang tests, test-suite (including spec)
      x86-64FreeBSDnonelast releasellvm dejagnu, clang tests, test-suite

      + + +
      +
      - -
      -

      - Once all testing has been completed and appropriate bugs filed, the pre-release - tar balls may be put on the website and the LLVM community is notified. Ask that - all LLVM developers test the release in 2 ways:

      -
        -
      1. Download llvm-X.X, llvm-test-X.X, and the appropriate llvm-gcc4 - and/or clang binary. Build LLVM. - Run "make check" and the full llvm-test suite (make TEST=nightly report).
      2. -
      3. Download llvm-X.X, llvm-test-X.X, and the llvm-gcc4 and/or clang source. - Compile everything. Run "make check" and the full llvm-test suite (make TEST=nightly - report).
      4. -
      -

      Ask LLVM developers to submit the report and make check results to the list. - Attempt to verify that there are no regressions from the previous release. - The results are not used to qualify a release, but to spot other potential - problems. For unsupported targets, verify that make check at least is - clean.

      +

      Community Testing

      +
      + +

      Once all testing has been completed and appropriate bugs filed, the release + candidate tarballs are put on the website and the LLVM community is + notified. Ask that all LLVM developers test the release in 2 ways:

      + +
        +
      1. Download llvm-X.Y, llvm-test-X.Y, and the + appropriate llvm-gcc and/or clang binary. Build + LLVM. Run make check and the full LLVM test suite (make + TEST=nightly report).
      2. + +
      3. Download llvm-X.Y, llvm-test-X.Y, and the + llvm-gcc and/or clang source. Compile everything. Run + make check and the full LLVM test suite (make TEST=nightly + report).
      4. +
      + +

      Ask LLVM developers to submit the test suite report and make check + results to the list. Verify that there are no regressions from the previous + release. The results are not used to qualify a release, but to spot other + potential problems. For unsupported targets, verify that make check + is at least clean.

      -

      During the first round of testing time, - all regressions must be fixed before the second pre-release is created.

      +

      During the first round of testing, all regressions must be fixed before the + second release candidate is tagged.

      -

      If this is the second round of testing, this is only to ensure the bug - fixes previously merged in have not created new major problems. This is not - the time to solve additional and unrelated bugs. If no patches are merged in, - the release is determined to be ready and the release manager may move onto - the next step. -

      +

      If this is the second round of testing, the testing is only to ensure that + bug fixes previously merged in have not created new major problems. This + is not the time to solve additional and unrelated bugs! If no patches are + merged in, the release is determined to be ready and the release manager may + move onto the next stage.

      +
      - -
      -

      - Below are the rules regarding patching the release branch.

      -

      -

    4. Patches applied to the release branch are only applied by the release - manager.
    5. -
    6. During the first round of testing, patches that fix regressions or that - are small and relatively risk free (verified by the appropriate code owner) - are applied to the branch. Code owners are asked to be very conservative in - approving patches for the branch and we reserve the right to reject any patch - that does not fix a regression as previously defined.
    7. -
    8. During the remaining rounds of testing, only patches that fix regressions - may be applied.
    9. - -

      -
      +

      Release Patch Rules

      +
      - - -
      -

      - The final stages of the release process involving tagging the release branch, - updating documentation that refers to the release, and updating the demo - page.

      -

      FIXME: Add a note if anything needs to be done to the clang website. - Eventually the websites will be merged hopefully.

      -
      +

      Below are the rules regarding patching the release branch:

      +
        +
      1. Patches applied to the release branch may only be applied by the + release manager.

      2. + +
      3. During the first round of testing, patches that fix regressions or that + are small and relatively risk free (verified by the appropriate code + owner) are applied to the branch. Code owners are asked to be very + conservative in approving patches for the branch. We reserve the right to + reject any patch that does not fix a regression as previously + defined.

      4. + +
      5. During the remaining rounds of testing, only patches that fix critical + regressions may be applied.

      6. +
      - - -
      -

      - Review the documentation and ensure that it is up to date. The Release Notes - must be updated to reflect bug fixes, new known issues, and changes in the - list of supported platforms. The Getting Started Guide should be updated to - reflect the new release version number tag avaiable from Subversion and - changes in basic system requirements. Merge both changes from mainline into - the release branch. -

      - -
      -

      Tag the release branch using the following procedure:

      +

      Release Final Tasks

      + +
      + +

      The final stages of the release process involves tagging the "final" release + branch, updating documentation that refers to the release, and updating the + demo page.

      + + +

      Update Documentation

      + +
      + +

      Review the documentation and ensure that it is up to date. The "Release + Notes" must be updated to reflect new features, bug fixes, new known issues, + and changes in the list of supported platforms. The "Getting Started Guide" + should be updated to reflect the new release version number tag avaiable from + Subversion and changes in basic system requirements. Merge both changes from + mainline into the release branch.

      + +
      + + +

      Tag the LLVM Final Release

      + +
      + +

      Tag the final release sources using the following procedure:

      +
      -svn copy https://llvm.org/svn/llvm-project/llvm/branches/release_XX \
      -         https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_XX
      -svn copy https://llvm.org/svn/llvm-project/llvm-gcc-4.2/branches/release_XX \
      -         https://llvm.org/svn/llvm-project/llvm-gcc-4.2/tags/RELEASE_XX
      -svn copy https://llvm.org/svn/llvm-project/test-suite/branches/release_XX \
      -         https://llvm.org/svn/llvm-project/test-suite/tags/RELEASE_XX
      +$ svn copy https://llvm.org/svn/llvm-project/llvm/branches/release_XY \
      +           https://llvm.org/svn/llvm-project/llvm/tags/RELEASE_XY/Final
      +
      +$ svn copy https://llvm.org/svn/llvm-project/llvm-gcc-4.2/branches/release_XY \
      +           https://llvm.org/svn/llvm-project/llvm-gcc-4.2/tags/RELEASE_XY/Final
      +
      +$ svn copy https://llvm.org/svn/llvm-project/test-suite/branches/release_XY \
      +           https://llvm.org/svn/llvm-project/test-suite/tags/RELEASE_XY/Final
      +
      +$ svn copy https://llvm.org/svn/llvm-project/cfe/branches/release_XY \
      +           https://llvm.org/svn/llvm-project/cfe/tags/RELEASE_XY/Final
       
      +
      - - - - -
      -

      - The LLVM demo page must be updated to use the new release. This consists of - using the llvm-gcc binary and building LLVM. Update the website demo page - configuration to use the new release.

      - -
      -

      - The website must be updated before the release announcement is sent out. Here is - what to do:

      -
        -
      1. Check out the website module from CVS.
      2. -
      3. Create a new subdirectory X.X in the releases directory.
      4. -
      5. Commit the llvm, test-suite, llvm-gcc source, - clang source, clang binaries, - and llvm-gcc binaries in this new directory.
      6. -
      7. Copy and commit the llvm/docs and LICENSE.txt - files into this new directory. The docs should be built with BUILD_FOR_WEBSITE=1.
      8. -
      9. Commit the index.html to the release/X.X directory to redirect (use from previous - release.
      10. -
      11. Update the releases/download.html file with the new release.
      12. -
      13. Update the releases/index.html with the new release and link to - release documentation.
      14. -
      15. Finally, update the main page (index.html and sidebar) to - point to the new release and release announcement. Make sure this all gets - committed back into Subversion.
      16. -
      +

      Update the LLVM Demo Page

      + +
      + +

      The LLVM demo page must be updated to use the new release. This consists of + using the new llvm-gcc binary and building LLVM.

      + + +

      Update the LLVM Website

      + +
      + +

      The website must be updated before the release announcement is sent out. Here + is what to do:

      + +
        +
      1. Check out the www module from Subversion.
      2. + +
      3. Create a new subdirectory X.Y in the releases directory.
      4. + +
      5. Commit the llvm, test-suite, llvm-gcc source, + clang source, clang binaries, and llvm-gcc + binaries in this new directory.
      6. + +
      7. Copy and commit the llvm/docs and LICENSE.txt files + into this new directory. The docs should be built with + BUILD_FOR_WEBSITE=1.
      8. + +
      9. Commit the index.html to the release/X.Y directory to + redirect (use from previous release.
      10. + +
      11. Update the releases/download.html file with the new release.
      12. + +
      13. Update the releases/index.html with the new release and link to + release documentation.
      14. + +
      15. Finally, update the main page (index.html and sidebar) to point + to the new release and release announcement. Make sure this all gets + committed back into Subversion.
      16. +
      +
      - -
      -

      Have Chris send out the release announcement when everything is finished.

      +

      Announce the Release

      + +
      + +

      Have Chris send out the release announcement when everything is finished.

      + +
      + +
      +
      @@ -515,9 +617,9 @@ svn copy https://llvm.org/svn/llvm-project/test-suite/branches/release_XX \ src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"> Valid HTML 4.01 - The LLVM Compiler Infrastructure + The LLVM Compiler Infrastructure
      - Last modified: $Date: 2010-07-07 09:48:00 +0200 (Wed, 07 Jul 2010) $ + Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $ diff --git a/docs/HowToSubmitABug.html b/docs/HowToSubmitABug.html index 64c6141ec7b1..54f548cefd90 100644 --- a/docs/HowToSubmitABug.html +++ b/docs/HowToSubmitABug.html @@ -7,9 +7,9 @@ -
      +

      How to submit an LLVM bug report -

      + @@ -37,12 +37,12 @@
      - + -
      +

      If you're working with LLVM and run into a bug, we definitely want to know about it. This document describes what you can do to increase the odds of @@ -76,12 +76,12 @@ information:

      - + -
      +

      More often than not, bugs in the compiler cause it to crash—often due to an assertion failure of some sort. The most important @@ -109,14 +109,12 @@ with the following extra command line options:

      -
      - - + -
      +

      If the problem is in the front-end, you should re-run the same llvm-gcc command that resulted in the crash, but add the @@ -137,11 +135,11 @@ has instructions on the best way to use delta.

      - + -
      +

      If you find that a bug crashes in the optimizer, compile your test-case to a .bc file by passing "-emit-llvm -O0 -c -o foo.bc". @@ -171,11 +169,11 @@ that bugpoint emits. If something goes wrong with bugpoint, please submit the

      - + -
      +

      If you find a bug that crashes llvm-gcc in the code generator, compile your source file to a .bc file by passing "-emit-llvm -c -o foo.bc" @@ -207,13 +205,15 @@ that bugpoint emits. If something goes wrong with bugpoint, please submit the

      - - + + +

      + Miscompilations +

      -
      +

      If llvm-gcc successfully produces an executable, but that executable doesn't run right, this is either a bug in the code or a bug in the @@ -241,12 +241,12 @@ error.

      - + -
      +

      Similarly to debugging incorrect compilation by mis-behaving passes, you can debug incorrect code generation by either LLC or the JIT, using @@ -338,9 +338,9 @@ the following:

      src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"> Chris Lattner
      - The LLVM Compiler Infrastructure + The LLVM Compiler Infrastructure
      - Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $ + Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $ diff --git a/docs/LangRef.html b/docs/LangRef.html index 580ae7964b5a..0e37e8231c82 100644 --- a/docs/LangRef.html +++ b/docs/LangRef.html @@ -12,7 +12,7 @@ -
      LLVM Language Reference Manual
      +

      LLVM Language Reference Manual

      1. Abstract
      2. Introduction
      3. @@ -321,10 +321,10 @@
      - +

      Abstract

      -
      +

      This document is a reference manual for the LLVM assembly language. LLVM is a Static Single Assignment (SSA) based representation that provides type @@ -335,10 +335,10 @@

      - +

      Introduction

      -
      +

      The LLVM code representation is designed to be used in three different forms: as an in-memory compiler IR, as an on-disk bitcode representation (suitable @@ -359,12 +359,12 @@ variable is never accessed outside of the current function, allowing it to be promoted to a simple SSA value instead of a memory location.

      -
      - - +

      + Well-Formedness +

      -
      +

      It is important to note that this document describes 'well formed' LLVM assembly language. There is a difference between what the parser accepts and @@ -384,13 +384,15 @@

      +
      + - +

      Identifiers

      -
      +

      LLVM identifiers come in two basic types: global and local. Global identifiers (functions, global variables) begin with the '@' @@ -475,14 +477,15 @@

      - +

      High Level Structure

      - +
      - +

      + Module Structure +

      -
      +

      LLVM programs are composed of "Module"s, each of which is a translation unit of the input programs. Each module consists of functions, global variables, @@ -528,11 +531,11 @@ define i32 @main() { ; i32()*  

      - + -
      +

      All Global Variables and Functions have one of the following types of linkage:

      @@ -677,11 +680,11 @@ define i32 @main() { ; i32()*  
      - + -
      +

      LLVM functions, calls and invokes can all have an optional calling @@ -750,11 +753,11 @@ define i32 @main() { ; i32()*  

      - + -
      +

      All Global Variables and Functions have one of the following visibility styles:

      @@ -784,11 +787,11 @@ define i32 @main() { ; i32()*  
      - + -
      +

      LLVM IR allows you to specify name aliases for certain types. This can make it easier to read the IR and make the IR more condensed (particularly when @@ -815,11 +818,11 @@ define i32 @main() { ; i32()*  

      - + -
      +

      Global variables define regions of memory allocated at compilation time instead of run-time. Global variables may optionally be initialized, may @@ -883,11 +886,11 @@ define i32 @main() { ; i32()*   -

      + -
      +

      LLVM function definitions consist of the "define" keyword, an optional linkage type, an optional @@ -946,11 +949,11 @@ define [linkage] [visibility]

      -
      +

      Aliases -

      + -
      +

      Aliases act as "second name" for the aliasee value (which can be either function, global variable, another alias or bitcast of global value). Aliases @@ -965,11 +968,11 @@ define [linkage] [visibility]

      - + -
      +

      Named metadata is a collection of metadata. Metadata nodes (but not metadata strings) are the only valid operands for @@ -988,9 +991,11 @@ define [linkage] [visibility]

      - +

      + Parameter Attributes +

      -
      +

      The return type and each parameter of a function type may have a set of parameter attributes associated with them. Parameter attributes are @@ -1017,13 +1022,15 @@ declare signext i8 @returns_signed_char()

      zeroext
      This indicates to the code generator that the parameter or return value - should be zero-extended to a 32-bit value by the caller (for a parameter) - or the callee (for a return value).
      + should be zero-extended to the extent required by the target's ABI (which + is usually 32-bits, but is 8-bits for a i1 on x86-64) by the caller (for a + parameter) or the callee (for a return value).
      signext
      This indicates to the code generator that the parameter or return value - should be sign-extended to a 32-bit value by the caller (for a parameter) - or the callee (for a return value).
      + should be sign-extended to the extent required by the target's ABI (which + is usually 32-bits) by the caller (for a parameter) or the callee (for a + return value).
      inreg
      This indicates that this parameter or return value should be treated in a @@ -1095,11 +1102,11 @@ declare signext i8 @returns_signed_char()
      - + -
      +

      Each function may specify a garbage collector name, which is simply a string:

      @@ -1115,11 +1122,11 @@ define void @f() gc "name" { ... }
      - + -
      +

      Function attributes are set to communicate additional information about a function. Function attributes are considered to be part of the function, not @@ -1238,11 +1245,11 @@ define void @f() optsize { ... }

      - + -
      +

      Modules may contain "module-level inline asm" blocks, which corresponds to the GCC "file scope inline asm" blocks. These blocks are internally @@ -1264,11 +1271,11 @@ module asm "more can go here"

      - + -
      +

      A module may specify a target specific data layout string that specifies how data is to be laid out in memory. The syntax for the data layout is @@ -1376,11 +1383,11 @@ target datalayout = "layout specification"

      - + -
      +

      Any memory access must be done through a pointer value associated with an address range of the memory access, otherwise the behavior @@ -1440,11 +1447,11 @@ to implement type-based alias analysis.

      - + -
      +

      Certain memory accesses, such as loads, stores, and

      +
      + - +

      Type System

      -
      +

      The LLVM type system is one of the most important features of the intermediate representation. Being typed enables a number of optimizations @@ -1471,13 +1480,12 @@ synchronization behavior.

      and transformations that are not feasible to perform on normal three address code representations.

      -
      - - +

      + Type Classifications +

      -
      +

      The types fall into a few useful classifications:

      @@ -1534,19 +1542,21 @@ Classifications
      - +

      + Primitive Types +

      -
      +

      The primitive types are the fundamental building blocks of the LLVM system.

      -
      - - +

      + Integer Type +

      -
      +
      Overview:

      The integer type is a very simple type that simply specifies an arbitrary @@ -1580,9 +1590,11 @@ Classifications

      - +

      + Floating Point Types +

      -
      +
      @@ -1598,9 +1610,11 @@ Classifications - +

      + X86mmx Type +

      -
      +
      Overview:

      The x86mmx type represents a value held in an MMX register on an x86 machine. The operations allowed on it are quite limited: parameters and return values, load and store, and bitcast. User-specified MMX instructions are represented as intrinsic or asm calls with arguments and/or results of this type. There are no arrays, vectors or constants of this type.

      @@ -1613,9 +1627,11 @@ Classifications
      - +

      + Void Type +

      -
      +
      Overview:

      The void type does not represent any value and has no size.

      @@ -1628,9 +1644,11 @@ Classifications
      - +

      + Label Type +

      -
      +
      Overview:

      The label type represents code labels.

      @@ -1643,9 +1661,11 @@ Classifications
      - +

      + Metadata Type +

      -
      +
      Overview:

      The metadata type represents embedded metadata. No derived types may be @@ -1659,11 +1679,14 @@ Classifications

      + - +

      + Derived Types +

      -
      +

      The real power in LLVM comes from the derived types in the system. This is what allows a programmer to represent arrays, functions, pointers, and other @@ -1673,12 +1696,12 @@ Classifications

      of another array.

      -
      - - +

      + Aggregate Types +

      -
      +

      Aggregate Types are a subset of derived types that can contain multiple member types. Arrays, @@ -1688,9 +1711,11 @@ Classifications

      - +

      + Array Type +

      -
      +
      Overview:

      The array type is a very simple derived type that arranges elements @@ -1746,9 +1771,11 @@ Classifications

      - +

      + Function Type +

      -
      +
      Overview:

      The function type can be thought of as a function signature. It consists of @@ -1799,9 +1826,11 @@ Classifications

      - +

      + Structure Type +

      -
      +
      Overview:

      The structure type is used to represent a collection of data members together @@ -1837,10 +1866,11 @@ Classifications

      - +

      + Packed Structure Type +

      -
      +
      Overview:

      The packed structure type is used to represent a collection of data members @@ -1875,9 +1905,11 @@ Classifications

      - +

      + Pointer Type +

      -
      +
      Overview:

      The pointer type is used to specify memory locations. @@ -1919,9 +1951,11 @@ Classifications

      - +

      + Vector Type +

      -
      +
      Overview:

      A vector type is a simple derived type that represents a vector of elements. @@ -1958,8 +1992,11 @@ Classifications

      - -
      +

      + Opaque Type +

      + +
      Overview:

      Opaque types are used to represent unknown types in the system. This @@ -1982,12 +2019,14 @@ Classifications

      - - -
      + +

      + Type Up-references +

      + +
      Overview:

      An "up reference" allows you to refer to a lexically enclosing type without @@ -2030,21 +2069,23 @@ Classifications

      + + - +

      Constants

      -
      +

      LLVM has several different basic types of constants. This section describes them all and their syntax.

      -
      - - +

      + Simple Constants +

      -
      +
      Boolean constants
      @@ -2097,12 +2138,12 @@ Classifications
      - + -
      +

      Complex constants are a (potentially recursive) combination of simple constants and smaller complex constants.

      @@ -2152,11 +2193,11 @@ Classifications
      - + -
      +

      The addresses of global variables and functions are always implicitly valid @@ -2174,8 +2215,11 @@ Classifications

      - -
      +

      + Undefined Values +

      + +

      The string 'undef' can be used anywhere a constant is expected, and indicates that the user of the value may receive an unspecified bit-pattern. @@ -2314,8 +2358,11 @@ b: unreachable

      - -
      +

      + Trap Values +

      + +

      Trap values are similar to undef values, however instead of representing an unspecified bit pattern, they represent the @@ -2367,7 +2414,12 @@ b: unreachable terminator instruction if the terminator instruction has multiple successors and the instruction is always executed when control transfers to one of the successors, and - may not be executed when control is transfered to another. + may not be executed when control is transferred to another. + +

    10. Additionally, an instruction also control-depends on a terminator + instruction if the set of instructions it otherwise depends on would be + different if the terminator had transferred control to a different + successor.
    11. Dependence is transitive.
    12. @@ -2411,17 +2463,34 @@ end: ; control-dependent on %cmp, so this ; always results in a trap value. - volatile store i32 0, i32* @g ; %end is control-equivalent to %entry - ; so this is defined (ignoring earlier + volatile store i32 0, i32* @g ; This would depend on the store in %true + ; if %cmp is true, or the store in %entry + ; otherwise, so this is undefined behavior. + + %br i1 %cmp, %second_true, %second_end + ; The same branch again, but this time the + ; true block doesn't have side effects. + +second_true: + ; No side effects! + br label %end + +second_end: + volatile store i32 0, i32* @g ; This time, the instruction always depends + ; on the store in %end. Also, it is + ; control-equivalent to %end, so this is + ; well- defined (again, ignoring earlier ; undefined behavior in this example).
      - -
      +

      + Addresses of Basic Blocks +

      + +

      blockaddress(@function, %block)

      @@ -2446,10 +2515,11 @@ end: - +

      + Constant Expressions +

      -
      +

      Constant expressions are used to allow expressions involving other constants to be used as constants. Constant expressions may be of @@ -2575,16 +2645,18 @@ end:

      - - - - - - -
      + +

      Other Values

      + +
      + +

      +Inline Assembler Expressions +

      + +

      LLVM supports inline assembler expressions (as opposed to Module-Level Inline Assembly) through the use of @@ -2633,13 +2705,12 @@ call void asm alignstack "eieio", ""() documented here. Constraints on what can be done (e.g. duplication, moving, etc need to be documented). This is probably best done by reference to another document that covers inline asm from a holistic perspective.

      -
      - + -
      +

      The call instructions that wrap inline asm nodes may have a "!srcloc" MDNode attached to it that contains a list of constant integers. If present, the @@ -2660,12 +2731,14 @@ call void asm sideeffect "something bad", ""(), !srcloc !42

      - - -
      + +

      + Metadata Nodes and Metadata Strings +

      + +

      LLVM IR allows metadata to be attached to instructions in the program that can convey extra information about the code to the optimizers and code @@ -2690,25 +2763,31 @@ call void asm sideeffect "something bad", ""(), !srcloc !42

      Metadata can be used as function arguments. Here llvm.dbg.value function is using two metadata arguments.

      -
      -       call void @llvm.dbg.value(metadata !24, i64 0, metadata !25)
      -     
      +
      +
      +call void @llvm.dbg.value(metadata !24, i64 0, metadata !25)
      +
      +

      Metadata can be attached with an instruction. Here metadata !21 is attached with add instruction using !dbg identifier.

      -
      -      %indvar.next = add i64 %indvar, 1, !dbg !21
      -    
      +
      +
      +%indvar.next = add i64 %indvar, 1, !dbg !21
      +
      +
      + +
      - + - +

      LLVM has a number of "magic" global variables that contain data that affect code generation or other IR semantics. These are documented here. All globals of this sort should have a section specified as "llvm.metadata". This @@ -2716,11 +2795,11 @@ section and all globals that start with "llvm." are reserved for use by LLVM.

      - + -
      +

      The @llvm.used global is an array with i8* element type which has appending linkage. This array contains a list of @@ -2751,11 +2830,13 @@ object file to prevent the assembler and linker from molesting the symbol.

      - +

      + + The 'llvm.compiler.used' Global Variable + +

      -
      +

      The @llvm.compiler.used directive is the same as the @llvm.used directive, except that it only prevents the compiler from @@ -2769,11 +2850,11 @@ should not be exposed to source languages.

      - + -
      +
       %0 = type { i32, void ()* }
       @llvm.global_ctors = appending global [1 x %0] [%0 { i32 65535, void ()* @ctor }]
      @@ -2784,11 +2865,11 @@ should not be exposed to source languages.

      - + -
      +
       %0 = type { i32, void ()* }
       @llvm.global_dtors = appending global [1 x %0] [%0 { i32 65535, void ()* @dtor }]
      @@ -2799,12 +2880,13 @@ should not be exposed to source languages.

      +
      - +

      Instruction Reference

      -
      +

      The LLVM instruction set consists of several different classifications of instructions: terminator @@ -2813,13 +2895,12 @@ should not be exposed to source languages.

      memory instructions, and other instructions.

      -
      - - +

      + Terminator Instructions +

      -
      +

      As mentioned previously, every basic block in a program ends with a "Terminator" instruction, which indicates which @@ -2837,13 +2918,12 @@ Instructions

      'unwind' instruction, and the 'unreachable' instruction.

      -
      - - +

      + 'ret' Instruction +

      -
      +
      Syntax:
      @@ -2889,9 +2969,11 @@ Instruction 
      - +

      + 'br' Instruction +

      -
      +
      Syntax:
      @@ -2930,11 +3012,11 @@ IfUnequal:
       
      - + -
      +
      Syntax:
      @@ -2985,11 +3067,11 @@ IfUnequal:
       
       
       
      -
      +
       
      -
      +
      Syntax:
      @@ -3033,11 +3115,11 @@ IfUnequal:
       
       
       
      -
      +
       
      -
      +
      Syntax:
      @@ -3123,10 +3205,11 @@ that the invoke/unwind semantics are likely to change in future versions.

      - +

      + 'unwind' Instruction +

      -
      +
      Syntax:
      @@ -3154,10 +3237,11 @@ that the invoke/unwind semantics are likely to change in future versions.

      - +

      + 'unreachable' Instruction +

      -
      +
      Syntax:
      @@ -3175,10 +3259,14 @@ Instruction 
      - - +
      -
      + +

      + Binary Operations +

      + +

      Binary operators are used to do most of the computation in a program. They require two operands of the same type, execute an operation on them, and @@ -3188,14 +3276,12 @@ Instruction

      There are several different binary operators:

      -
      - - + -
      +
      Syntax:
      @@ -3236,11 +3322,11 @@ Instruction 
      - + -
      +
      Syntax:
      @@ -3266,11 +3352,11 @@ Instruction 
      - + -
      +
      Syntax:
      @@ -3318,11 +3404,11 @@ Instruction 
      - + -
      +
      Syntax:
      @@ -3354,11 +3440,11 @@ Instruction 
      - + -
      +
      Syntax:
      @@ -3404,11 +3490,11 @@ Instruction 
      - + -
      +
      Syntax:
      @@ -3434,10 +3520,11 @@ Instruction 
      - +

      + 'udiv' Instruction +

      -
      +
      Syntax:
      @@ -3474,10 +3561,11 @@ Instruction 
      - +

      + 'sdiv' Instruction +

      -
      +
      Syntax:
      @@ -3516,10 +3604,11 @@ Instruction 
      - +

      + 'fdiv' Instruction +

      -
      +
      Syntax:
      @@ -3545,10 +3634,11 @@ Instruction 
      - +

      + 'urem' Instruction +

      -
      +
      Syntax:
      @@ -3582,11 +3672,11 @@ Instruction 
      - + -
      +
      Syntax:
      @@ -3606,9 +3696,10 @@ Instruction 
      Semantics:

      This instruction returns the remainder of a division (where the result - has the same sign as the dividend, op1), not the modulo - operator (where the result has the same sign as the divisor, op2) of - a value. For more information about the difference, + is either zero or has the same sign as the dividend, op1), not the + modulo operator (where the result is either zero or has the same sign + as the divisor, op2) of a value. + For more information about the difference, see The Math Forum. For a table of how this is implemented in various languages, please see @@ -3632,10 +3723,11 @@ Instruction

      - +

      + 'frem' Instruction +

      -
      +
      Syntax:
      @@ -3662,11 +3754,14 @@ Instruction 
      - - +
      -
      + +

      + Bitwise Binary Operations +

      + +

      Bitwise binary operators are used to do various forms of bit-twiddling in a program. They are generally very efficient instructions and can commonly be @@ -3674,13 +3769,12 @@ Operations

      same type, execute an operation on them, and produce a single value. The resulting value is the same type as its operands.

      -
      - - +

      + 'shl' Instruction +

      -
      +
      Syntax:
      @@ -3727,10 +3821,11 @@ Instruction 
      - +

      + 'lshr' Instruction +

      -
      +
      Syntax:
      @@ -3773,9 +3868,11 @@ Instruction 
      - -
      +

      + 'ashr' Instruction +

      + +
      Syntax:
      @@ -3818,10 +3915,11 @@ Instruction 
      - +

      + 'and' Instruction +

      -
      +
      Syntax:
      @@ -3878,9 +3976,11 @@ Instruction 
      - +

      + 'or' Instruction +

      -
      +
      Syntax:
      @@ -3939,10 +4039,11 @@ Instruction 
      - +

      + 'xor' Instruction +

      -
      +
      Syntax:
      @@ -4002,12 +4103,14 @@ Instruction 
      - - -
      + +

      + Vector Operations +

      + +

      LLVM supports several instructions to represent vector operations in a target-independent manner. These instructions cover the element-access and @@ -4016,14 +4119,12 @@ Instruction

      will want to use target-specific intrinsics to take full advantage of a specific target.

      -
      - - + -
      +
      Syntax:
      @@ -4055,11 +4156,11 @@ Instruction 
      - + -
      +
      Syntax:
      @@ -4091,11 +4192,11 @@ Instruction 
      - + -
      +
      Syntax:
      @@ -4138,24 +4239,24 @@ Instruction 
      - - -
      + +

      + Aggregate Operations +

      + +

      LLVM supports several instructions for working with aggregate values.

      -
      - - + -
      +
      Syntax:
      @@ -4193,11 +4294,11 @@ Instruction 
      - + -
      +
      Syntax:
      @@ -4231,27 +4332,26 @@ Instruction 
      - - - -
      + +

      + Memory Access and Addressing Operations +

      + +

      A key design point of an SSA-based representation is how it represents memory. In LLVM, no memory locations are in SSA form, which makes things very simple. This section describes how to read, write, and allocate memory in LLVM.

      -
      - - + -
      +
      Syntax:
      @@ -4298,10 +4398,11 @@ Instruction 
      - +

      + 'load' Instruction +

      -
      +
      Syntax:
      @@ -4356,10 +4457,11 @@ Instruction 
      - +

      + 'store' Instruction +

      -
      +
      Syntax:
      @@ -4417,11 +4519,11 @@ Instruction 
      - + -
      +
      Syntax:
      @@ -4547,23 +4649,25 @@ entry:
       
       
      - - -
      + +

      + Conversion Operations +

      + +

      The instructions in this category are the conversion instructions (casting) which all take a single operand and a type. They perform various bit conversions on the operand.

      -
      - - -
      + + +
      Syntax:
      @@ -4599,10 +4703,11 @@ entry:
       
      - -
      + + +
      Syntax:
      @@ -4638,10 +4743,11 @@ entry:
       
      - -
      + + +
      Syntax:
      @@ -4676,11 +4782,11 @@ entry:
       
      - + -
      +
      Syntax:
      @@ -4714,10 +4820,11 @@ entry:
       
      - -
      + + +
      Syntax:
      @@ -4743,17 +4850,18 @@ entry:
       
       
      Example:
      -  %X = fpext float 3.1415 to double        ; yields double:3.1415
      -  %Y = fpext float 1.0 to float            ; yields float:1.0 (no-op)
      +  %X = fpext float 3.125 to double         ; yields double:3.125000e+00
      +  %Y = fpext double %X to fp128            ; yields fp128:0xL00000000000000004000900000000000
       
      - -
      + + +
      Syntax:
      @@ -4787,10 +4895,11 @@ entry:
       
      - -
      + + +
      Syntax:
      @@ -4825,10 +4934,11 @@ entry:
       
      - -
      + + +
      Syntax:
      @@ -4861,10 +4971,11 @@ entry:
       
      - -
      + + +
      Syntax:
      @@ -4896,10 +5007,11 @@ entry:
       
      - -
      + + +
      Syntax:
      @@ -4933,10 +5045,11 @@ entry:
       
      - -
      + + +
      Syntax:
      @@ -4970,10 +5083,11 @@ entry:
       
      - -
      + + +
      Syntax:
      @@ -5012,21 +5126,24 @@ entry:
       
       
      - - +
      -
      + +

      + Other Operations +

      + +

      The instructions in this category are the "miscellaneous" instructions, which defy better classification.

      -
      - - +

      + 'icmp' Instruction +

      -
      +
      Syntax:
      @@ -5125,10 +5242,11 @@ entry:
       
      - +

      + 'fcmp' Instruction +

      -
      +
      Syntax:
      @@ -5245,11 +5363,11 @@ entry:
       
      - + -
      +
      Syntax:
      @@ -5293,11 +5411,11 @@ Loop:       ; Infinite loop that counts from 0 on up...
       
      - + -
      +
      Syntax:
      @@ -5336,11 +5454,11 @@ Loop:       ; Infinite loop that counts from 0 on up...
       
      - + -
      +
      Syntax:
      @@ -5445,11 +5563,11 @@ freestanding environments and non-C-based languages.

      - + -
      +
      Syntax:
      @@ -5490,11 +5608,15 @@ freestanding environments and non-C-based languages.

      +
      + +
      + - +

      Intrinsic Functions

      -
      +

      LLVM supports the notion of an "intrinsic function". These functions have well known names and semantics and are required to follow certain @@ -5537,14 +5659,12 @@ freestanding environments and non-C-based languages.

      To learn how to add an intrinsic function, please see the Extending LLVM Guide.

      -
      - - + -
      +

      Variable argument support is defined in LLVM with the va_arg instruction and these three @@ -5586,15 +5706,13 @@ declare void @llvm.va_copy(i8*, i8*) declare void @llvm.va_end(i8*)

      -
      - - + -
      +
      Syntax:
      @@ -5620,11 +5738,11 @@ declare void @llvm.va_end(i8*)
       
      - + -
      +
      Syntax:
      @@ -5651,11 +5769,11 @@ declare void @llvm.va_end(i8*)
       
      - + -
      +
      Syntax:
      @@ -5681,12 +5799,14 @@ declare void @llvm.va_end(i8*)
       
       
      - - -
      + +

      + Accurate Garbage Collection Intrinsics +

      + +

      LLVM support for Accurate Garbage Collection (GC) requires the implementation and generation of these @@ -5701,14 +5821,12 @@ LLVM.

      The garbage collection intrinsics only operate on objects in the generic address space (address space zero).

      -
      - - + -
      +
      Syntax:
      @@ -5735,11 +5853,11 @@ LLVM.

      - + -
      +
      Syntax:
      @@ -5767,11 +5885,11 @@ LLVM.

      - + -
      +
      Syntax:
      @@ -5798,24 +5916,24 @@ LLVM.

      - - -
      + +

      + Code Generator Intrinsics +

      + +

      These intrinsics are provided by LLVM to expose special features that may only be implemented with code generator support.

      -
      - - + -
      +
      Syntax:
      @@ -5846,11 +5964,11 @@ LLVM.

      - + -
      +
      Syntax:
      @@ -5880,11 +5998,11 @@ LLVM.

      - + -
      +
      Syntax:
      @@ -5910,11 +6028,11 @@ LLVM.

      - + -
      +
      Syntax:
      @@ -5935,11 +6053,11 @@ LLVM.

      - + -
      +
      Syntax:
      @@ -5968,11 +6086,11 @@ LLVM.

      - + -
      +
      Syntax:
      @@ -5999,11 +6117,11 @@ LLVM.

      - + -
      +
      Syntax:
      @@ -6025,26 +6143,26 @@ LLVM.

      - - -
      + +

      + Standard C Library Intrinsics +

      + +

      LLVM provides intrinsics for a few important standard C library functions. These intrinsics allow source-language front-ends to pass information about the alignment of the pointer arguments to the code generator, providing opportunity for more efficient code generation.

      -
      - - + -
      +
      Syntax:

      This is an overloaded intrinsic. You can use llvm.memcpy on any @@ -6094,11 +6212,11 @@ LLVM.

      - + -
      +
      Syntax:

      This is an overloaded intrinsic. You can use llvm.memmove on any integer bit @@ -6150,11 +6268,11 @@ LLVM.

      - + -
      +
      Syntax:

      This is an overloaded intrinsic. You can use llvm.memset on any integer bit @@ -6200,11 +6318,11 @@ LLVM.

      - + -
      +
      Syntax:

      This is an overloaded intrinsic. You can use llvm.sqrt on any @@ -6238,11 +6356,11 @@ LLVM.

      - + -
      +
      Syntax:

      This is an overloaded intrinsic. You can use llvm.powi on any @@ -6274,11 +6392,11 @@ LLVM.

      - + -
      +
      Syntax:

      This is an overloaded intrinsic. You can use llvm.sin on any @@ -6308,11 +6426,11 @@ LLVM.

      - + -
      +
      Syntax:

      This is an overloaded intrinsic. You can use llvm.cos on any @@ -6342,11 +6460,11 @@ LLVM.

      - + -
      +
      Syntax:

      This is an overloaded intrinsic. You can use llvm.pow on any @@ -6376,24 +6494,24 @@ LLVM.

      - - -
      + +

      + Bit Manipulation Intrinsics +

      + +

      LLVM provides intrinsics for a few important bit manipulation operations. These allow efficient code generation for some algorithms.

      -
      - - + -
      +
      Syntax:

      This is an overloaded intrinsic function. You can use bswap on any integer @@ -6424,11 +6542,11 @@ LLVM.

      - + -
      +
      Syntax:

      This is an overloaded intrinsic. You can use llvm.ctpop on any integer bit @@ -6456,11 +6574,11 @@ LLVM.

      - + -
      +
      Syntax:

      This is an overloaded intrinsic. You can use llvm.ctlz on any @@ -6490,11 +6608,11 @@ LLVM.

      - + -
      +
      Syntax:

      This is an overloaded intrinsic. You can use llvm.cttz on any @@ -6523,23 +6641,25 @@ LLVM.

      - - -
      + +

      + Arithmetic with Overflow Intrinsics +

      + +

      LLVM provides intrinsics for some arithmetic with overflow operations.

      -
      - - +

      + + 'llvm.sadd.with.overflow.*' Intrinsics + +

      -
      +
      Syntax:

      This is an overloaded intrinsic. You can use llvm.sadd.with.overflow @@ -6581,11 +6701,13 @@ LLVM.

      - +

      + + 'llvm.uadd.with.overflow.*' Intrinsics + +

      -
      +
      Syntax:

      This is an overloaded intrinsic. You can use llvm.uadd.with.overflow @@ -6626,11 +6748,13 @@ LLVM.

      - +

      + + 'llvm.ssub.with.overflow.*' Intrinsics + +

      -
      +
      Syntax:

      This is an overloaded intrinsic. You can use llvm.ssub.with.overflow @@ -6672,11 +6796,13 @@ LLVM.

      - +

      + + 'llvm.usub.with.overflow.*' Intrinsics + +

      -
      +
      Syntax:

      This is an overloaded intrinsic. You can use llvm.usub.with.overflow @@ -6718,11 +6844,13 @@ LLVM.

      - +

      + + 'llvm.smul.with.overflow.*' Intrinsics + +

      -
      +
      Syntax:

      This is an overloaded intrinsic. You can use llvm.smul.with.overflow @@ -6765,11 +6893,13 @@ LLVM.

      - +

      + + 'llvm.umul.with.overflow.*' Intrinsics + +

      -
      +
      Syntax:

      This is an overloaded intrinsic. You can use llvm.umul.with.overflow @@ -6810,12 +6940,14 @@ LLVM.

      - - -
      + +

      + Half Precision Floating Point Intrinsics +

      + +

      Half precision floating point is a storage-only format. This means that it is a dense encoding (in memory) but does not support computation in the @@ -6829,14 +6961,15 @@ LLVM.

      float if needed, then converted to i16 with llvm.convert.to.fp16, then storing as an i16 value.

      -
      - +

      + + 'llvm.convert.to.fp16' Intrinsic + +

      -
      +
      Syntax:
      @@ -6867,11 +7000,13 @@ LLVM.

      - +

      + + 'llvm.convert.from.fp16' Intrinsic + +

      -
      +
      Syntax:
      @@ -6901,12 +7036,14 @@ LLVM.

      - - -
      + +

      + Debugger Intrinsics +

      + +

      The LLVM debugger intrinsics (which all start with llvm.dbg. prefix), are described in @@ -6916,11 +7053,11 @@ LLVM.

      - + -
      +

      The LLVM exception handling intrinsics (which all start with llvm.eh. prefix), are described in @@ -6930,11 +7067,11 @@ LLVM.

      - + -
      +

      This intrinsic makes it possible to excise one parameter, marked with the nest attribute, from a function. @@ -6960,14 +7097,14 @@ LLVM.

      The call %val = call i32 %fp(i32 %x, i32 %y) is then equivalent to %val = call i32 %f(i8* %nval, i32 %x, i32 %y).

      -
      - - +

      + + 'llvm.init.trampoline' Intrinsic + +

      -
      +
      Syntax:
      @@ -7004,12 +7141,14 @@ LLVM.

      - - -
      + +

      + Atomic Operations and Synchronization Intrinsics +

      + +

      These intrinsic functions expand the "universal IR" of LLVM to represent hardware constructs for atomic operations and memory synchronization. This @@ -7029,13 +7168,12 @@ LLVM.

      No one model or paradigm should be selected above others unless the hardware itself ubiquitously does so.

      -
      - - -
      + + +
      Syntax:
         declare void @llvm.memory.barrier(i1 <ll>, i1 <ls>, i1 <sl>, i1 <ss>, i1 <device>)
      @@ -7103,11 +7241,11 @@ LLVM.

      - + -
      +
      Syntax:

      This is an overloaded intrinsic. You can use llvm.atomic.cmp.swap on @@ -7163,10 +7301,11 @@ LLVM.

      - -
      + + +
      Syntax:

      This is an overloaded intrinsic. You can use llvm.atomic.swap on any @@ -7219,12 +7358,11 @@ LLVM.

      - - -
      +
      Syntax:

      This is an overloaded intrinsic. You can use llvm.atomic.load.add on @@ -7269,12 +7407,11 @@ LLVM.

      - - -
      +
      Syntax:

      This is an overloaded intrinsic. You can use llvm.atomic.load.sub on @@ -7321,14 +7458,25 @@ LLVM.

      - +

      + + 'llvm.atomic.load.and.*' Intrinsic + +
      + + 'llvm.atomic.load.nand.*' Intrinsic + +
      + + 'llvm.atomic.load.or.*' Intrinsic + +
      + + 'llvm.atomic.load.xor.*' Intrinsic + +

      -
      +
      Syntax:

      These are overloaded intrinsics. You can @@ -7401,14 +7549,25 @@ LLVM.

      - +

      + + 'llvm.atomic.load.max.*' Intrinsic + +
      + + 'llvm.atomic.load.min.*' Intrinsic + +
      + + 'llvm.atomic.load.umax.*' Intrinsic + +
      + + 'llvm.atomic.load.umin.*' Intrinsic + +

      -
      +
      Syntax:

      These are overloaded intrinsics. You can use llvm.atomic.load_max, @@ -7479,25 +7638,24 @@ LLVM.

      - - - -
      + +

      + Memory Use Markers +

      + +

      This class of intrinsics exists to information about the lifetime of memory objects and ranges where variables are immutable.

      -
      - - + -
      +
      Syntax:
      @@ -7523,11 +7681,11 @@ LLVM.

      - + -
      +
      Syntax:
      @@ -7552,11 +7710,11 @@ LLVM.

      - + -
      +
      Syntax:
      @@ -7580,11 +7738,11 @@ LLVM.

      - + -
      +
      Syntax:
      @@ -7606,24 +7764,24 @@ LLVM.

      - - -
      + +

      + General Intrinsics +

      + +

      This class of intrinsics is designed to be generic and has no specific purpose.

      -
      - - + -
      +
      Syntax:
      @@ -7647,11 +7805,11 @@ LLVM.

      - + -
      +
      Syntax:

      This is an overloaded intrinsic. You can use 'llvm.annotation' on @@ -7683,11 +7841,11 @@ LLVM.

      - + -
      +
      Syntax:
      @@ -7708,11 +7866,11 @@ LLVM.

      - + -
      +
      Syntax:
      @@ -7742,11 +7900,11 @@ LLVM.

      - + -
      +
      Syntax:
      @@ -7776,6 +7934,10 @@ LLVM.

      +
      + +
      +
      @@ -7785,8 +7947,8 @@ LLVM.

      src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"> Chris Lattner
      - The LLVM Compiler Infrastructure
      - Last modified: $Date: 2011-02-24 22:01:34 +0100 (Thu, 24 Feb 2011) $ + The LLVM Compiler Infrastructure
      + Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
      diff --git a/docs/Lexicon.html b/docs/Lexicon.html index 09fae3996605..449e26eb922e 100644 --- a/docs/Lexicon.html +++ b/docs/Lexicon.html @@ -9,12 +9,12 @@ content="A glossary of terms used with the LLVM project."> -
      The LLVM Lexicon
      +

      The LLVM Lexicon

      NOTE: This document is a work in progress!

      -
      Table Of Contents
      +

      Table Of Contents

      -
      +
      @@ -83,19 +83,20 @@ -
      Definitions
      +

      Definitions

      +
      - -
      +

      - A -

      +
      ADCE
      Aggressive Dead Code Elimination
      - -
      +

      - B -

      +
      BURS
      Bottom Up Rewriting System—A method of instruction selection for @@ -104,8 +105,8 @@ href="http://www.program-transformation.org/Transform/BURG">BURG tool.
      - -
      +

      - C -

      +
      CSE
      Common Subexpression Elimination. An optimization that removes common @@ -116,8 +117,8 @@ href="http://www.program-transformation.org/Transform/BURG">BURG tool.
      - -
      +

      - D -

      +
      DAG
      Directed Acyclic Graph
      @@ -136,8 +137,8 @@ href="http://www.program-transformation.org/Transform/BURG">BURG tool.
      - -
      +

      - G -

      +
      GC
      Garbage Collection. The practice of using reachability analysis instead @@ -145,8 +146,8 @@ href="http://www.program-transformation.org/Transform/BURG">BURG tool.
      - -
      +

      - H -

      +
      Heap
      In garbage collection, the region of memory which is managed using @@ -154,8 +155,8 @@ href="http://www.program-transformation.org/Transform/BURG">BURG tool.
      - -
      +

      - I -

      +
      IPA
      Inter-Procedural Analysis. Refers to any variety of code analysis that @@ -169,8 +170,8 @@ href="http://www.program-transformation.org/Transform/BURG">BURG tool.
      - -
      +

      - L -

      +
      LCSSA
      Loop-Closed Static Single Assignment Form
      @@ -183,16 +184,16 @@ href="http://www.program-transformation.org/Transform/BURG">BURG tool.
      - -
      +

      - M -

      +
      MC
      Machine Code
      - -
      +

      - O -

      +
      Object Pointer
      A pointer to an object such that the garbage collector is able to trace @@ -202,8 +203,8 @@ href="http://www.program-transformation.org/Transform/BURG">BURG tool.
      - -
      +

      - P -

      +
      PRE
      Partial Redundancy Elimination
      @@ -211,8 +212,8 @@ href="http://www.program-transformation.org/Transform/BURG">BURG tool.
      - -
      +

      - R -

      +
      RAUW
      An abbreviation for Replace All Uses With. The functions User::replaceUsesOfWith(), @@ -234,8 +235,8 @@ href="http://www.program-transformation.org/Transform/BURG">BURG tool.
      - -
      +

      - S -

      +
      Safe Point
      In garbage collection, it is necessary to identify stack @@ -261,6 +262,8 @@ href="http://www.program-transformation.org/Transform/BURG">BURG tool.
      function.
      + +

      BURG tool. href="http://validator.w3.org/check/referer">Valid HTML 4.01The LLVM Team
      -The LLVM Compiler Infrastructure
      -Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $ +The LLVM Compiler Infrastructure
      +Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
      diff --git a/docs/LinkTimeOptimization.html b/docs/LinkTimeOptimization.html index 30334744d7f9..289da236270a 100644 --- a/docs/LinkTimeOptimization.html +++ b/docs/LinkTimeOptimization.html @@ -6,9 +6,9 @@ -
      +

      LLVM Link Time Optimization: Design and Implementation -

      +
      - + -
      +

      LLVM features powerful intermodular optimizations which can be used at link time. Link Time Optimization (LTO) is another name for intermodular optimization @@ -50,12 +50,12 @@ and design between the LTO optimizer and the linker.

      - + -
      +

      The LLVM Link Time Optimizer provides complete transparency, while doing intermodular optimization, in the compiler tool chain. Its main goal is to let @@ -69,14 +69,13 @@ the linker and LLVM optimizer helps to do optimizations that are not possible in other models. The linker input allows the optimizer to avoid relying on conservative escape analysis.

      -
      - + -
      +

      The following example illustrates the advantages of LTO's integrated approach and clean interface. This example requires a system linker which supports LTO through the interface described in this document. Here, @@ -145,11 +144,11 @@ $ llvm-gcc a.o main.o -o main # <-- standard link command without any modific

      - + -
      +
      Compiler driver invokes link time optimizer separately.
      In this model the link time optimizer is not able to take advantage of @@ -175,12 +174,14 @@ $ llvm-gcc a.o main.o -o main # <-- standard link command without any modific
      - - -
      + +

      + Multi-phase communication between libLTO and linker +

      + +

      The linker collects information about symbol defininitions and uses in various link objects which is more accurate than any information collected by other tools during typical build cycles. The linker collects this @@ -192,14 +193,13 @@ $ llvm-gcc a.o main.o -o main # <-- standard link command without any modific Our goal is to take advantage of tight integration between the linker and the optimizer by sharing this information during various linking phases.

      -
      - + -
      +

      The linker first reads all object files in natural order and collects symbol information. This includes native object files as well as LLVM bitcode files. To minimize the cost to the linker in the case that all .o files @@ -219,11 +219,11 @@ $ llvm-gcc a.o main.o -o main # <-- standard link command without any modific

      - + -
      +

      In this stage, the linker resolves symbols using global symbol table. It may report undefined symbol errors, read archive members, replace weak symbols, etc. The linker is able to do this seamlessly even though it @@ -233,10 +233,10 @@ $ llvm-gcc a.o main.o -o main # <-- standard link command without any modific

      - -
      + +

      After symbol resolution, the linker tells the LTO shared object which symbols are needed by native object files. In the example above, the linker reports that only foo1() is used by native object files using @@ -248,11 +248,11 @@ $ llvm-gcc a.o main.o -o main # <-- standard link command without any modific

      - + -
      +

      In this phase, the linker reads optimized a native object file and updates the internal global symbol table to reflect any changes. The linker also collects information about any changes in use of external symbols by @@ -264,12 +264,14 @@ $ llvm-gcc a.o main.o -o main # <-- standard link command without any modific bitcode files.

      - - -
      + +

      +libLTO +

      + +

      libLTO is a shared object that is part of the LLVM tools, and is intended for use by a linker. libLTO provides an abstract C interface to use the LLVM interprocedural optimizer without exposing details @@ -278,14 +280,13 @@ $ llvm-gcc a.o main.o -o main # <-- standard link command without any modific be possible for a completely different compilation technology to provide a different libLTO that works with their object files and the standard linker tool.

      -
      - + -
      +

      A non-native object file is handled via an lto_module_t. The following functions allow the linker to check if a file (on disk @@ -325,11 +326,11 @@ lto_module_get_symbol_attribute(lto_module_t, unsigned int)

      - + -
      +

      Once the linker has loaded each non-native object files into an lto_module_t, it can request libLTO to process them all and @@ -371,6 +372,8 @@ of the native object files.

      +
      +
      @@ -381,8 +384,8 @@ of the native object files.

      src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"> Devang Patel and Nick Kledzik
      - LLVM Compiler Infrastructure
      - Last modified: $Date: 2010-09-29 22:09:55 +0200 (Wed, 29 Sep 2010) $ + LLVM Compiler Infrastructure
      + Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $ diff --git a/docs/MakefileGuide.html b/docs/MakefileGuide.html index 6ceb09db3274..1e7c346952f0 100644 --- a/docs/MakefileGuide.html +++ b/docs/MakefileGuide.html @@ -7,7 +7,7 @@ -
      LLVM Makefile Guide
      +

      LLVM Makefile Guide

      1. Introduction
      2. @@ -77,10 +77,10 @@
      - +

      Introduction

      -
      +

      This document provides usage information about the LLVM makefile system. While loosely patterned after the BSD makefile system, LLVM has taken a departure from BSD in order to implement additional features needed by LLVM. @@ -99,20 +99,19 @@

      - +

      General Concepts

      -
      +

      The LLVM Makefile System is the component of LLVM that is responsible for building the software, testing it, generating distributions, checking those distributions, installing and uninstalling, etc. It consists of a several files throughout the source tree. These files and other general concepts are described in this section.

      -
      - -
      +

      Projects

      +

      The LLVM Makefile System is quite generous. It not only builds its own software, but it can build yours too. Built into the system is knowledge of the llvm/projects directory. Any directory under projects @@ -129,8 +128,8 @@

      - -
      +

      Variable Values

      +

      To use the makefile system, you simply create a file named Makefile in your directory and declare values for certain variables. The variables and values that you select determine what the makefile system @@ -139,16 +138,15 @@

      - -
      +

      Including Makefiles

      +

      Setting variables alone is not enough. You must include into your Makefile additional files that provide the rules of the LLVM Makefile system. The various files involved are described in the sections that follow.

      -
      - -
      +

      Makefile

      +

      Each directory to participate in the build needs to have a file named Makefile. This is the file first read by make. It has three sections:

      @@ -163,9 +161,8 @@
      - -
      +

      Makefile.common

      +

      Every project must have a Makefile.common file at its top source directory. This file serves three purposes:

        @@ -182,9 +179,8 @@
      - -
      +

      Makefile.config

      +

      Every project must have a Makefile.config at the top of its build directory. This file is generated by the configure script from the pattern provided by the @@ -196,8 +192,8 @@

      - -
      +

      Makefile.rules

      +

      This file, located at $(LLVM_SRC_ROOT)/Makefile.rules is the heart of the LLVM Makefile System. It provides all the logic, dependencies, and rules for building the targets supported by the system. What it does largely @@ -205,9 +201,11 @@ have been set before Makefile.rules is included.

      +
      + - -
      +

      Comments

      +

      User Makefiles need not have comments in them unless the construction is unusual or it does not strictly follow the rules and patterns of the LLVM makefile system. Makefile comments are invoked with the pound (#) character. @@ -215,19 +213,20 @@ by make.

      +
      + - +

      Tutorial

      -
      +

      This section provides some examples of the different kinds of modules you can build with the LLVM makefile system. In general, each directory you provide will build a single object although that object may be composed of additionally compiled components.

      -
      - -
      +

      Libraries

      +

      Only a few variable definitions are needed to build a regular library. Normally, the makefile system will build all the software into a single libname.o (pre-linked) object. This means the library is not @@ -256,11 +255,10 @@ -load option. See the WritingAnLLVMPass.html document for an example of why you might want to do this. -

      - -
      +

      Bitcode Modules

      +

      In some situations, it is desirable to build a single bitcode module from a variety of sources, instead of an archive, shared library, or bitcode library. Bitcode modules can be specified in addition to any of the other @@ -280,10 +278,10 @@

      - -
      + +

      In some situations, you need to create a loadable module. Loadable modules can be loaded into programs like opt or llc to specify additional passes to run or targets to support. Loadable modules are also @@ -311,9 +309,11 @@ library which is part of lib/System implementation.

      +
      + - -
      +

      Tools

      +

      For building executable programs (tools), you must provide the name of the tool and the names of the libraries you wish to link with the tool. For example:

      @@ -344,11 +344,10 @@ syntax is used. Note that in order to use the .a suffix, the library in question must have been built with the ARCHIVE_LIBRARY option set.

      -
      - -
      +

      JIT Tools

      +

      Many tools will want to use the JIT features of LLVM. To do this, you simply specify that you want an execution 'engine', and the makefiles will automatically link in the appropriate JIT for the host or an interpreter @@ -367,11 +366,15 @@

      +
      + +
      + - +

      Targets Supported

      -
      +

      This section describes each of the targets that can be built using the LLVM Makefile system. Any target can be invoked from any directory but not all are applicable to a given directory (e.g. "check", "dist" and "install" will @@ -426,11 +429,10 @@

      - A -
      Remove built objects from installation directory.
      -
      - -
      +

      all (default)

      +

      When you invoke make with no arguments, you are implicitly instructing it to seek the "all" target (goal). This target is used for building the software recursively and will do different things in different @@ -440,15 +442,15 @@

      - -
      +

      all-local

      +

      This target is the same as all but it operates only on the current directory instead of recursively.

      - -
      +

      check

      +

      This target can be invoked from anywhere within a project's directories but always invokes the check-local target in the project's test directory, if it exists and has a @@ -464,8 +466,8 @@

      - -
      +

      check-local

      +

      This target should be implemented by the Makefile in the project's test directory. It is invoked by the check target elsewhere. Each project is free to define the actions of check-local as @@ -475,8 +477,8 @@

      - -
      +

      clean

      +

      This target cleans the build directory, recursively removing all things that the Makefile builds. The cleaning rules have been made guarded so they shouldn't go awry (via rm -f $(UNSET_VARIABLE)/* which will attempt @@ -484,15 +486,15 @@

      - -
      +

      clean-local

      +

      This target does the same thing as clean but only for the current (local) directory.

      - -
      +

      dist

      +

      This target builds a distribution tarball. It first builds the entire project using the all target and then tars up the necessary files and compresses it. The generated tarball is sufficient for a casual source @@ -500,8 +502,8 @@

      - -
      +

      dist-check

      +

      This target does the same thing as the dist target but also checks the distribution tarball. The check is made by unpacking the tarball to a new directory, configuring it, building it, installing it, and then verifying that @@ -512,16 +514,16 @@

      - -
      +

      dist-clean

      +

      This is a special form of the clean clean target. It performs a normal clean but also removes things pertaining to building the distribution.

      - -
      +

      install

      +

      This target finalizes shared objects and executables and copies all libraries, headers, executables and documentation to the directory given with the --prefix option to configure. When completed, @@ -538,8 +540,8 @@

      - -
      +

      preconditions

      +

      This utility target checks to see if the Makefile in the object directory is older than the Makefile in the source directory and copies it if so. It also reruns the configure script if that needs to @@ -549,15 +551,15 @@

      - -
      +

      printvars

      +

      This utility target just causes the LLVM makefiles to print out some of the makefile variables so that you can double check how things are set.

      - -
      +

      reconfigure

      +

      This utility target will force a reconfigure of LLVM or your project. It simply runs $(PROJ_OBJ_ROOT)/config.status --recheck to rerun the configuration tests and rebuild the configured files. This isn't generally @@ -566,8 +568,8 @@

      - -
      +

      spotless

      +

      This utility target, only available when $(PROJ_OBJ_ROOT) is not the same as $(PROJ_SRC_ROOT), will completely clean the $(PROJ_OBJ_ROOT) directory by removing its content entirely and @@ -578,8 +580,8 @@

      - -
      +

      tags

      +

      This target will generate a TAGS file in the top-level source directory. It is meant for use with emacs, XEmacs, or ViM. The TAGS file provides an index of symbol definitions so that the editor can jump you to the @@ -587,18 +589,20 @@

      - -
      +

      uninstall

      +

      This target is the opposite of the install target. It removes the header, library and executable files from the installation directories. Note that the directories themselves are not removed because it is not guaranteed that LLVM is the only thing installing there (e.g. --prefix=/usr).

      +
      + - +

      Variables

      -
      +

      Variables are used to tell the LLVM Makefile System what to do and to obtain information from it. Variables are also used internally by the LLVM Makefile System. Variable names that contain only the upper case alphabetic @@ -606,11 +610,10 @@ variables are internal to the LLVM Makefile System and should not be relied upon nor modified. The sections below describe how to use the LLVM Makefile variables.

      -
      - -
      +

      Control Variables

      +

      Variables listed in the table below should be set before the inclusion of $(LEVEL)/Makefile.common. These variables provide input to the LLVM make system that tell it what to do @@ -762,8 +765,8 @@

      - -
      +

      Override Variables

      +

      Override variables can be used to override the default values provided by the LLVM makefile system. These variables can be set in several ways:

      @@ -868,8 +871,8 @@
      - -
      +

      Readable Variables

      +

      Variables listed in the table below can be used by the user's Makefile but should not be changed. Changing the value will generally cause the build to go wrong, so don't do it.

      @@ -939,8 +942,8 @@
      - -
      +

      Internal Variables

      +

      Variables listed below are used by the LLVM Makefile System and considered internal. You should not use these variables under any circumstances.

      @@ -1018,6 +1021,8 @@

      +
      +
      @@ -1027,8 +1032,8 @@ src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"> Reid Spencer
      - The LLVM Compiler Infrastructure
      - Last modified: $Date: 2010-10-22 14:54:34 +0200 (Fri, 22 Oct 2010) $ + The LLVM Compiler Infrastructure
      + Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
      diff --git a/docs/Packaging.html b/docs/Packaging.html index 7aa4a1c2e16b..b9329726991f 100644 --- a/docs/Packaging.html +++ b/docs/Packaging.html @@ -7,7 +7,7 @@ -
      Advice on Packaging LLVM
      +

      Advice on Packaging LLVM

      1. Overview
      2. Compile Flags
      3. @@ -17,9 +17,9 @@
      - +

      Overview

      -
      +

      LLVM sets certain default configure options to make sure our developers don't break things for constrained platforms. These settings are not optimal for most @@ -34,9 +34,9 @@ developed against each.

      - +

      Compile Flags

      -
      +

      LLVM runs much more quickly when it's optimized and assertions are removed. However, such a build is currently incompatible with users who build without @@ -65,9 +65,9 @@ versions of LLVM in parallel. The following configure flags are relevant:

      - +

      C++ Features

      -
      +
      RTTI
      LLVM disables RTTI by default. Add REQUIRES_RTTI=1 @@ -78,9 +78,9 @@ versions of LLVM in parallel. The following configure flags are relevant:
      - +

      Shared Library

      -
      +

      Configure with --enable-shared to build libLLVM-major.minor.(so|dylib) and link the tools @@ -89,9 +89,9 @@ against it. This saves lots of binary size at the cost of some startup time.

      - +

      Dependencies

      -
      +
      --enable-libffi
      Depend on src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"> Valid HTML 4.01 - The LLVM Compiler Infrastructure
      - Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $ + The LLVM Compiler Infrastructure
      + Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $ diff --git a/docs/Passes.html b/docs/Passes.html index fb2aff585bdb..ca9602c6e076 100644 --- a/docs/Passes.html +++ b/docs/Passes.html @@ -40,7 +40,7 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if ! --> -

      LLVM's Analysis and Transform Passes
      +

      LLVM's Analysis and Transform Passes

      1. Introduction
      2. @@ -55,8 +55,8 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

        \n" if !

      - -
      +

      Introduction

      +

      This document serves as a high level summary of the optimization features that LLVM provides. Optimizations are implemented as Passes that traverse some portion of a program to either collect information or transform the program. @@ -69,15 +69,13 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if ! bitcode are neither analysis nor transform passes.

      The table below provides a quick summary of each pass and links to the more complete pass description later in the document.

      -
      -
      + - + - @@ -85,27 +83,23 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - - - - + + + + - - - - + - + - @@ -120,8 +114,8 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - + @@ -129,13 +123,12 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - - + @@ -155,46 +148,40 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - + - + - + - - + - + - - + - - - - - + + - + - @@ -208,24 +195,24 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - - - - + + + +
      ANALYSIS PASSES
      OptionName
      -aa-evalExhaustive Alias Analysis Precision Evaluator
      -basicaaBasic Alias Analysis (default AA impl)
      -basicaaBasic Alias Analysis (stateless AA impl)
      -basiccgBasic CallGraph Construction
      -codegenprepareOptimize for code generation
      -count-aaCount Alias Analysis Query Responses
      -debug-aaAA use debugger
      -domfrontierDominance Frontier Construction
      -dot-callgraphPrint Call Graph to 'dot' file
      -dot-cfgPrint CFG of function to 'dot' file
      -dot-cfg-onlyPrint CFG of function to 'dot' file (with no function bodies)
      -dot-domPrint dominator tree of function to 'dot' file
      -dot-dom-onlyPrint dominator tree of function to 'dot' file (with no function bodies)
      -dot-postdomPrint post dominator tree of function to 'dot' file
      -dot-postdom-onlyPrint post dominator tree of function to 'dot' file (with no function bodies)
      -dot-domPrint dominance tree of function to 'dot' file
      -dot-dom-onlyPrint dominance tree of function to 'dot' file (with no function bodies)
      -dot-postdomPrint postdominance tree of function to 'dot' file
      -dot-postdom-onlyPrint postdominance tree of function to 'dot' file (with no function bodies)
      -globalsmodref-aaSimple mod/ref analysis for globals
      -instcountCounts the various types of Instructions
      -interprocedural-aa-evalExhaustive Interprocedural Alias Analysis Precision Evaluator
      -interprocedural-basic-aaInterprocedural Basic Alias Analysis
      -intervalsInterval Partition Construction
      -iv-usersInduction Variable Users
      -lazy-value-infoLazy Value Information Analysis
      -ldaLoop Dependence Analysis
      -libcall-aaLibCall Alias Analysis
      -lintCheck for common errors in LLVM IR
      -live-valuesValue Liveness Analysis
      -lintStatically lint-checks LLVM IR
      -loopsNatural Loop Information
      -memdepMemory Dependence Analysis
      -module-debuginfoPrints module debug info metadata
      -module-debuginfoDecodes module-level debug info
      -no-aaNo Alias Analysis (always returns 'may' alias)
      -no-profileNo Profile Information
      -pointertrackingTrack pointer bounds
      -postdomfrontierPost-Dominance Frontier Construction
      -postdomtreePost-Dominator Tree Construction
      -print-alias-setsAlias Set Printer
      -print-used-typesFind Used Types
      -profile-estimatorEstimate profiling information
      -profile-loaderLoad profile information from llvmprof.out
      -regionsDetect single entry single exit regions in a function
      -profile-verifierVerify profiling information
      -regionsDetect single entry single exit regions
      -scalar-evolutionScalar Evolution Analysis
      -scev-aaScalarEvolution-based Alias Analysis
      -targetdataTarget Data Layout
      TRANSFORM PASSES
      OptionName
      -abcdRemove redundant conditional branches
      -adceAggressive Dead Code Elimination
      -always-inlineInliner for always_inline functions
      -argpromotionPromote 'by reference' arguments to scalars
      -block-placementProfile Guided Basic Block Placement
      -break-crit-edgesBreak critical edges in CFG
      -codegenpreparePrepare a function for code generation
      -codegenprepareOptimize for code generation
      -constmergeMerge Duplicate Global Constants
      -constpropSimple constant propagation
      -dceDead Code Elimination
      -internalizeInternalize Global Symbols
      -ipconstpropInterprocedural constant propagation
      -ipsccpInterprocedural Sparse Conditional Constant Propagation
      -jump-threadingThread control through conditional blocks
      -jump-threadingJump Threading
      -lcssaLoop-Closed SSA Form Pass
      -licmLoop Invariant Code Motion
      -loop-deletionDead Loop Deletion Pass
      -loop-deletionDelete dead loops
      -loop-extractExtract loops into new functions
      -loop-extract-singleExtract at most one loop into a new function
      -loop-index-splitIndex Split Loops
      -loop-reduceLoop Strength Reduction
      -loop-rotateRotate Loops
      -loop-simplifyCanonicalize natural loops
      -loop-unrollUnroll loops
      -loop-unswitchUnswitch loops
      -loop-simplifyCanonicalize natural loops
      -loweratomicLower atomic intrinsics
      -loweratomicLower atomic intrinsics to non-atomic form
      -lowerinvokeLower invoke and unwind, for unwindless code generators
      -lowersetjmpLower Set Jump
      -lowerswitchLower SwitchInst's to branches
      -mem2regPromote Memory to Register
      -memcpyoptOptimize use of memcpy and friends
      -memcpyoptMemCpy Optimization
      -mergefuncMerge Functions
      -mergereturnUnify function exit nodes
      -partial-inlinerPartial Inliner
      -partialspecializationPartial Specialization
      -prune-ehRemove unused exception handling info
      -reassociateReassociate expressions
      -reg2memDemote all values to stack slots
      -scalarreplScalar Replacement of Aggregates
      -scalarreplScalar Replacement of Aggregates (DT)
      -sccpSparse Conditional Constant Propagation
      -sinkCode Sinking
      -simplify-libcallsSimplify well-known library calls
      -simplify-libcalls-halfpowrSimplify half_powr library calls
      -simplifycfgSimplify the CFG
      -split-gepsSplit complex GEPs into simple GEPs
      -ssiStatic Single Information Construction
      -ssi-everythingStatic Single Information Construction (everything, intended for debugging)
      -sinkCode sinking
      -sretpromotionPromote sret arguments to multiple ret values
      -stripStrip all symbols from a module
      -strip-dead-debug-infoStrip debug info for unused symbols
      -strip-dead-prototypesRemove unused function declarations
      -strip-dead-prototypesStrip Unused Function Prototypes
      -strip-debug-declareStrip all llvm.dbg.declare intrinsics
      -strip-nondebugStrip all symbols, except dbg symbols, from a module
      -sretpromotionPromote sret arguments
      -tailcallelimTail Call Elimination
      -tailduplicateTail Duplication
      -verifyModule Verifier
      -view-cfgView CFG of function
      -view-cfg-onlyView CFG of function (with no function bodies)
      -view-domView dominator tree of function
      -view-dom-onlyView dominator tree of function (with no function bodies)
      -view-postdomView post dominator tree of function
      -view-postdom-onlyView post dominator tree of function (with no function bodies)
      -view-domView dominance tree of function
      -view-dom-onlyView dominance tree of function (with no function bodies)
      -view-postdomView postdominance tree of function
      -view-postdom-onlyView postdominance tree of function (with no function bodies)
      +
      - -
      +

      Analysis Passes

      +

      This section describes the LLVM Analysis Passes.

      -
      - -
      + +

      This is a simple N^2 alias analysis accuracy evaluator. Basically, for each function in the program, it simply queries to see how the alias analysis implementation answers alias queries between each pair of @@ -236,10 +223,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      +

      + -basicaa: Basic Alias Analysis (stateless AA impl) +

      +

      This is the default implementation of the Alias Analysis interface that simply implements a few identities (two different globals cannot alias, @@ -248,30 +235,18 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      + +

      Yet to be written.

      - -
      -

      - This pass munges the code in the input function to better prepare it for - SelectionDAG-based code generation. This works around limitations in it's - basic-block-at-a-time approach. It should eventually be removed. -

      -
      - - - -
      + +

      A pass which can be used to count how many alias queries are being made and how the alias analysis implementation being used responds. @@ -279,10 +254,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      + +

      This simple pass checks alias analysis users to ensure that if they create a new value, they do not query AA without informing it of the value. @@ -296,10 +271,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      + +

      This pass is a simple dominator construction algorithm for finding forward dominator frontiers. @@ -307,10 +282,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      + +

      This pass is a simple dominator construction algorithm for finding forward dominators. @@ -318,10 +293,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      + +

      This pass, only available in opt, prints the call graph into a .dot graph. This graph can then be processed with the "dot" tool @@ -330,10 +305,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      + +

      This pass, only available in opt, prints the control flow graph into a .dot graph. This graph can then be processed with the @@ -342,10 +317,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      + +

      This pass, only available in opt, prints the control flow graph into a .dot graph, omitting the function bodies. This graph can @@ -355,10 +330,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      +

      + -dot-dom: Print dominance tree of function to 'dot' file +

      +

      This pass, only available in opt, prints the dominator tree into a .dot graph. This graph can then be processed with the @@ -367,11 +342,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      +

      + -dot-dom-only: Print dominance tree of function to 'dot' file (with no function bodies) +

      +

      This pass, only available in opt, prints the dominator tree into a .dot graph, omitting the function bodies. This graph can @@ -381,10 +355,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      +

      + -dot-postdom: Print postdominance tree of function to 'dot' file +

      +

      This pass, only available in opt, prints the post dominator tree into a .dot graph. This graph can then be processed with the @@ -393,11 +367,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      +

      + -dot-postdom-only: Print postdominance tree of function to 'dot' file (with no function bodies) +

      +

      This pass, only available in opt, prints the post dominator tree into a .dot graph, omitting the function bodies. This graph can @@ -407,10 +380,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      + +

      This simple pass provides alias and mod/ref information for global values that do not have their address taken, and keeps track of whether functions @@ -420,43 +393,20 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      + +

      This pass collects the count of all instructions and reports them

      - -
      -

      This pass implements a simple N^2 alias analysis accuracy evaluator. - Basically, for each function in the program, it simply queries to see how the - alias analysis implementation answers alias queries between each pair of - pointers in the function. -

      -
      - - - -
      -

      This pass defines the default implementation of the Alias Analysis interface - that simply implements a few identities (two different globals cannot alias, - etc), but otherwise does no analysis. -

      -
      - - - -
      + +

      This analysis calculates and represents the interval partition of a function, or a preexisting interval partition. @@ -469,44 +419,44 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      + +

      Bookkeeping for "interesting" users of expressions computed from induction variables.

      - -
      + +

      Interface for lazy computation of value constraint information.

      - -
      + +

      Loop dependence analysis framework, which is used to detect dependences in memory accesses in loops.

      - -
      + +

      LibCall Alias Analysis.

      - -
      +

      + -lint: Statically lint-checks LLVM IR +

      +

      This pass statically checks for common and easily-identified constructs which produce undefined or likely unintended behavior in LLVM IR.

      @@ -534,18 +484,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      -

      LLVM IR Value liveness analysis pass.

      -
      - - - -
      +

      + -loops: Natural Loop Information +

      +

      This analysis is used to identify natural loops and determine the loop depth of various nodes of the CFG. Note that the loops identified may actually be @@ -555,10 +497,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      + +

      An analysis that determines, for a given memory operation, what preceding memory operations it depends on. It builds on alias analysis information, and @@ -568,10 +510,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      +

      + -module-debuginfo: Decodes module-level debug info +

      +

      This pass decodes the debug info metadata in a module and prints in a (sufficiently-prepared-) human-readable form. @@ -581,10 +523,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      + +

      Always returns "I don't know" for alias queries. NoAA is unlike other alias analysis implementations, in that it does not chain to a previous analysis. As @@ -593,10 +535,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      + +

      The default "no profile" implementation of the abstract ProfileInfo interface. @@ -604,19 +546,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      -

      Tracking of pointer bounds. -

      -
      - - - -
      + +

      This pass is a simple post-dominator construction algorithm for finding post-dominator frontiers. @@ -624,10 +557,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      + +

      This pass is a simple post-dominator construction algorithm for finding post-dominators. @@ -635,18 +568,18 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      + +

      Yet to be written.

      - -
      + +

      This pass, only available in opt, prints the call graph to standard error in a human-readable form. @@ -654,10 +587,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      + +

      This pass, only available in opt, prints the SCCs of the call graph to standard error in a human-readable form. @@ -665,10 +598,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      + +

      This pass, only available in opt, prints the SCCs of each function CFG to standard error in a human-readable form. @@ -676,10 +609,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      + +

      Pass that prints instructions, and associated debug info:

        @@ -690,18 +623,18 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

        \n" if !

      - -
      + +

      Dominator Info Printer.

      - -
      + +

      This pass, only available in opt, prints out call sites to external functions that are called with constant arguments. This can be @@ -711,10 +644,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      + +

      The PrintFunctionPass class is designed to be pipelined with other FunctionPasses, and prints out the functions of the module @@ -723,20 +656,20 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      + +

      This pass simply prints out the entire module when it is executed.

      - -
      + +

      This pass is used to seek out all of the types in use by the program. Note that this analysis explicitly does not include types only used by the symbol @@ -744,20 +677,20 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      + +

      Profiling information that estimates the profiling information in a very crude and unimaginative way.

      - -
      + +

      A concrete implementation of profiling information that loads the information from a profile dump file. @@ -765,16 +698,16 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      + +

      Pass that checks profiling information for plausibility.

      - -
      +

      + -regions: Detect single entry single exit regions +

      +

      The RegionInfo pass detects single entry single exit regions in a function, where a region is defined as any subgraph that is connected to the @@ -784,10 +717,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      + +

      The ScalarEvolution analysis can be used to analyze and catagorize scalar expressions in loops. It specializes in recognizing general @@ -803,10 +736,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      +

      + -scev-aa: ScalarEvolution-based Alias Analysis +

      +

      Simple alias analysis implemented in terms of ScalarEvolution queries. This differs from traditional loop dependence analysis in that it tests @@ -819,62 +752,26 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      -

      - performs code stripping. this transformation can delete: -

      - -
        -
      1. names for virtual registers
      2. -
      3. symbols for internal globals and functions
      4. -
      5. debug information
      6. -
      - -

      - note that this transformation makes code much less readable, so it should - only be used in situations where the strip utility would be used, - such as reducing code size or making it harder to reverse engineer code. -

      -
      - - - -
      + +

      Provides other passes access to information on how the size and alignment required by the the target ABI for various data types.

      +
      + - -
      +

      Transform Passes

      +

      This section describes the LLVM Transform Passes.

      -
      - -
      -

      ABCD removes conditional branch instructions that can be proved redundant. - With the SSI representation, each variable has a constraint. By analyzing these - constraints we can prove that a branch is redundant. When a branch is proved - redundant it means that one direction will always be taken; thus, we can change - this branch into an unconditional jump.

      -

      It is advisable to run SimplifyCFG and - Aggressive Dead Code Elimination after ABCD - to clean up the code.

      -
      - - - -
      + +

      ADCE aggressively tries to eliminate code. This pass is similar to DCE but it assumes that values are dead until proven otherwise. This is similar to SCCP, except applied to @@ -882,19 +779,19 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      + +

      A custom inliner that handles only functions that are marked as "always inline".

      - -
      + +

      This pass promotes "by reference" arguments to be "by value" arguments. In practice, this means looking for internal functions that have pointer @@ -922,10 +819,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      + +

      This pass is a very simple profile guided basic block placement algorithm. The idea is to put frequently executed blocks together at the start of the function and hopefully increase the number of fall-through conditional @@ -934,10 +831,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      + +

      Break all of the critical edges in the CFG by inserting a dummy basic block. It may be "required" by passes that cannot deal with critical edges. This @@ -947,20 +844,20 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      +

      + -codegenprepare: Optimize for code generation +

      +
      This pass munges the code in the input function to better prepare it for SelectionDAG-based code generation. This works around limitations in it's basic-block-at-a-time approach. It should eventually be removed.
      - -
      + +

      Merges duplicate global constants together into a single constant that is shared. This is useful because some passes (ie TraceValues) insert a lot of @@ -970,10 +867,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      + +

      This file implements constant propagation and merging. It looks for instructions involving only constant operands and replaces them with a constant value instead of an instruction. For example:

      @@ -986,10 +883,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      + +

      Dead code elimination is similar to dead instruction elimination, but it rechecks instructions that were used by removed @@ -998,10 +895,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      + +

      This pass deletes dead arguments from internal functions. Dead argument elimination removes arguments which are directly dead, as well as arguments @@ -1016,10 +913,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      + +

      This pass is used to cleanup the output of GCC. It eliminate names for types that are unused in the entire translation unit, using the )) { s:^ *///? ?::; print "

      \n" if !

      - -
      + +

      Dead instruction elimination performs a single pass over the function, removing instructions that are obviously dead. @@ -1039,10 +936,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      + +

      A trivial dead store elimination that only considers basic-block local redundant stores. @@ -1050,10 +947,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      + +

      A simple interprocedural pass which walks the call-graph, looking for functions which do not access or only read non-local memory, and marking them readnone/readonly. In addition, it marks function arguments (of pointer type) @@ -1065,10 +962,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      + +

      This transform is designed to eliminate unreachable internal globals from the program. It uses an aggressive algorithm, searching out globals that are @@ -1079,10 +976,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      + +

      This pass transforms simple global variables that never have their address taken. If obviously true, it marks read/write globals as constant, deletes @@ -1091,10 +988,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      + +

      This pass performs global value numbering to eliminate fully and partially redundant instructions. It also performs redundant load elimination. @@ -1102,10 +999,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      + +

      This transformation analyzes and transforms the induction variables (and computations derived from them) into simpler forms suitable for subsequent @@ -1153,20 +1050,20 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      + +

      Bottom-up inlining of functions into callees.

      - -
      + +

      This pass instruments the specified program with counters for edge profiling. Edge profiling can give a reasonable approximation of the hot paths through a @@ -1181,10 +1078,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      + +

      This pass instruments the specified program with counters for edge profiling. Edge profiling can give a reasonable approximation of the hot paths through a program, and is used for a wide variety of program transformations. @@ -1192,10 +1089,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      + +

      Combine instructions to form fewer, simple instructions. This pass does not modify the CFG This pass is where algebraic @@ -1246,10 +1143,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      + +

      This pass loops over all of the functions in the input module, looking for a main function. If a main function is found, all other functions and all @@ -1258,10 +1155,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      + +

      This pass implements an extremely simple interprocedural constant propagation pass. It could certainly be improved in many different ways, @@ -1272,10 +1169,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      + +

      An interprocedural variant of Sparse Conditional Constant Propagation. @@ -1283,10 +1180,10 @@ perl -e '$/ = undef; for (split(/\n/, <>)) { s:^ *///? ?::; print "

      \n" if !

      - -
      +

      + -jump-threading: Jump Threading +

      +

      Jump threading tries to find distinct threads of control flow running through a basic block. This pass looks at blocks that have multiple predecessors and @@ -1312,10 +1209,10 @@ if (X < 3) {

      - -
      + +

      This pass transforms loops by placing phi nodes at the end of the loops for all values that are live across the loop boundary. For example, it turns @@ -1341,10 +1238,10 @@ if (X < 3) {

      - -
      + +

      This pass performs loop invariant code motion, attempting to remove as much code from the body of a loop as possible. It does this by either hoisting @@ -1376,11 +1273,12 @@ if (X < 3) { variable.

      + - -
      +

      + -loop-deletion: Delete dead loops +

      +

      This file implements the Dead Loop Deletion Pass. This pass is responsible for eliminating loops with non-infinite computable trip counts that have no @@ -1390,10 +1288,10 @@ if (X < 3) {

      - -
      + +

      A pass wrapper around the ExtractLoop() scalar transformation to extract each top-level loop into its own new function. If the loop is the @@ -1403,10 +1301,10 @@ if (X < 3) {

      - -
      + +

      Similar to Extract loops into new functions, this pass extracts one natural loop from the program into a function if it @@ -1415,21 +1313,10 @@ if (X < 3) {

      - -
      -

      - This pass divides loop's iteration range by spliting loop such that each - individual loop is executed efficiently. -

      -
      - - - -
      + +

      This pass performs a strength reduction on array references inside loops that have as one or more of their components the loop induction variable. This is @@ -1440,60 +1327,18 @@ if (X < 3) {

      - -
      + +

      A simple loop rotation transformation.

      - -
      -

      - This pass implements a simple loop unroller. It works best when loops have - been canonicalized by the -indvars pass, - allowing it to determine the trip counts of loops easily. -

      -
      - - - -
      -

      - This pass transforms loops that contain branches on loop-invariant conditions - to have multiple loops. For example, it turns the left into the right code: -

      - -
      for (...)                  if (lic)
      -  A                          for (...)
      -  if (lic)                     A; B; C
      -    B                      else
      -  C                          for (...)
      -                               A; C
      - -

      - This can increase the size of the code exponentially (doubling it every time - a loop is unswitched) so we only unswitch if the resultant code will be - smaller than a threshold. -

      - -

      - This pass expects LICM to be run before it to hoist invariant conditions out - of the loop, to make the unswitching opportunity obvious. -

      -
      - - - -
      + +

      This pass performs several transformations to transform natural loops into a simpler form, which makes subsequent analyses and transformations simpler and @@ -1531,26 +1376,52 @@ if (X < 3) {

      - -
      +

      + -loop-unroll: Unroll loops +

      +

      - Turn malloc and free instructions into @malloc and - @free calls. -

      - -

      - This is a target-dependent tranformation because it depends on the size of - data types and alignment constraints. + This pass implements a simple loop unroller. It works best when loops have + been canonicalized by the -indvars pass, + allowing it to determine the trip counts of loops easily.

      -
      - -loweratomic: Lower atomic intrinsics +

      + -loop-unswitch: Unswitch loops +

      +
      +

      + This pass transforms loops that contain branches on loop-invariant conditions + to have multiple loops. For example, it turns the left into the right code: +

      + +
      for (...)                  if (lic)
      +  A                          for (...)
      +  if (lic)                     A; B; C
      +    B                      else
      +  C                          for (...)
      +                               A; C
      + +

      + This can increase the size of the code exponentially (doubling it every time + a loop is unswitched) so we only unswitch if the resultant code will be + smaller than a threshold. +

      + +

      + This pass expects LICM to be run before it to hoist invariant conditions out + of the loop, to make the unswitching opportunity obvious. +

      -
      + + +

      + -loweratomic: Lower atomic intrinsics to non-atomic form +

      +

      This pass lowers atomic intrinsics to non-atomic form for use in a known non-preemptible environment. @@ -1565,10 +1436,10 @@ if (X < 3) {

      - -
      + +

      This transformation is designed for use by code generators which do not yet support stack unwinding. This pass supports two models of exception handling @@ -1606,10 +1477,10 @@ if (X < 3) {

      - -
      + +

      Lowers setjmp and longjmp to use the LLVM invoke and unwind instructions as necessary. @@ -1635,10 +1506,10 @@ if (X < 3) {

      - -
      + +

      Rewrites switch instructions with a sequence of branches, which allows targets to get away with not implementing the switch instruction until @@ -1647,10 +1518,10 @@ if (X < 3) {

      - -
      + +

      This file promotes memory references to be register references. It promotes alloca instructions which only have loads and @@ -1663,10 +1534,10 @@ if (X < 3) {

      - -
      +

      + -memcpyopt: MemCpy Optimization +

      +

      This pass performs various transformations related to eliminating memcpy calls, or transforming sets of stores into memset's. @@ -1674,10 +1545,10 @@ if (X < 3) {

      - -
      + +

      This pass looks for equivalent functions that are mergable and folds them. A hash is computed from the function, based on its type and number of @@ -1695,10 +1566,10 @@ if (X < 3) {

      - -
      + +

      Ensure that functions have at most one ret instruction in them. Additionally, it keeps track of which node is the new exit node of the CFG. @@ -1706,36 +1577,20 @@ if (X < 3) {

      - -
      + +

      This pass performs partial inlining, typically by inlining an if statement that surrounds the body of the function.

      - -
      -

      This pass finds function arguments that are often a common constant and - specializes a version of the called function for that constant. - - This pass simply does the cloning for functions it specializes. It depends - on IPSCCP and DAE to clean up the results. - - The initial heuristic favors constant arguments that are used in control - flow. -

      -
      - - - -
      + +

      This file implements a simple interprocedural pass which walks the call-graph, turning invoke instructions into call instructions if and @@ -1745,10 +1600,10 @@ if (X < 3) {

      - -
      + +

      This pass reassociates commutative expressions in an order that is designed to promote better constant propagation, GCSE, LICM, PRE, etc. @@ -1768,10 +1623,10 @@ if (X < 3) {

      - -
      + +

      This file demotes all registers to memory references. It is intented to be the inverse of -mem2reg. By converting to @@ -1785,10 +1640,10 @@ if (X < 3) {

      - -
      +

      + -scalarrepl: Scalar Replacement of Aggregates (DT) +

      +

      The well-known scalar replacement of aggregates transformation. This transform breaks up alloca instructions of aggregate type (structure @@ -1807,10 +1662,10 @@ if (X < 3) {

      - -
      + +

      Sparse conditional constant propagation and merging, which can be summarized as: @@ -1830,20 +1685,10 @@ if (X < 3) {

      - -
      -

      This pass moves instructions into successor blocks, when possible, so that - they aren't executed on paths where their results aren't needed. -

      -
      - - - -
      + +

      Applies a variety of small optimizations for calls to specific well-known function calls (e.g. runtime library functions). For example, a call @@ -1853,20 +1698,10 @@ if (X < 3) {

      - -
      -

      Simple pass that applies an experimental transformation on calls - to specific functions. -

      -
      - - - -
      + +

      Performs dead code elimination and basic block merging. Specifically:

      @@ -1882,118 +1717,20 @@ if (X < 3) {
      - -
      -

      This function breaks GEPs with more than 2 non-zero operands into smaller - GEPs each with no more than 2 non-zero operands. This exposes redundancy - between GEPs with common initial operand sequences. +

      + -sink: Code sinking +

      +
      +

      This pass moves instructions into successor blocks, when possible, so that + they aren't executed on paths where their results aren't needed.

      - -
      -

      This pass converts a list of variables to the Static Single Information - form. - - We are building an on-demand representation, that is, we do not convert - every single variable in the target function to SSI form. Rather, we receive - a list of target variables that must be converted. We also do not - completely convert a target variable to the SSI format. Instead, we only - change the variable in the points where new information can be attached - to its live range, that is, at branch points. -

      -
      - - - -
      -

      A pass that runs SSI on every non-void variable, intended for debugging. -

      -
      - - - -
      -

      - performs code stripping. this transformation can delete: -

      - -
        -
      1. names for virtual registers
      2. -
      3. symbols for internal globals and functions
      4. -
      5. debug information
      6. -
      - -

      - note that this transformation makes code much less readable, so it should - only be used in situations where the strip utility would be used, - such as reducing code size or making it harder to reverse engineer code. -

      -
      - - - -
      -

      - This pass loops over all of the functions in the input module, looking for - dead declarations and removes them. Dead declarations are declarations of - functions for which no implementation is available (i.e., declarations for - unused library functions). -

      -
      - - - -
      -

      This pass implements code stripping. Specifically, it can delete:

      -
        -
      • names for virtual registers
      • -
      • symbols for internal globals and functions
      • -
      • debug information
      • -
      -

      - Note that this transformation makes code much less readable, so it should - only be used in situations where the 'strip' utility would be used, such as - reducing code size or making it harder to reverse engineer code. -

      -
      - - - -
      -

      This pass implements code stripping. Specifically, it can delete:

      -
        -
      • names for virtual registers
      • -
      • symbols for internal globals and functions
      • -
      • debug information
      • -
      -

      - Note that this transformation makes code much less readable, so it should - only be used in situations where the 'strip' utility would be used, such as - reducing code size or making it harder to reverse engineer code. -

      -
      - - - -
      +

      + -sretpromotion: Promote sret arguments to multiple ret values +

      +

      This pass finds functions that return a struct (using a pointer to the struct as the first argument of the function, marked with the 'sret' attribute) and @@ -2013,10 +1750,103 @@ if (X < 3) {

      -
      - -tailcallelim: Tail Call Elimination +

      + -strip: Strip all symbols from a module +

      +
      +

      + performs code stripping. this transformation can delete: +

      + +
        +
      1. names for virtual registers
      2. +
      3. symbols for internal globals and functions
      4. +
      5. debug information
      6. +
      + +

      + note that this transformation makes code much less readable, so it should + only be used in situations where the strip utility would be used, + such as reducing code size or making it harder to reverse engineer code. +

      -
      + + +

      + -strip-dead-debug-info: Strip debug info for unused symbols +

      +
      +

      + performs code stripping. this transformation can delete: +

      + +
        +
      1. names for virtual registers
      2. +
      3. symbols for internal globals and functions
      4. +
      5. debug information
      6. +
      + +

      + note that this transformation makes code much less readable, so it should + only be used in situations where the strip utility would be used, + such as reducing code size or making it harder to reverse engineer code. +

      +
      + + +

      + -strip-dead-prototypes: Strip Unused Function Prototypes +

      +
      +

      + This pass loops over all of the functions in the input module, looking for + dead declarations and removes them. Dead declarations are declarations of + functions for which no implementation is available (i.e., declarations for + unused library functions). +

      +
      + + +

      + -strip-debug-declare: Strip all llvm.dbg.declare intrinsics +

      +
      +

      This pass implements code stripping. Specifically, it can delete:

      +
        +
      • names for virtual registers
      • +
      • symbols for internal globals and functions
      • +
      • debug information
      • +
      +

      + Note that this transformation makes code much less readable, so it should + only be used in situations where the 'strip' utility would be used, such as + reducing code size or making it harder to reverse engineer code. +

      +
      + + +

      + -strip-nondebug: Strip all symbols, except dbg symbols, from a module +

      +
      +

      This pass implements code stripping. Specifically, it can delete:

      +
        +
      • names for virtual registers
      • +
      • symbols for internal globals and functions
      • +
      • debug information
      • +
      +

      + Note that this transformation makes code much less readable, so it should + only be used in situations where the 'strip' utility would be used, such as + reducing code size or making it harder to reverse engineer code. +

      +
      + + +

      + -tailcallelim: Tail Call Elimination +

      +

      This file transforms calls of the current function (self recursion) followed by a return instruction with a branch to the entry of the function, creating @@ -2045,10 +1875,10 @@ if (X < 3) {

      - -
      + +

      This pass performs a limited form of tail duplication, intended to simplify CFGs by removing some unconditional branches. This pass is necessary to @@ -2058,17 +1888,18 @@ if (X < 3) {

      - - -
      -

      This section describes the LLVM Utility Passes.

      + +

      Utility Passes

      +
      +

      This section describes the LLVM Utility Passes.

      + - -
      + +

      Same as dead argument elimination, but deletes arguments to functions which are external. This is only for use by

      - -
      + +

      This pass is used by bugpoint to extract all blocks from the module into their own functions.

      - -
      + +

      This is a little utility pass that gives instructions names, this is mostly useful when diffing the effect of an optimization because deleting an unnamed instruction can change all other instruction numbering, making the @@ -2098,10 +1929,10 @@ if (X < 3) {

      - -
      + +

      Ensures that the module is in the form required by the Module Verifier pass. @@ -2114,10 +1945,10 @@ if (X < 3) {

      - -
      + +

      Verifies an LLVM IR code. This is useful to run after an optimization which is undergoing testing. Note that llvm-as verifies its input before @@ -2165,20 +1996,20 @@ if (X < 3) {

      - -
      + +

      Displays the control flow graph using the GraphViz tool.

      - -
      + +

      Displays the control flow graph using the GraphViz tool, but omitting function bodies. @@ -2186,22 +2017,20 @@ if (X < 3) {

      - -
      +

      + -view-dom: View dominance tree of function +

      +

      Displays the dominator tree using the GraphViz tool.

      - -
      +

      + -view-dom-only: View dominance tree of function (with no function bodies) +

      +

      Displays the dominator tree using the GraphViz tool, but omitting function bodies. @@ -2209,28 +2038,28 @@ if (X < 3) {

      - -
      +

      + -view-postdom: View postdominance tree of function +

      +

      Displays the post dominator tree using the GraphViz tool.

      - -
      +

      + -view-postdom-only: View postdominance tree of function (with no function bodies) +

      +

      Displays the post dominator tree using the GraphViz tool, but omitting function bodies.

      +
      +
      @@ -2241,8 +2070,8 @@ if (X < 3) { src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"> Reid Spencer
      - LLVM Compiler Infrastructure
      - Last modified: $Date: 2011-02-13 21:57:25 +0100 (Sun, 13 Feb 2011) $ + LLVM Compiler Infrastructure
      + Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $ diff --git a/docs/ProgrammersManual.html b/docs/ProgrammersManual.html index 0351dd03b7d0..6af922b4071c 100644 --- a/docs/ProgrammersManual.html +++ b/docs/ProgrammersManual.html @@ -8,9 +8,9 @@ -
      +

      LLVM Programmer's Manual -

      +
      1. Introduction
      2. @@ -56,6 +56,7 @@ option
      - + -
      +

      This document is meant to highlight some of the important classes and interfaces available in the LLVM source-base. This manual is not @@ -240,24 +242,22 @@ href="/doxygen/InstVisitor_8h-source.html">InstVisitor template.

      - + -
      +

      This section contains general information that is useful if you are working in the LLVM source-base, but that isn't specific to any particular API.

      -
      - - + -
      +

      LLVM makes heavy use of the C++ Standard Template Library (STL), perhaps much more than you are used to, or have seen before. Because of @@ -303,11 +303,11 @@ to write maintainable code more than where to put your curly braces.

      - + -
      + - - + + +

      + Important and useful LLVM APIs +

      -
      +

      Here we highlight some LLVM APIs that are generally useful and good to know about when writing transformations.

      -
      - - + -
      +

      The LLVM source-base makes extensive use of a custom form of RTTI. These templates have many similarities to the C++ dynamic_cast<> @@ -440,12 +440,12 @@ are lots of examples in the LLVM source base.

      - + -
      +

      Although LLVM generally does not do much string manipulation, we do have several important APIs which take strings. Two important examples are the @@ -459,14 +459,12 @@ clients to perform a heap allocation which is usually unnecessary. Instead, many LLVM APIs use a StringRef or a const Twine& for passing strings efficiently.

      -
      - - + -
      +

      The StringRef data type represents a reference to a constant string (a character array and a length) and supports the common operations available @@ -502,11 +500,11 @@ small and pervasive enough in LLVM that it should always be passed by value.

      - + -
      +

      The Twine class is an efficient way for APIs to accept concatenated strings. For example, a common LLVM paradigm is to name one instruction based on @@ -537,13 +535,14 @@ accept concatenated strings.

      - - - -
      + +

      + The DEBUG() macro and -debug option +

      + +

      Often when working on your pass you will put a bunch of debugging printouts and other code into your pass. After you get it working, you want to remove @@ -589,15 +588,13 @@ enable or disable it directly in gdb. Just use "set DebugFlag=0" or program hasn't been started yet, you can always just run it with -debug.

      -
      - - + -
      +

      Sometimes you may find yourself in a situation where enabling -debug just turns on too much information (such as when working on the code @@ -665,13 +662,15 @@ DEBUG_WITH_TYPE("", errs() << "No debug type (2)\n");

      - - -
      + +

      + The Statistic class & -stats + option +

      + +

      The "llvm/ADT/Statistic.h" file @@ -766,11 +765,11 @@ maintainable and useful.

      - + -
      +

      Several of the important data structures in LLVM are graphs: for example CFGs made out of LLVM BasicBlocks, CFGs made out of @@ -814,13 +813,15 @@ attributes, then you can call DAG.clearGraphAttrs().

      - - + + +

      + Picking the Right Data Structure for a Task +

      -
      +

      LLVM has a plethora of data structures in the llvm/ADT/ directory, and we commonly use STL data structures. This section describes the trade-offs @@ -876,35 +877,47 @@ elements (but could contain many), for example, it's much better to use . Doing so avoids (relatively) expensive malloc/free calls, which dwarf the cost of adding the elements to the container.

      -
      - - + -
      +
      There are a variety of sequential containers available for you, based on your needs. Pick the first in this section that will do what you want. -
      -
      - Fixed Size Arrays +

      + llvm/ADT/ArrayRef.h +

      + +
      +

      The llvm::ArrayRef class is the preferred class to use in an interface that + accepts a sequential list of elements in memory and just reads from them. By + taking an ArrayRef, the API can be passed a fixed size array, an std::vector, + an llvm::SmallVector and anything else that is contiguous in memory. +

      -
      + + + +

      + Fixed Size Arrays +

      + +

      Fixed size arrays are very simple and very fast. They are good if you know exactly how many elements you have, or you have a (low) upper bound on how many you have.

      - + -
      +

      Heap allocated arrays (new[] + delete[]) are also simple. They are good if the number of elements is variable, if you know how many elements you will need before the array is allocated, and if the array is usually large (if not, @@ -916,11 +929,11 @@ construct those elements actually used).

      - + -
      +

      SmallVector<Type, N> is a simple class that looks and smells just like vector<Type>: it supports efficient iteration, lays out elements in memory order (so you can @@ -945,11 +958,11 @@ SmallVectors are most useful when on the stack.

      - + -
      +

      std::vector is well loved and respected. It is useful when SmallVector isn't: when the size of the vector is often large (thus the small optimization will @@ -987,11 +1000,11 @@ the loop.

      -
      +

      <deque> -

      + -
      +

      std::deque is, in some senses, a generalized version of std::vector. Like std::vector, it provides constant time random access and other similar properties, but it also provides efficient access to the front of the list. It @@ -1003,11 +1016,11 @@ something cheaper.

      -
      +

      <list> -

      + -
      +

      std::list is an extremely inefficient class that is rarely useful. It performs a heap allocation for every element inserted into it, thus having an extremely high constant factor, particularly for small data types. std::list @@ -1021,11 +1034,11 @@ not invalidate iterator or pointers to other elements in the list.

      - + -
      +

      ilist<T> implements an 'intrusive' doubly-linked list. It is intrusive, because it requires the element to store and provide access to the prev/next pointers for the list.

      @@ -1051,22 +1064,22 @@ Related classes of interest are explained in the following subsections:
      - + -
      +

      ilist_traits<T> is ilist<T>'s customization mechanism. iplist<T> (and consequently ilist<T>) publicly derive from this traits class.

      -
      +

      iplist -

      + -
      +

      iplist<T> is ilist<T>'s base and as such supports a slightly narrower interface. Notably, inserters from T& are absent.

      @@ -1076,11 +1089,11 @@ used for a wide variety of customizations.

      - + -
      +

      ilist_node<T> implements a the forward and backward links that are expected by the ilist<T> (and analogous containers) in the default manner.

      @@ -1091,11 +1104,11 @@ in the default manner.

      - + -
      +

      ilists have another specialty that must be considered. To be a good citizen in the C++ ecosystem, it needs to support the standard container operations, such as begin and end iterators, etc. Also, the @@ -1129,11 +1142,11 @@ field in the ghostly sentinel which can be legally accessed.

      - + -
      +

      Other STL containers are available, such as std::string.

      There are also various STL adapter classes such as std::queue, @@ -1142,27 +1155,25 @@ underlying container but don't affect the cost of the container itself.

      - - - -
      + +

      + Set-Like Containers (std::set, SmallSet, SetVector, etc) +

      + +

      Set-like containers are useful when you need to canonicalize multiple values into a single representation. There are several different choices for how to do this, providing various trade-offs.

      -
      - - - + -
      +

      If you intend to insert a lot of elements, then do a lot of queries, a great approach is to use a vector (or other sequential container) with @@ -1180,11 +1191,11 @@ efficiently queried with a standard binary or radix search.

      - + -
      +

      If you have a set-like data structure that is usually small and whose elements are reasonably small, a SmallSet<Type, N> is a good choice. This set @@ -1203,11 +1214,11 @@ and erasing, but does not support iteration.

      - + -
      +

      SmallPtrSet has all the advantages of SmallSet (and a SmallSet of pointers is transparently implemented with a SmallPtrSet), but also supports iterators. If @@ -1223,11 +1234,11 @@ visited in sorted order.

      - + -
      +

      DenseSet is a simple quadratically probed hash table. It excels at supporting @@ -1242,11 +1253,11 @@ href="#dss_densemap">DenseMap has.

      - + -
      +

      FoldingSet is an aggregate class that is really good at uniquing @@ -1279,11 +1290,11 @@ elements.

      -
      +

      <set> -

      + -
      +

      std::set is a reasonable all-around set class, which is decent at many things but great at nothing. std::set allocates memory for each element @@ -1304,11 +1315,11 @@ std::set is almost never a good choice.

      - + -
      +

      LLVM's SetVector<Type> is an adapter class that combines your choice of a set-like container along with a Sequential Container. The important property @@ -1344,11 +1355,11 @@ heap traffic.

      - + -
      +

      UniqueVector is similar to SetVector, but it @@ -1364,11 +1375,11 @@ factors, and produces a lot of malloc traffic. It should be avoided.

      - + -
      +

      The STL provides several other options, such as std::multiset and the various @@ -1384,22 +1395,23 @@ better.

      - - -
      + +

      + Map-Like Containers (std::map, DenseMap, etc) +

      + +
      Map-like containers are useful when you want to associate data to a key. As usual, there are a lot of different ways to do this. :) -
      - + -
      +

      If your usage pattern follows a strict insert-then-query approach, you can @@ -1412,11 +1424,11 @@ vectors for sets.

      - + -
      +

      Strings are commonly used as keys in maps, and they are difficult to support @@ -1446,11 +1458,11 @@ copies a string if a value is inserted into the table.

      - + -
      +

      IndexedMap is a specialized container for mapping small dense integers (or values that can be mapped to small dense integers) to some other type. It is @@ -1466,11 +1478,11 @@ virtual register ID).

      - + -
      +

      DenseMap is a simple quadratically probed hash table. It excels at supporting @@ -1492,11 +1504,11 @@ inserted into the map) that it needs internally.

      - + -
      +

      ValueMap is a wrapper around a DenseMap mapping @@ -1509,11 +1521,11 @@ a Config parameter to the ValueMap template.

      - + -
      +

      IntervalMap is a compact map for small keys and values. It maps key intervals instead of single keys, and it will automatically coalesce adjacent @@ -1526,11 +1538,11 @@ as STL iterators. The heavyweight iterators allow a smaller data structure.

      -
      +

      <map> -

      + -
      +

      std::map has similar characteristics to std::set: it uses @@ -1546,11 +1558,31 @@ another element takes place).

      -
      - Other Map-Like Container Options +

      + "llvm/ADT/IntEqClasses.h" +

      + +
      + +

      IntEqClasses provides a compact representation of equivalence classes of +small integers. Initially, each integer in the range 0..n-1 has its own +equivalence class. Classes can be joined by passing two class representatives to +the join(a, b) method. Two integers are in the same class when findLeader() +returns the same representative.

      + +

      Once all equivalence classes are formed, the map can be compressed so each +integer 0..n-1 maps to an equivalence class number in the range 0..m-1, where m +is the total number of equivalence classes. The map must be uncompressed before +it can be edited again.

      +
      -
      + +

      + Other Map-Like Container Options +

      + +

      The STL provides several other options, such as std::multimap and the various @@ -1564,12 +1596,14 @@ always better.

      - - -
      + +

      + String-like containers +

      + +

      TODO: const char* vs stringref vs smallstring vs std::string. Describe twine, @@ -1579,11 +1613,11 @@ xref to #string_apis.

      - + -
      +

      Unlike the other containers, there are only two bit storage containers, and choosing when to use each is relatively straightforward.

      @@ -1593,14 +1627,13 @@ implementation in many common compilers (e.g. commonly available versions of GCC) is extremely inefficient and 2) the C++ standards committee is likely to deprecate this container and/or change it significantly somehow. In any case, please don't use it.

      -
      - + -
      +

      The BitVector container provides a dynamic size set of bits for manipulation. It supports individual bit setting/testing, as well as set operations. The set operations take time O(size of bitvector), but operations are performed one word @@ -1611,11 +1644,11 @@ the number of set bits to be high (IE a dense set).

      - + -
      +

      The SmallBitVector container provides the same interface as BitVector, but it is optimized for the case where only a small number of bits, less than 25 or so, are needed. It also transparently supports larger bit counts, but @@ -1630,11 +1663,11 @@ and its operator[] does not provide an assignable lvalue.

      - + -
      +

      The SparseBitVector container is much like BitVector, with one major difference: Only the bits that are set, are stored. This makes the SparseBitVector much more space efficient than BitVector when the set is sparse, @@ -1644,13 +1677,17 @@ universe). The downside to the SparseBitVector is that setting and testing of r

      - - + +
      + + +

      + Helpful Hints for Common Operations +

      -
      +

      This section describes how to perform some very simple transformations of LLVM code. This is meant to give examples of common idioms used, showing the @@ -1659,15 +1696,13 @@ you should also read about the main classes that you will be working with. The Core LLVM Class Hierarchy Reference contains details and descriptions of the main classes that you should know about.

      -
      - - + -
      +

      The LLVM compiler infrastructure have many different data structures that may be traversed. Following the example of the C++ standard template library, the @@ -1684,16 +1719,14 @@ on them, and it is easier to remember how to iterate. First we show a few common examples of the data structures that need to be traversed. Other data structures are traversed in very similar ways.

      -
      - - + -
      +

      It's quite common to have a Function instance that you'd like to transform in some way; in particular, you'd like to manipulate its @@ -1722,13 +1755,13 @@ exactly equivalent to (*i).size() just like you'd expect.

      - + -
      +

      Just like when dealing with BasicBlocks in Functions, it's easy to iterate over the individual instructions that make up @@ -1753,13 +1786,13 @@ basic block itself: errs() << *blk << "\n";.

      - + -
      +

      If you're finding that you commonly iterate over a Function's BasicBlocks and then that BasicBlock's Instructions, @@ -1799,12 +1832,12 @@ for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)

      - + -
      +

      Sometimes, it'll be useful to grab a reference (or pointer) to a class instance when all you've got at hand is an iterator. Well, extracting @@ -1876,12 +1909,12 @@ and operator* changed to return a pointer instead of a reference.

      - + -
      +

      Say that you're writing a FunctionPass and would like to count all the locations in the entire module (that is, across every Function) where a @@ -1938,11 +1971,11 @@ class OurFunctionPass : public FunctionPass {

      - + -
      +

      You may have noticed that the previous example was a bit oversimplified in that it did not deal with call sites generated by 'invoke' instructions. In @@ -1965,11 +1998,11 @@ If you look at its definition, it has only a single pointer member.

      - + -
      +

      Frequently, we might have an instance of the Value Class and we want to @@ -2026,12 +2059,12 @@ calling use/op_begin() on const Value*s or

      - + -
      +

      Iterating over the predecessors and successors of a block is quite easy with the routines defined in "llvm/Support/CFG.h". Just use code like @@ -2054,13 +2087,14 @@ succ_iterator/succ_begin/succ_end.

      - - - -
      + +

      + Making simple changes +

      + +

      There are some primitive transformation operations present in the LLVM infrastructure that are worth knowing about. When performing @@ -2068,15 +2102,13 @@ transformations, it's fairly common to manipulate the contents of basic blocks. This section describes some of the common methods for doing so and gives example code.

      -
      - - + -
      +

      Instantiating Instructions

      @@ -2212,18 +2244,15 @@ Instruction* newInst = new Instruction(..., pi);
      - + -
      +

      Deleting an instruction from an existing sequence of instructions that form a -BasicBlock is very straight-forward. First, -you must have a pointer to the instruction that you wish to delete. Second, you -need to obtain the pointer to that instruction's basic block. You use the -pointer to the basic block to get its list of instructions and then use the -erase function to remove your instruction. For example:

      +BasicBlock is very straight-forward: just +call the instruction's eraseFromParent() method. For example:

      @@ -2232,15 +2261,19 @@ I->eraseFromParent();
       
      +

      This unlinks the instruction from its containing basic block and deletes +it. If you'd just like to unlink the instruction from its containing basic +block but not delete it, you can use the removeFromParent() method.

      +
      - + -
      +

      Replacing individual instructions

      @@ -2248,7 +2281,7 @@ I->eraseFromParent(); permits use of two very useful replace functions: ReplaceInstWithValue and ReplaceInstWithInst.

      -

      Deleting Instructions

      +
      Deleting Instructions
      • ReplaceInstWithValue @@ -2301,11 +2334,11 @@ ReplaceInstWithValue, ReplaceInstWithInst -->
      - + -
      +

      Deleting a global variable from a module is just as easy as deleting an Instruction. First, you must have a pointer to the global variable that you wish @@ -2322,12 +2355,14 @@ GV->eraseFromParent();

      - - -
      + +

      + How to Create Types +

      + +

      In generating IR, you may need some complex types. If you know these types statically, you can use TypeBuilder<...>::get(), defined @@ -2362,13 +2397,15 @@ comment for more details.

      - - + + +

      + Threads and LLVM +

      -
      +

      This section describes the interaction of the LLVM APIs with multithreading, both on the part of client applications, and in the JIT, in the hosted @@ -2391,14 +2428,13 @@ compiler, consider compiling LLVM and LLVM-GCC in single-threaded mode, and using the resultant compiler to build a copy of LLVM with multithreading support.

      -
      - + -
      +

      In order to properly protect its internal data structures while avoiding @@ -2431,11 +2467,11 @@ result in concurrent LLVM API calls.

      - + -
      +

      When you are done using the LLVM APIs, you should call llvm_shutdown() to deallocate memory used for internal structures. This will also invoke @@ -2451,11 +2487,11 @@ destructor.

      - + -
      +

      ManagedStatic is a utility class in LLVM used to implement static initialization of static resources, such as the global type tables. Before the @@ -2480,11 +2516,11 @@ and only if you know what you're doing!

      - + -
      +

      LLVMContext is an opaque class in the LLVM API which clients can use to operate multiple, isolated instances of LLVM concurrently within the same @@ -2524,11 +2560,11 @@ isolation is not a concern.

      - + -
      +

      LLVM's "eager" JIT compiler is safe to use in threaded programs. Multiple threads can call ExecutionEngine::getPointerToFunction() or @@ -2551,26 +2587,27 @@ access, but we suggest using only the eager JIT in threaded programs.

      - - + + +

      + Advanced Topics +

      -
      +

      This section describes some of the advanced or obscure API's that most clients do not need to be aware of. These API's tend manage the inner workings of the LLVM system, and only need to be accessed in unusual circumstances.

      -
      - + -
      +

      The LLVM type system has a very simple goal: allow clients to compare types for @@ -2599,14 +2636,12 @@ Third, a concrete type is a type that is not an abstract type (e.g. "{ i32, float }").

      -
      - - + -
      +

      Because the most common question is "how do I build a recursive type with LLVM", @@ -2658,11 +2693,11 @@ href="#PATypeHolder">PATypeHolder class.

      - + -
      +

      The refineAbstractTypeTo method starts the type unification process. While this method is actually a member of the DerivedType class, it is most @@ -2688,11 +2723,11 @@ complex datastructures.

      - + -
      +

      PATypeHolder is a form of a "smart pointer" for Type objects. When VMCore happily goes about nuking types that become isomorphic to existing types, it @@ -2710,11 +2745,11 @@ Type is maintained by PATypeHolder objects.

      - + -
      +

      Some data structures need more to perform more complex updates when types get @@ -2728,14 +2763,15 @@ objects) can never be refined.

      - - - -
      + +

      + The ValueSymbolTable and + TypeSymbolTable classes +

      + +

      The ValueSymbolTable class provides a symbol table that the Function and @@ -2766,11 +2802,11 @@ insert entries into the symbol table.

      - + -
      +

      The User class provides a basis for expressing the ownership of User towards other @@ -2779,18 +2815,19 @@ Use helper class is employed to do the bookkeeping and to facilitate -

      +

      + + Interaction and relationship between User and Use objects + +

      -
      +

      A subclass of User can choose between incorporating its Use objects or refer to them out-of-line by means of a pointer. A mixed variant (some Uses inline others hung off) is impractical and breaks the invariant that the Use objects belonging to the same User form a contiguous array.

      -

      We have 2 different layouts in the User (sub)classes: @@ -2839,17 +2876,18 @@ enforce the following memory layouts:

      (In the above figures 'P' stands for the Use** that is stored in each Use object in the member Use::Prev) - - -
      + +

      + The waymarking algorithm +

      + +

      Since the Use objects are deprived of the direct (back)pointer to their User objects, there must be a fast and exact method to recover it. This is accomplished by the following scheme:

      -
      A bit-encoding in the 2 LSBits (least significant bits) of the Use::Prev allows to find the start of the User object: @@ -2880,15 +2918,16 @@ Only the significant number of bits need to be stored between the stops, so that the worst case is 20 memory accesses when there are 1000 Use objects associated with a User.

      - - -
      + +

      + Reference implementation +

      + +

      The following literate Haskell fragment demonstrates the concept:

      -
      @@ -2970,11 +3009,15 @@ And here is the result of <deepCheck identityProp>:

      OK, passed 500 tests.
      - - + +

      + Tagging considerations +

      + +
      +

      To maintain the invariant that the 2 LSBits of each Use** in Use never change after being set up, setters of Use::Prev must re-tag the @@ -2989,13 +3032,17 @@ the LSBit set. (Portability is relying on the fact that all known compilers plac

      - - + +
      + + +

      + The Core LLVM Class Hierarchy Reference +

      -
      +

      #include "llvm/Type.h"
      doxygen info: Type Class

      @@ -3004,14 +3051,12 @@ being inspected or transformed. The core LLVM classes are defined in header files in the include/llvm/ directory, and implemented in the lib/VMCore directory.

      -
      - - + -
      +

      Type is a superclass of all type classes. Every Value has a Type. Type cannot be instantiated directly but only @@ -3026,14 +3071,13 @@ the lib/VMCore directory.

      be performed with address equality of the Type Instance. That is, given two Type* values, the types are identical if the pointers are identical.

      -
      - + -
      +
      • bool isIntegerTy() const: Returns true for any integer type.
      • @@ -3051,10 +3095,10 @@ the lib/VMCore directory.

      - -
      + +
      IntegerType
      Subclass of DerivedType that represents integer types of any bit width. @@ -3116,14 +3160,14 @@ the lib/VMCore directory.

      - - - - -
      + +

      + The Module class +

      + +

      #include "llvm/Module.h"
      doxygen info: @@ -3138,14 +3182,12 @@ href="#GlobalVariable">GlobalVariables, and a SymbolTable. Additionally, it contains a few helpful member functions that try to make common operations easy.

      -
      - - + -
      +
      • Module::Module(std::string name = "")
      • @@ -3244,13 +3286,14 @@ provide a name for it (probably based on the name of the translation unit).

      - - - -
      + +

      + The Value class +

      + +

      #include "llvm/Value.h"
      @@ -3301,14 +3344,12 @@ the class that represents this value. Although this may take some getting used to, it simplifies the representation and makes it easier to manipulate.

      -
      - - + -
      +
      • Value::use_iterator - Typedef for iterator over the @@ -3355,12 +3396,14 @@ Inst->replaceAllUsesWith(ConstVal);
      - - -
      + +

      + The User class +

      + +

      #include "llvm/User.h"
      @@ -3379,14 +3422,12 @@ Single Assignment (SSA) form, there can only be one definition referred to, allowing this direct connection. This connection provides the use-def information in LLVM.

      -
      - - + -
      +

      The User class exposes the operand list in two ways: through an index access interface and through an iterator based interface.

      @@ -3409,12 +3450,14 @@ the operands of a User.

      - - -
      + +

      + The Instruction class +

      + +

      #include "llvm/Instruction.h"
      @@ -3445,14 +3488,13 @@ href="#CmpInst">CmpInst). Unfortunately, the use of macros in this file confuses doxygen, so these enum values don't show up correctly in the doxygen output.

      -
      - - -
      +

      + + Important Subclasses of the Instruction class + +

      +
      • BinaryOperator

        This subclasses represents all two operand instructions whose operands @@ -3471,12 +3513,13 @@ this file confuses doxygen, so these enum values don't show up correctly in the

      - +

      + + Important Public Members of the Instruction class + +

      -
      +
      • BasicBlock *getParent() @@ -3496,12 +3539,14 @@ and it has no name

      - - -
      + +

      + The Constant class and subclasses +

      + +

      Constant represents a base class for different types of constants. It is subclassed by ConstantInt, ConstantArray, etc. for representing @@ -3509,11 +3554,9 @@ the various types of Constants. GlobalValue is also a subclass, which represents the address of a global variable or function.

      -
      - -
      Important Subclasses of Constant
      -
      +

      Important Subclasses of Constant

      +
      • ConstantInt : This subclass of Constant represents an integer constant of any width. @@ -3561,13 +3604,14 @@ a subclass, which represents the address of a global variable or function.
      - - - -
      + +

      + The GlobalValue class +

      + +

      #include "llvm/GlobalValue.h"
      @@ -3607,15 +3651,14 @@ dereference the pointer with GetElementPtrInst first, then its elements can be accessed. This is explained in the LLVM Language Reference Manual.

      -
      - - +

      + + Important Public Members of the GlobalValue class + +

      -
      +
      • bool hasInternalLinkage() const
        @@ -3631,12 +3674,14 @@ GlobalValue is currently embedded into.

      - - -
      + +

      + The Function class +

      + +

      #include "llvm/Function.h"
      doxygen @@ -3683,15 +3728,15 @@ href="#Argument">Arguments in the function body.

      Note that Function is a GlobalValue and therefore also a Constant. The value of the function is its address (after linking) which is guaranteed to be constant.

      -
      - +

      + + Important Public Members of the Function class + +

      -
      +
      • Function(const FunctionType @@ -3769,12 +3814,14 @@ iterator
      - - -
      + +

      + The GlobalVariable class +

      + +

      #include "llvm/GlobalVariable.h" @@ -3796,15 +3843,15 @@ variables may have an initial value (which must be a Constant), and if they have an initializer, they may be marked as "constant" themselves (indicating that their contents never change at runtime).

      -
      - +

      + + Important Public Members of the GlobalVariable class + +

      -
      +
      • GlobalVariable(const Type *Ty, bool @@ -3842,13 +3889,14 @@ never change at runtime).

      - - - -
      + +

      + The BasicBlock class +

      + +

      #include "llvm/BasicBlock.h"
      @@ -3873,15 +3921,14 @@ href="#Value">Values, because they are referenced by instructions like branches and can go in the switch tables. BasicBlocks have type label.

      -
      - - +

      + + Important Public Members of the BasicBlock class + +

      -
      +
      • BasicBlock(const std::string &Name = "",
      - - - -
      + +

      + The Argument class +

      + +

      This subclass of Value defines the interface for incoming formal arguments to a function. A Function maintains a list of its formal @@ -3947,6 +3995,8 @@ arguments. An argument has a pointer to the parent Function.

      +
      +
      @@ -3957,8 +4007,8 @@ arguments. An argument has a pointer to the parent Function.

      Dinakar Dhurjati and Chris Lattner
      - The LLVM Compiler Infrastructure
      - Last modified: $Date: 2011-02-17 03:19:22 +0100 (Thu, 17 Feb 2011) $ + The LLVM Compiler Infrastructure
      + Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
      diff --git a/docs/Projects.html b/docs/Projects.html index 76da086657c1..3c6d4ad78ba4 100644 --- a/docs/Projects.html +++ b/docs/Projects.html @@ -7,7 +7,7 @@ -
      Creating an LLVM Project
      +

      Creating an LLVM Project

      1. Overview
      2. @@ -30,10 +30,10 @@
      - +

      Overview

      -
      +

      The LLVM build system is designed to facilitate the building of third party projects that use LLVM header files, libraries, and tools. In order to use @@ -73,12 +73,12 @@ provide enough information on how to write your own Makefiles.

      - + -
      +

      Follow these simple steps to start your project:

      @@ -145,12 +145,12 @@ project should build.

      - + -
      +

      In order to use the LLVM build system, you will want to organize your source code so that it can benefit from the build system's features. @@ -230,26 +230,24 @@ your tools directory.

      - + -
      +

      The LLVM build system provides a convenient way to build libraries and executables. Most of your project Makefiles will only need to define a few variables. Below is a list of the variables one can set and what they can do:

      -
      - - + -
      +
      LEVEL @@ -263,11 +261,11 @@ do:

      - + -
      +
      DIRS @@ -294,11 +292,11 @@ do:

      - + -
      +
      LIBRARYNAME @@ -325,11 +323,11 @@ do:

      - + -
      +
      TOOLNAME @@ -368,11 +366,11 @@ do:

      - + -
      +
      ExtraSource @@ -398,13 +396,15 @@ do:

      - - + + +

      + Placement of Object Code +

      -
      +

      The final location of built libraries and executables will depend upon whether you do a Debug, Release, or Profile build.

      @@ -427,12 +427,12 @@ whether you do a Debug, Release, or Profile build.

      - + -
      +

      If you have any questions or need any help creating an LLVM project, the LLVM team would be more than happy to help. You can always post your @@ -451,9 +451,9 @@ Mailing List.

      src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"> John Criswell
      - The LLVM Compiler Infrastructure + The LLVM Compiler Infrastructure
      - Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $ + Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $ diff --git a/docs/ReleaseNotes.html b/docs/ReleaseNotes.html index 84298376a732..71bf16ea9167 100644 --- a/docs/ReleaseNotes.html +++ b/docs/ReleaseNotes.html @@ -3,13 +3,12 @@ - - LLVM 2.8 Release Notes + LLVM 2.9 Release Notes -
      LLVM 2.8 Release Notes
      +

      LLVM 2.9 Release Notes

      LLVM Dragon Logo @@ -17,35 +16,35 @@
      1. Introduction
      2. Sub-project Status Update
      3. -
      4. External Projects Using LLVM 2.8
      5. -
      6. What's New in LLVM 2.8?
      7. +
      8. External Projects Using LLVM 2.9
      9. +
      10. What's New in LLVM 2.9?
      11. Installation Instructions
      12. Known Problems
      13. Additional Information
      -

      Written by the LLVM Team

      +

      Written by the LLVM Team

      + --> - + -
      +

      This document contains the release notes for the LLVM Compiler -Infrastructure, release 2.8. Here we describe the status of LLVM, including +Infrastructure, release 2.9. Here we describe the status of LLVM, including major improvements from the previous release and significant known problems. All LLVM releases may be downloaded from the LLVM releases web site.

      @@ -62,51 +61,37 @@ current one. To see the release notes for a specific release, please see the releases page.

      - - - - - - - - - + -
      +

      -The LLVM 2.8 distribution currently consists of code from the core LLVM +The LLVM 2.9 distribution currently consists of code from the core LLVM repository (which roughly includes the LLVM optimizers, code generators and supporting tools), the Clang repository and the llvm-gcc repository. In addition to this code, the LLVM Project includes other sub-projects that are in development. Here we include updates on these subprojects.

      -
      - - - + -
      +

      Clang is an LLVM front end for the C, C++, and Objective-C languages. Clang aims to provide a better user experience @@ -115,112 +100,61 @@ standards, fast compilation, and low memory use. Like LLVM, Clang provides a modular, library-based architecture that makes it suitable for creating or integrating with other development tools. Clang is considered a production-quality compiler for C, Objective-C, C++ and Objective-C++ on x86 -(32- and 64-bit), and for darwin-arm targets.

      +(32- and 64-bit), and for darwin/arm targets.

      -

      In the LLVM 2.8 time-frame, the Clang team has made many improvements:

      - -
        -
      • Clang C++ is now feature-complete with respect to the ISO C++ 1998 and 2003 standards.
      • -
      • Added support for Objective-C++.
      • -
      • Clang now uses LLVM-MC to directly generate object code and to parse inline assembly (on Darwin).
      • -
      • Introduced many new warnings, including -Wmissing-field-initializers, -Wshadow, -Wno-protocol, -Wtautological-compare, -Wstrict-selector-match, -Wcast-align, -Wunused improvements, and greatly improved format-string checking.
      • -
      • Introduced the "libclang" library, a C interface to Clang intended to support IDE clients.
      • -
      • Added support for #pragma GCC visibility, #pragma align, and others.
      • -
      • Added support for SSE, AVX, ARM NEON, and AltiVec.
      • -
      • Improved support for many Microsoft extensions.
      • -
      • Implemented support for blocks in C++.
      • -
      • Implemented precompiled headers for C++.
      • -
      • Improved abstract syntax trees to retain more accurate source information.
      • -
      • Added driver support for handling LLVM IR and bitcode files directly.
      • -
      • Major improvements to compiler correctness for exception handling.
      • -
      • Improved generated code quality in some areas: -
          -
        • Good code generation for X86-32 and X86-64 ABI handling.
        • -
        • Improved code generation for bit-fields, although important work remains.
        • -
        -
      • -
      -
      - - - - -
      - -

      The Clang Static Analyzer - project is an effort to use static source code analysis techniques to - automatically find bugs in C and Objective-C programs (and hopefully C++ in the - future!). The tool is very good at finding bugs that occur on specific - paths through code, such as on error conditions.

      - -

      The LLVM 2.8 release fixes a number of bugs and slightly improves precision - over 2.7, but there are no major new features in the release. +

      In the LLVM 2.9 time-frame, the Clang team has made many improvements in C, +C++ and Objective-C support. C++ support is now generally rock solid, has +been exercised on a broad variety of code, and has several new C++'0x features +implemented (such as rvalue references and variadic templates). LLVM 2.9 has +also brought in a large range of bug fixes and minor features (e.g. __label__ +support), and is much more compatible with the Linux Kernel.

      + +

      If Clang rejects your code but another compiler accepts it, please take a +look at the language +compatibility guide to make sure this is not intentional or a known issue.

      - +

      +DragonEgg: GCC front-ends, LLVM back-end +

      -
      +

      -DragonEgg is a port of llvm-gcc to -gcc-4.5. Unlike llvm-gcc, dragonegg in theory does not require any gcc-4.5 -modifications whatsoever (currently one small patch is needed) thanks to the -new gcc plugin architecture. -DragonEgg is a gcc plugin that makes gcc-4.5 use the LLVM optimizers and code -generators instead of gcc's, just like with llvm-gcc. +DragonEgg is a +gcc plugin that replaces GCC's +optimizers and code generators with LLVM's. +Currently it requires a patched version of gcc-4.5. +The plugin can target the x86-32 and x86-64 processor families and has been +used successfully on the Darwin, FreeBSD and Linux platforms. +The Ada, C, C++ and Fortran languages work well. +The plugin is capable of compiling plenty of Obj-C, Obj-C++ and Java but it is +not known whether the compiled code actually works or not!

      -DragonEgg is still a work in progress, but it is able to compile a lot of code, -for example all of gcc, LLVM and clang. Currently Ada, C, C++ and Fortran work -well, while all other languages either don't work at all or only work poorly. -For the moment only the x86-32 and x86-64 targets are supported, and only on -linux and darwin (darwin may need additional gcc patches). -

      - -

      -The 2.8 release has the following notable changes: +The 2.9 release has the following notable changes:

        -
      • The plugin loads faster due to exporting fewer symbols.
      • -
      • Additional vector operations such as addps256 are now supported.
      • -
      • Ada global variables with no initial value are no longer zero initialized, -resulting in better optimization.
      • -
      • The '-fplugin-arg-dragonegg-enable-gcc-optzns' flag now runs all gcc -optimizers, rather than just a handful.
      • -
      • Fortran programs using common variables now link correctly.
      • -
      • GNU OMP constructs no longer crash the compiler.
      • +
      • The plugin is much more stable when compiling Fortran.
      • +
      • Inline assembly where an asm output is tied to an input of a different size +is now supported in many more cases.
      • +
      • Basic support for the __float128 type was added. It is now possible to +generate LLVM IR from programs using __float128 but code generation does not +work yet.
      • +
      • Compiling Java programs no longer systematically crashes the plugin.
      - - -
      -

      -The VMKit project is an implementation of -a Java Virtual Machine (Java VM or JVM) that uses LLVM for static and -just-in-time compilation. As of LLVM 2.8, VMKit now supports copying garbage -collectors, and can be configured to use MMTk's copy mark-sweep garbage -collector. In LLVM 2.8, the VMKit .NET VM is no longer being maintained. -

      -
      - - - + -
      +

      The new LLVM compiler-rt project is a simple library that provides an implementation of the low-level @@ -231,21 +165,22 @@ function. The compiler-rt library provides highly optimized implementations of this and other low-level routines (some are 3x faster than the equivalent libgcc routines).

      -

      -All of the code in the compiler-rt project is available under the standard LLVM -License, a "BSD-style" license. New in LLVM 2.8, compiler_rt now supports -soft floating point (for targets that don't have a real floating point unit), -and includes an extensive testsuite for the "blocks" language feature and the -blocks runtime included in compiler_rt.

      +

      In the LLVM 2.9 timeframe, compiler_rt has had several minor changes for + better ARM support, and a fairly major license change. All of the code in the + compiler-rt project is now dual + licensed under MIT and UIUC license, which allows you to use compiler-rt + in applications without the binary copyright reproduction clause. If you + prefer the LLVM/UIUC license, you are free to continue using it under that + license as well.

      - + -
      +

      LLDB is a brand new member of the LLVM umbrella of projects. LLDB is a next generation, high-performance debugger. It @@ -254,20 +189,20 @@ libraries in the larger LLVM Project, such as the Clang expression parser, the LLVM disassembler and the LLVM JIT.

      -LLDB is in early development and not included as part of the LLVM 2.8 release, -but is mature enough to support basic debugging scenarios on Mac OS X in C, -Objective-C and C++. We'd really like help extending and expanding LLDB to -support new platforms, new languages, new architectures, and new features. -

      +LLDB is has advanced by leaps and bounds in the 2.9 timeframe. It is +dramatically more stable and useful, and includes both a new tutorial and a side-by-side comparison with +GDB.

      - + -
      +

      libc++ is another new member of the LLVM family. It is an implementation of the C++ standard library, written from the @@ -275,21 +210,56 @@ ground up to specifically target the forthcoming C++'0X standard and focus on delivering great performance.

      -As of the LLVM 2.8 release, libc++ is virtually feature complete, but would -benefit from more testing and better integration with Clang++. It is also -looking forward to the C++ committee finalizing the C++'0x standard. +In the LLVM 2.9 timeframe, libc++ has had numerous bugs fixed, and is now being +co-developed with Clang's C++'0x mode.

      + +

      +Like compiler_rt, libc++ is now dual + licensed under the MIT and UIUC license, allowing it to be used more + permissively.

      - -
      -KLEE: A Symbolic Execution Virtual Machine +

      +LLBrowse: IR Browser +

      + +
      +

      + + LLBrowse is an interactive viewer for LLVM modules. It can load any LLVM + module and displays its contents as an expandable tree view, facilitating an + easy way to inspect types, functions, global variables, or metadata nodes. It + is fully cross-platform, being based on the popular wxWidgets GUI toolkit. +

      -
      + +

      +VMKit +

      + +
      +

      The VMKit project is an implementation + of a Java Virtual Machine (Java VM or JVM) that uses LLVM for static and + just-in-time compilation. As of LLVM 2.9, VMKit now supports generational + garbage collectors. The garbage collectors are provided by the MMTk framework, + and VMKit can be configured to use one of the numerous implemented collectors + of MMTk. +

      +
      + + + +
      - - +

      + External Open Source Projects Using LLVM 2.9 +

      -
      +

      An exciting aspect of LLVM is that it is used as an enabling technology for a lot of other language and tools projects. This section lists some of the - projects that have already been updated to work with LLVM 2.8.

      + projects that have already been updated to work with LLVM 2.9.

      + + +

      Crack Programming Language

      + +
      +

      +Crack aims to provide the +ease of development of a scripting language with the performance of a compiled +language. The language derives concepts from C++, Java and Python, incorporating +object-oriented programming, operator overloading and strong typing.

      +
      + + + +

      TTA-based Codesign Environment (TCE)

      + +
      +

      TCE is a toolset for designing application-specific processors (ASP) based on +the Transport triggered architecture (TTA). The toolset provides a complete +co-design flow from C/C++ programs down to synthesizable VHDL and parallel +program binaries. Processor customization points include the register files, +function units, supported operations, and the interconnection network.

      + +

      TCE uses Clang and LLVM for C/C++ language support, target independent +optimizations and also for parts of code generation. It generates new LLVM-based +code generators "on the fly" for the designed TTA processors and loads them in +to the compiler backend as runtime libraries to avoid per-target recompilation +of larger parts of the compiler chain.

      +
      + + + + +

      PinaVM

      + +
      +

      PinaVM is an open +source, SystemC front-end. Unlike many +other front-ends, PinaVM actually executes the elaboration of the +program analyzed using LLVM's JIT infrastructure. It later enriches the +bitcode with SystemC-specific information.

      - - -
      -

      -TCE is a toolset for designing -application-specific processors (ASP) based on the Transport triggered -architecture (TTA). The toolset provides a complete co-design flow from C/C++ -programs down to synthesizable VHDL and parallel program binaries. Processor -customization points include the register files, function units, supported -operations, and the interconnection network.

      - -

      TCE uses llvm-gcc/Clang and LLVM for C/C++ language support, target -independent optimizations and also for parts of code generation. It generates -new LLVM-based code generators "on the fly" for the designed TTA processors and -loads them in to the compiler backend as runtime libraries to avoid per-target -recompilation of larger parts of the compiler chain.

      - +

      Pure

      + +
      +

      Pure is an + algebraic/functional + programming language based on term rewriting. Programs are collections + of equations which are used to evaluate expressions in a symbolic + fashion. The interpreter uses LLVM as a backend to JIT-compile Pure + programs to fast native code. Pure offers dynamic typing, eager and lazy + evaluation, lexical closures, a hygienic macro system (also based on + term rewriting), built-in list and matrix support (including list and + matrix comprehensions) and an easy-to-use interface to C and other + programming languages (including the ability to load LLVM bitcode + modules, and inline C, C++, Fortran and Faust code in Pure programs if + the corresponding LLVM-enabled compilers are installed).

      + +

      Pure version 0.47 has been tested and is known to work with LLVM 2.9 + (and continues to work with older LLVM releases >= 2.5).

      - +

      IcedTea Java Virtual Machine Implementation

      -
      +

      -Horizon is a bytecode -language and compiler written on top of LLVM, intended for producing -single-address-space managed code operating systems that -run faster than the equivalent multiple-address-space C systems. -More in-depth blurb is available on the wiki.

      - -
      - - - - -
      -

      -Clam AntiVirus is an open source (GPL) -anti-virus toolkit for UNIX, designed especially for e-mail scanning on mail -gateways. Since version 0.96 it has bytecode -signatures that allow writing detections for complex malware. It -uses LLVM's JIT to speed up the execution of bytecode on -X86, X86-64, PPC32/64, falling back to its own interpreter otherwise. -The git version was updated to work with LLVM 2.8. +IcedTea provides a +harness to build OpenJDK using only free software build tools and to provide +replacements for the not-yet free parts of OpenJDK. One of the extensions that +IcedTea provides is a new JIT compiler named Shark which uses LLVM +to provide native code generation without introducing processor-dependent +code.

      -

      The -ClamAV bytecode compiler uses Clang and LLVM to compile a C-like -language, insert runtime checks, and generate ClamAV bytecode.

      - +

      OpenJDK 7 b112, IcedTea6 1.9 and IcedTea7 1.13 and later have been tested +and are known to work with LLVM 2.9 (and continue to work with older LLVM +releases >= 2.6 as well).

      -
      -Pure -
      - -
      -

      -Pure -is an algebraic/functional -programming language based on term rewriting. Programs are collections -of equations which are used to evaluate expressions in a symbolic -fashion. Pure offers dynamic typing, eager and lazy evaluation, lexical -closures, a hygienic macro system (also based on term rewriting), -built-in list and matrix support (including list and matrix -comprehensions) and an easy-to-use C interface. The interpreter uses -LLVM as a backend to JIT-compile Pure programs to fast native code.

      - -

      Pure versions 0.44 and later have been tested and are known to work with -LLVM 2.8 (and continue to work with older LLVM releases >= 2.5).

      - -
      - - - - -
      -

      -GHC is an open source, -state-of-the-art programming suite for -Haskell, a standard lazy functional programming language. It includes -an optimizing static compiler generating good code for a variety of +

      Glasgow Haskell Compiler (GHC)

      + +
      +

      GHC is an open source, state-of-the-art programming suite for Haskell, +a standard lazy functional programming language. It includes an +optimizing static compiler generating good code for a variety of platforms, together with an interactive system for convenient, quick development.

      In addition to the existing C and native code generators, GHC 7.0 now -supports an LLVM -code generator. GHC supports LLVM 2.7 and later.

      - +supports an LLVM code generator. GHC supports LLVM 2.7 and later.

      - - -
      -

      -Clay is a new systems programming -language that is specifically designed for generic programming. It makes -generic programming very concise thanks to whole program type propagation. It -uses LLVM as its backend.

      - +

      Polly - Polyhedral optimizations for LLVM

      + +
      +

      Polly is a project that aims to provide advanced memory access optimizations +to better take advantage of SIMD units, cache hierarchies, multiple cores or +even vector accelerators for LLVM. Built around an abstract mathematical +description based on Z-polyhedra, it provides the infrastructure to develop +advanced optimizations in LLVM and to connect complex external optimizers. In +its first year of existence Polly already provides an exact value-based +dependency analysis as well as basic SIMD and OpenMP code generation support. +Furthermore, Polly can use PoCC(Pluto) an advanced optimizer for data-locality +and parallelism.

      - - -
      -

      -llvm-py has been updated to work -with LLVM 2.8. llvm-py provides Python bindings for LLVM, allowing you to write a -compiler backend or a VM in Python.

      +

      Rubinius

      +
      +

      Rubinius is an environment + for running Ruby code which strives to write as much of the implementation in + Ruby as possible. Combined with a bytecode interpreting VM, it uses LLVM to + optimize and compile ruby code down to machine code. Techniques such as type + feedback, method inlining, and deoptimization are all used to remove dynamism + from ruby execution and increase performance.

      - + -
      +

      FAUST is a compiled language for real-time audio signal processing. The name FAUST stands for Functional AUdio STream. Its programming model combines two approaches: functional programming and block diagram composition. In addition with the C, C++, JAVA output formats, the -Faust compiler can now generate LLVM bitcode, and works with LLVM 2.7 and -2.8.

      +Faust compiler can now generate LLVM bitcode, and works with LLVM 2.7-2.9.

      - - - -
      -

      Jade -(Just-in-time Adaptive Decoder Engine) is a generic video decoder engine using -LLVM for just-in-time compilation of video decoder configurations. Those -configurations are designed by MPEG Reconfigurable Video Coding (RVC) committee. -MPEG RVC standard is built on a stream-based dataflow representation of -decoders. It is composed of a standard library of coding tools written in -RVC-CAL language and a dataflow configuration — block diagram — -of a decoder.

      - -

      Jade project is hosted as part of the Open -RVC-CAL Compiler and requires it to translate the RVC-CAL standard library -of video coding tools into an LLVM assembly code.

      - -
      - - - - -
      -

      Neko LLVM JIT -replaces the standard Neko JIT with an LLVM-based implementation. While not -fully complete, it is already providing a 1.5x speedup on 64-bit systems. -Neko LLVM JIT requires LLVM 2.8 or later.

      - -
      - - - - -
      -

      -Crack aims to provide -the ease of development of a scripting language with the performance of a -compiled language. The language derives concepts from C++, Java and Python, -incorporating object-oriented programming, operator overloading and strong -typing. Crack 0.2 works with LLVM 2.7, and the forthcoming Crack 0.2.1 release -builds on LLVM 2.8.

      - -
      - - - - -
      -

      -DTMC provides support for -Transactional Memory, which is an easy-to-use and efficient way to synchronize -accesses to shared memory. Transactions can contain normal C/C++ code (e.g., -__transaction { list.remove(x); x.refCount--; }) and will be executed -virtually atomically and isolated from other transactions.

      - -
      - - - - -
      -

      -Kai (Japanese 会 for -meeting/gathering) is an experimental interpreter that provides a highly -extensible runtime environment and explicit control over the compilation -process. Programs are defined using nested symbolic expressions, which are all -parsed into first-class values with minimal intrinsic semantics. Kai can -generate optimised code at run-time (using LLVM) in order to exploit the nature -of the underlying hardware and to integrate with external software libraries. -It is a unique exploration into world of dynamic code compilation, and the -interaction between high level and low level semantics.

      - -
      - - - - -
      -

      -OSL is a shading -language designed for use in physically based renderers and in particular -production rendering. By using LLVM instead of the interpreter, it was able to -meet its performance goals (>= C-code) while retaining the benefits of -runtime specialization and a portable high-level language. -

      - -
      - - - - +

      + What's New in LLVM 2.9? +

      -
      +

      This release includes a huge number of bug fixes, performance tweaks and minor improvements. Some of the major improvements and new features are listed in this section.

      -
      - - + -
      +
      -

      LLVM 2.8 includes several major new capabilities:

      +

      LLVM 2.9 includes several major new capabilities:

        -
      • As mentioned above, libc++ and LLDB are major new additions to the LLVM collective.
      • -
      • LLVM 2.8 now has pretty decent support for debugging optimized code. You - should be able to reliably get debug info for function arguments, assuming - that the value is actually available where you have stopped.
      • -
      • A new 'llvm-diff' tool is available that does a semantic diff of .ll - files.
      • -
      • The MC subproject has made major progress in this release. - Direct .o file writing support for darwin/x86[-64] is now reliable and - support for other targets and object file formats are in progress.
      • -
      + +
    13. Type Based Alias Analysis (TBAA) is now implemented and turned on by default + in Clang. This allows substantially better load/store optimization in some + cases. TBAA can be disabled by passing -fno-strict-aliasing. +
    14. +
    15. This release has seen a continued focus on quality of debug information. + LLVM now generates much higher fidelity debug information, particularly when + debugging optimized code.
    16. + +
    17. Inline assembly now supports multiple alternative constraints.
    18. + +
    19. A new backend for the NVIDIA PTX virtual ISA (used to target its GPUs) is + under rapid development. It is not generally useful in 2.9, but is making + rapid progress.
    20. + + +
      - + -
      +

      LLVM IR has several new features for better support of new targets and that expose new optimization opportunities:

        -
      • The memcpy, memmove, and memset - intrinsics now take address space qualified pointers and a bit to indicate - whether the transfer is "volatile" or not. -
      • -
      • Per-instruction debug info metadata is much faster and uses less memory by - using the new DebugLoc class.
      • -
      • LLVM IR now has a more formalized concept of "trap values", which allow the optimizer - to optimize more aggressively in the presence of undefined behavior, while - still producing predictable results.
      • -
      • LLVM IR now supports two new linkage - types (linker_private_weak and linker_private_weak_def_auto) which map - onto some obscure MachO concepts.
      • +
      • The udiv, ashr, lshr, and shl + instructions now have support exact and nuw/nsw bits to indicate that they + don't overflow or shift out bits. This is useful for optimization of pointer differences and other cases.
      • + +
      • LLVM IR now supports the unnamed_addr + attribute to indicate that constant global variables with identical + initializers can be merged. This fixed an + issue where LLVM would incorrectly merge two globals which were supposed + to have distinct addresses.
      • + +
      • The new hotpatch attribute has been added + to allow runtime patching of functions.
      - + -
      +

      In addition to a large array of minor performance tweaks and bug fixes, this release includes a few major enhancements and additions to the optimizers:

        -
      • As mentioned above, the optimizer now has support for updating debug - information as it goes. A key aspect of this is the new llvm.dbg.value - intrinsic. This intrinsic represents debug info for variables that are - promoted to SSA values (typically by mem2reg or the -scalarrepl passes).
      • +
      • Link Time Optimization (LTO) has been improved to use MC for parsing inline + assembly and now can build large programs like Firefox 4 on both Mac OS X and + Linux.
      • + +
      • The new -loop-idiom pass recognizes memset/memcpy loops (and memset_pattern + on darwin), turning them into library calls, which are typically better + optimized than inline code. If you are building a libc and notice that your + memcpy and memset functions are compiled into infinite recursion, please build + with -ffreestanding or -fno-builtin to disable this pass.
      • + +
      • A new -early-cse pass does a fast pass over functions to fold constants, + simplify expressions, perform simple dead store elimination, and perform + common subexpression elimination. It does a good job at catching some of the + trivial redundancies that exist in unoptimized code, making later passes more + effective.
      • -
      • The JumpThreading pass is now much more aggressive about implied value - relations, allowing it to thread conditions like "a == 4" when a is known to - be 13 in one of the predecessors of a block. It does this in conjunction - with the new LazyValueInfo analysis pass.
      • -
      • The new RegionInfo analysis pass identifies single-entry single-exit regions - in the CFG. You can play with it with the "opt -regions -analyze" or - "opt -view-regions" commands.
      • -
      • The loop optimizer has significantly improved strength reduction and analysis - capabilities. Notably it is able to build on the trap value and signed - integer overflow information to optimize <= and >= loops.
      • -
      • The CallGraphSCCPassManager now has some basic support for iterating within - an SCC when a optimizer devirtualizes a function call. This allows inlining - through indirect call sites that are devirtualized by store-load forwarding - and other optimizations.
      • -
      • The new -loweratomic pass is available - to lower atomic instructions into their non-atomic form. This can be useful - to optimize generic code that expects to run in a single-threaded - environment.
      • +
      • A new -loop-instsimplify pass is used to clean up loop bodies in the loop + optimizer.
      • + +
      • The new TargetLibraryInfo interface allows mid-level optimizations to know + whether the current target's runtime library has certain functions. For + example, the optimizer can now transform integer-only printf calls to call + iprintf, allowing reduced code size for embedded C libraries (e.g. newlib). +
      • + +
      • LLVM has a new RegionPass + infrastructure for region-based optimizations.
      • + +
      • Several optimizer passes have been substantially sped up: + GVN is much faster on functions with deep dominator trees and lots of basic + blocks. The dominator tree and dominance frontier passes are much faster to + compute, and preserved by more passes (so they are computed less often). The + -scalar-repl pass is also much faster and doesn't use DominanceFrontier. +
      • + +
      • The Dead Store Elimination pass is more aggressive optimizing stores of + different types: e.g. a large store following a small one to the same address. + The MemCpyOptimizer pass handles several new forms of memcpy elimination.
      • + +
      • LLVM now optimizes various idioms for overflow detection into check of the + flag register on various CPUs. For example, we now compile: + +
        +   unsigned long t = a+b;
        +   if (t < a) ...
        +  
        + into: +
        +   addq %rdi, %rbx
        +   jno  LBB0_2
        +  
        +
      • +
      - -
      - + -
      +

      The LLVM Machine Code (aka MC) subsystem was created to solve a number of problems in the realm of assembly, disassembly, object file format handling, and a number of other related areas that CPU instruction-set level tools work in.

      -

      The MC subproject has made great leaps in LLVM 2.8. For example, support for - directly writing .o files from LLC (and clang) now works reliably for - darwin/x86[-64] (including inline assembly support) and the integrated - assembler is turned on by default in Clang for these targets. This provides - improved compile times among other things.

      -
        -
      • The entire compiler has converted over to using the MCStreamer assembler API - instead of writing out a .s file textually.
      • -
      • The "assembler parser" is far more mature than in 2.7, supporting a full - complement of directives, now supports assembler macros, etc.
      • -
      • The "assembler backend" has been completed, including support for relaxation - relocation processing and all the other things that an assembler does.
      • -
      • The MachO file format support is now fully functional and works.
      • -
      • The MC disassembler now fully supports ARM and Thumb. ARM assembler support - is still in early development though.
      • -
      • The X86 MC assembler now supports the X86 AES and AVX instruction set.
      • -
      • Work on ELF and COFF object files and ARM target support is well underway, - but isn't useful yet in LLVM 2.8. Please contact the llvmdev mailing list - if you're interested in this.
      • +
      • ELF MC support has matured enough for the integrated assembler to be turned + on by default in Clang on X86-32 and X86-64 ELF systems.
      • + +
      • MC supports and CodeGen uses the .file and .loc directives + for producing line number debug info. This produces more compact line + tables and easier to read .s files.
      • + +
      • MC supports the .cfi_* directives for producing DWARF + frame information, but it is still not used by CodeGen by default.
      • + + +
      • The MC assembler now generates much better diagnostics for common errors, + is much faster at matching instructions, is much more bug-compatible with + the GAS assembler, and is now generally useful for a broad range of X86 + assembly.
      • + +
      • We now have some basic internals + documentation for MC.
      • + +
      • .td files can now specify assembler aliases directly with the MnemonicAlias and InstAlias + tblgen classes.
      • + +
      • LLVM now has an experimental format-independent object file manipulation + library (lib/Object). It supports both PE/COFF and ELF. The llvm-nm tool has + been extended to work with native object files, and the new llvm-objdump tool + supports disassembly of object files (but no relocations are displayed yet). +
      • + +
      • Win32 PE-COFF support in the MC assembler has made a lot of progress in the + 2.9 timeframe, but is still not generally useful.
      • +

      For more information, please see the Intro to the LLVM MC Project Blog Post.

      -
      - - - - -
      + +

      +Target Independent Code Generator Improvements +

      + +

      We have put a significant amount of work into the code generator infrastructure, which allows us to implement more aggressive algorithms and make it run faster:

        -
      • The clang/gcc -momit-leaf-frame-pointer argument is now supported.
      • -
      • The clang/gcc -ffunction-sections and -fdata-sections arguments are now - supported on ELF targets (like GCC).
      • -
      • The MachineCSE pass is now tuned and on by default. It eliminates common - subexpressions that are exposed when lowering to machine instructions.
      • -
      • The "local" register allocator was replaced by a new "fast" register - allocator. This new allocator (which is often used at -O0) is substantially - faster and produces better code than the old local register allocator.
      • -
      • A new LLC "-regalloc=default" option is available, which automatically - chooses a register allocator based on the -O optimization level.
      • -
      • The common code generator code was modified to promote illegal argument and - return value vectors to wider ones when possible instead of scalarizing - them. For example, <3 x float> will now pass in one SSE register - instead of 3 on X86. This generates substantially better code since the - rest of the code generator was already expecting this.
      • -
      • The code generator uses a new "COPY" machine instruction. This speeds up - the code generator and eliminates the need for targets to implement the - isMoveInstr hook. Also, the copyRegToReg hook was renamed to copyPhysReg - and simplified.
      • -
      • The code generator now has a "LocalStackSlotPass", which optimizes stack - slot access for targets (like ARM) that have limited stack displacement - addressing.
      • -
      • A new "PeepholeOptimizer" is available, which eliminates sign and zero - extends, and optimizes away compare instructions when the condition result - is available from a previous instruction.
      • -
      • Atomic operations now get legalized into simpler atomic operations if not - natively supported, easing the implementation burden on targets.
      • -
      • We have added two new bottom-up pre-allocation register pressure aware schedulers: -
          -
        1. The hybrid scheduler schedules aggressively to minimize schedule length when registers are available and avoid overscheduling in high pressure situations.
        2. -
        3. The instruction-level-parallelism scheduler schedules for maximum ILP when registers are available and avoid overscheduling in high pressure situations.
        4. -
      • -
      • The tblgen type inference algorithm was rewritten to be more consistent and - diagnose more target bugs. If you have an out-of-tree backend, you may - find that it finds bugs in your target description. This support also - allows limited support for writing patterns for instructions that return - multiple results (e.g. a virtual register and a flag result). The - 'parallel' modifier in tblgen was removed, you should use the new support - for multiple results instead.
      • -
      • A new (experimental) "-rendermf" pass is available which renders a - MachineFunction into HTML, showing live ranges and other useful - details.
      • -
      • The new SubRegIndex tablegen class allows subregisters to be indexed - symbolically instead of numerically. If your target uses subregisters you - will need to adapt to use SubRegIndex when you upgrade to 2.8.
      • - +
      • The pre-register-allocation (preRA) instruction scheduler models register + pressure much more accurately in some cases. This allows the adoption of more + aggressive scheduling heuristics without causing spills to be generated. +
      • + +
      • LiveDebugVariables is a new pass that keeps track of debugging information + for user variables that are promoted to registers in optimized builds.
      • -
      • The -fast-isel instruction selection path (used at -O0 on X86) was rewritten - to work bottom-up on basic blocks instead of top down. This makes it - slightly faster (because the MachineDCE pass is not needed any longer) and - allows it to generate better code in some cases.
      • +
      • The scheduler now models operand latency and pipeline forwarding.
      • +
      • A major register allocator infrastructure rewrite is underway. It is not on + by default for 2.9 and you are not advised to use it, but it has made + substantial progress in the 2.9 timeframe: +
          +
        • A new -regalloc=basic "basic" register allocator can be used as a simple + fallback when debugging. It uses the new infrastructure.
        • +
        • New infrastructure is in place for live range splitting. "SplitKit" can + break a live interval into smaller pieces while preserving SSA form, and + SpillPlacement can help find the best split points. This is a work in + progress so the API is changing quickly.
        • +
        • The inline spiller has learned to clean up after live range splitting. It + can hoist spills out of loops, and it can eliminate redundant spills.
        • +
        • Rematerialization works with live range splitting.
        • +
        • The new "greedy" register allocator using live range splitting. This will + be the default register allocator in the next LLVM release, but it is not + turned on by default in 2.9.
        • +
        +
      - + -
      +

      New features and major changes in the X86 target include:

        -
      • The X86 backend now supports holding X87 floating point stack values - in registers across basic blocks, dramatically improving performance of code - that uses long double, and when targeting CPUs that don't support SSE.
      • - -
      • The X86 backend now uses a SSEDomainFix pass to optimize SSE operations. On - Nehalem ("Core i7") and newer CPUs there is a 2 cycle latency penalty on - using a register in a different domain than where it was defined. This pass - optimizes away these stalls.
      • - -
      • The X86 backend now promotes 16-bit integer operations to 32-bits when - possible. This avoids 0x66 prefixes, which are slow on some - microarchitectures and bloat the code on all of them.
      • - -
      • The X86 backend now supports the Microsoft "thiscall" calling convention, - and a calling convention to support - ghc.
      • - -
      • The X86 backend supports a new "llvm.x86.int" intrinsic, which maps onto - the X86 "int $42" and "int3" instructions.
      • - -
      • At the IR level, the <2 x float> datatype is now promoted and passed - around as a <4 x float> instead of being passed and returned as an MMX - vector. If you have a frontend that uses this, please pass and return a - <2 x i32> instead (using bitcasts).
      • - -
      • When printing .s files in verbose assembly mode (the default for clang -S), - the X86 backend now decodes X86 shuffle instructions and prints human - readable comments after the most inscrutable of them, e.g.: - -
        -  insertps $113, %xmm3, %xmm0 # xmm0 = zero,xmm0[1,2],xmm3[1]
        -  unpcklps %xmm1, %xmm0       # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
        -  pshufd   $1, %xmm1, %xmm1   # xmm1 = xmm1[1,0,0,0]
        -
        +
      • LLVM 2.9 includes a complete reimplementation of the MMX instruction set. + The reimplementation uses a new LLVM IR x86_mmx type to ensure that MMX operations + are only generated from source that uses MMX builtin operations. With + this, random types like <2 x i32> are not turned into MMX operations + (which can be catastrophic without proper "emms" insertion). Because the X86 + code generator always generates reliable code, the -disable-mmx flag is now + removed.
      • - + +
      • X86 support for FS/GS relative loads and stores using address space 256/257 works reliably + now.
      • + +
      • LLVM 2.9 generates much better code in several cases by using adc/sbb to + avoid generation of conditional move instructions for conditional increment + and other idioms.
      • + +
      • The X86 backend has adopted a new preRA scheduling mode, "list-ilp", to + shorten the height of instruction schedules without inducing register spills. +
      • + +
      • The MC assembler supports 3dNow! and 3DNowA instructions.
      • + +
      • Several bugs have been fixed for Windows x64 code generator.
      - + -
      +

      New features of the ARM target include:

        -
      • The ARM backend now optimizes tail calls into jumps.
      • -
      • Scheduling is improved through the new list-hybrid scheduler as well - as through better modeling of structural hazards.
      • -
      • Half float instructions are now - supported.
      • -
      • NEON support has been improved to model instructions which operate onto - multiple consecutive registers more aggressively. This avoids lots of - extraneous register copies.
      • -
      • The ARM backend now uses a new "ARMGlobalMerge" pass, which merges several - global variables into one, saving extra address computation (all the global - variables can be accessed via same base address) and potentially reducing - register pressure.
      • +
      • The ARM backend now has a fast instruction selector, which dramatically + improves -O0 compile times.
      • +
      • The ARM backend has new tuning for Cortex-A8 and Cortex-A9 CPUs.
      • +
      • The __builtin_prefetch builtin (and llvm.prefetch intrinsic) is compiled + into prefetch instructions instead of being discarded.
      • -
      • The ARM backend has received many minor improvements and tweaks which lead - to substantially better performance in a wide range of different scenarios. -
      • +
      • The ARM backend preRA scheduler now models machine resources at cycle + granularity. This allows the scheduler to both accurately model + instruction latency and avoid overcommitting functional units.
      • -
      • The ARM NEON intrinsics have been substantially reworked to reduce - redundancy and improve code generation. Some of the major changes are: -
          -
        1. - All of the NEON load and store intrinsics (llvm.arm.neon.vld* and - llvm.arm.neon.vst*) take an extra parameter to specify the alignment in bytes - of the memory being accessed. -
        2. -
        3. - The llvm.arm.neon.vaba intrinsic (vector absolute difference and - accumulate) has been removed. This operation is now represented using - the llvm.arm.neon.vabd intrinsic (vector absolute difference) followed by a - vector add. -
        4. -
        5. - The llvm.arm.neon.vabdl and llvm.arm.neon.vabal intrinsics (lengthening - vector absolute difference with and without accumulation) have been removed. - They are represented using the llvm.arm.neon.vabd intrinsic (vector absolute - difference) followed by a vector zero-extend operation, and for vabal, - a vector add. -
        6. -
        7. - The llvm.arm.neon.vmovn intrinsic has been removed. Calls of this intrinsic - are now replaced by vector truncate operations. -
        8. -
        9. - The llvm.arm.neon.vmovls and llvm.arm.neon.vmovlu intrinsics have been - removed. They are now represented as vector sign-extend (vmovls) and - zero-extend (vmovlu) operations. -
        10. -
        11. - The llvm.arm.neon.vaddl*, llvm.arm.neon.vaddw*, llvm.arm.neon.vsubl*, and - llvm.arm.neon.vsubw* intrinsics (lengthening vector add and subtract) have - been removed. They are replaced by vector add and vector subtract operations - where one (vaddw, vsubw) or both (vaddl, vsubl) of the operands are either - sign-extended or zero-extended. -
        12. -
        13. - The llvm.arm.neon.vmulls, llvm.arm.neon.vmullu, llvm.arm.neon.vmlal*, and - llvm.arm.neon.vmlsl* intrinsics (lengthening vector multiply with and without - accumulation and subtraction) have been removed. These operations are now - represented as vector multiplications where the operands are either - sign-extended or zero-extended, followed by a vector add for vmlal or a - vector subtract for vmlsl. Note that the polynomial vector multiply - intrinsic, llvm.arm.neon.vmullp, remains unchanged. -
        14. -
        -
      • +
      • Countless ARM microoptimizations have landed in LLVM 2.9.
      • +
      +
      + + +

      +Other Target Specific Improvements +

      + +
      +
        +
      • MicroBlaze: major updates for aggressive delay slot filler, MC-based + assembly printing, assembly instruction parsing, ELF .o file emission, and MC + instruction disassembler have landed.
      • + +
      • SPARC: Many improvements, including using the Y registers for + multiplications and addition of a simple delay slot filler.
      • + +
      • PowerPC: The backend has been largely MC'ized and is ready to support + directly writing out mach-o object files. No one seems interested in finishing + this final step though.
      • + +
      • Mips: Improved o32 ABI support, including better varags handling. +More instructions supported in codegen: madd, msub, rotr, rotrv and clo. +It also now supports lowering block addresses.
      - - + -
      +

      If you're already an LLVM user or developer with out-of-tree changes based -on LLVM 2.7, this section lists some "gotchas" that you may run into upgrading +on LLVM 2.8, this section lists some "gotchas" that you may run into upgrading from the previous release.

        -
      • The build configuration machinery changed the output directory names. It - wasn't clear to many people that a "Release-Asserts" build was a release build - without asserts. To make this more clear, "Release" does not include - assertions and "Release+Asserts" does (likewise, "Debug" and - "Debug+Asserts").
      • -
      • The MSIL Backend was removed, it was unsupported and broken.
      • -
      • The ABCD, SSI, and SCCVN passes were removed. These were not fully - functional and their behavior has been or will be subsumed by the - LazyValueInfo pass.
      • -
      • The LLVM IR 'Union' feature was removed. While this is a desirable feature - for LLVM IR to support, the existing implementation was half baked and - barely useful. We'd really like anyone interested to resurrect the work and - finish it for a future release.
      • -
      • If you're used to reading .ll files, you'll probably notice that .ll file - dumps don't produce #uses comments anymore. To get them, run a .bc file - through "llvm-dis --show-annotations".
      • -
      • Target triples are now stored in a normalized form, and all inputs from - humans are expected to be normalized by Triple::normalize before being - stored in a module triple or passed to another library.
      • -
      +
    21. This is the last release to support the llvm-gcc frontend.
    22. - - -

      In addition, many APIs have changed in this release. Some of the major LLVM -API changes are:

      -
        -
      • LLVM 2.8 changes the internal order of operands in InvokeInst - and CallInst. - To be portable across releases, please use the CallSite class and the - high-level accessors, such as getCalledValue and - setUnwindDest. -
      • -
      • - You can no longer pass use_iterators directly to cast<> (and similar), - because these routines tend to perform costly dereference operations more - than once. You have to dereference the iterators yourself and pass them in. -
      • -
      • - llvm.memcpy.*, llvm.memset.*, llvm.memmove.* intrinsics take an extra - parameter now ("i1 isVolatile"), totaling 5 parameters, and the pointer - operands are now address-space qualified. - If you were creating these intrinsic calls and prototypes yourself (as opposed - to using Intrinsic::getDeclaration), you can use - UpgradeIntrinsicFunction/UpgradeIntrinsicCall to be portable across releases. -
      • -
      • - SetCurrentDebugLocation takes a DebugLoc now instead of a MDNode. - Change your code to use - SetCurrentDebugLocation(DebugLoc::getFromDILocation(...)). -
      • -
      • - The RegisterPass and RegisterAnalysisGroup templates are - considered deprecated, but continue to function in LLVM 2.8. Clients are - strongly advised to use the upcoming INITIALIZE_PASS() and - INITIALIZE_AG_PASS() macros instead. -
      • -
      • - The constructor for the Triple class no longer tries to understand odd triple - specifications. Frontends should ensure that they only pass valid triples to - LLVM. The Triple::normalize utility method has been added to help front-ends - deal with funky triples. -
      • -
      • - The signature of the GCMetadataPrinter::finishAssembly virtual - function changed: the raw_ostream and MCAsmInfo arguments - were dropped. GC plugins which compute stack maps must be updated to avoid - having the old definition overload the new signature. -
      • -
      • - The signature of MemoryBuffer::getMemBuffer changed. Unfortunately - calls intended for the old version still compile, but will not work correctly, - leading to a confusing error about an invalid header in the bitcode. -
      • +
      • LLVM has a new naming + convention standard, though the codebase hasn't fully adopted it yet.
      • -
      • - Some APIs were renamed: -
          -
        • llvm_report_error -> report_fatal_error
        • -
        • llvm_install_error_handler -> install_fatal_error_handler
        • -
        • llvm::DwarfExceptionHandling -> llvm::JITExceptionHandling
        • -
        • VISIBILITY_HIDDEN -> LLVM_LIBRARY_VISIBILITY
        • -
        -
      • +
      • The new DIBuilder class provides a simpler interface for front ends to + encode debug info in LLVM IR, and has replaced DIFactory.
      • -
      • - Some public headers were renamed: -
          -
        • llvm/Assembly/AsmAnnotationWriter.h was renamed - to llvm/Assembly/AssemblyAnnotationWriter.h -
        • -
        +
      • LLVM IR and other tools always work on normalized target triples (which have + been run through Triple::normalize).
      • + +
      • The target triple x86_64--mingw64 is obsoleted. Use x86_64--mingw32 + instead.
      • + +
      • The PointerTracking pass has been removed from mainline, and moved to The + ClamAV project (its only client).
      • + +
      • The LoopIndexSplit, LiveValues, SimplifyHalfPowrLibCalls, GEPSplitter, and + PartialSpecialization passes were removed. They were unmaintained, + buggy, or deemed to be a bad idea.
      - +

      +Internal API Changes +

      -
      +
      -

      This section lists changes to the LLVM development infrastructure. This -mostly impacts users who actively work on LLVM or follow development on -mainline, but may also impact users who leverage the LLVM build infrastructure -or are interested in LLVM qualification.

      +

      In addition, many APIs have changed in this release. Some of the major + LLVM API changes are:

        -
      • The default for make check is now to use - the lit testing tool, which is - part of LLVM itself. You can use lit directly as well, or use - the llvm-lit tool which is created as part of a Makefile or CMake - build (and knows how to find the appropriate tools). See the lit - documentation and the blog - post, and PR5217 - for more information.
      • +
      • include/llvm/System merged into include/llvm/Support.
      • +
      • The llvm::APInt API was significantly + cleaned up.
      • -
      • The LLVM test-suite infrastructure has a new "simple" test format - (make TEST=simple). The new format is intended to require only a - compiler and not a full set of LLVM tools. This makes it useful for testing - released compilers, for running the test suite with other compilers (for - performance comparisons), and makes sure that we are testing the compiler as - users would see it. The new format is also designed to work using reference - outputs instead of comparison to a baseline compiler, which makes it run much - faster and makes it less system dependent.
      • +
      • In the code generator, MVT::Flag was renamed to MVT::Glue to more accurately + describe its behavior.
      • -
      • Significant progress has been made on a new interface to running the - LLVM test-suite (aka the LLVM "nightly tests") using - the LNT infrastructure. The LNT - interface to the test-suite brings significantly improved reporting - capabilities for monitoring the correctness and generated code quality - produced by LLVM over time.
      • +
      • The system_error header from C++0x was added, and is now pervasively used to + capture and handle i/o and other errors in LLVM.
      • + +
      • The old sys::Path API has been deprecated in favor of the new PathV2 API, + which is more efficient and flexible.
      - - + + +

      + Known Problems +

      -
      +

      This section contains significant known problems with the LLVM system, listed by component. If you run into a problem, please check the LLVM bug database and submit a bug if there isn't already one.

      -
      - - + -
      +

      The following components of this LLVM release are either untested, known to be broken or unreliable, or are in early development. These components should @@ -1114,43 +841,54 @@ components, please contact us on the LLVMdev list.

        -
      • The Alpha, Blackfin, CellSPU, MicroBlaze, MSP430, MIPS, SystemZ +
      • The Alpha, Blackfin, CellSPU, MicroBlaze, MSP430, MIPS, PTX, SystemZ and XCore backends are experimental.
      • llc "-filetype=obj" is experimental on all targets - other than darwin-i386 and darwin-x86_64.
      • + other than darwin and ELF X86 systems. +
      - + -
      +
      • The X86 backend does not yet support all inline assembly that uses the X86 floating point stack. It supports the 'f' and 't' constraints, but not 'u'.
      • -
      • Win64 code generation wasn't widely tested. Everything should work, but we - expect small issues to happen. Also, llvm-gcc cannot build the mingw64 - runtime currently due to lack of support for the 'u' inline assembly - constraint and for X87 floating point inline assembly.
      • The X86-64 backend does not yet support the LLVM IR instruction va_arg. Currently, front-ends support variadic argument constructs on X86-64 by lowering them manually.
      • +
      • Windows x64 (aka Win64) code generator has a few issues. +
          +
        • llvm-gcc cannot build the mingw-w64 runtime currently + due to lack of support for the 'u' inline assembly + constraint and for X87 floating point inline assembly.
        • +
        • On mingw-w64, you will see unresolved symbol __chkstk + due to Bug 8919. + It is fixed in r128206.
        • +
        • Miss-aligned MOVDQA might crash your program. It is due to + Bug 9483, + lack of handling aligned internal globals.
        • +
        +
      • +
      - + -
      +
      • The Linux PPC32/ABI support needs testing for the interpreter and static @@ -1160,11 +898,11 @@ compilation, and lacks support for debug information.
      - + -
      +
      • Thumb mode works only on ARMv6 or higher processors. On sub-ARMv6 @@ -1177,11 +915,11 @@ results (PR1388).
      - + -
      +
      • The SPARC backend only supports the 32-bit SPARC ABI (-m32); it does not @@ -1191,11 +929,11 @@ results (PR1388).
      - + -
      +
      • 64-bit MIPS targets are not supported yet.
      • @@ -1204,11 +942,11 @@ results (PR1388).
      - + -
      +
        @@ -1219,11 +957,11 @@ appropriate nops inserted to ensure restartability.
      - + -
      +

      The C backend has numerous problems and is not being actively maintained. Depending on it for anything serious is not advised.

      @@ -1242,11 +980,13 @@ Depending on it for anything serious is not advised.

      - + -
      +
      + +

      LLVM 2.9 will be the last release of llvm-gcc.

      llvm-gcc is generally very stable for the C family of languages. The only major language feature of GCC not supported by llvm-gcc is the @@ -1267,16 +1007,18 @@ actively maintained. If you are interested in Ada, we recommend that you consider using dragonegg instead.

      - - + + +

      + Additional Information +

      -
      +

      A wide variety of additional information is available on the LLVM web page, in particular in the LLVM web page, in particular in the documentation section. The web page also contains versions of the API documentation which is up-to-date with the Subversion version of the source code. @@ -1299,7 +1041,7 @@ lists.

      src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"> LLVM Compiler Infrastructure
      - Last modified: $Date: 2010-10-26 14:43:36 +0200 (Tue, 26 Oct 2010) $ + Last modified: $Date: 2011-04-21 03:52:00 +0200 (Thu, 21 Apr 2011) $ diff --git a/docs/SourceLevelDebugging.html b/docs/SourceLevelDebugging.html index 186ea4abe732..4cfb52ee3a60 100644 --- a/docs/SourceLevelDebugging.html +++ b/docs/SourceLevelDebugging.html @@ -8,7 +8,7 @@ -
      Source Level Debugging with LLVM
      +

      Source Level Debugging with LLVM

      @@ -68,10 +68,10 @@ height="369"> - +

      Introduction

      -
      +

      This document is the central repository for all information pertaining to debug information in LLVM. It describes the actual format @@ -80,14 +80,12 @@ height="369"> Further, this document provides specific examples of what debug information for C/C++ looks like.

      -
      - - + -
      +

      The idea of the LLVM debugging information is to capture how the important pieces of the source-language's Abstract Syntax Tree map onto LLVM code. @@ -133,11 +131,11 @@ height="369">

      - + -
      +

      The role of debug information is to provide meta information normally stripped away during the compilation process. This meta information provides @@ -157,11 +155,11 @@ height="369">

      - + -
      +

      An extremely high priority of LLVM debugging information is to make it interact well with optimizations and analysis. In particular, the LLVM debug @@ -226,13 +224,15 @@ height="369">

      - - + + +

      + Debugging information format +

      -
      +

      LLVM debugging information has been carefully designed to make it possible for the optimizer to optimize the program and debugging information without @@ -265,14 +265,12 @@ height="369"> common to any source-language. The next section describes the data layout conventions used by the C and C++ front-ends.

      -
      - - + -
      +

      In consideration of the complexity and volume of debug information, LLVM provides a specification for well formed debug descriptors.

      @@ -312,14 +310,12 @@ height="369">

      The details of the various descriptors follow.

      -
      - - + -
      +
      @@ -351,11 +347,11 @@ height="369">
       
      - + -
      +
      @@ -380,11 +376,11 @@ height="369">
       
      - + -
      +
      @@ -407,16 +403,17 @@ height="369">
       

      These descriptors provide debug information about globals variables. The -provide details such as name, type and where the variable is defined.

      +provide details such as name, type and where the variable is defined. All +global variables are collected by named metadata !llvm.dbg.gv.

      - + -
      +
      @@ -432,30 +429,35 @@ provide details such as name, type and where the variable is defined.

      i32, ;; Line number where defined metadata, ;; Reference to type descriptor i1, ;; True if the global is local to compile unit (static) - i1 ;; True if the global is defined in the compile unit (not extern) - i32 ;; Virtuality, e.g. dwarf::DW_VIRTUALITY__virtual - i32 ;; Index into a virtual function + i1, ;; True if the global is defined in the compile unit (not extern) + i32, ;; Virtuality, e.g. dwarf::DW_VIRTUALITY__virtual + i32, ;; Index into a virtual function metadata, ;; indicates which base type contains the vtable pointer for the ;; derived class - i1 ;; isArtificial - i1 ;; isOptimized - Function *;; Pointer to LLVM function + i1, ;; isArtificial + i1, ;; isOptimized + Function *,;; Pointer to LLVM function + metadata, ;; Lists function template parameters + metadata ;; Function declaration descriptor }

      These descriptors provide debug information about functions, methods and subprograms. They provide details such as name, return types and the source - location where the subprogram is defined.

      + location where the subprogram is defined. + All subprogram descriptors are collected by a named metadata + !llvm.dbg.sp. +

      - + -
      +
      @@ -463,7 +465,9 @@ provide details such as name, type and where the variable is defined.

      i32, ;; Tag = 11 + LLVMDebugVersion (DW_TAG_lexical_block) metadata,;; Reference to context descriptor i32, ;; Line number - i32 ;; Column number + i32, ;; Column number + metadata,;; Reference to source file + i32 ;; Unique ID to identify blocks from a template function }
      @@ -475,11 +479,11 @@ provide details such as name, type and where the variable is defined.

      - + -
      +
      @@ -527,11 +531,11 @@ DW_ATE_unsigned_char = 8
       
      - + -
      +
      @@ -544,7 +548,12 @@ DW_ATE_unsigned_char = 8
         i64,      ;; Size in bits
         i64,      ;; Alignment in bits
         i64,      ;; Offset in bits
      -  metadata  ;; Reference to type derived from
      +  metadata, ;; Reference to type derived from
      +  metadata, ;; (optional) Name of the Objective C property assoicated with 
      +            ;; Objective-C an ivar 
      +  metadata, ;; (optional) Name of the Objective C property getter selector.
      +  metadata, ;; (optional) Name of the Objective C property setter selector.
      +  i32       ;; (optional) Objective C property attributes.
       }
       
      @@ -594,11 +603,11 @@ DW_TAG_restrict_type = 55
      - + -
      +
      @@ -644,7 +653,8 @@ DW_TAG_inheritance      = 28
       
       

      The members of enumeration types (tag = DW_TAG_enumeration_type) are enumerator descriptors, each representing - the definition of enumeration value for the set.

      + the definition of enumeration value for the set. All enumeration type + descriptors are collected by named metadata !llvm.dbg.enum.

      The members of structure (tag = DW_TAG_structure_type) or union (tag = DW_TAG_union_type) types are any one of @@ -680,11 +690,11 @@ DW_TAG_inheritance = 28

      - + -
      +
      @@ -700,16 +710,17 @@ DW_TAG_inheritance      = 28
          composite type.  The low value defines
          the lower bounds typically zero for C/C++.  The high value is the upper
          bounds.  Values are 64 bit.  High - low + 1 is the size of the array.  If low
      -   == high the array will be unbounded.

      + > high the array bounds are not included in generated debugging information. +

      - + -
      +
      @@ -729,11 +740,11 @@ DW_TAG_inheritance      = 28
       
      - + -
      +
      @@ -742,7 +753,8 @@ DW_TAG_inheritance      = 28
         metadata, ;; Context
         metadata, ;; Name
         metadata, ;; Reference to file where defined
      -  i32,      ;; Line number where defined
      +  i32,      ;; 24 bit - Line number where defined
      +            ;; 8 bit - Argument number. 1 indicates 1st argument.
         metadata  ;; Type descriptor
       }
       
      @@ -771,39 +783,39 @@ DW_TAG_return_variable = 258
      - - -
      + +

      + Debugger intrinsic functions +

      + +

      LLVM uses several intrinsic functions (name prefixed with "llvm.dbg") to provide debug information at various points in generated code.

      -
      - - + -
      +
         void %llvm.dbg.declare(metadata, metadata)
       

      This intrinsic provides information about a local element (ex. variable.) The - first argument is metadata holding alloca for the variable.. The + first argument is metadata holding alloca for the variable. The second argument is metadata containing description of the variable.

      - + -
      +
         void %llvm.dbg.value(metadata, i64, metadata)
       
      @@ -815,12 +827,14 @@ DW_TAG_return_variable = 258 user source variable.

      - - -
      + +

      + Object lifetimes and scoping +

      + +

      In many languages, the local variables in functions can have their lifetimes or scopes limited to a subset of a function. In the C family of languages, for example, variables are only live (readable and writable) within the @@ -978,13 +992,15 @@ call void @llvm.dbg.declare(metadata, metadata !12), !dbg !14

      - - + + +

      + C/C++ front-end specific debug information +

      -
      +

      The C and C++ front-ends represent information about the program in a format that is effectively identical @@ -1005,14 +1021,12 @@ call void @llvm.dbg.declare(metadata, metadata !12), !dbg !14

      The following sections provide examples of various C/C++ constructs and the debug information that would best describe those constructs.

      -
      - - + -
      +

      Given the source files MySource.cpp and MyHeader.h located in the directory /Users/mine/sources, the following code:

      @@ -1086,11 +1100,11 @@ using Instruction::getMetadata() and
      - + -
      +

      Given an integer global variable declared as follows:

      @@ -1156,11 +1170,11 @@ int MyGlobal = 100;
      - + -
      +

      Given a function declared as follows:

      @@ -1192,7 +1206,14 @@ int main(int argc, char *argv[]) { i32 1, ;; Line number metadata !4, ;; Type i1 false, ;; Is local - i1 true ;; Is definition + i1 true, ;; Is definition + i32 0, ;; Virtuality attribute, e.g. pure virtual function + i32 0, ;; Index into virtual table for C++ methods + i32 0, ;; Type that holds virtual table. + i32 0, ;; Flags + i1 false, ;; True if this function is optimized + Function *, ;; Pointer to llvm::Function + null ;; Function template parameters } ;; ;; Define the subprogram itself. @@ -1206,22 +1227,20 @@ define i32 @main(i32 %argc, i8** %argv) {
      - + -
      +

      The following are the basic type descriptors for C/C++ core types:

      -
      - -
      +

      bool -

      + -
      +
      @@ -1243,11 +1262,11 @@ define i32 @main(i32 %argc, i8** %argv) {
       
      -
      +

      char -

      + -
      +
      @@ -1269,11 +1288,11 @@ define i32 @main(i32 %argc, i8** %argv) {
       
      - + -
      +
      @@ -1295,11 +1314,11 @@ define i32 @main(i32 %argc, i8** %argv) {
       
      -
      +

      short -

      + -
      +
      @@ -1321,11 +1340,11 @@ define i32 @main(i32 %argc, i8** %argv) {
       
      - + -
      +
      @@ -1347,11 +1366,11 @@ define i32 @main(i32 %argc, i8** %argv) {
       
      -
      +

      int -

      + -
      +
      @@ -1372,11 +1391,11 @@ define i32 @main(i32 %argc, i8** %argv) {
       
      - + -
      +
      @@ -1398,11 +1417,11 @@ define i32 @main(i32 %argc, i8** %argv) {
       
      - + -
      +
      @@ -1424,11 +1443,11 @@ define i32 @main(i32 %argc, i8** %argv) {
       
      - + -
      +
      @@ -1450,11 +1469,11 @@ define i32 @main(i32 %argc, i8** %argv) {
       
      -
      +

      float -

      + -
      +
      @@ -1476,11 +1495,11 @@ define i32 @main(i32 %argc, i8** %argv) {
       
      -
      +

      double -

      + -
      +
      @@ -1501,12 +1520,14 @@ define i32 @main(i32 %argc, i8** %argv) {
       
       
      - - -
      + +

      + C/C++ derived types +

      + +

      Given the following as an example of C/C++ derived type:

      @@ -1587,11 +1608,11 @@ typedef const int *IntPtr;
      - + -
      +

      Given the following as an example of C/C++ struct type:

      @@ -1700,11 +1721,11 @@ struct Color {
      - + -
      +

      Given the following as an example of C/C++ enumeration type:

      @@ -1765,6 +1786,8 @@ enum Trees {
      +
      +
      @@ -1775,8 +1798,8 @@ enum Trees { src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"> Chris Lattner
      - LLVM Compiler Infrastructure
      - Last modified: $Date: 2011-02-03 01:22:17 +0100 (Thu, 03 Feb 2011) $ + LLVM Compiler Infrastructure
      + Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $ diff --git a/docs/SystemLibrary.html b/docs/SystemLibrary.html index b81b1a80d25d..57dc2391d535 100644 --- a/docs/SystemLibrary.html +++ b/docs/SystemLibrary.html @@ -7,7 +7,7 @@ -
      System Library
      +

      System Library

      • Abstract
      • Keeping LLVM Portable @@ -36,8 +36,8 @@ - -
        +

        Abstract

        +

        This document provides some details on LLVM's System Library, located in the source at lib/System and include/llvm/System. The library's purpose is to shield LLVM from the differences between operating @@ -63,21 +63,19 @@

        - -
        + +

        In order to keep LLVM portable, LLVM developers should adhere to a set of portability rules associated with the System Library. Adherence to these rules should help the System Library achieve its goal of shielding LLVM from the variations in operating system interfaces and doing so efficiently. The following sections define the rules needed to fulfill this objective.

        -
        - -
        +

        Don't Include System Headers

        +

        Except in lib/System, no LLVM source code should directly #include a system header. Care has been taken to remove all such #includes from LLVM while lib/System was being @@ -91,9 +89,8 @@

        - -
        +

        Don't Expose System Headers

        +

        The System Library must shield LLVM from all system headers. To obtain system level functionality, LLVM source must #include "llvm/System/Thing.h" and nothing else. This means that @@ -103,8 +100,8 @@

        - -
        +

        Use Standard C Headers

        +

        The standard C headers (the ones beginning with "c") are allowed to be exposed through the lib/System interface. These headers and the things they declare are considered to be platform agnostic. LLVM source @@ -113,9 +110,8 @@

        - -
        +

        Use Standard C++ Headers

        +

        The standard C++ headers from the standard C++ library and standard template library may be exposed through the lib/System interface. These headers and the things they declare are considered to be @@ -124,8 +120,8 @@

        - -
        +

        High Level Interface

        +

        The entry points specified in the interface of lib/System must be aimed at completing some reasonably high level task needed by LLVM. We do not want to simply wrap each operating system call. It would be preferable to wrap several @@ -143,8 +139,8 @@

        - -
        +

        No Unused Functionality

        +

        There must be no functionality specified in the interface of lib/System that isn't actually used by LLVM. We're not writing a general purpose operating system wrapper here, just enough to satisfy LLVM's needs. And, LLVM @@ -153,9 +149,8 @@

        - -
        +

        No Duplicate Implementations

        +

        The implementation of a function for a given platform must be written exactly once. This implies that it must be possible to apply a function's implementation to multiple operating systems if those operating systems can @@ -165,8 +160,8 @@

        - -
        +

        No Virtual Methods

        +

        The System Library interfaces can be called quite frequently by LLVM. In order to make those calls as efficient as possible, we discourage the use of virtual methods. There is no need to use inheritance for implementation @@ -175,8 +170,8 @@

        - -
        +

        No Exposed Functions

        +

        Any functions defined by system libraries (i.e. not defined by lib/System) must not be exposed through the lib/System interface, even if the header file for that function is not exposed. This prevents inadvertent use of system @@ -191,8 +186,8 @@

        - -
        +

        No Exposed Data

        +

        Any data defined by system libraries (i.e. not defined by lib/System) must not be exposed through the lib/System interface, even if the header file for that function is not exposed. As with functions, this prevents inadvertent use @@ -200,8 +195,8 @@

        - -
        +

        Minimize Soft Errors

        +

        Operating system interfaces will generally provide error results for every little thing that could go wrong. In almost all cases, you can divide these error results into two groups: normal/good/soft and abnormal/bad/hard. That @@ -239,9 +234,8 @@

        - -
        +

        No throw Specifications

        +

        None of the lib/System interface functions may be declared with C++ throw() specifications on them. This requirement makes sure that the compiler does not insert additional exception handling code into the interface @@ -252,8 +246,8 @@

        - -
        +

        Code Organization

        +

        Implementations of the System Library interface are separated by their general class of operating system. Currently only Unix and Win32 classes are defined but more could be added for other operating system classifications. @@ -281,8 +275,8 @@

        - -
        +

        Consistent Semantics

        +

        The implementation of a lib/System interface can vary drastically between platforms. That's okay as long as the end result of the interface function is the same. For example, a function to create a directory is pretty straight @@ -296,12 +290,14 @@

        - -
        +

        Bug 351

        +

        See bug 351 for further details on the progress of this work

        +
        +
        @@ -312,8 +308,8 @@ src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"> Reid Spencer
        - LLVM Compiler Infrastructure
        - Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $ + LLVM Compiler Infrastructure
        + Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $ diff --git a/docs/TableGenFundamentals.html b/docs/TableGenFundamentals.html index d118332c9939..37ca04621a34 100644 --- a/docs/TableGenFundamentals.html +++ b/docs/TableGenFundamentals.html @@ -7,9 +7,9 @@ -
        TableGen Fundamentals
        +

        TableGen Fundamentals

        -
        +
        - +

        Introduction

        -
        +

        TableGen's purpose is to help a human develop and maintain records of domain-specific information. Because there may be a large number of these @@ -72,12 +72,10 @@ find an emacs "TableGen mode" and a vim language file in the llvm/utils/emacs and llvm/utils/vim directories of your LLVM distribution, respectively.

        -
        - - +

        Basic concepts

        -
        +

        TableGen files consist of two key parts: 'classes' and 'definitions', both of which are considered 'records'.

        @@ -112,9 +110,9 @@ multiclass, as if they were declared in the current multiclass.

        - +

        An example record

        -
        +

        With no other arguments, TableGen parses the specified file and prints out all of the classes, then all of the definitions. This is a good way to see what @@ -212,9 +210,9 @@ abstractions they prefer to use when describing their information.

        - +

        Running TableGen

        -
        +

        TableGen runs just like any other LLVM tool. The first (optional) argument specifies the file to read. If a filename is not specified, tblgen @@ -256,27 +254,28 @@ what you need and formats it in the appropriate way.

        +
        - +

        TableGen syntax

        -
        +

        TableGen doesn't care about the meaning of data (that is up to the backend to define), but it does care about syntax, and it enforces a simple type system. This section describes the syntax and the constructs allowed in a TableGen file.

        -
        - - +

        TableGen primitives

        + +
        - +

        TableGen comments

        -
        +

        TableGen supports BCPL style "//" comments, which run to the end of the line, and it also supports nestable "/* */" comments.

        @@ -284,11 +283,11 @@ the line, and it also supports nestable "/* */" comments.

        - + -
        +

        TableGen files are strongly typed, in a simple (but complete) type-system. These types are used to perform automatic conversions, check for errors, and to @@ -344,11 +343,11 @@ needed.

        - + -
        +

        TableGen allows for a pretty reasonable number of different expression forms when building up values. These forms allow the TableGen file to be written in a @@ -433,12 +432,14 @@ to a "bits<4>" value, for example.

        - - -
        + +

        + Classes and definitions +

        + +

        As mentioned in the intro, classes and definitions (collectively known as 'records') in TableGen are the main high-level unit of @@ -473,14 +474,12 @@ between a group of records and isolating it in a single place. Also, classes permit the specification of default values for their subclasses, allowing the subclasses to override them as they wish.

        -
        - - + -
        +

        Value definitions define named entries in records. A value must be defined before it can be referred to as the operand for another value definition or @@ -492,11 +491,11 @@ equal sign. Value definitions require terminating semicolons.

        - + -
        +

        A record-level let expression is used to change the value of a value definition in a record. This is primarily useful when a superclass defines a @@ -519,11 +518,11 @@ because the D class overrode its value.

        - + -
        +

        TableGen permits the definition of parameterized classes as well as normal concrete classes. Parameterized TableGen classes specify a list of variable @@ -610,11 +609,11 @@ X86 backend.

        - + -
        +

        While classes with template arguments are a good way to factor commonality @@ -772,17 +771,21 @@ before them.

        - - + +

        + File scope entities +

        + +
        + - + -
        +

        TableGen supports the 'include' token, which textually substitutes the specified file in place of the include directive. The filename should be specified as a double quoted string immediately after the 'include' @@ -797,11 +800,11 @@ keyword. Example:

        - + -
        +

        "Let" expressions at file scope are similar to "let" expressions within a record, except they can specify a value binding for @@ -864,11 +867,15 @@ several levels of multiclass instanciations. This also avoids the need of using

        +
        + +
        + - +

        Code Generator backend info

        -
        +

        Expressions used by code generator to describe instructions and isel patterns:

        @@ -882,10 +889,10 @@ patterns:

        - +

        TableGen backends

        -
        +

        TODO: How they work, how to write one. This section should not contain details about any particular backend, except maybe -print-enums as an example. @@ -903,8 +910,8 @@ This should highlight the APIs in TableGen/Record.h.

        src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"> Chris Lattner
        - LLVM Compiler Infrastructure
        - Last modified: $Date: 2011-01-07 18:05:37 +0100 (Fri, 07 Jan 2011) $ + LLVM Compiler Infrastructure
        + Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $ diff --git a/docs/TestingGuide.html b/docs/TestingGuide.html index 964bdc31247d..4fc4c70011ab 100644 --- a/docs/TestingGuide.html +++ b/docs/TestingGuide.html @@ -7,9 +7,9 @@ -
        +

        LLVM Testing Infrastructure Guide -

        +
        1. Overview
        2. @@ -52,10 +52,10 @@
        - +

        Overview

        -
        +

        This document is the reference manual for the LLVM testing infrastructure. It documents the structure of the LLVM testing infrastructure, the tools needed to @@ -64,10 +64,10 @@ use it, and how to add and run tests.

        - +

        Requirements

        -
        +

        In order to use the LLVM testing infrastructure, you will need all of the software required to build LLVM, as well @@ -76,10 +76,10 @@ as Python 2.4 or later.

        - +

        LLVM testing infrastructure organization

        -
        +

        The LLVM testing infrastructure contains two major categories of tests: regression tests and whole programs. The regression tests are contained inside @@ -89,13 +89,11 @@ referred to as the "LLVM test suite" and are in the test-suite module in subversion.

        -
        - - +

        Regression tests

        -
        +

        The regression tests are small pieces of code that test a specific feature of LLVM or trigger a specific bug in LLVM. They are usually written in LLVM @@ -119,10 +117,10 @@ application or benchmark.

        - +

        Test suite

        -
        +

        The test suite contains whole programs, which are pieces of code which can be compiled and linked into a stand-alone program that can be @@ -144,11 +142,10 @@ generates code.

        - +

        Debugging Information tests

        -
        +

        The test suite contains tests to check quality of debugging information. The test are written in C based languages or in LLVM assembly language.

        @@ -160,11 +157,13 @@ test suite for more information . This test suite is located in the
        +
        + - +

        Quick start

        -
        +

        The tests are located in two separate Subversion modules. The regressions tests are in the main "llvm" module under the directory @@ -179,7 +178,8 @@ the test-suite directory will be automatically configured. Alternatively, you can configure the test-suite module manually.

        - +

        Regression tests

        +

        To run all of the LLVM regression tests, use master Makefile in the llvm/test directory:

        @@ -198,7 +198,7 @@ Alternatively, you can configure the test-suite module manually.

        -

        If you have Clang checked out and built, +

        If you have Clang checked out and built, you can run the LLVM and Clang tests simultaneously using:

        or

        @@ -239,10 +239,14 @@ script which is built as part of LLVM. For example, to run the

        For more information on using the 'lit' tool, see 'llvm-lit --help' or the 'lit' man page.

        +
        + - +

        Test suite

        +
        +

        To run the comprehensive test suite (tests that compile and execute whole programs), first checkout and setup the test-suite module:

        @@ -292,9 +296,10 @@ that subdirectory.

        - +

        Debugging Information tests

        +
        +

        To run debugging information tests simply checkout the tests inside clang/test directory.

        @@ -310,10 +315,14 @@ clang/test directory.

        +
        + +
        + - +

        Regression test structure

        -
        +

        The LLVM regression tests are driven by 'lit' and are located in the llvm/test directory. @@ -335,12 +344,10 @@ clang/test directory.

      • Verifier: tests the IR verifier.
      -
      - - +

      Writing new regression tests

      -
      +

      The regression test structure is very simple, but does require some information to be set. This information is gathered via configure and is written to a file, lit.site.cfg @@ -492,10 +499,10 @@ negatives).

      - +

      The FileCheck utility

      -
      +

      A powerful feature of the RUN: lines is that it allows any arbitrary commands to be executed as part of the test harness. While standard (portable) unix @@ -561,13 +568,12 @@ is a "subl" in between those labels. If it existed somewhere else in the file, that would not count: "grep subl" matches if subl exists anywhere in the file.

      -
      - - +

      + The FileCheck -check-prefix option +

      -
      +

      The FileCheck -check-prefix option allows multiple test configurations to be driven from one .ll file. This is useful in many circumstances, for example, @@ -598,10 +604,11 @@ both 32-bit and 64-bit code generation.

      - +

      + The "CHECK-NEXT:" directive +

      -
      +

      Sometimes you want to match lines and would like to verify that matches happen on exactly consecutive lines with no other lines in between them. In @@ -638,10 +645,11 @@ directive in a file.

      - +

      + The "CHECK-NOT:" directive +

      -
      +

      The CHECK-NOT: directive is used to verify that a string doesn't occur between two matches (or the first match and the beginning of the file). For @@ -668,10 +676,11 @@ define i8 @coerce_offset0(i32 %V, i32* %P) {

      - +

      + FileCheck Pattern Matching Syntax +

      -
      +

      The CHECK: and CHECK-NOT: directives both take a pattern to match. For most uses of FileCheck, fixed string matching is perfectly sufficient. For some @@ -700,10 +709,11 @@ braces explicitly from the input, you can use something ugly like

      - +

      + FileCheck Variables +

      -
      +

      It is often useful to match a pattern and then verify that it occurs again later in the file. For codegen tests, this can be useful to allow any register, @@ -738,11 +748,12 @@ define two separate CHECK lines that match on the same line.

      +
      + - +

      Variables and substitutions

      -
      +

      With a RUN line there are a number of substitutions that are permitted. In general, any Tcl variable that is available in the substitute function (in test/lib/llvm.exp) can be substituted into a RUN line. @@ -835,9 +846,9 @@ substitutions

      - +

      Other Features

      -
      +

      To make RUN line writing easier, there are several shell scripts located in the llvm/test/Scripts directory. This directory is in the PATH when running tests, so you can just call these scripts using their name. For @@ -892,12 +903,13 @@ substitutions

      +
      + - +

      Test suite Structure

      -
      +

      The test-suite module contains a number of programs that can be compiled with LLVM and executed. These programs are compiled using the native compiler @@ -962,10 +974,10 @@ will help you separate benign warnings from actual test failures.

      - +

      Running the test suite

      -
      +

      First, all tests are executed within the LLVM object directory tree. They are not executed inside of the LLVM source tree. This is because the @@ -1020,14 +1032,13 @@ test suite creates temporary files during execution.

      have the suite checked out and configured, you don't need to do it again (unless the test code or configure script changes).

      -
      - - +

      + Configuring External Tests +

      -
      +

      In order to run the External tests in the test-suite module, you must specify --with-externals. This must be done during the re-configuration step (see above), @@ -1055,10 +1066,11 @@ the test code or configure script changes).

      - +

      + Running different tests +

      -
      +

      In addition to the regular "whole program" tests, the test-suite module also provides a mechanism for compiling the programs in different ways. If the variable TEST is defined on the gmake command line, the test system will @@ -1078,10 +1090,11 @@ LLVM.

      - +

      + Generating test output +

      -
      +

      There are a number of ways to run the tests and generate output. The most simple one is simply running gmake with no arguments. This will compile and run all programs in the tree using a number of different methods @@ -1109,11 +1122,12 @@ LLVM.

      - +

      + Writing custom tests for the test suite +

      -
      +

      Assuming you can run the test suite, (e.g. "gmake TEST=nightly report" should work), it is really easy to run optimizations or code generator @@ -1179,6 +1193,8 @@ example reports that can do fancy stuff.

      +
      +
      @@ -1189,8 +1205,8 @@ example reports that can do fancy stuff.

      src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"> John T. Criswell, Daniel Dunbar, Reid Spencer, and Tanya Lattner
      - The LLVM Compiler Infrastructure
      - Last modified: $Date: 2011-02-15 10:23:02 +0100 (Tue, 15 Feb 2011) $ + The LLVM Compiler Infrastructure
      + Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $ diff --git a/docs/UsingLibraries.html b/docs/UsingLibraries.html index ea28dbec0cc4..2c1c69a69a5f 100644 --- a/docs/UsingLibraries.html +++ b/docs/UsingLibraries.html @@ -5,7 +5,7 @@ -
      Using The LLVM Libraries
      +

      Using The LLVM Libraries

      1. Abstract
      2. Introduction
      3. @@ -26,12 +26,12 @@

        Warning: This document is out of date, for more information please see llvm-config or, - if you use CMake, the CMake LLVM + if you use CMake, the CMake LLVM guide.

        - -
        +

        Abstract

        +

        Amongst other things, LLVM is a toolkit for building compilers, linkers, runtime executives, virtual machines, and other program execution related tools. In addition to the LLVM tool set, the functionality of LLVM is @@ -45,8 +45,8 @@

        - -
        +

        Introduction

        +

        If you're writing a compiler, virtual machine, or any other utility based on LLVM, you'll need to figure out which of the many libraries files you will need to link with to be successful. An understanding of the contents of these @@ -74,8 +74,8 @@ correct for your tool can sometimes be challenging.

        -
        Library Descriptions
        -
        +

        Library Descriptions

        +

        The table below categorizes each library

      @@ -152,8 +152,8 @@ -
      Using llvm-config
      -
      +

      Using llvm-config

      +

      The llvm-config tool is a perl script that produces on its output various kinds of information. For example, the source or object directories used to build LLVM can be accessed by passing options to llvm-config. @@ -187,16 +187,16 @@ -

      Dependency Relationships Of Libraries

      +

      Dependency Relationships Of Libraries

      This graph shows the dependency of archive libraries on other archive libraries or objects. Where a library has both archive and object forms, only the archive form is shown.

      - Library Dependencies -

      Dependency Relationships Of Object Files

      + Library Dependencies +

      Dependency Relationships Of Object Files

      This graph shows the dependency of object files on archive libraries or other objects. Where a library has both object and archive forms, only the dependency to the archive form is shown.

      - Object File Dependencies + Object File Dependencies

      The following list shows the dependency relationships between libraries in textual form. The information is the same as shown on the graphs but arranged alphabetically.

      @@ -280,8 +280,8 @@
    23. libLLVMSystem.a
    24. libLLVMbzip2.a
    25. -
      libLLVMSystem.a
        -
      +
      libLLVMSystem.a
      +
      libLLVMTarget.a
      • libLLVMCore.a
      • libLLVMSupport.a
      • @@ -295,8 +295,8 @@
      • libLLVMTarget.a
      • libLLVMipa.a
      -
      libLLVMbzip2.a
        -
      +
      libLLVMbzip2.a
      +
      libLLVMipa.a
      • libLLVMAnalysis.a
      • libLLVMCore.a
      • @@ -401,42 +401,46 @@
      - -
      +

      Linkage Rules Of Thumb

      +

      This section contains various "rules of thumb" about what files you should link into your programs.

      -
      - -
      +

      + Always Link LLVMCore, LLVMSupport, and LLVMSystem +

      +

      No matter what you do with LLVM, the last three entries in the value of your LLVMLIBS make variable should always be: LLVMCore LLVMSupport.a LLVMSystem.a. There are no LLVM programs that don't depend on these three.

      - -
      +

      + Never link both archive and re-linked library +

      +

      There is never any point to linking both the re-linked (.o) and the archive (.a) versions of a library. Since the re-linked version includes the entire library, the archive version will not resolve any symbols. You could even end up with link error if you place the archive version before the re-linked version on the linker's command line.

      + +
      +
      +The LLVM Compiler Infrastructure +
      Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
      - + -
      +

      This document describes techniques for writing compiler backends that convert @@ -91,13 +91,11 @@ characteristics, such as a RISC instruction set and straightforward calling conventions.

      -
      - - + -
      +

      The audience for this document is anyone who needs to write an LLVM backend to @@ -106,21 +104,21 @@ generate code for a specific hardware or software target.

      - + -
      +

      These essential documents must be read before reading this document:

        -
      • LLVM Language Reference +
      • LLVM Language Reference Manual — a reference manual for the LLVM assembly language.
      • -
      • The LLVM +
      • The LLVM Target-Independent Code Generator — a guide to the components (classes and code generation algorithms) for translating the LLVM internal representation into machine code for a specified target. Pay particular @@ -129,14 +127,14 @@ These essential documents must be read before reading this document: Allocation, Prolog/Epilog Code Insertion, Late Machine Code Optimizations, and Code Emission.
      • -
      • TableGen +
      • TableGen Fundamentals —a document that describes the TableGen (tblgen) application that manages domain-specific information to support LLVM code generation. TableGen processes input from a target description file (.td suffix) and generates C++ code that can be used for code generation.
      • -
      • Writing an LLVM +
      • Writing an LLVM Pass — The assembly printer is a FunctionPass, as are several SelectionDAG processing steps.
      @@ -155,11 +153,11 @@ machine dependent features.
      - + -
      +

      To write a compiler backend for LLVM that converts the LLVM IR to code for a @@ -220,17 +218,17 @@ that the class will need and which components will need to be subclassed.

      - + -
      +

      To actually create your compiler backend, you need to create and modify a few files. The absolute minimum is discussed here. But to actually use the LLVM target-independent code generator, you must perform the steps described in -the LLVM +the LLVM Target-Independent Code Generator document.

      @@ -281,13 +279,15 @@ regenerate configure by running ./autoconf/AutoRegen.sh.
      - - + + +

      + Target Machine +

      -
      +

      LLVMTargetMachine is designed as a base class for targets implemented @@ -360,11 +360,6 @@ public:

      -
      - - -
      -
      • getInstrInfo()
      • getRegisterInfo()
      • @@ -398,10 +393,6 @@ SparcTargetMachine::SparcTargetMachine(const Module &M, const std::string &a
      -
      - -
      -

      Hyphens separate portions of the TargetDescription string.

        @@ -424,12 +415,12 @@ SparcTargetMachine::SparcTargetMachine(const Module &M, const std::string &a
      - + -
      +

      You must also register your target with the TargetRegistry, which is @@ -480,12 +471,12 @@ For more information, see

      - + -
      +

      You should describe a concrete target-specific class that represents the @@ -514,14 +505,12 @@ input files and placed in XXXGenRegisterInfo.h.inc and implementation of XXXRegisterInfo requires hand-coding.

      -
      - - + -
      +

      The XXXRegisterInfo.td file typically starts with register definitions @@ -700,11 +689,11 @@ fields of a register's TargetRegisterDesc.

      - + -
      +

      The RegisterClass class (specified in Target.td) is used to @@ -894,12 +883,12 @@ namespace SP { // Register class instances

      - + TargetRegisterInfo + -
      +

      The final step is to hand code portions of XXXRegisterInfo, which @@ -933,13 +922,15 @@ implementation in SparcRegisterInfo.cpp:

      - - -
      +

      + Instruction Set +

      + + +

      During the early stages of code generation, the LLVM IR code is converted to a @@ -1103,7 +1094,7 @@ The fifth parameter is a string that is used by the assembly printer and can be left as an empty string until the assembly printer interface is implemented. The sixth and final parameter is the pattern used to match the instruction during the SelectionDAG Select Phase described in -(The LLVM +(The LLVM Target-Independent Code Generator). This parameter is detailed in the next section, Instruction Selector.

      @@ -1188,14 +1179,12 @@ correspond to the values in SparcInstrInfo.td. I.e., SPCC::ICC_NE = 9, SPCC::FCC_U = 23 and so on.)

      -
      - - + -
      +

      The code generator backend maps instruction operands to fields in the @@ -1283,12 +1272,12 @@ the rd, rs1, and rs2 fields respectively.

      - + TargetInstrInfo + -
      +

      The final step is to hand code portions of XXXInstrInfo, which @@ -1327,10 +1316,10 @@ implementation in SparcInstrInfo.cpp:

      - -
      + +

      Performance can be improved by combining instructions or by eliminating @@ -1485,13 +1474,15 @@ branch.

      - - + + +

      + Instruction Selector +

      -
      +

      LLVM uses a SelectionDAG to represent LLVM IR instructions, and nodes @@ -1533,7 +1524,7 @@ selection pass into the queue of passes to run. The LLVM static compiler (llc) is an excellent tool for visualizing the contents of DAGs. To display the SelectionDAG before or after specific processing phases, use the command line options for llc, described -at +at SelectionDAG Instruction Selection Process.

      @@ -1642,14 +1633,12 @@ SDNode *Select_ISD_STORE(const SDValue &N) {
      -
      - - + -
      +

      The Legalize phase converts a DAG to use types and operations that are natively @@ -1716,14 +1705,12 @@ a LegalAction type enum value: Promote, Expand, contains examples of all four LegalAction values.

      -
      - -
      +

      Promote -

      + -
      +

      For an operation without native support for a given type, the specified type may @@ -1742,11 +1729,11 @@ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);

      -
      +

      Expand -

      + -
      +

      For a type without native support, a value may need to be broken down further, @@ -1767,11 +1754,11 @@ setOperationAction(ISD::FCOS, MVT::f32, Expand);

      -
      +

      Custom -

      + -
      +

      For some operations, simple type promotion or operation expansion may be @@ -1833,11 +1820,11 @@ static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) {

      -
      +

      Legal -

      + -
      +

      The Legal LegalizeAction enum value simply indicates that an @@ -1865,12 +1852,14 @@ if (TM.getSubtarget<SparcSubtarget>().isV9())

      - - -
      + +

      + Calling Conventions +

      + +

      To support target-specific calling conventions, XXXGenCallingConv.td @@ -2015,13 +2004,15 @@ def RetCC_X86_32 : CallingConv<[

      - - + + +

      + Assembly Printer +

      -
      +

      During the code emission stage, the code generator may utilize an LLVM pass to @@ -2171,12 +2162,12 @@ output.

      - + -
      +

      Subtarget support is used to inform the code generation process of instruction @@ -2289,12 +2280,12 @@ XXXSubtarget::XXXSubtarget(const Module &M, const std::string &FS) {

      - + -
      +

      The implementation of a target machine optionally includes a Just-In-Time (JIT) @@ -2333,14 +2324,12 @@ Both XXXJITInfo.cpp and XXXCodeEmitter.cpp must include the that write data (in bytes, words, strings, etc.) to the output stream.

      -
      - - + -
      +

      In XXXCodeEmitter.cpp, a target-specific of the Emitter class @@ -2478,11 +2467,11 @@ enum RelocationType {

      - + -
      +

      XXXJITInfo.cpp implements the JIT interfaces for target-specific @@ -2537,6 +2526,8 @@ with assembler.

      +
      +
      @@ -2547,9 +2538,9 @@ with assembler. src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"> Mason Woo and Misha Brukman
      - The LLVM Compiler Infrastructure + The LLVM Compiler Infrastructure
      - Last modified: $Date: 2010-11-23 04:31:01 +0100 (Tue, 23 Nov 2010) $ + Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $ diff --git a/docs/WritingAnLLVMPass.html b/docs/WritingAnLLVMPass.html index 80258e428352..75426e051d35 100644 --- a/docs/WritingAnLLVMPass.html +++ b/docs/WritingAnLLVMPass.html @@ -8,9 +8,9 @@ -
      +

      Writing an LLVM Pass -

      +
      1. Introduction - What is a pass?
      2. @@ -121,12 +121,12 @@
      - + -
      +

      The LLVM Pass Framework is an important part of the LLVM system, because LLVM passes are where most of the interesting parts of the compiler exist. Passes @@ -156,12 +156,12 @@ more advanced features are discussed.

      - + -
      +

      Here we describe how to write the "hello world" of passes. The "Hello" pass is designed to simply print out the name of non-external functions that exist in @@ -169,14 +169,12 @@ the program being compiled. It does not modify the program at all, it just inspects it. The source code and files for this pass are available in the LLVM source tree in the lib/Transforms/Hello directory.

      -
      - - + -
      +

      First, configure and build LLVM. This needs to be done directly inside the LLVM source tree rather than in a separate objects directory. @@ -185,7 +183,7 @@ source tree in the lib/Transforms/Hello directory.

      lib/Transforms/Hello. Finally, you must set up a build script (Makefile) that will compile the source code for the new pass. To do this, copy the following into Makefile:

      -
      +
       # Makefile for hello pass
      @@ -211,17 +209,20 @@ the opt or bugpoint tools via their -load options.
       If your operating system uses a suffix other than .so (such as windows or 
       Mac OS/X), the appropriate extension will be used.

      +

      If you are used CMake to build LLVM, see +Developing an LLVM pass with CMake.

      +

      Now that we have the build scripts set up, we just need to write the code for the pass itself.

      - + -
      +

      Now that we have a way to compile our new pass, we just have to write it. Start out with:

      @@ -301,7 +302,7 @@ function.

      initialization value is not important.

      -  static RegisterPass X("hello", "Hello World Pass",
      +  static RegisterPass<Hello> X("hello", "Hello World Pass",
                               false /* Only looks at CFG */,
                               false /* Analysis Pass */);
       }  // end of anonymous namespace
      @@ -337,7 +338,7 @@ is supplied as fourth argument. 

      }; char Hello::ID = 0; - static RegisterPass X("hello", "Hello World Pass", false, false); + static RegisterPass<Hello> X("hello", "Hello World Pass", false, false); }
      @@ -353,11 +354,11 @@ them) to be useful.

      - + -
      +

      Now that you have a brand new shiny shared object file, we can use the opt command to run an LLVM program through your pass. Because you @@ -443,13 +444,15 @@ about some more details of how they work and how to use them.

      - - + + +

      + Pass classes and requirements +

      -
      +

      One of the first things that you should do when designing a new pass is to decide what class you should subclass for your pass. The -

      - - + -
      +

      The most plain and boring type of pass is the "ImmutablePass" @@ -490,11 +491,11 @@ invalidated, and are never "run".

      - + -
      +

      The "ModulePass" @@ -516,14 +517,12 @@ DominatorTree for function definitions, not declarations.

      ModulePass and overload the runOnModule method with the following signature:

      -
      - - + -
      +
         virtual bool runOnModule(Module &M) = 0;
      @@ -535,12 +534,14 @@ false otherwise.

      - - -
      + +

      + The CallGraphSCCPass class +

      + +

      The "CallGraphSCCPass" @@ -581,15 +582,14 @@ because it has to handle SCCs with more than one node in it. All of the virtual methods described below should return true if they modified the program, or false if they didn't.

      -
      - - +

      + + The doInitialization(CallGraph &) method + +

      -
      +
         virtual bool doInitialization(CallGraph &CG);
      @@ -606,11 +606,11 @@ fast).

      - + -
      +
         virtual bool runOnSCC(CallGraphSCC &SCC) = 0;
      @@ -623,12 +623,13 @@ otherwise.

      - +

      + + The doFinalization(CallGraph &) method + +

      -
      +
         virtual bool doFinalization(CallGraph &CG);
      @@ -641,12 +642,14 @@ program being compiled.

      - - -
      + +

      + The FunctionPass class +

      + +

      In contrast to ModulePass subclasses, FunctionPass @@ -671,15 +674,14 @@ href="#basiccode">Hello World pass for example). FunctionPass's may overload three virtual methods to do their work. All of these methods should return true if they modified the program, or false if they didn't.

      -
      - - +

      + + The doInitialization(Module &) method + +

      -
      +
         virtual bool doInitialization(Module &M);
      @@ -703,11 +705,11 @@ free functions that it needs, adding prototypes to the module if necessary.

      - + -
      +
         virtual bool runOnFunction(Function &F) = 0;
      @@ -720,12 +722,13 @@ be returned if the function is modified.

      - +

      + + The doFinalization(Module &) method + +

      -
      +
         virtual bool doFinalization(Module &M);
      @@ -738,12 +741,14 @@ program being compiled.

      - - -
      + +

      + The LoopPass class +

      + +

      All LoopPass execute on each loop in the function independent of all of the other loops in the function. LoopPass processes loops in @@ -751,19 +756,18 @@ loop nest order such that outer most loop is processed last.

      LoopPass subclasses are allowed to update loop nest using LPPassManager interface. Implementing a loop pass is usually -straightforward. Looppass's may overload three virtual methods to +straightforward. LoopPass's may overload three virtual methods to do their work. All these methods should return true if they modified the program, or false if they didn't.

      -
      - +

      + + The doInitialization(Loop *,LPPassManager &) method + +

      -
      +
         virtual bool doInitialization(Loop *, LPPassManager &LPM);
      @@ -780,11 +784,11 @@ information.

      - + -
      +
         virtual bool runOnLoop(Loop *, LPPassManager &LPM) = 0;
      @@ -798,11 +802,11 @@ should be used to update loop nest.

      - + -
      +
         virtual bool doFinalization();
      @@ -815,12 +819,14 @@ program being compiled. 

      - - -
      + +

      + The RegionPass class +

      + +

      RegionPass is similar to LoopPass, but executes on each single entry single exit region in the function. @@ -829,19 +835,18 @@ region is processed last.

      RegionPass subclasses are allowed to update the region tree by using the RGPassManager interface. You may overload three virtual methods of -RegionPass to implementing your own region pass is usually. All these +RegionPass to implement your own region pass. All these methods should return true if they modified the program, or false if they didn not.

      -
      - +

      + + The doInitialization(Region *, RGPassManager &) method + +

      -
      +
         virtual bool doInitialization(Region *, RGPassManager &RGM);
      @@ -858,11 +863,11 @@ information.

      - + -
      +
         virtual bool runOnRegion(Region *, RGPassManager &RGM) = 0;
      @@ -876,11 +881,11 @@ should be used to update region tree.

      - + -
      +
         virtual bool doFinalization();
      @@ -893,14 +898,14 @@ program being compiled. 

      - - - - -
      + +

      + The BasicBlockPass class +

      + +

      BasicBlockPass's are just like FunctionPass's, except that they must limit @@ -922,15 +927,14 @@ href="#doInitialization_mod">doInitialization(Module &) and doFinalization(Module &) methods that FunctionPass's have, but also have the following virtual methods that may also be implemented:

      -
      - - +

      + + The doInitialization(Function &) method + +

      -
      +
         virtual bool doInitialization(Function &F);
      @@ -947,11 +951,11 @@ fast).

      - + -
      +
         virtual bool runOnBasicBlock(BasicBlock &BB) = 0;
      @@ -965,12 +969,13 @@ if the basic block is modified.

      - +

      + + The doFinalization(Function &) method + +

      -
      +
         virtual bool doFinalization(Function &F);
      @@ -984,12 +989,14 @@ finalization.

      - - -
      + +

      + The MachineFunctionPass class +

      + +

      A MachineFunctionPass is a part of the LLVM code generator that executes on the machine-dependent representation of each LLVM function in the @@ -1014,15 +1021,14 @@ href="#runOnMachineFunction">runOnMachineFunction (including global data) -

      - - +

      + + The runOnMachineFunction(MachineFunction &MF) method + +

      -
      +
         virtual bool runOnMachineFunction(MachineFunction &MF) = 0;
      @@ -1043,13 +1049,17 @@ remember, you may not modify the LLVM Function or its contents from a
       
       
      - - + +
      + + +

      + Pass registration +

      -
      +

      In the Hello World example pass we illustrated how pass registration works, and discussed some of the reasons that it is used and @@ -1066,14 +1076,12 @@ well as for debug output generated by the --debug-pass option.

      If you want your pass to be easily dumpable, you should implement the virtual print method:

      -
      - - + -
      +
         virtual void print(std::ostream &O, const Module *M) const;
      @@ -1093,13 +1101,15 @@ depended on.

      - - + + +

      + Specifying interactions between passes +

      -
      +

      One of the main responsibilities of the PassManager is to make sure that passes interact with each other correctly. Because PassManager @@ -1116,14 +1126,12 @@ specifies. If a pass does not implement the getAnalysisUsage method, it defaults to not having any prerequisite passes, and invalidating all other passes.

      -
      - - + -
      +
         virtual void getAnalysisUsage(AnalysisUsage &Info) const;
      @@ -1139,11 +1147,14 @@ object:

      - +

      + + The AnalysisUsage::addRequired<> + and AnalysisUsage::addRequiredTransitive<> methods + +

      -
      +

      If your pass requires a previous pass to be executed (an analysis for example), it can use one of these methods to arrange for it to be run before your pass. @@ -1165,11 +1176,13 @@ pass is.

      - +

      + + The AnalysisUsage::addPreserved<> method + +

      -
      +

      One of the jobs of the PassManager is to optimize how and when analyses are run. In particular, it attempts to avoid recomputing data unless it needs to. For @@ -1200,22 +1213,13 @@ the fact that it hacks on the CFG.

      - +

      + + Example implementations of getAnalysisUsage + +

      -
      - -
      -  // This is an example implementation from an analysis, which does not modify
      -  // the program at all, yet has a prerequisite.
      -  void PostDominanceFrontier::getAnalysisUsage(AnalysisUsage &AU) const {
      -    AU.setPreservesAll();
      -    AU.addRequired<PostDominatorTree>();
      -  }
      -
      - -

      and:

      +
         // This example modifies the program, but does not modify the CFG
      @@ -1228,12 +1232,14 @@ the fact that it hacks on the CFG.
       
      - +

      + + The getAnalysis<> and + getAnalysisIfAvailable<> methods + +

      -
      +

      The Pass::getAnalysis<> method is automatically inherited by your class, providing you with access to the passes that you declared that you @@ -1285,13 +1291,15 @@ if it is active. For example:

      - - + + +

      + Implementing Analysis Groups +

      -
      +

      Now that we understand the basics of how passes are defined, how they are used, and how they are required from other passes, it's time to get a little bit @@ -1310,14 +1318,12 @@ between these two extremes for other implementations). To cleanly support situations like this, the LLVM Pass Infrastructure supports the notion of Analysis Groups.

      -
      - - + -
      +

      An Analysis Group is a single simple interface that may be implemented by multiple different passes. Analysis Groups can be given human readable names @@ -1364,11 +1370,11 @@ hypothetical example) instead.

      - + -
      +

      The RegisterAnalysisGroup template is used to register the analysis group itself, while the INITIALIZE_AG_PASS is used to add pass @@ -1425,13 +1431,15 @@ pass is the default implementation for the interface.

      - - + + +

      + Pass Statistics +

      -
      +

      The Statistic class is designed to be an easy way to expose various success @@ -1443,12 +1451,12 @@ line. See the St -

      + -
      +

      The PassManager @@ -1615,14 +1623,12 @@ Hello: main

      Which shows that we don't accidentally invalidate dominator information anymore, and therefore do not have to compute it twice.

      -
      - - + -
      +
         virtual void releaseMemory();
      @@ -1643,13 +1649,15 @@ class, before the next call of run* in your pass.

      - - + + +

      + Registering dynamically loaded passes +

      -
      +

      Size matters when constructing production quality tools using llvm, both for the purposes of distribution, and for regulating the resident code size @@ -1676,14 +1684,12 @@ the static destructor unregisters. Thus a pass that is statically linked in the tool will be registered at start up. A dynamically loaded pass will register on load and unregister at unload.

      -
      - - + -
      +

      There are predefined registries to track instruction scheduling (RegisterScheduler) and register allocation (RegisterRegAlloc) @@ -1744,11 +1750,11 @@ call line to llvm/Codegen/LinkAllCodegenComponents.h.

      - + -
      +

      The easiest way to get started is to clone one of the existing registries; we recommend llvm/CodeGen/RegAllocRegistry.h. The key things to modify @@ -1776,13 +1782,15 @@ creator.

      - - + + +

      + Using GDB with dynamically loaded passes +

      -
      +

      Unfortunately, using GDB with dynamically loaded passes is not as easy as it should be. First of all, you can't set a breakpoint in a shared object that has @@ -1794,14 +1802,12 @@ GDB.

      transformation invoked by opt, although nothing described here depends on that.

      -
      - - + -
      +

      First thing you do is start gdb on the opt process:

      @@ -1842,11 +1848,11 @@ or do other standard debugging stuff.

      - + -
      +

      Once you have the basics down, there are a couple of problems that GDB has, some with solutions, some without.

      @@ -1874,26 +1880,26 @@ href="mailto:sabre@nondot.org">Chris.

      - - + + +

      + Future extensions planned +

      -
      +

      Although the LLVM Pass Infrastructure is very capable as it stands, and does some nifty stuff, there are things we'd like to add in the future. Here is where we are going:

      -
      - - + -
      +

      Multiple CPU machines are becoming more common and compilation can never be fast enough: obviously we should allow for a multithreaded compiler. Because of @@ -1911,6 +1917,8 @@ Despite that, we have kept the LLVM passes SMP ready, and you should too.

      +
      +
      @@ -1920,8 +1928,8 @@ Despite that, we have kept the LLVM passes SMP ready, and you should too.

      src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"> Chris Lattner
      - The LLVM Compiler Infrastructure
      - Last modified: $Date: 2011-02-15 10:23:02 +0100 (Tue, 15 Feb 2011) $ + The LLVM Compiler Infrastructure
      + Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
      diff --git a/docs/doxygen.css b/docs/doxygen.css index 83b049b5f1b7..80c6cad558c0 100644 --- a/docs/doxygen.css +++ b/docs/doxygen.css @@ -370,9 +370,39 @@ H2 { H3 { font-size: 100%; } + +H2, H3 { + border-bottom: 2px solid; + margin-top: 2em; +} + A.qindex {} A.qindexRef {} A.el { text-decoration: none; font-weight: bold } A.elRef { font-weight: bold } A.code { text-decoration: none; font-weight: normal; color: #4444ee } A.codeRef { font-weight: normal; color: #4444ee } + +div.memitem { + border: 1px solid #999999; + margin-top: 1.0em; + margin-bottom: 1.0em; + -webkit-border-radius: 0.5em; + -webkit-box-shadow: 3px 3px 6px #777777; + -moz-border-radius: 0.5em; + -moz-box-shadow: black 3px 3px 3px; +} + +div.memproto { + background-color: #E3E4E5; + padding: 0.25em 0.5em; + -webkit-border-top-left-radius: 0.5em; + -webkit-border-top-right-radius: 0.5em; + -moz-border-radius-topleft: 0.5em; + -moz-border-radius-topright: 0.5em; +} + +div.memdoc { + padding-left: 1em; + padding-right: 1em; +} diff --git a/docs/doxygen.footer b/docs/doxygen.footer index d75fff5e4ccc..15585b8da733 100644 --- a/docs/doxygen.footer +++ b/docs/doxygen.footer @@ -1,6 +1,6 @@
      Documentation for the LLVM System at SVN head
      +

      Documentation for the LLVM System at SVN head

      If you are using a released version of LLVM, see the download page to find your documentation.

      -
      LibraryFormsDescription
      -
      -

      Written by The LLVM Team

      +

      Written by The LLVM Team

      - +

      LLVM Design & Overview

        @@ -57,7 +55,7 @@ frequent questions about LLVM's most frequently misunderstood instruction.
      - +

      LLVM User Guides

        @@ -75,7 +73,7 @@ LLVM for a custom language, and the facilities LLVM offers in tutorial form.Developer Policy - The LLVM project's policy towards developers and their contributions. -
      • LLVM Command Guide - A reference +
      • LLVM Command Guide - A reference manual for the LLVM command line utilities ("man" pages for LLVM tools).
        Current tools: llvm-ar, @@ -131,7 +129,7 @@ href="irc://irc.oftc.net/llvm">join #llvm on irc.oftc.net directly.
      • - +

        General LLVM Programming Documentation

          @@ -179,7 +177,7 @@ href="http://llvm.org/doxygen/inherits.html">classes)
        - +

        LLVM Subsystem Documentation

          @@ -246,7 +244,7 @@ JITed code with GDB. - +

          LLVM Mailing Lists

            @@ -286,8 +284,8 @@ times each day, making it a high volume list. Valid HTML 4.01 - LLVM Compiler Infrastructure
            - Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $ + LLVM Compiler Infrastructure
            + Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $ diff --git a/docs/llvm.css b/docs/llvm.css index f572b5e57141..1222cf12bcb1 100644 --- a/docs/llvm.css +++ b/docs/llvm.css @@ -23,7 +23,7 @@ th { border: 2px solid gray; font-weight: bold; font-size: 105%; * Documentation */ /* Common for title and header */ -.doc_title, .doc_section, .doc_subsection, h1, h2 { +.doc_title, .doc_section, .doc_subsection, h1, h2, h3 { color: black; background: url("img/lines.gif"); font-family: "Georgia,Palatino,Times,Roman,SanSerif"; font-weight: bold; border-width: 1px; @@ -35,17 +35,17 @@ th { border: 2px solid gray; font-weight: bold; font-size: 105%; padding-bottom: 2px } -h1, .doc_section { text-align: center; font-size: 22pt; +h1, .doc_title, .title { text-align: left; font-size: 25pt } + +h2, .doc_section { text-align: center; font-size: 22pt; margin: 20pt 0pt 5pt 0pt; } -.doc_title, .title { text-align: left; font-size: 25pt } - -h2, .doc_subsection { width: 75%; +h3, .doc_subsection { width: 75%; text-align: left; font-size: 12pt; padding: 4pt 4pt 4pt 4pt; margin: 1.5em 0.5em 0.5em 0.5em } -h3, .doc_subsubsection { margin: 2.0em 0.5em 0.5em 0.5em; +h4, .doc_subsubsection { margin: 2.0em 0.5em 0.5em 0.5em; font-weight: bold; font-style: oblique; border-bottom: 1px solid #999999; font-size: 12pt; width: 75%; } @@ -70,6 +70,10 @@ h3, .doc_subsubsection { margin: 2.0em 0.5em 0.5em 0.5em; display: table; } +h2+div, h2+p {text-align: left; padding-left: 20pt; padding-right: 10pt;} +h3+div, h3+p {text-align: left; padding-left: 20pt; padding-right: 10pt;} +h4+div, h4+p {text-align: left; padding-left: 20pt; padding-right: 10pt;} + /* It is preferrable to use
             everywhere instead of the
              * 
            ...
            construct. * diff --git a/docs/tutorial/LangImpl1.html b/docs/tutorial/LangImpl1.html index c256af488b7f..22a2b127b466 100644 --- a/docs/tutorial/LangImpl1.html +++ b/docs/tutorial/LangImpl1.html @@ -11,7 +11,7 @@ -
            Kaleidoscope: Tutorial Introduction and the Lexer
            +

            Kaleidoscope: Tutorial Introduction and the Lexer

      - +

      Tutorial Introduction

      -
      +

      Welcome to the "Implementing a language with LLVM" tutorial. This tutorial runs through the implementation of a simple language, showing how fun and @@ -123,10 +123,10 @@ languages!

      - +

      The Basic Language

      -
      +

      This tutorial will be illustrated with a toy language that we'll call "Kaleidoscope" (derived @@ -181,10 +181,10 @@ a Mandelbrot Set at various levels of magnification.

      - +

      The Lexer

      -
      +

      When it comes to implementing a language, the first thing needed is the ability to process a text file and recognize what it says. The traditional @@ -341,8 +341,8 @@ so that you can use the lexer and parser together. src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"> Chris Lattner
      - The LLVM Compiler Infrastructure
      - Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $ + The LLVM Compiler Infrastructure
      + Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $ diff --git a/docs/tutorial/LangImpl2.html b/docs/tutorial/LangImpl2.html index f39ed6ccffef..c6a9bb1ec1c3 100644 --- a/docs/tutorial/LangImpl2.html +++ b/docs/tutorial/LangImpl2.html @@ -11,7 +11,7 @@ -

      Kaleidoscope: Implementing a Parser and AST
      +

      Kaleidoscope: Implementing a Parser and AST

      - +

      Chapter 2 Introduction

      -
      +

      Welcome to Chapter 2 of the "Implementing a language with LLVM" tutorial. This chapter shows you how to use the lexer, built in @@ -61,10 +61,10 @@ Tree.

      - +

      The Abstract Syntax Tree (AST)

      -
      +

      The AST for a program captures its behavior in such a way that it is easy for later stages of the compiler (e.g. code generation) to interpret. We basically @@ -178,10 +178,10 @@ bodies in Kaleidoscope.

      - +

      Parser Basics

      -
      +

      Now that we have an AST to build, we need to define the parser code to build it. The idea here is that we want to parse something like "x+y" (which is @@ -239,11 +239,10 @@ piece of our grammar: numeric literals.

      - +

      Basic Expression Parsing

      -
      +

      We start with numeric literals, because they are the simplest to process. For each production in our grammar, we'll define a function which parses that @@ -394,11 +393,10 @@ They are a bit more complex.

      - +

      Binary Expression Parsing

      -
      +

      Binary expressions are significantly harder to parse because they are often ambiguous. For example, when given the string "x+y*z", the parser can choose @@ -617,10 +615,10 @@ handle function definitions, etc.

      - +

      Parsing the Rest

      -
      +

      The next thing missing is handling of function prototypes. In Kaleidoscope, @@ -714,10 +712,10 @@ actually execute this code we've built!

      - +

      The Driver

      -
      +

      The driver for this simply invokes all of the parsing pieces with a top-level dispatch loop. There isn't much interesting here, so I'll just include the @@ -753,10 +751,10 @@ type "4+5;", and the parser will know you are done.

      - +

      Conclusions

      -
      +

      With just under 400 lines of commented code (240 lines of non-comment, non-blank code), we fully defined our minimal language, including a lexer, @@ -790,10 +788,10 @@ Representation (IR) from the AST.

      - +

      Full Code Listing

      -
      +

      Here is the complete code listing for this and the previous chapter. @@ -1226,8 +1224,8 @@ int main() { src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"> Chris Lattner
      - The LLVM Compiler Infrastructure
      - Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $ + The LLVM Compiler Infrastructure
      + Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $ diff --git a/docs/tutorial/LangImpl3.html b/docs/tutorial/LangImpl3.html index a320ff7e9064..47406ca36e41 100644 --- a/docs/tutorial/LangImpl3.html +++ b/docs/tutorial/LangImpl3.html @@ -11,7 +11,7 @@ -

      Kaleidoscope: Code generation to LLVM IR
      +

      Kaleidoscope: Code generation to LLVM IR

      - +

      Chapter 3 Introduction

      -
      +

      Welcome to Chapter 3 of the "Implementing a language with LLVM" tutorial. This chapter shows you how to transform the .

      - +

      Code Generation Setup

      -
      +

      In order to generate LLVM IR, we want some simple setup to get started. First @@ -147,10 +147,10 @@ has already been done, and we'll just use it to emit code.

      - +

      Expression Code Generation

      -
      +

      Generating LLVM code for expression nodes is very straightforward: less than 45 lines of commented code for all four of our expression nodes. First @@ -293,10 +293,10 @@ basic framework.

      - +

      Function Code Generation

      -
      +

      Code generation for prototypes and functions must handle a number of details, which make their code less beautiful than expression code @@ -515,11 +515,10 @@ def bar() foo(1, 2); # error, unknown function "foo"

      - +

      Driver Changes and Closing Thoughts

      -
      +

      For now, code generation to LLVM doesn't really get us much, except that we can @@ -657,10 +656,10 @@ support to this so we can actually start running code!

      - +

      Full Code Listing

      -
      +

      Here is the complete code listing for our running example, enhanced with the @@ -1262,8 +1261,8 @@ int main() { src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"> Chris Lattner
      - The LLVM Compiler Infrastructure
      - Last modified: $Date: 2011-02-15 01:24:32 +0100 (Tue, 15 Feb 2011) $ + The LLVM Compiler Infrastructure
      + Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $ diff --git a/docs/tutorial/LangImpl4.html b/docs/tutorial/LangImpl4.html index a2511d959e7b..5b8990e441e4 100644 --- a/docs/tutorial/LangImpl4.html +++ b/docs/tutorial/LangImpl4.html @@ -11,7 +11,7 @@ -

      Kaleidoscope: Adding JIT and Optimizer Support
      +

      Kaleidoscope: Adding JIT and Optimizer Support

      - +

      Chapter 4 Introduction

      -
      +

      Welcome to Chapter 4 of the "Implementing a language with LLVM" tutorial. Chapters 1-3 described the implementation of a simple @@ -48,11 +48,10 @@ for the Kaleidoscope language.

      - +

      Trivial Constant Folding

      -
      +

      Our demonstration for Chapter 3 is elegant and easy to extend. Unfortunately, @@ -134,11 +133,10 @@ range of optimizations that you can use, in the form of "passes".

      - +

      LLVM Optimization Passes

      -
      +

      LLVM provides many optimization passes, which do many different sorts of things and have different tradeoffs. Unlike other systems, LLVM doesn't hold @@ -266,10 +264,10 @@ executing it!

      - +

      Adding a JIT Compiler

      -
      +

      Code that is available in LLVM IR can have a wide variety of tools applied to it. For example, you can run optimizations on it (as we did above), @@ -474,10 +472,10 @@ tackling some interesting LLVM IR issues along the way.

      - +

      Full Code Listing

      -
      +

      Here is the complete code listing for our running example, enhanced with the @@ -1078,7 +1076,7 @@ int main() { // Create the JIT. This takes ownership of the module. std::string ErrStr; - TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&ErrStr).create(); +TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&ErrStr).create(); if (!TheExecutionEngine) { fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str()); exit(1); @@ -1130,8 +1128,8 @@ int main() { src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"> Chris Lattner
      - The LLVM Compiler Infrastructure
      - Last modified: $Date: 2010-11-16 18:28:22 +0100 (Tue, 16 Nov 2010) $ + The LLVM Compiler Infrastructure
      + Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $ diff --git a/docs/tutorial/LangImpl5.html b/docs/tutorial/LangImpl5.html index d2c3bd03dc4e..4fc23a149429 100644 --- a/docs/tutorial/LangImpl5.html +++ b/docs/tutorial/LangImpl5.html @@ -11,7 +11,7 @@ -

      Kaleidoscope: Extending the Language: Control Flow
      +

      Kaleidoscope: Extending the Language: Control Flow

      - +

      Chapter 5 Introduction

      -
      +

      Welcome to Chapter 5 of the "Implementing a language with LLVM" tutorial. Parts 1-4 described the implementation of the simple @@ -65,14 +65,14 @@ have an if/then/else expression plus a simple 'for' loop.

      - +

      If/Then/Else

      -
      +

      Extending Kaleidoscope to support if/then/else is quite straightforward. It -basically requires adding lexer support for this "new" concept to the lexer, +basically requires adding support for this "new" concept to the lexer, parser, AST, and LLVM code emitter. This example is nice, because it shows how easy it is to "grow" a language over time, incrementally extending it as new ideas are discovered.

      @@ -108,15 +108,12 @@ Since Kaleidoscope allows side-effects, this behavior is important to nail down.

      Now that we know what we "want", lets break this down into its constituent pieces.

      -
      - - +

      Lexer Extensions for If/Then/Else

      -
      +

      The lexer extensions are straightforward. First we add new enum values for the relevant tokens:

      @@ -146,11 +143,10 @@ stuff:

      - +

      AST Extensions for If/Then/Else

      -
      +

      To represent the new expression we add a new AST node for it:

      @@ -172,11 +168,10 @@ public:
      - +

      Parser Extensions for If/Then/Else

      -
      +

      Now that we have the relevant tokens coming from the lexer and we have the AST node to build, our parsing logic is relatively straightforward. First we @@ -231,10 +226,10 @@ static ExprAST *ParsePrimary() {

      - +

      LLVM IR for If/Then/Else

      -
      +

      Now that we have it parsing and building the AST, the final piece is adding LLVM code generation support. This is the most interesting part of the @@ -347,11 +342,10 @@ directly.

      - +

      Code Generation for If/Then/Else

      -
      +

      In order to generate code for this, we implement the Codegen method for IfExprAST:

      @@ -472,7 +466,7 @@ are emitted, we can finish up with the merge code:

      // Emit merge block. TheFunction->getBasicBlockList().push_back(MergeBB); Builder.SetInsertPoint(MergeBB); - PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), + PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, "iftmp"); PN->addIncoming(ThenV, ThenBB); @@ -500,11 +494,13 @@ another useful expression that is familiar from non-functional languages...

      +
      + - +

      'for' Loop Expression

      -
      +

      Now that we know how to add basic control flow constructs to the language, we have the tools to add more powerful things. Lets add something more @@ -533,14 +529,11 @@ variables, it will get more useful.

      As before, lets talk about the changes that we need to Kaleidoscope to support this.

      -
      - - +

      Lexer Extensions for the 'for' Loop

      -
      +

      The lexer extensions are the same sort of thing as for if/then/else:

      @@ -566,11 +559,10 @@ the 'for' Loop
      - +

      AST Extensions for the 'for' Loop

      -
      +

      The AST node is just as simple. It basically boils down to capturing the variable name and the constituent expressions in the node.

      @@ -593,11 +585,10 @@ public:
      - +

      Parser Extensions for the 'for' Loop

      -
      +

      The parser code is also fairly standard. The only interesting thing here is handling of the optional step value. The parser code handles it by checking to @@ -653,11 +644,10 @@ static ExprAST *ParseForExpr() {

      - +

      LLVM IR for the 'for' Loop

      -
      +

      Now we get to the good part: the LLVM IR we want to generate for this thing. With the simple example above, we get this LLVM IR (note that this dump is @@ -699,11 +689,10 @@ expressions, and some basic blocks. Lets see how this fits together.

      - +

      Code Generation for the 'for' Loop

      -
      +

      The first part of Codegen is very simple: we just output the start expression for the loop value:

      @@ -746,7 +735,7 @@ create an unconditional branch for the fall-through between the two blocks.

      Builder.SetInsertPoint(LoopBB); // Start the PHI node with an entry for Start. - PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), VarName.c_str()); + PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, VarName.c_str()); Variable->addIncoming(StartVal, PreheaderBB);
      @@ -876,11 +865,13 @@ language.

      +
      + - +

      Full Code Listing

      -
      +

      Here is the complete code listing for our running example, enhanced with the @@ -1452,7 +1443,7 @@ Value *IfExprAST::Codegen() { // Emit merge block. TheFunction->getBasicBlockList().push_back(MergeBB); Builder.SetInsertPoint(MergeBB); - PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), + PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, "iftmp"); PN->addIncoming(ThenV, ThenBB); @@ -1494,7 +1485,7 @@ Value *ForExprAST::Codegen() { Builder.SetInsertPoint(LoopBB); // Start the PHI node with an entry for Start. - PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), VarName.c_str()); + PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, VarName.c_str()); Variable->addIncoming(StartVal, PreheaderBB); // Within the loop, the variable is defined equal to the PHI node. If it @@ -1721,7 +1712,7 @@ int main() { // Create the JIT. This takes ownership of the module. std::string ErrStr; - TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&ErrStr).create(); + TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&ErrStr).create(); if (!TheExecutionEngine) { fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str()); exit(1); @@ -1773,8 +1764,8 @@ int main() { src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"> Chris Lattner
      - The LLVM Compiler Infrastructure
      - Last modified: $Date: 2010-11-16 18:28:22 +0100 (Tue, 16 Nov 2010) $ + The LLVM Compiler Infrastructure
      + Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $ diff --git a/docs/tutorial/LangImpl6.html b/docs/tutorial/LangImpl6.html index 7ddf3a099cbc..31d7ff4cd215 100644 --- a/docs/tutorial/LangImpl6.html +++ b/docs/tutorial/LangImpl6.html @@ -11,7 +11,7 @@ -

      Kaleidoscope: Extending the Language: User-defined Operators
      +

      Kaleidoscope: Extending the Language: User-defined Operators

      - +

      Chapter 6 Introduction

      -
      +

      Welcome to Chapter 6 of the "Implementing a language with LLVM" tutorial. At this point in our tutorial, we now have a fully @@ -60,10 +60,10 @@ an example of what you can build with Kaleidoscope and its feature set.

      - +

      User-defined Operators: the Idea

      -
      +

      The "operator overloading" that we will add to Kaleidoscope is more general than @@ -125,10 +125,10 @@ operators.

      - +

      User-defined Binary Operators

      -
      +

      Adding support for user-defined binary operators is pretty simple with our current framework. We'll first add support for the unary/binary keywords:

      @@ -342,10 +342,10 @@ see what it takes.

      - +

      User-defined Unary Operators

      -
      +

      Since we don't currently support unary operators in the Kaleidoscope language, we'll need to add everything to support them. Above, we added simple @@ -491,10 +491,10 @@ is simpler primarily because it doesn't need to handle any predefined operators.

      - +

      Kicking the Tires

      -
      +

      It is somewhat hard to believe, but with a few simple extensions we've covered in the last chapters, we have grown a real-ish language. With this, we @@ -796,10 +796,10 @@ add variable mutation without building SSA in your front-end.

      - +

      Full Code Listing

      -
      +

      Here is the complete code listing for our running example, enhanced with the @@ -1475,7 +1475,7 @@ Value *IfExprAST::Codegen() { // Emit merge block. TheFunction->getBasicBlockList().push_back(MergeBB); Builder.SetInsertPoint(MergeBB); - PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), + PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, "iftmp"); PN->addIncoming(ThenV, ThenBB); @@ -1517,7 +1517,7 @@ Value *ForExprAST::Codegen() { Builder.SetInsertPoint(LoopBB); // Start the PHI node with an entry for Start. - PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), VarName.c_str()); + PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, VarName.c_str()); Variable->addIncoming(StartVal, PreheaderBB); // Within the loop, the variable is defined equal to the PHI node. If it @@ -1758,7 +1758,7 @@ int main() { // Create the JIT. This takes ownership of the module. std::string ErrStr; - TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&ErrStr).create(); + TheExecutionEngine = EngineBuilder(TheModule).setErrorStr(&ErrStr).create(); if (!TheExecutionEngine) { fprintf(stderr, "Could not create ExecutionEngine: %s\n", ErrStr.c_str()); exit(1); @@ -1810,8 +1810,8 @@ int main() { src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"> Chris Lattner
      - The LLVM Compiler Infrastructure
      - Last modified: $Date: 2010-11-16 18:28:22 +0100 (Tue, 16 Nov 2010) $ + The LLVM Compiler Infrastructure
      + Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $ diff --git a/docs/tutorial/LangImpl7.html b/docs/tutorial/LangImpl7.html index 3b36129d6716..a4a21f1aed22 100644 --- a/docs/tutorial/LangImpl7.html +++ b/docs/tutorial/LangImpl7.html @@ -12,7 +12,7 @@ -

      Kaleidoscope: Extending the Language: Mutable Variables
      +

      Kaleidoscope: Extending the Language: Mutable Variables

      - +

      Chapter 7 Introduction

      -
      +

      Welcome to Chapter 7 of the "Implementing a language with LLVM" tutorial. In chapters 1 through 6, we've built a very @@ -66,10 +66,10 @@ support for this, though the way it works is a bit unexpected for some.

      - +

      Why is this a hard problem?

      -
      +

      To understand why mutable variables cause complexities in SSA construction, @@ -140,10 +140,10 @@ logic.

      - +

      Memory in LLVM

      -
      +

      The 'trick' here is that while LLVM does require all register values to be in SSA form, it does not require (or permit) memory objects to be in SSA form. @@ -321,11 +321,10 @@ variables now!

      - +

      Mutable Variables in Kaleidoscope

      -
      +

      Now that we know the sort of problem we want to tackle, lets see what this looks like in the context of our little Kaleidoscope language. We're going to @@ -378,11 +377,10 @@ Kaleidoscope to support new variable definitions.

      - +

      Adjusting Existing Variables for Mutation

      -
      +

      The symbol table in Kaleidoscope is managed at code generation time by the @@ -648,10 +646,10 @@ we'll add the assignment operator.

      - +

      New Assignment Operator

      -
      +

      With our current framework, adding a new assignment operator is really simple. We will parse it just like any other binary operator, but handle it @@ -745,11 +743,10 @@ add this next!

      - +

      User-defined Local Variables

      -
      +

      Adding var/in is just like any other other extensions we made to Kaleidoscope: we extend the lexer, the parser, the AST and the code generator. @@ -979,10 +976,10 @@ anywhere in sight.

      - +

      Full Code Listing

      -
      +

      Here is the complete code listing for our running example, enhanced with mutable @@ -1755,7 +1752,7 @@ Value *IfExprAST::Codegen() { // Emit merge block. TheFunction->getBasicBlockList().push_back(MergeBB); Builder.SetInsertPoint(MergeBB); - PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), + PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, "iftmp"); PN->addIncoming(ThenV, ThenBB); @@ -2160,8 +2157,8 @@ int main() { src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"> Chris Lattner
      - The LLVM Compiler Infrastructure
      - Last modified: $Date: 2010-11-16 18:28:22 +0100 (Tue, 16 Nov 2010) $ + The LLVM Compiler Infrastructure
      + Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $ diff --git a/docs/tutorial/LangImpl8.html b/docs/tutorial/LangImpl8.html index fe42a22e0304..cc55d401a4c7 100644 --- a/docs/tutorial/LangImpl8.html +++ b/docs/tutorial/LangImpl8.html @@ -11,8 +11,7 @@ -

      Kaleidoscope: Conclusion and other useful LLVM - tidbits
      +

      Kaleidoscope: Conclusion and other useful LLVM tidbits

      - +

      Tutorial Conclusion

      -
      +

      Welcome to the the final chapter of the "Implementing a language with LLVM" tutorial. In the course of this tutorial, we have grown @@ -154,23 +153,19 @@ are very useful if you want to take advantage of LLVM's capabilities.

      - +

      Properties of the LLVM IR

      -
      +

      We have a couple common questions about code in the LLVM IR form - lets just get these out of the way right now, shall we?

      -
      - - +

      Target Independence

      -
      +

      Kaleidoscope is an example of a "portable language": any program written in Kaleidoscope will work the same way on any target that it runs on. Many other @@ -221,10 +216,10 @@ in-kernel language.

      - +

      Safety Guarantees

      -
      +

      Many of the languages above are also "safe" languages: it is impossible for a program written in Java to corrupt its address space and crash the process @@ -243,11 +238,10 @@ list if you are interested in more details.

      - +

      Language-Specific Optimizations

      -
      +

      One thing about LLVM that turns off many people is that it does not solve all the world's problems in one system (sorry 'world hunger', someone else will have @@ -297,24 +291,23 @@ language-specific AST.

      +
      + - +

      Tips and Tricks

      -
      +

      There is a variety of useful tips and tricks that you come to know after working on/with LLVM that aren't obvious at first glance. Instead of letting everyone rediscover them, this section talks about some of these issues.

      -
      - - +

      Implementing portable offsetof/sizeof

      -
      +

      One interesting thing that comes up, if you are trying to keep the code generated by your compiler "target independent", is that you often need to know @@ -331,11 +324,10 @@ in a portable way.

      - +

      Garbage Collected Stack Frames

      -
      +

      Some languages want to explicitly manage their stack frames, often so that they are garbage collected or to allow easy implementation of closures. There @@ -349,6 +341,8 @@ Passing Style and the use of tail calls (which LLVM also supports).

      +
      +
      @@ -358,8 +352,8 @@ Passing Style and the use of tail calls (which LLVM also supports).

      src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"> Chris Lattner
      - The LLVM Compiler Infrastructure
      - Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $ + The LLVM Compiler Infrastructure
      + Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
      diff --git a/docs/tutorial/OCamlLangImpl1.html b/docs/tutorial/OCamlLangImpl1.html index 44106132fbe8..7cae68c988ab 100644 --- a/docs/tutorial/OCamlLangImpl1.html +++ b/docs/tutorial/OCamlLangImpl1.html @@ -12,7 +12,7 @@ -
      Kaleidoscope: Tutorial Introduction and the Lexer
      +

      Kaleidoscope: Tutorial Introduction and the Lexer

      - +

      Tutorial Introduction

      -
      +

      Welcome to the "Implementing a language with LLVM" tutorial. This tutorial runs through the implementation of a simple language, showing how fun and @@ -130,10 +130,10 @@ languages!

      - +

      The Basic Language

      -
      +

      This tutorial will be illustrated with a toy language that we'll call "Kaleidoscope" (derived @@ -188,10 +188,10 @@ a Mandelbrot Set at various levels of magnification.

      - +

      The Lexer

      -
      +

      When it comes to implementing a language, the first thing needed is the ability to process a text file and recognize what it says. The traditional @@ -358,8 +358,8 @@ include a driver so that you can use the lexer and parser together. Chris Lattner
      Erick Tryzelaar
      - The LLVM Compiler Infrastructure
      - Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $ + The LLVM Compiler Infrastructure
      + Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $ diff --git a/docs/tutorial/OCamlLangImpl2.html b/docs/tutorial/OCamlLangImpl2.html index 41d0956e4620..e1bb87122506 100644 --- a/docs/tutorial/OCamlLangImpl2.html +++ b/docs/tutorial/OCamlLangImpl2.html @@ -12,7 +12,7 @@ -

      Kaleidoscope: Implementing a Parser and AST
      +

      Kaleidoscope: Implementing a Parser and AST

      - +

      Chapter 2 Introduction

      -
      +

      Welcome to Chapter 2 of the "Implementing a language with LLVM in Objective Caml" tutorial. This chapter shows you how to use @@ -65,10 +65,10 @@ Tree.

      - +

      The Abstract Syntax Tree (AST)

      -
      +

      The AST for a program captures its behavior in such a way that it is easy for later stages of the compiler (e.g. code generation) to interpret. We basically @@ -146,10 +146,10 @@ bodies in Kaleidoscope.

      - +

      Parser Basics

      -
      +

      Now that we have an AST to build, we need to define the parser code to build it. The idea here is that we want to parse something like "x+y" (which is @@ -181,11 +181,10 @@ piece of our grammar: numeric literals.

      - +

      Basic Expression Parsing

      -
      +

      We start with numeric literals, because they are the simplest to process. For each production in our grammar, we'll define a function which parses that @@ -303,11 +302,10 @@ They are a bit more complex.

      - +

      Binary Expression Parsing

      -
      +

      Binary expressions are significantly harder to parse because they are often ambiguous. For example, when given the string "x+y*z", the parser can choose @@ -517,10 +515,10 @@ handle function definitions, etc.

      - +

      Parsing the Rest

      -
      +

      The next thing missing is handling of function prototypes. In Kaleidoscope, @@ -596,10 +594,10 @@ actually execute this code we've built!

      - +

      The Driver

      -
      +

      The driver for this simply invokes all of the parsing pieces with a top-level dispatch loop. There isn't much interesting here, so I'll just include the @@ -652,10 +650,10 @@ type "4+5;", and the parser will know you are done.

      - +

      Conclusions

      -
      +

      With just under 300 lines of commented code (240 lines of non-comment, non-blank code), we fully defined our minimal language, including a lexer, @@ -689,10 +687,10 @@ Representation (IR) from the AST.

      - +

      Full Code Listing

      -
      +

      Here is the complete code listing for this and the previous chapter. @@ -1038,8 +1036,8 @@ main () Chris Lattner Erick Tryzelaar
      - The LLVM Compiler Infrastructure
      - Last modified: $Date: 2010-05-07 02:28:04 +0200 (Fri, 07 May 2010) $ + The LLVM Compiler Infrastructure
      + Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $ diff --git a/docs/tutorial/OCamlLangImpl3.html b/docs/tutorial/OCamlLangImpl3.html index c7c53709a060..e52bb6c9008d 100644 --- a/docs/tutorial/OCamlLangImpl3.html +++ b/docs/tutorial/OCamlLangImpl3.html @@ -12,7 +12,7 @@ -

      Kaleidoscope: Code generation to LLVM IR
      +

      Kaleidoscope: Code generation to LLVM IR

      - +

      Chapter 3 Introduction

      -
      +

      Welcome to Chapter 3 of the "Implementing a language with LLVM" tutorial. This chapter shows you how to transform the

      - +

      Code Generation Setup

      -
      +

      In order to generate LLVM IR, we want some simple setup to get started. First @@ -128,10 +128,10 @@ that this has already been done, and we'll just use it to emit code.

      - +

      Expression Code Generation

      -
      +

      Generating LLVM code for expression nodes is very straightforward: less than 30 lines of commented code for all four of our expression nodes. First @@ -263,10 +263,10 @@ basic framework.

      - +

      Function Code Generation

      -
      +

      Code generation for prototypes and functions must handle a number of details, which make their code less beautiful than expression code @@ -466,11 +466,10 @@ def bar() foo(1, 2); # error, unknown function "foo"

      - +

      Driver Changes and Closing Thoughts

      -
      +

      For now, code generation to LLVM doesn't really get us much, except that we can @@ -607,10 +606,10 @@ support to this so we can actually start running code!

      - +

      Full Code Listing

      -
      +

      Here is the complete code listing for our running example, enhanced with the @@ -1086,8 +1085,8 @@ main () Chris Lattner
      Erick Tryzelaar
      - The LLVM Compiler Infrastructure
      - Last modified: $Date: 2010-05-28 19:07:41 +0200 (Fri, 28 May 2010) $ + The LLVM Compiler Infrastructure
      + Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $ diff --git a/docs/tutorial/OCamlLangImpl4.html b/docs/tutorial/OCamlLangImpl4.html index a86184c94520..db164d5e6a1a 100644 --- a/docs/tutorial/OCamlLangImpl4.html +++ b/docs/tutorial/OCamlLangImpl4.html @@ -12,7 +12,7 @@ -

      Kaleidoscope: Adding JIT and Optimizer Support
      +

      Kaleidoscope: Adding JIT and Optimizer Support

      - +

      Chapter 4 Introduction

      -
      +

      Welcome to Chapter 4 of the "Implementing a language with LLVM" tutorial. Chapters 1-3 described the implementation of a simple @@ -52,11 +52,10 @@ for the Kaleidoscope language.

      - +

      Trivial Constant Folding

      -
      +

      Note: the default IRBuilder now always includes the constant folding optimisations below.

      @@ -148,11 +147,10 @@ range of optimizations that you can use, in the form of "passes".

      - +

      LLVM Optimization Passes

      -
      +

      LLVM provides many optimization passes, which do many different sorts of things and have different tradeoffs. Unlike other systems, LLVM doesn't hold @@ -283,10 +281,10 @@ executing it!

      - +

      Adding a JIT Compiler

      -
      +

      Code that is available in LLVM IR can have a wide variety of tools applied to it. For example, you can run optimizations on it (as we did above), @@ -486,10 +484,10 @@ constructs, tackling some interesting LLVM IR issues along the way.

      - +

      Full Code Listing

      -
      +

      Here is the complete code listing for our running example, enhanced with the @@ -1022,8 +1020,8 @@ extern double putchard(double X) { Chris Lattner
      Erick Tryzelaar
      - The LLVM Compiler Infrastructure
      - Last modified: $Date: 2010-05-28 19:07:41 +0200 (Fri, 28 May 2010) $ + The LLVM Compiler Infrastructure
      + Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $ diff --git a/docs/tutorial/OCamlLangImpl5.html b/docs/tutorial/OCamlLangImpl5.html index 3173803cfa6b..ca796916ef14 100644 --- a/docs/tutorial/OCamlLangImpl5.html +++ b/docs/tutorial/OCamlLangImpl5.html @@ -12,7 +12,7 @@ -

      Kaleidoscope: Extending the Language: Control Flow
      +

      Kaleidoscope: Extending the Language: Control Flow

      - +

      Chapter 5 Introduction

      -
      +

      Welcome to Chapter 5 of the "Implementing a language with LLVM" tutorial. Parts 1-4 described the implementation of the simple @@ -69,10 +69,10 @@ have an if/then/else expression plus a simple 'for' loop.

      - +

      If/Then/Else

      -
      +

      Extending Kaleidoscope to support if/then/else is quite straightforward. It @@ -112,15 +112,12 @@ Since Kaleidoscope allows side-effects, this behavior is important to nail down.

      Now that we know what we "want", lets break this down into its constituent pieces.

      -
      - - +

      Lexer Extensions for If/Then/Else

      -
      +

      The lexer extensions are straightforward. First we add new variants for the relevant tokens:

      @@ -153,11 +150,10 @@ stuff:

      - +

      AST Extensions for If/Then/Else

      -
      +

      To represent the new expression we add a new AST variant for it:

      @@ -175,11 +171,10 @@ type expr =
      - +

      Parser Extensions for If/Then/Else

      -
      +

      Now that we have the relevant tokens coming from the lexer and we have the AST node to build, our parsing logic is relatively straightforward. First we @@ -214,10 +209,10 @@ let rec parse_primary = parser

      - +

      LLVM IR for If/Then/Else

      -
      +

      Now that we have it parsing and building the AST, the final piece is adding LLVM code generation support. This is the most interesting part of the @@ -331,11 +326,10 @@ directly.

      - +

      Code Generation for If/Then/Else

      -
      +

      In order to generate code for this, we implement the Codegen method for IfExprAST:

      @@ -492,11 +486,13 @@ another useful expression that is familiar from non-functional languages...

      +
      + - +

      'for' Loop Expression

      -
      +

      Now that we know how to add basic control flow constructs to the language, we have the tools to add more powerful things. Lets add something more @@ -525,14 +521,11 @@ variables, it will get more useful.

      As before, lets talk about the changes that we need to Kaleidoscope to support this.

      -
      - - +

      Lexer Extensions for the 'for' Loop

      -
      +

      The lexer extensions are the same sort of thing as for if/then/else:

      @@ -559,11 +552,10 @@ the 'for' Loop
      - +

      AST Extensions for the 'for' Loop

      -
      +

      The AST variant is just as simple. It basically boils down to capturing the variable name and the constituent expressions in the node.

      @@ -580,11 +572,10 @@ type expr =
      - +

      Parser Extensions for the 'for' Loop

      -
      +

      The parser code is also fairly standard. The only interesting thing here is handling of the optional step value. The parser code handles it by checking to @@ -628,11 +619,10 @@ let rec parse_primary = parser

      - +

      LLVM IR for the 'for' Loop

      -
      +

      Now we get to the good part: the LLVM IR we want to generate for this thing. With the simple example above, we get this LLVM IR (note that this dump is @@ -674,11 +664,10 @@ expressions, and some basic blocks. Lets see how this fits together.

      - +

      Code Generation for the 'for' Loop

      -
      +

      The first part of Codegen is very simple: we just output the start expression for the loop value:

      @@ -851,11 +840,13 @@ to our poor innocent language.

      +
      + - +

      Full Code Listing

      -
      +

      Here is the complete code listing for our running example, enhanced with the @@ -1562,8 +1553,8 @@ operators Chris Lattner
      Erick Tryzelaar
      - The LLVM Compiler Infrastructure
      - Last modified: $Date: 2010-05-28 19:07:41 +0200 (Fri, 28 May 2010) $ + The LLVM Compiler Infrastructure
      + Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $ diff --git a/docs/tutorial/OCamlLangImpl6.html b/docs/tutorial/OCamlLangImpl6.html index 1d4f8c7509f8..bde429b5afcf 100644 --- a/docs/tutorial/OCamlLangImpl6.html +++ b/docs/tutorial/OCamlLangImpl6.html @@ -12,7 +12,7 @@ -

      Kaleidoscope: Extending the Language: User-defined Operators
      +

      Kaleidoscope: Extending the Language: User-defined Operators

      - +

      Chapter 6 Introduction

      -
      +

      Welcome to Chapter 6 of the "Implementing a language with LLVM" tutorial. At this point in our tutorial, we now have a fully @@ -64,10 +64,10 @@ an example of what you can build with Kaleidoscope and its feature set.

      - +

      User-defined Operators: the Idea

      -
      +

      The "operator overloading" that we will add to Kaleidoscope is more general than @@ -129,10 +129,10 @@ operators.

      - +

      User-defined Binary Operators

      -
      +

      Adding support for user-defined binary operators is pretty simple with our current framework. We'll first add support for the unary/binary keywords:

      @@ -320,10 +320,10 @@ see what it takes.

      - +

      User-defined Unary Operators

      -
      +

      Since we don't currently support unary operators in the Kaleidoscope language, we'll need to add everything to support them. Above, we added simple @@ -472,10 +472,10 @@ is simpler primarily because it doesn't need to handle any predefined operators.

      - +

      Kicking the Tires

      -
      +

      It is somewhat hard to believe, but with a few simple extensions we've covered in the last chapters, we have grown a real-ish language. With this, we @@ -778,10 +778,10 @@ add variable mutation without building SSA in your front-end.

      - +

      Full Code Listing

      -
      +

      Here is the complete code listing for our running example, enhanced with the @@ -1567,8 +1567,8 @@ SSA construction Chris Lattner
      Erick Tryzelaar
      - The LLVM Compiler Infrastructure
      - Last modified: $Date: 2010-06-21 22:31:30 +0200 (Mon, 21 Jun 2010) $ + The LLVM Compiler Infrastructure
      + Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $ diff --git a/docs/tutorial/OCamlLangImpl7.html b/docs/tutorial/OCamlLangImpl7.html index a9fcd704cf8b..a48e679cecef 100644 --- a/docs/tutorial/OCamlLangImpl7.html +++ b/docs/tutorial/OCamlLangImpl7.html @@ -13,7 +13,7 @@ -

      Kaleidoscope: Extending the Language: Mutable Variables
      +

      Kaleidoscope: Extending the Language: Mutable Variables

      - +

      Chapter 7 Introduction

      -
      +

      Welcome to Chapter 7 of the "Implementing a language with LLVM" tutorial. In chapters 1 through 6, we've built a very @@ -70,10 +70,10 @@ support for this, though the way it works is a bit unexpected for some.

      - +

      Why is this a hard problem?

      -
      +

      To understand why mutable variables cause complexities in SSA construction, @@ -144,10 +144,10 @@ logic.

      - +

      Memory in LLVM

      -
      +

      The 'trick' here is that while LLVM does require all register values to be in SSA form, it does not require (or permit) memory objects to be in SSA form. @@ -325,11 +325,10 @@ variables now!

      - +

      Mutable Variables in Kaleidoscope

      -
      +

      Now that we know the sort of problem we want to tackle, lets see what this looks like in the context of our little Kaleidoscope language. We're going to @@ -382,11 +381,10 @@ Kaleidoscope to support new variable definitions.

      - +

      Adjusting Existing Variables for Mutation

      -
      +

      The symbol table in Kaleidoscope is managed at code generation time by the @@ -672,10 +670,10 @@ we'll add the assignment operator.

      - +

      New Assignment Operator

      -
      +

      With our current framework, adding a new assignment operator is really simple. We will parse it just like any other binary operator, but handle it @@ -773,11 +771,10 @@ add this next!

      - +

      User-defined Local Variables

      -
      +

      Adding var/in is just like any other other extensions we made to Kaleidoscope: we extend the lexer, the parser, the AST and the code generator. @@ -956,10 +953,10 @@ anywhere in sight.

      - +

      Full Code Listing

      -
      +

      Here is the complete code listing for our running example, enhanced with mutable @@ -1887,7 +1884,7 @@ extern double printd(double X) { -Next: Conclusion and other useful LLVM tidbits +Next: Conclusion and other useful LLVM tidbits

      @@ -1899,9 +1896,9 @@ extern double printd(double X) { src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"> Chris Lattner
      - The LLVM Compiler Infrastructure
      + The LLVM Compiler Infrastructure
      Erick Tryzelaar
      - Last modified: $Date: 2011-01-01 04:27:43 +0100 (Sat, 01 Jan 2011) $ + Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $ diff --git a/docs/tutorial/OCamlLangImpl8.html b/docs/tutorial/OCamlLangImpl8.html index 64a62002c4cc..eed8c03d21ca 100644 --- a/docs/tutorial/OCamlLangImpl8.html +++ b/docs/tutorial/OCamlLangImpl8.html @@ -11,8 +11,7 @@ -
      Kaleidoscope: Conclusion and other useful LLVM - tidbits
      +

      Kaleidoscope: Conclusion and other useful LLVM tidbits

      - +

      Tutorial Conclusion

      -
      +

      Welcome to the the final chapter of the "Implementing a language with LLVM" tutorial. In the course of this tutorial, we have grown @@ -154,23 +153,19 @@ are very useful if you want to take advantage of LLVM's capabilities.

      - +

      Properties of the LLVM IR

      -
      +

      We have a couple common questions about code in the LLVM IR form - lets just get these out of the way right now, shall we?

      -
      - - +

      Target Independence

      -
      +

      Kaleidoscope is an example of a "portable language": any program written in Kaleidoscope will work the same way on any target that it runs on. Many other @@ -221,10 +216,10 @@ in-kernel language.

      - +

      Safety Guarantees

      -
      +

      Many of the languages above are also "safe" languages: it is impossible for a program written in Java to corrupt its address space and crash the process @@ -243,11 +238,10 @@ list if you are interested in more details.

      - +

      Language-Specific Optimizations

      -
      +

      One thing about LLVM that turns off many people is that it does not solve all the world's problems in one system (sorry 'world hunger', someone else will have @@ -297,24 +291,23 @@ language-specific AST.

      +
      + - +

      Tips and Tricks

      -
      +

      There is a variety of useful tips and tricks that you come to know after working on/with LLVM that aren't obvious at first glance. Instead of letting everyone rediscover them, this section talks about some of these issues.

      -
      - - +

      Implementing portable offsetof/sizeof

      -
      +

      One interesting thing that comes up, if you are trying to keep the code generated by your compiler "target independent", is that you often need to know @@ -331,11 +324,10 @@ in a portable way.

      - +

      Garbage Collected Stack Frames

      -
      +

      Some languages want to explicitly manage their stack frames, often so that they are garbage collected or to allow easy implementation of closures. There @@ -349,6 +341,8 @@ Passing Style and the use of tail calls (which LLVM also supports).

      +
      +
      @@ -358,7 +352,7 @@ Passing Style and the use of tail calls (which LLVM also supports).

      src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!"> Chris Lattner
      - The LLVM Compiler Infrastructure
      + The LLVM Compiler Infrastructure
      Last modified: $Date$
      diff --git a/docs/tutorial/index.html b/docs/tutorial/index.html index 11dd5e2d732a..0a8cae2c2c9b 100644 --- a/docs/tutorial/index.html +++ b/docs/tutorial/index.html @@ -12,7 +12,7 @@ -
      LLVM Tutorial: Table of Contents
      +

      LLVM Tutorial: Table of Contents

      1. Kaleidoscope: Implementing a Language with LLVM diff --git a/examples/BrainF/BrainF.cpp b/examples/BrainF/BrainF.cpp index 8536915993ef..54f35535b57d 100644 --- a/examples/BrainF/BrainF.cpp +++ b/examples/BrainF/BrainF.cpp @@ -294,8 +294,7 @@ void BrainF::readloop(PHINode *phi, BasicBlock *oldbb, BasicBlock *testbb, // Make part of PHI instruction now, wait until end of loop to finish PHINode *phi_0 = PHINode::Create(PointerType::getUnqual(IntegerType::getInt8Ty(C)), - headreg, testbb); - phi_0->reserveOperandSpace(2); + 2, headreg, testbb); phi_0->addIncoming(curhead, bb_0); curhead = phi_0; @@ -449,8 +448,8 @@ void BrainF::readloop(PHINode *phi, BasicBlock *oldbb, BasicBlock *testbb, //%head.%d = phi i8 *[%head.%d, %main.%d] PHINode *phi_1 = builder-> - CreatePHI(PointerType::getUnqual(IntegerType::getInt8Ty(C)), headreg); - phi_1->reserveOperandSpace(1); + CreatePHI(PointerType::getUnqual(IntegerType::getInt8Ty(C)), 1, + headreg); phi_1->addIncoming(head_0, testbb); curhead = phi_1; } diff --git a/examples/ExceptionDemo/ExceptionDemo.cpp b/examples/ExceptionDemo/ExceptionDemo.cpp index 95ccd24a6894..e5bd3777703b 100644 --- a/examples/ExceptionDemo/ExceptionDemo.cpp +++ b/examples/ExceptionDemo/ExceptionDemo.cpp @@ -1,12 +1,11 @@ -//===-- examples/ExceptionDemo/ExceptionDemo.cpp - -// An example use of the llvm Exception mechanism --===// +//===-- ExceptionDemo.cpp - An example using llvm Exceptions --------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -//===--------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// // // Demo program which implements an example LLVM exception implementation, and // shows several test cases including the handling of foreign exceptions. @@ -46,8 +45,7 @@ // This code uses code from the llvm compiler-rt project and the llvm // Kaleidoscope project. // -//===--------------------------------------------------------------------===// - +//===----------------------------------------------------------------------===// #include "llvm/LLVMContext.h" #include "llvm/DerivedTypes.h" @@ -64,11 +62,17 @@ #include "llvm/Support/IRBuilder.h" #include "llvm/Support/Dwarf.h" +// FIXME: Although all systems tested with (Linux, OS X), do not need this +// header file included. A user on ubuntu reported, undefined symbols +// for stderr, and fprintf, and the addition of this include fixed the +// issue for them. Given that LLVM's best practices include the goal +// of reducing the number of redundant header files included, the +// correct solution would be to find out why these symbols are not +// defined for the system in question, and fix the issue by finding out +// which LLVM header file, if any, would include these symbols. #include -#include + #include -#include -#include #include @@ -80,8 +84,8 @@ // http://refspecs.freestandards.org/abi-eh-1.21.html extern "C" { - -typedef enum { + + typedef enum { _URC_NO_REASON = 0, _URC_FOREIGN_EXCEPTION_CAUGHT = 1, _URC_FATAL_PHASE2_ERROR = 2, @@ -91,43 +95,43 @@ typedef enum { _URC_HANDLER_FOUND = 6, _URC_INSTALL_CONTEXT = 7, _URC_CONTINUE_UNWIND = 8 -} _Unwind_Reason_Code; - -typedef enum { + } _Unwind_Reason_Code; + + typedef enum { _UA_SEARCH_PHASE = 1, _UA_CLEANUP_PHASE = 2, _UA_HANDLER_FRAME = 4, _UA_FORCE_UNWIND = 8, _UA_END_OF_STACK = 16 -} _Unwind_Action; - -struct _Unwind_Exception; - -typedef void (*_Unwind_Exception_Cleanup_Fn) (_Unwind_Reason_Code, - struct _Unwind_Exception *); - -struct _Unwind_Exception { + } _Unwind_Action; + + struct _Unwind_Exception; + + typedef void (*_Unwind_Exception_Cleanup_Fn) (_Unwind_Reason_Code, + struct _Unwind_Exception *); + + struct _Unwind_Exception { uint64_t exception_class; _Unwind_Exception_Cleanup_Fn exception_cleanup; - + uintptr_t private_1; uintptr_t private_2; - + // @@@ The IA-64 ABI says that this structure must be double-word aligned. // Taking that literally does not make much sense generically. Instead // we provide the maximum alignment required by any type for the machine. -} __attribute__((__aligned__)); - -struct _Unwind_Context; -typedef struct _Unwind_Context* _Unwind_Context_t; - -extern const uint8_t* _Unwind_GetLanguageSpecificData (_Unwind_Context_t c); -extern uintptr_t _Unwind_GetGR (_Unwind_Context_t c, int i); -extern void _Unwind_SetGR (_Unwind_Context_t c, int i, uintptr_t n); -extern void _Unwind_SetIP (_Unwind_Context_t, uintptr_t new_value); -extern uintptr_t _Unwind_GetIP (_Unwind_Context_t context); -extern uintptr_t _Unwind_GetRegionStart (_Unwind_Context_t context); - + } __attribute__((__aligned__)); + + struct _Unwind_Context; + typedef struct _Unwind_Context *_Unwind_Context_t; + + extern const uint8_t *_Unwind_GetLanguageSpecificData (_Unwind_Context_t c); + extern uintptr_t _Unwind_GetGR (_Unwind_Context_t c, int i); + extern void _Unwind_SetGR (_Unwind_Context_t c, int i, uintptr_t n); + extern void _Unwind_SetIP (_Unwind_Context_t, uintptr_t new_value); + extern uintptr_t _Unwind_GetIP (_Unwind_Context_t context); + extern uintptr_t _Unwind_GetRegionStart (_Unwind_Context_t context); + } // extern "C" // @@ -136,8 +140,8 @@ extern uintptr_t _Unwind_GetRegionStart (_Unwind_Context_t context); /// This is our simplistic type info struct OurExceptionType_t { - /// type info type - int type; + /// type info type + int type; }; @@ -148,10 +152,10 @@ struct OurExceptionType_t { /// on a double word boundary. This is necessary to match the standard: /// http://refspecs.freestandards.org/abi-eh-1.21.html struct OurBaseException_t { - struct OurExceptionType_t type; - - // Note: This is properly aligned in unwind.h - struct _Unwind_Exception unwindException; + struct OurExceptionType_t type; + + // Note: This is properly aligned in unwind.h + struct _Unwind_Exception unwindException; }; @@ -169,7 +173,7 @@ static std::map namedValues; int64_t ourBaseFromUnwindOffset; const unsigned char ourBaseExcpClassChars[] = - {'o', 'b', 'j', '\0', 'b', 'a', 's', '\0'}; +{'o', 'b', 'j', '\0', 'b', 'a', 's', '\0'}; static uint64_t ourBaseExceptionClass = 0; @@ -177,13 +181,13 @@ static uint64_t ourBaseExceptionClass = 0; static std::vector ourTypeInfoNames; static std::map ourTypeInfoNamesIndex; -static llvm::StructType* ourTypeInfoType; -static llvm::StructType* ourExceptionType; -static llvm::StructType* ourUnwindExceptionType; +static llvm::StructType *ourTypeInfoType; +static llvm::StructType *ourExceptionType; +static llvm::StructType *ourUnwindExceptionType; -static llvm::ConstantInt* ourExceptionNotThrownState; -static llvm::ConstantInt* ourExceptionThrownState; -static llvm::ConstantInt* ourExceptionCaughtState; +static llvm::ConstantInt *ourExceptionNotThrownState; +static llvm::ConstantInt *ourExceptionThrownState; +static llvm::ConstantInt *ourExceptionCaughtState; typedef std::vector ArgNames; typedef std::vector ArgTypes; @@ -204,35 +208,32 @@ typedef std::vector ArgTypes; /// @param declarationOnly for function declarations /// @param isVarArg function uses vararg arguments /// @returns function instance -llvm::Function *createFunction(llvm::Module& module, - const llvm::Type* retType, - const ArgTypes& theArgTypes, - const ArgNames& theArgNames, - const std::string& functName, +llvm::Function *createFunction(llvm::Module &module, + const llvm::Type *retType, + const ArgTypes &theArgTypes, + const ArgNames &theArgNames, + const std::string &functName, llvm::GlobalValue::LinkageTypes linkage, bool declarationOnly, bool isVarArg) { - llvm::FunctionType* functType = llvm::FunctionType::get(retType, - theArgTypes, - isVarArg); - llvm::Function* ret = llvm::Function::Create(functType, - linkage, - functName, - &module); - if (!ret || declarationOnly) - return(ret); - - namedValues.clear(); - unsigned i = 0; - for (llvm::Function::arg_iterator argIndex = ret->arg_begin(); - i != theArgNames.size(); - ++argIndex, ++i) { - - argIndex->setName(theArgNames[i]); - namedValues[theArgNames[i]] = argIndex; - } - + llvm::FunctionType *functType = + llvm::FunctionType::get(retType, theArgTypes, isVarArg); + llvm::Function *ret = + llvm::Function::Create(functType, linkage, functName, &module); + if (!ret || declarationOnly) return(ret); + + namedValues.clear(); + unsigned i = 0; + for (llvm::Function::arg_iterator argIndex = ret->arg_begin(); + i != theArgNames.size(); + ++argIndex, ++i) { + + argIndex->setName(theArgNames[i]); + namedValues[theArgNames[i]] = argIndex; + } + + return(ret); } @@ -243,18 +244,18 @@ llvm::Function *createFunction(llvm::Module& module, /// @param type stack variable type /// @param initWith optional constant initialization value /// @returns AllocaInst instance -static llvm::AllocaInst *createEntryBlockAlloca(llvm::Function& function, - const std::string &varName, - const llvm::Type* type, - llvm::Constant* initWith = NULL) { - llvm::BasicBlock& block = function.getEntryBlock(); - llvm::IRBuilder<> tmp(&block, block.begin()); - llvm::AllocaInst* ret = tmp.CreateAlloca(type, 0, varName.c_str()); - - if (initWith) - tmp.CreateStore(initWith, ret); - - return(ret); +static llvm::AllocaInst *createEntryBlockAlloca(llvm::Function &function, + const std::string &varName, + const llvm::Type *type, + llvm::Constant *initWith = 0) { + llvm::BasicBlock &block = function.getEntryBlock(); + llvm::IRBuilder<> tmp(&block, block.begin()); + llvm::AllocaInst *ret = tmp.CreateAlloca(type, 0, varName.c_str()); + + if (initWith) + tmp.CreateStore(initWith, ret); + + return(ret); } @@ -274,15 +275,15 @@ extern "C" { /// Prints a 32 bit number, according to the format, to stderr. /// @param intToPrint integer to print /// @param format printf like format to use when printing -void print32Int(int intToPrint, const char* format) { - if (format) { - // Note: No NULL check - fprintf(stderr, format, intToPrint); - } - else { - // Note: No NULL check - fprintf(stderr, "::print32Int(...):NULL arg.\n"); - } +void print32Int(int intToPrint, const char *format) { + if (format) { + // Note: No NULL check + fprintf(stderr, format, intToPrint); + } + else { + // Note: No NULL check + fprintf(stderr, "::print32Int(...):NULL arg.\n"); + } } @@ -291,27 +292,27 @@ void print32Int(int intToPrint, const char* format) { /// Prints a 64 bit number, according to the format, to stderr. /// @param intToPrint integer to print /// @param format printf like format to use when printing -void print64Int(long int intToPrint, const char* format) { - if (format) { - // Note: No NULL check - fprintf(stderr, format, intToPrint); - } - else { - // Note: No NULL check - fprintf(stderr, "::print64Int(...):NULL arg.\n"); - } +void print64Int(long int intToPrint, const char *format) { + if (format) { + // Note: No NULL check + fprintf(stderr, format, intToPrint); + } + else { + // Note: No NULL check + fprintf(stderr, "::print64Int(...):NULL arg.\n"); + } } /// Prints a C string to stderr /// @param toPrint string to print -void printStr(char* toPrint) { - if (toPrint) { - fprintf(stderr, "%s", toPrint); - } - else { - fprintf(stderr, "::printStr(...):NULL arg.\n"); - } +void printStr(char *toPrint) { + if (toPrint) { + fprintf(stderr, "%s", toPrint); + } + else { + fprintf(stderr, "::printStr(...):NULL arg.\n"); + } } @@ -319,17 +320,17 @@ void printStr(char* toPrint) { /// is calculated from the supplied OurBaseException_t::unwindException /// member address. Handles (ignores), NULL pointers. /// @param expToDelete exception to delete -void deleteOurException(OurUnwindException* expToDelete) { +void deleteOurException(OurUnwindException *expToDelete) { #ifdef DEBUG - fprintf(stderr, - "deleteOurException(...).\n"); + fprintf(stderr, + "deleteOurException(...).\n"); #endif - - if (expToDelete && - (expToDelete->exception_class == ourBaseExceptionClass)) { - - free(((char*) expToDelete) + ourBaseFromUnwindOffset); - } + + if (expToDelete && + (expToDelete->exception_class == ourBaseExceptionClass)) { + + free(((char*) expToDelete) + ourBaseFromUnwindOffset); + } } @@ -340,27 +341,27 @@ void deleteOurException(OurUnwindException* expToDelete) { /// @unlink /// @param expToDelete exception instance to delete void deleteFromUnwindOurException(_Unwind_Reason_Code reason, - OurUnwindException* expToDelete) { + OurUnwindException *expToDelete) { #ifdef DEBUG - fprintf(stderr, - "deleteFromUnwindOurException(...).\n"); + fprintf(stderr, + "deleteFromUnwindOurException(...).\n"); #endif - - deleteOurException(expToDelete); + + deleteOurException(expToDelete); } /// Creates (allocates on the heap), an exception (OurException instance), /// of the supplied type info type. /// @param type type info type -OurUnwindException* createOurException(int type) { - size_t size = sizeof(OurException); - OurException* ret = (OurException*) memset(malloc(size), 0, size); - (ret->type).type = type; - (ret->unwindException).exception_class = ourBaseExceptionClass; - (ret->unwindException).exception_cleanup = deleteFromUnwindOurException; - - return(&(ret->unwindException)); +OurUnwindException *createOurException(int type) { + size_t size = sizeof(OurException); + OurException *ret = (OurException*) memset(malloc(size), 0, size); + (ret->type).type = type; + (ret->unwindException).exception_class = ourBaseExceptionClass; + (ret->unwindException).exception_cleanup = deleteFromUnwindOurException; + + return(&(ret->unwindException)); } @@ -369,22 +370,22 @@ OurUnwindException* createOurException(int type) { /// @link http://dwarfstd.org/Dwarf3.pdf @unlink /// @param data reference variable holding memory pointer to decode from /// @returns decoded value -static uintptr_t readULEB128(const uint8_t** data) { - uintptr_t result = 0; - uintptr_t shift = 0; - unsigned char byte; - const uint8_t* p = *data; - - do { - byte = *p++; - result |= (byte & 0x7f) << shift; - shift += 7; - } - while (byte & 0x80); - - *data = p; - - return result; +static uintptr_t readULEB128(const uint8_t **data) { + uintptr_t result = 0; + uintptr_t shift = 0; + unsigned char byte; + const uint8_t *p = *data; + + do { + byte = *p++; + result |= (byte & 0x7f) << shift; + shift += 7; + } + while (byte & 0x80); + + *data = p; + + return result; } @@ -393,26 +394,26 @@ static uintptr_t readULEB128(const uint8_t** data) { /// @link http://dwarfstd.org/Dwarf3.pdf @unlink /// @param data reference variable holding memory pointer to decode from /// @returns decoded value -static uintptr_t readSLEB128(const uint8_t** data) { - uintptr_t result = 0; - uintptr_t shift = 0; - unsigned char byte; - const uint8_t* p = *data; - - do { - byte = *p++; - result |= (byte & 0x7f) << shift; - shift += 7; - } - while (byte & 0x80); - - *data = p; - - if ((byte & 0x40) && (shift < (sizeof(result) << 3))) { - result |= (~0 << shift); - } - - return result; +static uintptr_t readSLEB128(const uint8_t **data) { + uintptr_t result = 0; + uintptr_t shift = 0; + unsigned char byte; + const uint8_t *p = *data; + + do { + byte = *p++; + result |= (byte & 0x7f) << shift; + shift += 7; + } + while (byte & 0x80); + + *data = p; + + if ((byte & 0x40) && (shift < (sizeof(result) << 3))) { + result |= (~0 << shift); + } + + return result; } @@ -422,82 +423,82 @@ static uintptr_t readSLEB128(const uint8_t** data) { /// @param data reference variable holding memory pointer to decode from /// @param encoding dwarf encoding type /// @returns decoded value -static uintptr_t readEncodedPointer(const uint8_t** data, uint8_t encoding) { - uintptr_t result = 0; - const uint8_t* p = *data; - - if (encoding == llvm::dwarf::DW_EH_PE_omit) - return(result); - - // first get value - switch (encoding & 0x0F) { - case llvm::dwarf::DW_EH_PE_absptr: - result = *((uintptr_t*)p); - p += sizeof(uintptr_t); - break; - case llvm::dwarf::DW_EH_PE_uleb128: - result = readULEB128(&p); - break; - // Note: This case has not been tested - case llvm::dwarf::DW_EH_PE_sleb128: - result = readSLEB128(&p); - break; - case llvm::dwarf::DW_EH_PE_udata2: - result = *((uint16_t*)p); - p += sizeof(uint16_t); - break; - case llvm::dwarf::DW_EH_PE_udata4: - result = *((uint32_t*)p); - p += sizeof(uint32_t); - break; - case llvm::dwarf::DW_EH_PE_udata8: - result = *((uint64_t*)p); - p += sizeof(uint64_t); - break; - case llvm::dwarf::DW_EH_PE_sdata2: - result = *((int16_t*)p); - p += sizeof(int16_t); - break; - case llvm::dwarf::DW_EH_PE_sdata4: - result = *((int32_t*)p); - p += sizeof(int32_t); - break; - case llvm::dwarf::DW_EH_PE_sdata8: - result = *((int64_t*)p); - p += sizeof(int64_t); - break; - default: - // not supported - abort(); - break; - } - - // then add relative offset - switch (encoding & 0x70) { - case llvm::dwarf::DW_EH_PE_absptr: - // do nothing - break; - case llvm::dwarf::DW_EH_PE_pcrel: - result += (uintptr_t)(*data); - break; - case llvm::dwarf::DW_EH_PE_textrel: - case llvm::dwarf::DW_EH_PE_datarel: - case llvm::dwarf::DW_EH_PE_funcrel: - case llvm::dwarf::DW_EH_PE_aligned: - default: - // not supported - abort(); - break; - } - - // then apply indirection - if (encoding & llvm::dwarf::DW_EH_PE_indirect) { - result = *((uintptr_t*)result); - } - - *data = p; - - return result; +static uintptr_t readEncodedPointer(const uint8_t **data, uint8_t encoding) { + uintptr_t result = 0; + const uint8_t *p = *data; + + if (encoding == llvm::dwarf::DW_EH_PE_omit) + return(result); + + // first get value + switch (encoding & 0x0F) { + case llvm::dwarf::DW_EH_PE_absptr: + result = *((uintptr_t*)p); + p += sizeof(uintptr_t); + break; + case llvm::dwarf::DW_EH_PE_uleb128: + result = readULEB128(&p); + break; + // Note: This case has not been tested + case llvm::dwarf::DW_EH_PE_sleb128: + result = readSLEB128(&p); + break; + case llvm::dwarf::DW_EH_PE_udata2: + result = *((uint16_t*)p); + p += sizeof(uint16_t); + break; + case llvm::dwarf::DW_EH_PE_udata4: + result = *((uint32_t*)p); + p += sizeof(uint32_t); + break; + case llvm::dwarf::DW_EH_PE_udata8: + result = *((uint64_t*)p); + p += sizeof(uint64_t); + break; + case llvm::dwarf::DW_EH_PE_sdata2: + result = *((int16_t*)p); + p += sizeof(int16_t); + break; + case llvm::dwarf::DW_EH_PE_sdata4: + result = *((int32_t*)p); + p += sizeof(int32_t); + break; + case llvm::dwarf::DW_EH_PE_sdata8: + result = *((int64_t*)p); + p += sizeof(int64_t); + break; + default: + // not supported + abort(); + break; + } + + // then add relative offset + switch (encoding & 0x70) { + case llvm::dwarf::DW_EH_PE_absptr: + // do nothing + break; + case llvm::dwarf::DW_EH_PE_pcrel: + result += (uintptr_t)(*data); + break; + case llvm::dwarf::DW_EH_PE_textrel: + case llvm::dwarf::DW_EH_PE_datarel: + case llvm::dwarf::DW_EH_PE_funcrel: + case llvm::dwarf::DW_EH_PE_aligned: + default: + // not supported + abort(); + break; + } + + // then apply indirection + if (encoding & llvm::dwarf::DW_EH_PE_indirect) { + result = *((uintptr_t*)result); + } + + *data = p; + + return result; } @@ -524,74 +525,74 @@ static bool handleActionValue(int64_t *resultAction, uintptr_t actionEntry, uint64_t exceptionClass, struct _Unwind_Exception *exceptionObject) { - bool ret = false; - - if (!resultAction || - !exceptionObject || - (exceptionClass != ourBaseExceptionClass)) - return(ret); - - struct OurBaseException_t* excp = (struct OurBaseException_t*) - (((char*) exceptionObject) + ourBaseFromUnwindOffset); - struct OurExceptionType_t *excpType = &(excp->type); - int type = excpType->type; - + bool ret = false; + + if (!resultAction || + !exceptionObject || + (exceptionClass != ourBaseExceptionClass)) + return(ret); + + struct OurBaseException_t *excp = (struct OurBaseException_t*) + (((char*) exceptionObject) + ourBaseFromUnwindOffset); + struct OurExceptionType_t *excpType = &(excp->type); + int type = excpType->type; + +#ifdef DEBUG + fprintf(stderr, + "handleActionValue(...): exceptionObject = <%p>, " + "excp = <%p>.\n", + exceptionObject, + excp); +#endif + + const uint8_t *actionPos = (uint8_t*) actionEntry, + *tempActionPos; + int64_t typeOffset = 0, + actionOffset; + + for (int i = 0; true; ++i) { + // Each emitted dwarf action corresponds to a 2 tuple of + // type info address offset, and action offset to the next + // emitted action. + typeOffset = readSLEB128(&actionPos); + tempActionPos = actionPos; + actionOffset = readSLEB128(&tempActionPos); + #ifdef DEBUG fprintf(stderr, - "handleActionValue(...): exceptionObject = <%p>, " - "excp = <%p>.\n", - exceptionObject, - excp); + "handleActionValue(...):typeOffset: <%lld>, " + "actionOffset: <%lld>.\n", + typeOffset, + actionOffset); #endif - - const uint8_t *actionPos = (uint8_t*) actionEntry, - *tempActionPos; - int64_t typeOffset = 0, - actionOffset; - - for (int i = 0; true; ++i) { - // Each emitted dwarf action corresponds to a 2 tuple of - // type info address offset, and action offset to the next - // emitted action. - typeOffset = readSLEB128(&actionPos); - tempActionPos = actionPos; - actionOffset = readSLEB128(&tempActionPos); - + assert((typeOffset >= 0) && + "handleActionValue(...):filters are not supported."); + + // Note: A typeOffset == 0 implies that a cleanup llvm.eh.selector + // argument has been matched. + if ((typeOffset > 0) && + (type == (classInfo[-typeOffset])->type)) { #ifdef DEBUG - fprintf(stderr, - "handleActionValue(...):typeOffset: <%lld>, " - "actionOffset: <%lld>.\n", - typeOffset, - actionOffset); + fprintf(stderr, + "handleActionValue(...):actionValue <%d> found.\n", + i); #endif - assert((typeOffset >= 0) && - "handleActionValue(...):filters are not supported."); - - // Note: A typeOffset == 0 implies that a cleanup llvm.eh.selector - // argument has been matched. - if ((typeOffset > 0) && - (type == (classInfo[-typeOffset])->type)) { -#ifdef DEBUG - fprintf(stderr, - "handleActionValue(...):actionValue <%d> found.\n", - i); -#endif - *resultAction = i + 1; - ret = true; - break; - } - -#ifdef DEBUG - fprintf(stderr, - "handleActionValue(...):actionValue not found.\n"); -#endif - if (!actionOffset) - break; - - actionPos += actionOffset; + *resultAction = i + 1; + ret = true; + break; } - - return(ret); + +#ifdef DEBUG + fprintf(stderr, + "handleActionValue(...):actionValue not found.\n"); +#endif + if (!actionOffset) + break; + + actionPos += actionOffset; + } + + return(ret); } @@ -607,180 +608,177 @@ static bool handleActionValue(int64_t *resultAction, /// @param context unwind system context /// @returns minimally supported unwinding control indicator static _Unwind_Reason_Code handleLsda(int version, - const uint8_t* lsda, - _Unwind_Action actions, - uint64_t exceptionClass, - struct _Unwind_Exception* exceptionObject, - _Unwind_Context_t context) { - _Unwind_Reason_Code ret = _URC_CONTINUE_UNWIND; - - if (!lsda) - return(ret); - + const uint8_t *lsda, + _Unwind_Action actions, + uint64_t exceptionClass, + struct _Unwind_Exception *exceptionObject, + _Unwind_Context_t context) { + _Unwind_Reason_Code ret = _URC_CONTINUE_UNWIND; + + if (!lsda) + return(ret); + #ifdef DEBUG - fprintf(stderr, - "handleLsda(...):lsda is non-zero.\n"); + fprintf(stderr, + "handleLsda(...):lsda is non-zero.\n"); #endif - - // Get the current instruction pointer and offset it before next - // instruction in the current frame which threw the exception. - uintptr_t pc = _Unwind_GetIP(context)-1; - - // Get beginning current frame's code (as defined by the - // emitted dwarf code) - uintptr_t funcStart = _Unwind_GetRegionStart(context); - uintptr_t pcOffset = pc - funcStart; - struct OurExceptionType_t** classInfo = NULL; - - // Note: See JITDwarfEmitter::EmitExceptionTable(...) for corresponding - // dwarf emission - - // Parse LSDA header. - uint8_t lpStartEncoding = *lsda++; - - if (lpStartEncoding != llvm::dwarf::DW_EH_PE_omit) { - readEncodedPointer(&lsda, lpStartEncoding); - } - - uint8_t ttypeEncoding = *lsda++; - uintptr_t classInfoOffset; - - if (ttypeEncoding != llvm::dwarf::DW_EH_PE_omit) { - // Calculate type info locations in emitted dwarf code which - // were flagged by type info arguments to llvm.eh.selector - // intrinsic - classInfoOffset = readULEB128(&lsda); - classInfo = (struct OurExceptionType_t**) (lsda + classInfoOffset); - } - - // Walk call-site table looking for range that - // includes current PC. - - uint8_t callSiteEncoding = *lsda++; - uint32_t callSiteTableLength = readULEB128(&lsda); - const uint8_t* callSiteTableStart = lsda; - const uint8_t* callSiteTableEnd = callSiteTableStart + - callSiteTableLength; - const uint8_t* actionTableStart = callSiteTableEnd; - const uint8_t* callSitePtr = callSiteTableStart; - - bool foreignException = false; - - while (callSitePtr < callSiteTableEnd) { - uintptr_t start = readEncodedPointer(&callSitePtr, - callSiteEncoding); - uintptr_t length = readEncodedPointer(&callSitePtr, + + // Get the current instruction pointer and offset it before next + // instruction in the current frame which threw the exception. + uintptr_t pc = _Unwind_GetIP(context)-1; + + // Get beginning current frame's code (as defined by the + // emitted dwarf code) + uintptr_t funcStart = _Unwind_GetRegionStart(context); + uintptr_t pcOffset = pc - funcStart; + struct OurExceptionType_t **classInfo = NULL; + + // Note: See JITDwarfEmitter::EmitExceptionTable(...) for corresponding + // dwarf emission + + // Parse LSDA header. + uint8_t lpStartEncoding = *lsda++; + + if (lpStartEncoding != llvm::dwarf::DW_EH_PE_omit) { + readEncodedPointer(&lsda, lpStartEncoding); + } + + uint8_t ttypeEncoding = *lsda++; + uintptr_t classInfoOffset; + + if (ttypeEncoding != llvm::dwarf::DW_EH_PE_omit) { + // Calculate type info locations in emitted dwarf code which + // were flagged by type info arguments to llvm.eh.selector + // intrinsic + classInfoOffset = readULEB128(&lsda); + classInfo = (struct OurExceptionType_t**) (lsda + classInfoOffset); + } + + // Walk call-site table looking for range that + // includes current PC. + + uint8_t callSiteEncoding = *lsda++; + uint32_t callSiteTableLength = readULEB128(&lsda); + const uint8_t *callSiteTableStart = lsda; + const uint8_t *callSiteTableEnd = callSiteTableStart + + callSiteTableLength; + const uint8_t *actionTableStart = callSiteTableEnd; + const uint8_t *callSitePtr = callSiteTableStart; + + bool foreignException = false; + + while (callSitePtr < callSiteTableEnd) { + uintptr_t start = readEncodedPointer(&callSitePtr, + callSiteEncoding); + uintptr_t length = readEncodedPointer(&callSitePtr, + callSiteEncoding); + uintptr_t landingPad = readEncodedPointer(&callSitePtr, callSiteEncoding); - uintptr_t landingPad = readEncodedPointer(&callSitePtr, - callSiteEncoding); - - // Note: Action value - uintptr_t actionEntry = readULEB128(&callSitePtr); - - if (exceptionClass != ourBaseExceptionClass) { - // We have been notified of a foreign exception being thrown, - // and we therefore need to execute cleanup landing pads - actionEntry = 0; - foreignException = true; - } - - if (landingPad == 0) { + + // Note: Action value + uintptr_t actionEntry = readULEB128(&callSitePtr); + + if (exceptionClass != ourBaseExceptionClass) { + // We have been notified of a foreign exception being thrown, + // and we therefore need to execute cleanup landing pads + actionEntry = 0; + foreignException = true; + } + + if (landingPad == 0) { #ifdef DEBUG - fprintf(stderr, - "handleLsda(...): No landing pad found.\n"); + fprintf(stderr, + "handleLsda(...): No landing pad found.\n"); #endif - - continue; // no landing pad for this entry - } - - if (actionEntry) { - actionEntry += ((uintptr_t) actionTableStart) - 1; + + continue; // no landing pad for this entry + } + + if (actionEntry) { + actionEntry += ((uintptr_t) actionTableStart) - 1; + } + else { +#ifdef DEBUG + fprintf(stderr, + "handleLsda(...):No action table found.\n"); +#endif + } + + bool exceptionMatched = false; + + if ((start <= pcOffset) && (pcOffset < (start + length))) { +#ifdef DEBUG + fprintf(stderr, + "handleLsda(...): Landing pad found.\n"); +#endif + int64_t actionValue = 0; + + if (actionEntry) { + exceptionMatched = handleActionValue(&actionValue, + classInfo, + actionEntry, + exceptionClass, + exceptionObject); + } + + if (!(actions & _UA_SEARCH_PHASE)) { +#ifdef DEBUG + fprintf(stderr, + "handleLsda(...): installed landing pad " + "context.\n"); +#endif + + // Found landing pad for the PC. + // Set Instruction Pointer to so we re-enter function + // at landing pad. The landing pad is created by the + // compiler to take two parameters in registers. + _Unwind_SetGR(context, + __builtin_eh_return_data_regno(0), + (uintptr_t)exceptionObject); + + // Note: this virtual register directly corresponds + // to the return of the llvm.eh.selector intrinsic + if (!actionEntry || !exceptionMatched) { + // We indicate cleanup only + _Unwind_SetGR(context, + __builtin_eh_return_data_regno(1), + 0); } else { -#ifdef DEBUG - fprintf(stderr, - "handleLsda(...):No action table found.\n"); -#endif + // Matched type info index of llvm.eh.selector intrinsic + // passed here. + _Unwind_SetGR(context, + __builtin_eh_return_data_regno(1), + actionValue); } - - bool exceptionMatched = false; - - if ((start <= pcOffset) && (pcOffset < (start + length))) { + + // To execute landing pad set here + _Unwind_SetIP(context, funcStart + landingPad); + ret = _URC_INSTALL_CONTEXT; + } + else if (exceptionMatched) { #ifdef DEBUG - fprintf(stderr, - "handleLsda(...): Landing pad found.\n"); + fprintf(stderr, + "handleLsda(...): setting handler found.\n"); #endif - int64_t actionValue = 0; - - if (actionEntry) { - exceptionMatched = handleActionValue - ( - &actionValue, - classInfo, - actionEntry, - exceptionClass, - exceptionObject - ); - } - - if (!(actions & _UA_SEARCH_PHASE)) { + ret = _URC_HANDLER_FOUND; + } + else { + // Note: Only non-clean up handlers are marked as + // found. Otherwise the clean up handlers will be + // re-found and executed during the clean up + // phase. #ifdef DEBUG - fprintf(stderr, - "handleLsda(...): installed landing pad " - "context.\n"); + fprintf(stderr, + "handleLsda(...): cleanup handler found.\n"); #endif - - // Found landing pad for the PC. - // Set Instruction Pointer to so we re-enter function - // at landing pad. The landing pad is created by the - // compiler to take two parameters in registers. - _Unwind_SetGR(context, - __builtin_eh_return_data_regno(0), - (uintptr_t)exceptionObject); - - // Note: this virtual register directly corresponds - // to the return of the llvm.eh.selector intrinsic - if (!actionEntry || !exceptionMatched) { - // We indicate cleanup only - _Unwind_SetGR(context, - __builtin_eh_return_data_regno(1), - 0); - } - else { - // Matched type info index of llvm.eh.selector intrinsic - // passed here. - _Unwind_SetGR(context, - __builtin_eh_return_data_regno(1), - actionValue); - } - - // To execute landing pad set here - _Unwind_SetIP(context, funcStart + landingPad); - ret = _URC_INSTALL_CONTEXT; - } - else if (exceptionMatched) { -#ifdef DEBUG - fprintf(stderr, - "handleLsda(...): setting handler found.\n"); -#endif - ret = _URC_HANDLER_FOUND; - } - else { - // Note: Only non-clean up handlers are marked as - // found. Otherwise the clean up handlers will be - // re-found and executed during the clean up - // phase. -#ifdef DEBUG - fprintf(stderr, - "handleLsda(...): cleanup handler found.\n"); -#endif - } - - break; - } + } + + break; } - - return(ret); + } + + return(ret); } @@ -796,38 +794,38 @@ static _Unwind_Reason_Code handleLsda(int version, /// @param context unwind system context /// @returns minimally supported unwinding control indicator _Unwind_Reason_Code ourPersonality(int version, - _Unwind_Action actions, - uint64_t exceptionClass, - struct _Unwind_Exception* exceptionObject, - _Unwind_Context_t context) { + _Unwind_Action actions, + uint64_t exceptionClass, + struct _Unwind_Exception *exceptionObject, + _Unwind_Context_t context) { #ifdef DEBUG - fprintf(stderr, - "We are in ourPersonality(...):actions is <%d>.\n", - actions); - - if (actions & _UA_SEARCH_PHASE) { - fprintf(stderr, "ourPersonality(...):In search phase.\n"); - } - else { - fprintf(stderr, "ourPersonality(...):In non-search phase.\n"); - } + fprintf(stderr, + "We are in ourPersonality(...):actions is <%d>.\n", + actions); + + if (actions & _UA_SEARCH_PHASE) { + fprintf(stderr, "ourPersonality(...):In search phase.\n"); + } + else { + fprintf(stderr, "ourPersonality(...):In non-search phase.\n"); + } #endif - - const uint8_t* lsda = _Unwind_GetLanguageSpecificData(context); - + + const uint8_t *lsda = _Unwind_GetLanguageSpecificData(context); + #ifdef DEBUG - fprintf(stderr, - "ourPersonality(...):lsda = <%p>.\n", - lsda); + fprintf(stderr, + "ourPersonality(...):lsda = <%p>.\n", + lsda); #endif - - // The real work of the personality function is captured here - return(handleLsda(version, - lsda, - actions, - exceptionClass, - exceptionObject, - context)); + + // The real work of the personality function is captured here + return(handleLsda(version, + lsda, + actions, + exceptionClass, + exceptionObject, + context)); } @@ -840,14 +838,14 @@ _Unwind_Reason_Code ourPersonality(int version, /// @returns class value uint64_t genClass(const unsigned char classChars[], size_t classCharsSize) { - uint64_t ret = classChars[0]; - - for (unsigned i = 1; i < classCharsSize; ++i) { - ret <<= 8; - ret += classChars[i]; - } - - return(ret); + uint64_t ret = classChars[0]; + + for (unsigned i = 1; i < classCharsSize; ++i) { + ret <<= 8; + ret += classChars[i]; + } + + return(ret); } } // extern "C" @@ -869,36 +867,36 @@ uint64_t genClass(const unsigned char classChars[], size_t classCharsSize) /// generated, and is used to hold the constant string. A value of /// false indicates that the constant string will be stored on the /// stack. -void generateStringPrint(llvm::LLVMContext& context, - llvm::Module& module, - llvm::IRBuilder<>& builder, +void generateStringPrint(llvm::LLVMContext &context, + llvm::Module &module, + llvm::IRBuilder<> &builder, std::string toPrint, bool useGlobal = true) { - llvm::Function *printFunct = module.getFunction("printStr"); - - llvm::Value *stringVar; - llvm::Constant* stringConstant = - llvm::ConstantArray::get(context, toPrint); - - if (useGlobal) { - // Note: Does not work without allocation - stringVar = - new llvm::GlobalVariable(module, - stringConstant->getType(), - true, - llvm::GlobalValue::LinkerPrivateLinkage, - stringConstant, - ""); - } - else { - stringVar = builder.CreateAlloca(stringConstant->getType()); - builder.CreateStore(stringConstant, stringVar); - } - - llvm::Value* cast = - builder.CreatePointerCast(stringVar, - builder.getInt8Ty()->getPointerTo()); - builder.CreateCall(printFunct, cast); + llvm::Function *printFunct = module.getFunction("printStr"); + + llvm::Value *stringVar; + llvm::Constant *stringConstant = + llvm::ConstantArray::get(context, toPrint); + + if (useGlobal) { + // Note: Does not work without allocation + stringVar = + new llvm::GlobalVariable(module, + stringConstant->getType(), + true, + llvm::GlobalValue::LinkerPrivateLinkage, + stringConstant, + ""); + } + else { + stringVar = builder.CreateAlloca(stringConstant->getType()); + builder.CreateStore(stringConstant, stringVar); + } + + llvm::Value *cast = + builder.CreatePointerCast(stringVar, + builder.getInt8Ty()->getPointerTo()); + builder.CreateCall(printFunct, cast); } @@ -914,35 +912,35 @@ void generateStringPrint(llvm::LLVMContext& context, /// generated, and is used to hold the constant string. A value of /// false indicates that the constant string will be stored on the /// stack. -void generateIntegerPrint(llvm::LLVMContext& context, - llvm::Module& module, - llvm::IRBuilder<>& builder, - llvm::Function& printFunct, - llvm::Value& toPrint, +void generateIntegerPrint(llvm::LLVMContext &context, + llvm::Module &module, + llvm::IRBuilder<> &builder, + llvm::Function &printFunct, + llvm::Value &toPrint, std::string format, bool useGlobal = true) { - llvm::Constant *stringConstant = llvm::ConstantArray::get(context, format); - llvm::Value *stringVar; - - if (useGlobal) { - // Note: Does not seem to work without allocation - stringVar = - new llvm::GlobalVariable(module, - stringConstant->getType(), - true, - llvm::GlobalValue::LinkerPrivateLinkage, - stringConstant, - ""); - } - else { - stringVar = builder.CreateAlloca(stringConstant->getType()); - builder.CreateStore(stringConstant, stringVar); - } - - llvm::Value* cast = - builder.CreateBitCast(stringVar, - builder.getInt8Ty()->getPointerTo()); - builder.CreateCall2(&printFunct, &toPrint, cast); + llvm::Constant *stringConstant = llvm::ConstantArray::get(context, format); + llvm::Value *stringVar; + + if (useGlobal) { + // Note: Does not seem to work without allocation + stringVar = + new llvm::GlobalVariable(module, + stringConstant->getType(), + true, + llvm::GlobalValue::LinkerPrivateLinkage, + stringConstant, + ""); + } + else { + stringVar = builder.CreateAlloca(stringConstant->getType()); + builder.CreateStore(stringConstant, stringVar); + } + + llvm::Value *cast = + builder.CreateBitCast(stringVar, + builder.getInt8Ty()->getPointerTo()); + builder.CreateCall2(&printFunct, &toPrint, cast); } @@ -965,64 +963,61 @@ void generateIntegerPrint(llvm::LLVMContext& context, /// @param exceptionCaughtFlag reference exception caught/thrown status storage /// @param exceptionStorage reference to exception pointer storage /// @returns newly created block -static llvm::BasicBlock* createFinallyBlock(llvm::LLVMContext& context, - llvm::Module& module, - llvm::IRBuilder<>& builder, - llvm::Function& toAddTo, - std::string& blockName, - std::string& functionId, - llvm::BasicBlock& terminatorBlock, - llvm::BasicBlock& unwindResumeBlock, - llvm::Value** exceptionCaughtFlag, - llvm::Value** exceptionStorage) { - assert(exceptionCaughtFlag && - "ExceptionDemo::createFinallyBlock(...):exceptionCaughtFlag " - "is NULL"); - assert(exceptionStorage && - "ExceptionDemo::createFinallyBlock(...):exceptionStorage " - "is NULL"); - - *exceptionCaughtFlag = - createEntryBlockAlloca(toAddTo, - "exceptionCaught", - ourExceptionNotThrownState->getType(), - ourExceptionNotThrownState); - - const llvm::PointerType* exceptionStorageType = - builder.getInt8Ty()->getPointerTo(); - *exceptionStorage = - createEntryBlockAlloca(toAddTo, - "exceptionStorage", - exceptionStorageType, - llvm::ConstantPointerNull::get( - exceptionStorageType)); - - llvm::BasicBlock *ret = llvm::BasicBlock::Create(context, - blockName, - &toAddTo); - - builder.SetInsertPoint(ret); - - std::ostringstream bufferToPrint; - bufferToPrint << "Gen: Executing finally block " - << blockName - << " in " - << functionId - << std::endl; - generateStringPrint(context, - module, - builder, - bufferToPrint.str(), - USE_GLOBAL_STR_CONSTS); - - llvm::SwitchInst* theSwitch = - builder.CreateSwitch(builder.CreateLoad(*exceptionCaughtFlag), - &terminatorBlock, - 2); - theSwitch->addCase(ourExceptionCaughtState, &terminatorBlock); - theSwitch->addCase(ourExceptionThrownState, &unwindResumeBlock); - - return(ret); +static llvm::BasicBlock *createFinallyBlock(llvm::LLVMContext &context, + llvm::Module &module, + llvm::IRBuilder<> &builder, + llvm::Function &toAddTo, + std::string &blockName, + std::string &functionId, + llvm::BasicBlock &terminatorBlock, + llvm::BasicBlock &unwindResumeBlock, + llvm::Value **exceptionCaughtFlag, + llvm::Value **exceptionStorage) { + assert(exceptionCaughtFlag && + "ExceptionDemo::createFinallyBlock(...):exceptionCaughtFlag " + "is NULL"); + assert(exceptionStorage && + "ExceptionDemo::createFinallyBlock(...):exceptionStorage " + "is NULL"); + + *exceptionCaughtFlag = + createEntryBlockAlloca(toAddTo, + "exceptionCaught", + ourExceptionNotThrownState->getType(), + ourExceptionNotThrownState); + + const llvm::PointerType *exceptionStorageType = + builder.getInt8Ty()->getPointerTo(); + *exceptionStorage = + createEntryBlockAlloca(toAddTo, + "exceptionStorage", + exceptionStorageType, + llvm::ConstantPointerNull::get( + exceptionStorageType)); + + llvm::BasicBlock *ret = llvm::BasicBlock::Create(context, + blockName, + &toAddTo); + + builder.SetInsertPoint(ret); + + std::ostringstream bufferToPrint; + bufferToPrint << "Gen: Executing finally block " + << blockName << " in " << functionId << "\n"; + generateStringPrint(context, + module, + builder, + bufferToPrint.str(), + USE_GLOBAL_STR_CONSTS); + + llvm::SwitchInst *theSwitch = + builder.CreateSwitch(builder.CreateLoad(*exceptionCaughtFlag), + &terminatorBlock, + 2); + theSwitch->addCase(ourExceptionCaughtState, &terminatorBlock); + theSwitch->addCase(ourExceptionThrownState, &unwindResumeBlock); + + return(ret); } @@ -1038,36 +1033,36 @@ static llvm::BasicBlock* createFinallyBlock(llvm::LLVMContext& context, /// @param terminatorBlock terminator "end" block /// @param exceptionCaughtFlag exception caught/thrown status /// @returns newly created block -static llvm::BasicBlock* createCatchBlock(llvm::LLVMContext& context, - llvm::Module& module, - llvm::IRBuilder<>& builder, - llvm::Function& toAddTo, - std::string& blockName, - std::string& functionId, - llvm::BasicBlock& terminatorBlock, - llvm::Value& exceptionCaughtFlag) { - - llvm::BasicBlock *ret = llvm::BasicBlock::Create(context, - blockName, - &toAddTo); - - builder.SetInsertPoint(ret); - - std::ostringstream bufferToPrint; - bufferToPrint << "Gen: Executing catch block " - << blockName - << " in " - << functionId - << std::endl; - generateStringPrint(context, - module, - builder, - bufferToPrint.str(), - USE_GLOBAL_STR_CONSTS); - builder.CreateStore(ourExceptionCaughtState, &exceptionCaughtFlag); - builder.CreateBr(&terminatorBlock); - - return(ret); +static llvm::BasicBlock *createCatchBlock(llvm::LLVMContext &context, + llvm::Module &module, + llvm::IRBuilder<> &builder, + llvm::Function &toAddTo, + std::string &blockName, + std::string &functionId, + llvm::BasicBlock &terminatorBlock, + llvm::Value &exceptionCaughtFlag) { + + llvm::BasicBlock *ret = llvm::BasicBlock::Create(context, + blockName, + &toAddTo); + + builder.SetInsertPoint(ret); + + std::ostringstream bufferToPrint; + bufferToPrint << "Gen: Executing catch block " + << blockName + << " in " + << functionId + << std::endl; + generateStringPrint(context, + module, + builder, + bufferToPrint.str(), + USE_GLOBAL_STR_CONSTS); + builder.CreateStore(ourExceptionCaughtState, &exceptionCaughtFlag); + builder.CreateBr(&terminatorBlock); + + return(ret); } @@ -1091,275 +1086,269 @@ static llvm::BasicBlock* createCatchBlock(llvm::LLVMContext& context, /// @param exceptionTypesToCatch array of type info types to "catch" /// @returns generated function static -llvm::Function* createCatchWrappedInvokeFunction(llvm::Module& module, - llvm::IRBuilder<>& builder, - llvm::FunctionPassManager& fpm, - llvm::Function& toInvoke, - std::string ourId, - unsigned numExceptionsToCatch, - unsigned exceptionTypesToCatch[]) { - - llvm::LLVMContext& context = module.getContext(); - llvm::Function *toPrint32Int = module.getFunction("print32Int"); - - ArgTypes argTypes; - argTypes.push_back(builder.getInt32Ty()); - - ArgNames argNames; - argNames.push_back("exceptTypeToThrow"); - - llvm::Function* ret = createFunction(module, - builder.getVoidTy(), - argTypes, - argNames, - ourId, - llvm::Function::ExternalLinkage, - false, - false); - - // Block which calls invoke - llvm::BasicBlock *entryBlock = llvm::BasicBlock::Create(context, - "entry", - ret); - // Normal block for invoke - llvm::BasicBlock *normalBlock = llvm::BasicBlock::Create(context, - "normal", - ret); - // Unwind block for invoke - llvm::BasicBlock *exceptionBlock = - llvm::BasicBlock::Create(context, "exception", ret); - - // Block which routes exception to correct catch handler block - llvm::BasicBlock *exceptionRouteBlock = - llvm::BasicBlock::Create(context, "exceptionRoute", ret); - - // Foreign exception handler - llvm::BasicBlock *externalExceptionBlock = - llvm::BasicBlock::Create(context, "externalException", ret); - - // Block which calls _Unwind_Resume - llvm::BasicBlock *unwindResumeBlock = - llvm::BasicBlock::Create(context, "unwindResume", ret); - - // Clean up block which delete exception if needed - llvm::BasicBlock *endBlock = - llvm::BasicBlock::Create(context, "end", ret); - - std::string nextName; - std::vector catchBlocks(numExceptionsToCatch); - llvm::Value* exceptionCaughtFlag = NULL; - llvm::Value* exceptionStorage = NULL; - - // Finally block which will branch to unwindResumeBlock if - // exception is not caught. Initializes/allocates stack locations. - llvm::BasicBlock* finallyBlock = createFinallyBlock(context, - module, - builder, - *ret, - nextName = "finally", - ourId, - *endBlock, - *unwindResumeBlock, - &exceptionCaughtFlag, - &exceptionStorage); - - for (unsigned i = 0; i < numExceptionsToCatch; ++i) { - nextName = ourTypeInfoNames[exceptionTypesToCatch[i]]; - - // One catch block per type info to be caught - catchBlocks[i] = createCatchBlock(context, - module, - builder, - *ret, - nextName, - ourId, - *finallyBlock, - *exceptionCaughtFlag); - } - - // Entry Block - - builder.SetInsertPoint(entryBlock); - - std::vector args; - args.push_back(namedValues["exceptTypeToThrow"]); - builder.CreateInvoke(&toInvoke, - normalBlock, - exceptionBlock, - args.begin(), - args.end()); - - // End Block - - builder.SetInsertPoint(endBlock); - - generateStringPrint(context, - module, - builder, - "Gen: In end block: exiting in " + ourId + ".\n", - USE_GLOBAL_STR_CONSTS); - llvm::Function *deleteOurException = - module.getFunction("deleteOurException"); - - // Note: function handles NULL exceptions - builder.CreateCall(deleteOurException, - builder.CreateLoad(exceptionStorage)); - builder.CreateRetVoid(); - - // Normal Block - - builder.SetInsertPoint(normalBlock); - - generateStringPrint(context, - module, - builder, - "Gen: No exception in " + ourId + "!\n", - USE_GLOBAL_STR_CONSTS); - - // Finally block is always called - builder.CreateBr(finallyBlock); - - // Unwind Resume Block - - builder.SetInsertPoint(unwindResumeBlock); - - llvm::Function *resumeOurException = - module.getFunction("_Unwind_Resume"); - builder.CreateCall(resumeOurException, - builder.CreateLoad(exceptionStorage)); - builder.CreateUnreachable(); - - // Exception Block - - builder.SetInsertPoint(exceptionBlock); - - llvm::Function *ehException = module.getFunction("llvm.eh.exception"); - - // Retrieve thrown exception - llvm::Value* unwindException = builder.CreateCall(ehException); - - // Store exception and flag - builder.CreateStore(unwindException, exceptionStorage); - builder.CreateStore(ourExceptionThrownState, exceptionCaughtFlag); - llvm::Function *personality = module.getFunction("ourPersonality"); - llvm::Value* functPtr = - builder.CreatePointerCast(personality, - builder.getInt8Ty()->getPointerTo()); - - args.clear(); - args.push_back(unwindException); - args.push_back(functPtr); - - // Note: Skipping index 0 - for (unsigned i = 0; i < numExceptionsToCatch; ++i) { - // Set up type infos to be caught - args.push_back( - module.getGlobalVariable( - ourTypeInfoNames[exceptionTypesToCatch[i]])); - } - - args.push_back(llvm::ConstantInt::get(builder.getInt32Ty(), 0)); - - llvm::Function *ehSelector = module.getFunction("llvm.eh.selector"); - - // Set up this exeption block as the landing pad which will handle - // given type infos. See case Intrinsic::eh_selector in - // SelectionDAGBuilder::visitIntrinsicCall(...) and AddCatchInfo(...) - // implemented in FunctionLoweringInfo.cpp to see how the implementation - // handles this call. This landing pad (this exception block), will be - // called either because it nees to cleanup (call finally) or a type - // info was found which matched the thrown exception. - llvm::Value* retTypeInfoIndex = builder.CreateCall(ehSelector, - args.begin(), - args.end()); - - // Retrieve exception_class member from thrown exception - // (_Unwind_Exception instance). This member tells us whether or not - // the exception is foreign. - llvm::Value* unwindExceptionClass = - builder.CreateLoad( - builder.CreateStructGEP( - builder.CreatePointerCast( - unwindException, - ourUnwindExceptionType->getPointerTo()), - 0)); - - // Branch to the externalExceptionBlock if the exception is foreign or - // to a catch router if not. Either way the finally block will be run. - builder.CreateCondBr( - builder.CreateICmpEQ(unwindExceptionClass, - llvm::ConstantInt::get(builder.getInt64Ty(), - ourBaseExceptionClass)), - exceptionRouteBlock, - externalExceptionBlock); - - // External Exception Block - - builder.SetInsertPoint(externalExceptionBlock); - - generateStringPrint(context, - module, - builder, - "Gen: Foreign exception received.\n", - USE_GLOBAL_STR_CONSTS); - - // Branch to the finally block - builder.CreateBr(finallyBlock); - - // Exception Route Block - - builder.SetInsertPoint(exceptionRouteBlock); - - // Casts exception pointer (_Unwind_Exception instance) to parent - // (OurException instance). - // - // Note: ourBaseFromUnwindOffset is usually negative - llvm::Value* typeInfoThrown = - builder.CreatePointerCast( - builder.CreateConstGEP1_64(unwindException, - ourBaseFromUnwindOffset), - ourExceptionType->getPointerTo()); - - // Retrieve thrown exception type info type - // - // Note: Index is not relative to pointer but instead to structure - // unlike a true getelementptr (GEP) instruction - typeInfoThrown = builder.CreateStructGEP(typeInfoThrown, 0); - - llvm::Value* typeInfoThrownType = - builder.CreateStructGEP(typeInfoThrown, 0); - - generateIntegerPrint(context, - module, - builder, - *toPrint32Int, - *(builder.CreateLoad(typeInfoThrownType)), - "Gen: Exception type <%d> received (stack unwound) " - " in " + - ourId + - ".\n", - USE_GLOBAL_STR_CONSTS); - - // Route to matched type info catch block or run cleanup finally block - llvm::SwitchInst* switchToCatchBlock = - builder.CreateSwitch(retTypeInfoIndex, - finallyBlock, - numExceptionsToCatch); - - unsigned nextTypeToCatch; - - for (unsigned i = 1; i <= numExceptionsToCatch; ++i) { - nextTypeToCatch = i - 1; - switchToCatchBlock->addCase(llvm::ConstantInt::get( - llvm::Type::getInt32Ty(context), - i), - catchBlocks[nextTypeToCatch]); - } - - llvm::verifyFunction(*ret); - fpm.run(*ret); - - return(ret); +llvm::Function *createCatchWrappedInvokeFunction(llvm::Module &module, + llvm::IRBuilder<> &builder, + llvm::FunctionPassManager &fpm, + llvm::Function &toInvoke, + std::string ourId, + unsigned numExceptionsToCatch, + unsigned exceptionTypesToCatch[]) { + + llvm::LLVMContext &context = module.getContext(); + llvm::Function *toPrint32Int = module.getFunction("print32Int"); + + ArgTypes argTypes; + argTypes.push_back(builder.getInt32Ty()); + + ArgNames argNames; + argNames.push_back("exceptTypeToThrow"); + + llvm::Function *ret = createFunction(module, + builder.getVoidTy(), + argTypes, + argNames, + ourId, + llvm::Function::ExternalLinkage, + false, + false); + + // Block which calls invoke + llvm::BasicBlock *entryBlock = llvm::BasicBlock::Create(context, + "entry", + ret); + // Normal block for invoke + llvm::BasicBlock *normalBlock = llvm::BasicBlock::Create(context, + "normal", + ret); + // Unwind block for invoke + llvm::BasicBlock *exceptionBlock = + llvm::BasicBlock::Create(context, "exception", ret); + + // Block which routes exception to correct catch handler block + llvm::BasicBlock *exceptionRouteBlock = + llvm::BasicBlock::Create(context, "exceptionRoute", ret); + + // Foreign exception handler + llvm::BasicBlock *externalExceptionBlock = + llvm::BasicBlock::Create(context, "externalException", ret); + + // Block which calls _Unwind_Resume + llvm::BasicBlock *unwindResumeBlock = + llvm::BasicBlock::Create(context, "unwindResume", ret); + + // Clean up block which delete exception if needed + llvm::BasicBlock *endBlock = + llvm::BasicBlock::Create(context, "end", ret); + + std::string nextName; + std::vector catchBlocks(numExceptionsToCatch); + llvm::Value *exceptionCaughtFlag = NULL; + llvm::Value *exceptionStorage = NULL; + + // Finally block which will branch to unwindResumeBlock if + // exception is not caught. Initializes/allocates stack locations. + llvm::BasicBlock *finallyBlock = createFinallyBlock(context, + module, + builder, + *ret, + nextName = "finally", + ourId, + *endBlock, + *unwindResumeBlock, + &exceptionCaughtFlag, + &exceptionStorage); + + for (unsigned i = 0; i < numExceptionsToCatch; ++i) { + nextName = ourTypeInfoNames[exceptionTypesToCatch[i]]; + + // One catch block per type info to be caught + catchBlocks[i] = createCatchBlock(context, + module, + builder, + *ret, + nextName, + ourId, + *finallyBlock, + *exceptionCaughtFlag); + } + + // Entry Block + + builder.SetInsertPoint(entryBlock); + + std::vector args; + args.push_back(namedValues["exceptTypeToThrow"]); + builder.CreateInvoke(&toInvoke, + normalBlock, + exceptionBlock, + args.begin(), + args.end()); + + // End Block + + builder.SetInsertPoint(endBlock); + + generateStringPrint(context, + module, + builder, + "Gen: In end block: exiting in " + ourId + ".\n", + USE_GLOBAL_STR_CONSTS); + llvm::Function *deleteOurException = + module.getFunction("deleteOurException"); + + // Note: function handles NULL exceptions + builder.CreateCall(deleteOurException, + builder.CreateLoad(exceptionStorage)); + builder.CreateRetVoid(); + + // Normal Block + + builder.SetInsertPoint(normalBlock); + + generateStringPrint(context, + module, + builder, + "Gen: No exception in " + ourId + "!\n", + USE_GLOBAL_STR_CONSTS); + + // Finally block is always called + builder.CreateBr(finallyBlock); + + // Unwind Resume Block + + builder.SetInsertPoint(unwindResumeBlock); + + llvm::Function *resumeOurException = + module.getFunction("_Unwind_Resume"); + builder.CreateCall(resumeOurException, + builder.CreateLoad(exceptionStorage)); + builder.CreateUnreachable(); + + // Exception Block + + builder.SetInsertPoint(exceptionBlock); + + llvm::Function *ehException = module.getFunction("llvm.eh.exception"); + + // Retrieve thrown exception + llvm::Value *unwindException = builder.CreateCall(ehException); + + // Store exception and flag + builder.CreateStore(unwindException, exceptionStorage); + builder.CreateStore(ourExceptionThrownState, exceptionCaughtFlag); + llvm::Function *personality = module.getFunction("ourPersonality"); + llvm::Value *functPtr = + builder.CreatePointerCast(personality, + builder.getInt8Ty()->getPointerTo()); + + args.clear(); + args.push_back(unwindException); + args.push_back(functPtr); + + // Note: Skipping index 0 + for (unsigned i = 0; i < numExceptionsToCatch; ++i) { + // Set up type infos to be caught + args.push_back(module.getGlobalVariable( + ourTypeInfoNames[exceptionTypesToCatch[i]])); + } + + args.push_back(llvm::ConstantInt::get(builder.getInt32Ty(), 0)); + + llvm::Function *ehSelector = module.getFunction("llvm.eh.selector"); + + // Set up this exeption block as the landing pad which will handle + // given type infos. See case Intrinsic::eh_selector in + // SelectionDAGBuilder::visitIntrinsicCall(...) and AddCatchInfo(...) + // implemented in FunctionLoweringInfo.cpp to see how the implementation + // handles this call. This landing pad (this exception block), will be + // called either because it nees to cleanup (call finally) or a type + // info was found which matched the thrown exception. + llvm::Value *retTypeInfoIndex = builder.CreateCall(ehSelector, + args.begin(), + args.end()); + + // Retrieve exception_class member from thrown exception + // (_Unwind_Exception instance). This member tells us whether or not + // the exception is foreign. + llvm::Value *unwindExceptionClass = + builder.CreateLoad(builder.CreateStructGEP( + builder.CreatePointerCast(unwindException, + ourUnwindExceptionType->getPointerTo()), + 0)); + + // Branch to the externalExceptionBlock if the exception is foreign or + // to a catch router if not. Either way the finally block will be run. + builder.CreateCondBr(builder.CreateICmpEQ(unwindExceptionClass, + llvm::ConstantInt::get(builder.getInt64Ty(), + ourBaseExceptionClass)), + exceptionRouteBlock, + externalExceptionBlock); + + // External Exception Block + + builder.SetInsertPoint(externalExceptionBlock); + + generateStringPrint(context, + module, + builder, + "Gen: Foreign exception received.\n", + USE_GLOBAL_STR_CONSTS); + + // Branch to the finally block + builder.CreateBr(finallyBlock); + + // Exception Route Block + + builder.SetInsertPoint(exceptionRouteBlock); + + // Casts exception pointer (_Unwind_Exception instance) to parent + // (OurException instance). + // + // Note: ourBaseFromUnwindOffset is usually negative + llvm::Value *typeInfoThrown = + builder.CreatePointerCast(builder.CreateConstGEP1_64(unwindException, + ourBaseFromUnwindOffset), + ourExceptionType->getPointerTo()); + + // Retrieve thrown exception type info type + // + // Note: Index is not relative to pointer but instead to structure + // unlike a true getelementptr (GEP) instruction + typeInfoThrown = builder.CreateStructGEP(typeInfoThrown, 0); + + llvm::Value *typeInfoThrownType = + builder.CreateStructGEP(typeInfoThrown, 0); + + generateIntegerPrint(context, + module, + builder, + *toPrint32Int, + *(builder.CreateLoad(typeInfoThrownType)), + "Gen: Exception type <%d> received (stack unwound) " + " in " + + ourId + + ".\n", + USE_GLOBAL_STR_CONSTS); + + // Route to matched type info catch block or run cleanup finally block + llvm::SwitchInst *switchToCatchBlock = + builder.CreateSwitch(retTypeInfoIndex, + finallyBlock, + numExceptionsToCatch); + + unsigned nextTypeToCatch; + + for (unsigned i = 1; i <= numExceptionsToCatch; ++i) { + nextTypeToCatch = i - 1; + switchToCatchBlock->addCase(llvm::ConstantInt::get( + llvm::Type::getInt32Ty(context), i), + catchBlocks[nextTypeToCatch]); + } + + llvm::verifyFunction(*ret); + fpm.run(*ret); + + return(ret); } @@ -1378,107 +1367,107 @@ llvm::Function* createCatchWrappedInvokeFunction(llvm::Module& module, /// if the above nativeThrowType matches generated function's arg. /// @returns generated function static -llvm::Function* createThrowExceptionFunction(llvm::Module& module, - llvm::IRBuilder<>& builder, - llvm::FunctionPassManager& fpm, - std::string ourId, - int32_t nativeThrowType, - llvm::Function& nativeThrowFunct) { - llvm::LLVMContext& context = module.getContext(); - namedValues.clear(); - ArgTypes unwindArgTypes; - unwindArgTypes.push_back(builder.getInt32Ty()); - ArgNames unwindArgNames; - unwindArgNames.push_back("exceptTypeToThrow"); - - llvm::Function *ret = createFunction(module, - builder.getVoidTy(), - unwindArgTypes, - unwindArgNames, - ourId, - llvm::Function::ExternalLinkage, - false, - false); - - // Throws either one of our exception or a native C++ exception depending - // on a runtime argument value containing a type info type. - llvm::BasicBlock *entryBlock = llvm::BasicBlock::Create(context, - "entry", - ret); - // Throws a foreign exception - llvm::BasicBlock *nativeThrowBlock = - llvm::BasicBlock::Create(context, - "nativeThrow", - ret); - // Throws one of our Exceptions - llvm::BasicBlock *generatedThrowBlock = - llvm::BasicBlock::Create(context, - "generatedThrow", - ret); - // Retrieved runtime type info type to throw - llvm::Value* exceptionType = namedValues["exceptTypeToThrow"]; - - // nativeThrowBlock block - - builder.SetInsertPoint(nativeThrowBlock); - - // Throws foreign exception - builder.CreateCall(&nativeThrowFunct, exceptionType); - builder.CreateUnreachable(); - - // entry block - - builder.SetInsertPoint(entryBlock); - - llvm::Function *toPrint32Int = module.getFunction("print32Int"); - generateIntegerPrint(context, - module, - builder, - *toPrint32Int, - *exceptionType, - "\nGen: About to throw exception type <%d> in " + - ourId + - ".\n", - USE_GLOBAL_STR_CONSTS); - - // Switches on runtime type info type value to determine whether or not - // a foreign exception is thrown. Defaults to throwing one of our - // generated exceptions. - llvm::SwitchInst* theSwitch = builder.CreateSwitch(exceptionType, - generatedThrowBlock, - 1); - - theSwitch->addCase(llvm::ConstantInt::get(llvm::Type::getInt32Ty(context), - nativeThrowType), - nativeThrowBlock); - - // generatedThrow block - - builder.SetInsertPoint(generatedThrowBlock); - - llvm::Function *createOurException = - module.getFunction("createOurException"); - llvm::Function *raiseOurException = - module.getFunction("_Unwind_RaiseException"); - - // Creates exception to throw with runtime type info type. - llvm::Value* exception = - builder.CreateCall(createOurException, - namedValues["exceptTypeToThrow"]); - - // Throw generated Exception - builder.CreateCall(raiseOurException, exception); - builder.CreateUnreachable(); - - llvm::verifyFunction(*ret); - fpm.run(*ret); - - return(ret); +llvm::Function *createThrowExceptionFunction(llvm::Module &module, + llvm::IRBuilder<> &builder, + llvm::FunctionPassManager &fpm, + std::string ourId, + int32_t nativeThrowType, + llvm::Function &nativeThrowFunct) { + llvm::LLVMContext &context = module.getContext(); + namedValues.clear(); + ArgTypes unwindArgTypes; + unwindArgTypes.push_back(builder.getInt32Ty()); + ArgNames unwindArgNames; + unwindArgNames.push_back("exceptTypeToThrow"); + + llvm::Function *ret = createFunction(module, + builder.getVoidTy(), + unwindArgTypes, + unwindArgNames, + ourId, + llvm::Function::ExternalLinkage, + false, + false); + + // Throws either one of our exception or a native C++ exception depending + // on a runtime argument value containing a type info type. + llvm::BasicBlock *entryBlock = llvm::BasicBlock::Create(context, + "entry", + ret); + // Throws a foreign exception + llvm::BasicBlock *nativeThrowBlock = + llvm::BasicBlock::Create(context, + "nativeThrow", + ret); + // Throws one of our Exceptions + llvm::BasicBlock *generatedThrowBlock = + llvm::BasicBlock::Create(context, + "generatedThrow", + ret); + // Retrieved runtime type info type to throw + llvm::Value *exceptionType = namedValues["exceptTypeToThrow"]; + + // nativeThrowBlock block + + builder.SetInsertPoint(nativeThrowBlock); + + // Throws foreign exception + builder.CreateCall(&nativeThrowFunct, exceptionType); + builder.CreateUnreachable(); + + // entry block + + builder.SetInsertPoint(entryBlock); + + llvm::Function *toPrint32Int = module.getFunction("print32Int"); + generateIntegerPrint(context, + module, + builder, + *toPrint32Int, + *exceptionType, + "\nGen: About to throw exception type <%d> in " + + ourId + + ".\n", + USE_GLOBAL_STR_CONSTS); + + // Switches on runtime type info type value to determine whether or not + // a foreign exception is thrown. Defaults to throwing one of our + // generated exceptions. + llvm::SwitchInst *theSwitch = builder.CreateSwitch(exceptionType, + generatedThrowBlock, + 1); + + theSwitch->addCase(llvm::ConstantInt::get(llvm::Type::getInt32Ty(context), + nativeThrowType), + nativeThrowBlock); + + // generatedThrow block + + builder.SetInsertPoint(generatedThrowBlock); + + llvm::Function *createOurException = + module.getFunction("createOurException"); + llvm::Function *raiseOurException = + module.getFunction("_Unwind_RaiseException"); + + // Creates exception to throw with runtime type info type. + llvm::Value *exception = + builder.CreateCall(createOurException, + namedValues["exceptTypeToThrow"]); + + // Throw generated Exception + builder.CreateCall(raiseOurException, exception); + builder.CreateUnreachable(); + + llvm::verifyFunction(*ret); + fpm.run(*ret); + + return(ret); } static void createStandardUtilityFunctions(unsigned numTypeInfos, - llvm::Module& module, - llvm::IRBuilder<>& builder); + llvm::Module &module, + llvm::IRBuilder<> &builder); /// Creates test code by generating and organizing these functions into the /// test case. The test case consists of an outer function setup to invoke @@ -1500,81 +1489,80 @@ static void createStandardUtilityFunctions(unsigned numTypeInfos, /// @param nativeThrowFunctName name of external function which will throw /// a foreign exception /// @returns outermost generated test function. -llvm::Function* createUnwindExceptionTest(llvm::Module& module, - llvm::IRBuilder<>& builder, - llvm::FunctionPassManager& fpm, +llvm::Function *createUnwindExceptionTest(llvm::Module &module, + llvm::IRBuilder<> &builder, + llvm::FunctionPassManager &fpm, std::string nativeThrowFunctName) { - // Number of type infos to generate - unsigned numTypeInfos = 6; - - // Initialze intrisics and external functions to use along with exception - // and type info globals. - createStandardUtilityFunctions(numTypeInfos, - module, - builder); - llvm::Function *nativeThrowFunct = - module.getFunction(nativeThrowFunctName); - - // Create exception throw function using the value ~0 to cause - // foreign exceptions to be thrown. - llvm::Function* throwFunct = - createThrowExceptionFunction(module, - builder, - fpm, - "throwFunct", - ~0, - *nativeThrowFunct); - // Inner function will catch even type infos - unsigned innerExceptionTypesToCatch[] = {6, 2, 4}; - size_t numExceptionTypesToCatch = sizeof(innerExceptionTypesToCatch) / - sizeof(unsigned); - - // Generate inner function. - llvm::Function* innerCatchFunct = - createCatchWrappedInvokeFunction(module, - builder, - fpm, - *throwFunct, - "innerCatchFunct", - numExceptionTypesToCatch, - innerExceptionTypesToCatch); - - // Outer function will catch odd type infos - unsigned outerExceptionTypesToCatch[] = {3, 1, 5}; - numExceptionTypesToCatch = sizeof(outerExceptionTypesToCatch) / - sizeof(unsigned); - - // Generate outer function - llvm::Function* outerCatchFunct = - createCatchWrappedInvokeFunction(module, - builder, - fpm, - *innerCatchFunct, - "outerCatchFunct", - numExceptionTypesToCatch, - outerExceptionTypesToCatch); - - // Return outer function to run - return(outerCatchFunct); + // Number of type infos to generate + unsigned numTypeInfos = 6; + + // Initialze intrisics and external functions to use along with exception + // and type info globals. + createStandardUtilityFunctions(numTypeInfos, + module, + builder); + llvm::Function *nativeThrowFunct = + module.getFunction(nativeThrowFunctName); + + // Create exception throw function using the value ~0 to cause + // foreign exceptions to be thrown. + llvm::Function *throwFunct = + createThrowExceptionFunction(module, + builder, + fpm, + "throwFunct", + ~0, + *nativeThrowFunct); + // Inner function will catch even type infos + unsigned innerExceptionTypesToCatch[] = {6, 2, 4}; + size_t numExceptionTypesToCatch = sizeof(innerExceptionTypesToCatch) / + sizeof(unsigned); + + // Generate inner function. + llvm::Function *innerCatchFunct = + createCatchWrappedInvokeFunction(module, + builder, + fpm, + *throwFunct, + "innerCatchFunct", + numExceptionTypesToCatch, + innerExceptionTypesToCatch); + + // Outer function will catch odd type infos + unsigned outerExceptionTypesToCatch[] = {3, 1, 5}; + numExceptionTypesToCatch = sizeof(outerExceptionTypesToCatch) / + sizeof(unsigned); + + // Generate outer function + llvm::Function *outerCatchFunct = + createCatchWrappedInvokeFunction(module, + builder, + fpm, + *innerCatchFunct, + "outerCatchFunct", + numExceptionTypesToCatch, + outerExceptionTypesToCatch); + + // Return outer function to run + return(outerCatchFunct); } /// Represents our foreign exceptions class OurCppRunException : public std::runtime_error { public: - OurCppRunException(const std::string reason) : - std::runtime_error(reason) {} - - OurCppRunException (const OurCppRunException& toCopy) : - std::runtime_error(toCopy) {} - - OurCppRunException& operator = (const OurCppRunException& toCopy) { - return(reinterpret_cast( - std::runtime_error::operator = (toCopy) - )); - } - - ~OurCppRunException (void) throw () {} + OurCppRunException(const std::string reason) : + std::runtime_error(reason) {} + + OurCppRunException (const OurCppRunException &toCopy) : + std::runtime_error(toCopy) {} + + OurCppRunException &operator = (const OurCppRunException &toCopy) { + return(reinterpret_cast( + std::runtime_error::operator=(toCopy))); + } + + ~OurCppRunException (void) throw () {} }; @@ -1583,13 +1571,13 @@ class OurCppRunException : public std::runtime_error { /// generated function contract. extern "C" void throwCppException (int32_t ignoreIt) { - throw(OurCppRunException("thrown by throwCppException(...)")); + throw(OurCppRunException("thrown by throwCppException(...)")); } typedef void (*OurExceptionThrowFunctType) (int32_t typeToThrow); /// This is a test harness which runs test by executing generated -/// function with a type info type to throw. Harness wraps the excecution +/// function with a type info type to throw. Harness wraps the execution /// of generated function in a C++ try catch clause. /// @param engine execution engine to use for executing generated function. /// This demo program expects this to be a JIT instance for demo @@ -1598,45 +1586,44 @@ typedef void (*OurExceptionThrowFunctType) (int32_t typeToThrow); /// @param typeToThrow type info type of generated exception to throw, or /// indicator to cause foreign exception to be thrown. static -void runExceptionThrow(llvm::ExecutionEngine* engine, - llvm::Function* function, +void runExceptionThrow(llvm::ExecutionEngine *engine, + llvm::Function *function, int32_t typeToThrow) { - - // Find test's function pointer - OurExceptionThrowFunctType functPtr = - reinterpret_cast( - reinterpret_cast( - engine->getPointerToFunction(function) - ) - ); - - try { - // Run test - (*functPtr)(typeToThrow); - } - catch (OurCppRunException exc) { - // Catch foreign C++ exception - fprintf(stderr, - "\nrunExceptionThrow(...):In C++ catch OurCppRunException " - "with reason: %s.\n", - exc.what()); - } - catch (...) { - // Catch all exceptions including our generated ones. I'm not sure - // why this latter functionality should work, as it seems that - // our exceptions should be foreign to C++ (the _Unwind_Exception:: - // exception_class should be different from the one used by C++), and - // therefore C++ should ignore the generated exceptions. - - fprintf(stderr, - "\nrunExceptionThrow(...):In C++ catch all.\n"); - } + + // Find test's function pointer + OurExceptionThrowFunctType functPtr = + reinterpret_cast( + reinterpret_cast(engine->getPointerToFunction(function))); + + try { + // Run test + (*functPtr)(typeToThrow); + } + catch (OurCppRunException exc) { + // Catch foreign C++ exception + fprintf(stderr, + "\nrunExceptionThrow(...):In C++ catch OurCppRunException " + "with reason: %s.\n", + exc.what()); + } + catch (...) { + // Catch all exceptions including our generated ones. I'm not sure + // why this latter functionality should work, as it seems that + // our exceptions should be foreign to C++ (the _Unwind_Exception:: + // exception_class should be different from the one used by C++), and + // therefore C++ should ignore the generated exceptions. + + fprintf(stderr, + "\nrunExceptionThrow(...):In C++ catch all.\n"); + } } // // End test functions // +typedef llvm::ArrayRef TypeArray; + /// This initialization routine creates type info globals and /// adds external function declarations to module. /// @param numTypeInfos number of linear type info associated type info types @@ -1644,287 +1631,285 @@ void runExceptionThrow(llvm::ExecutionEngine* engine, /// @param module code for module instance /// @param builder builder instance static void createStandardUtilityFunctions(unsigned numTypeInfos, - llvm::Module& module, - llvm::IRBuilder<>& builder) { - - llvm::LLVMContext& context = module.getContext(); - - // Exception initializations - - // Setup exception catch state - ourExceptionNotThrownState = - llvm::ConstantInt::get(llvm::Type::getInt8Ty(context), 0), - ourExceptionThrownState = - llvm::ConstantInt::get(llvm::Type::getInt8Ty(context), 1), - ourExceptionCaughtState = - llvm::ConstantInt::get(llvm::Type::getInt8Ty(context), 2), - - - // Create our type info type - ourTypeInfoType = llvm::StructType::get(context, - builder.getInt32Ty(), - NULL); - - // Create OurException type - ourExceptionType = llvm::StructType::get(context, - ourTypeInfoType, - NULL); - - // Create portion of _Unwind_Exception type - // - // Note: Declaring only a portion of the _Unwind_Exception struct. - // Does this cause problems? - ourUnwindExceptionType = llvm::StructType::get(context, - builder.getInt64Ty(), - NULL); - struct OurBaseException_t dummyException; - - // Calculate offset of OurException::unwindException member. - ourBaseFromUnwindOffset = ((uintptr_t) &dummyException) - - ((uintptr_t) &(dummyException.unwindException)); - + llvm::Module &module, + llvm::IRBuilder<> &builder) { + + llvm::LLVMContext &context = module.getContext(); + + // Exception initializations + + // Setup exception catch state + ourExceptionNotThrownState = + llvm::ConstantInt::get(llvm::Type::getInt8Ty(context), 0), + ourExceptionThrownState = + llvm::ConstantInt::get(llvm::Type::getInt8Ty(context), 1), + ourExceptionCaughtState = + llvm::ConstantInt::get(llvm::Type::getInt8Ty(context), 2), + + + + // Create our type info type + ourTypeInfoType = llvm::StructType::get(context, + TypeArray(builder.getInt32Ty())); + + // Create OurException type + ourExceptionType = llvm::StructType::get(context, + TypeArray(ourTypeInfoType)); + + // Create portion of _Unwind_Exception type + // + // Note: Declaring only a portion of the _Unwind_Exception struct. + // Does this cause problems? + ourUnwindExceptionType = + llvm::StructType::get(context, TypeArray(builder.getInt64Ty())); + struct OurBaseException_t dummyException; + + // Calculate offset of OurException::unwindException member. + ourBaseFromUnwindOffset = ((uintptr_t) &dummyException) - + ((uintptr_t) &(dummyException.unwindException)); + #ifdef DEBUG - fprintf(stderr, - "createStandardUtilityFunctions(...):ourBaseFromUnwindOffset " - "= %lld, sizeof(struct OurBaseException_t) - " - "sizeof(struct _Unwind_Exception) = %lu.\n", - ourBaseFromUnwindOffset, - sizeof(struct OurBaseException_t) - - sizeof(struct _Unwind_Exception)); + fprintf(stderr, + "createStandardUtilityFunctions(...):ourBaseFromUnwindOffset " + "= %lld, sizeof(struct OurBaseException_t) - " + "sizeof(struct _Unwind_Exception) = %lu.\n", + ourBaseFromUnwindOffset, + sizeof(struct OurBaseException_t) - + sizeof(struct _Unwind_Exception)); #endif - - size_t numChars = sizeof(ourBaseExcpClassChars) / sizeof(char); - - // Create our _Unwind_Exception::exception_class value - ourBaseExceptionClass = genClass(ourBaseExcpClassChars, numChars); - - // Type infos - - std::string baseStr = "typeInfo", typeInfoName; - std::ostringstream typeInfoNameBuilder; - std::vector structVals; + + size_t numChars = sizeof(ourBaseExcpClassChars) / sizeof(char); + + // Create our _Unwind_Exception::exception_class value + ourBaseExceptionClass = genClass(ourBaseExcpClassChars, numChars); + + // Type infos + + std::string baseStr = "typeInfo", typeInfoName; + std::ostringstream typeInfoNameBuilder; + std::vector structVals; + + llvm::Constant *nextStruct; + llvm::GlobalVariable *nextGlobal = NULL; + + // Generate each type info + // + // Note: First type info is not used. + for (unsigned i = 0; i <= numTypeInfos; ++i) { + structVals.clear(); + structVals.push_back(llvm::ConstantInt::get(builder.getInt32Ty(), i)); + nextStruct = llvm::ConstantStruct::get(ourTypeInfoType, structVals); - llvm::Constant *nextStruct; - llvm::GlobalVariable* nextGlobal = NULL; - - // Generate each type info - // - // Note: First type info is not used. - for (unsigned i = 0; i <= numTypeInfos; ++i) { - structVals.clear(); - structVals.push_back(llvm::ConstantInt::get(builder.getInt32Ty(), i)); - nextStruct = llvm::ConstantStruct::get(ourTypeInfoType, structVals); - - typeInfoNameBuilder.str(""); - typeInfoNameBuilder << baseStr << i; - typeInfoName = typeInfoNameBuilder.str(); - - // Note: Does not seem to work without allocation - nextGlobal = - new llvm::GlobalVariable(module, - ourTypeInfoType, - true, - llvm::GlobalValue::ExternalLinkage, - nextStruct, - typeInfoName); - - ourTypeInfoNames.push_back(typeInfoName); - ourTypeInfoNamesIndex[i] = typeInfoName; - } - - ArgNames argNames; - ArgTypes argTypes; - llvm::Function* funct = NULL; - - // print32Int - - const llvm::Type* retType = builder.getVoidTy(); - - argTypes.clear(); - argTypes.push_back(builder.getInt32Ty()); - argTypes.push_back(builder.getInt8Ty()->getPointerTo()); - - argNames.clear(); - - createFunction(module, - retType, - argTypes, - argNames, - "print32Int", - llvm::Function::ExternalLinkage, - true, - false); - - // print64Int - - retType = builder.getVoidTy(); - - argTypes.clear(); - argTypes.push_back(builder.getInt64Ty()); - argTypes.push_back(builder.getInt8Ty()->getPointerTo()); - - argNames.clear(); - - createFunction(module, - retType, - argTypes, - argNames, - "print64Int", - llvm::Function::ExternalLinkage, - true, - false); - - // printStr - - retType = builder.getVoidTy(); - - argTypes.clear(); - argTypes.push_back(builder.getInt8Ty()->getPointerTo()); - - argNames.clear(); - - createFunction(module, - retType, - argTypes, - argNames, - "printStr", - llvm::Function::ExternalLinkage, - true, - false); - - // throwCppException - - retType = builder.getVoidTy(); - - argTypes.clear(); - argTypes.push_back(builder.getInt32Ty()); - - argNames.clear(); - - createFunction(module, - retType, - argTypes, - argNames, - "throwCppException", - llvm::Function::ExternalLinkage, - true, - false); - - // deleteOurException - - retType = builder.getVoidTy(); - - argTypes.clear(); - argTypes.push_back(builder.getInt8Ty()->getPointerTo()); - - argNames.clear(); - - createFunction(module, - retType, - argTypes, - argNames, - "deleteOurException", - llvm::Function::ExternalLinkage, - true, - false); - - // createOurException - - retType = builder.getInt8Ty()->getPointerTo(); - - argTypes.clear(); - argTypes.push_back(builder.getInt32Ty()); - - argNames.clear(); - - createFunction(module, - retType, - argTypes, - argNames, - "createOurException", - llvm::Function::ExternalLinkage, - true, - false); - - // _Unwind_RaiseException - - retType = builder.getInt32Ty(); - - argTypes.clear(); - argTypes.push_back(builder.getInt8Ty()->getPointerTo()); - - argNames.clear(); - - funct = createFunction(module, - retType, - argTypes, - argNames, - "_Unwind_RaiseException", - llvm::Function::ExternalLinkage, - true, - false); - - funct->addFnAttr(llvm::Attribute::NoReturn); - - // _Unwind_Resume - - retType = builder.getInt32Ty(); - - argTypes.clear(); - argTypes.push_back(builder.getInt8Ty()->getPointerTo()); - - argNames.clear(); - - funct = createFunction(module, - retType, - argTypes, - argNames, - "_Unwind_Resume", - llvm::Function::ExternalLinkage, - true, - false); - - funct->addFnAttr(llvm::Attribute::NoReturn); - - // ourPersonality - - retType = builder.getInt32Ty(); - - argTypes.clear(); - argTypes.push_back(builder.getInt32Ty()); - argTypes.push_back(builder.getInt32Ty()); - argTypes.push_back(builder.getInt64Ty()); - argTypes.push_back(builder.getInt8Ty()->getPointerTo()); - argTypes.push_back(builder.getInt8Ty()->getPointerTo()); - - argNames.clear(); - - createFunction(module, - retType, - argTypes, - argNames, - "ourPersonality", - llvm::Function::ExternalLinkage, - true, - false); - - // llvm.eh.selector intrinsic - - getDeclaration(&module, llvm::Intrinsic::eh_selector); - - // llvm.eh.exception intrinsic - - getDeclaration(&module, llvm::Intrinsic::eh_exception); - - // llvm.eh.typeid.for intrinsic - - getDeclaration(&module, llvm::Intrinsic::eh_typeid_for); + typeInfoNameBuilder.str(""); + typeInfoNameBuilder << baseStr << i; + typeInfoName = typeInfoNameBuilder.str(); + + // Note: Does not seem to work without allocation + nextGlobal = + new llvm::GlobalVariable(module, + ourTypeInfoType, + true, + llvm::GlobalValue::ExternalLinkage, + nextStruct, + typeInfoName); + + ourTypeInfoNames.push_back(typeInfoName); + ourTypeInfoNamesIndex[i] = typeInfoName; + } + + ArgNames argNames; + ArgTypes argTypes; + llvm::Function *funct = NULL; + + // print32Int + + const llvm::Type *retType = builder.getVoidTy(); + + argTypes.clear(); + argTypes.push_back(builder.getInt32Ty()); + argTypes.push_back(builder.getInt8Ty()->getPointerTo()); + + argNames.clear(); + + createFunction(module, + retType, + argTypes, + argNames, + "print32Int", + llvm::Function::ExternalLinkage, + true, + false); + + // print64Int + + retType = builder.getVoidTy(); + + argTypes.clear(); + argTypes.push_back(builder.getInt64Ty()); + argTypes.push_back(builder.getInt8Ty()->getPointerTo()); + + argNames.clear(); + + createFunction(module, + retType, + argTypes, + argNames, + "print64Int", + llvm::Function::ExternalLinkage, + true, + false); + + // printStr + + retType = builder.getVoidTy(); + + argTypes.clear(); + argTypes.push_back(builder.getInt8Ty()->getPointerTo()); + + argNames.clear(); + + createFunction(module, + retType, + argTypes, + argNames, + "printStr", + llvm::Function::ExternalLinkage, + true, + false); + + // throwCppException + + retType = builder.getVoidTy(); + + argTypes.clear(); + argTypes.push_back(builder.getInt32Ty()); + + argNames.clear(); + + createFunction(module, + retType, + argTypes, + argNames, + "throwCppException", + llvm::Function::ExternalLinkage, + true, + false); + + // deleteOurException + + retType = builder.getVoidTy(); + + argTypes.clear(); + argTypes.push_back(builder.getInt8Ty()->getPointerTo()); + + argNames.clear(); + + createFunction(module, + retType, + argTypes, + argNames, + "deleteOurException", + llvm::Function::ExternalLinkage, + true, + false); + + // createOurException + + retType = builder.getInt8Ty()->getPointerTo(); + + argTypes.clear(); + argTypes.push_back(builder.getInt32Ty()); + + argNames.clear(); + + createFunction(module, + retType, + argTypes, + argNames, + "createOurException", + llvm::Function::ExternalLinkage, + true, + false); + + // _Unwind_RaiseException + + retType = builder.getInt32Ty(); + + argTypes.clear(); + argTypes.push_back(builder.getInt8Ty()->getPointerTo()); + + argNames.clear(); + + funct = createFunction(module, + retType, + argTypes, + argNames, + "_Unwind_RaiseException", + llvm::Function::ExternalLinkage, + true, + false); + + funct->addFnAttr(llvm::Attribute::NoReturn); + + // _Unwind_Resume + + retType = builder.getInt32Ty(); + + argTypes.clear(); + argTypes.push_back(builder.getInt8Ty()->getPointerTo()); + + argNames.clear(); + + funct = createFunction(module, + retType, + argTypes, + argNames, + "_Unwind_Resume", + llvm::Function::ExternalLinkage, + true, + false); + + funct->addFnAttr(llvm::Attribute::NoReturn); + + // ourPersonality + + retType = builder.getInt32Ty(); + + argTypes.clear(); + argTypes.push_back(builder.getInt32Ty()); + argTypes.push_back(builder.getInt32Ty()); + argTypes.push_back(builder.getInt64Ty()); + argTypes.push_back(builder.getInt8Ty()->getPointerTo()); + argTypes.push_back(builder.getInt8Ty()->getPointerTo()); + + argNames.clear(); + + createFunction(module, + retType, + argTypes, + argNames, + "ourPersonality", + llvm::Function::ExternalLinkage, + true, + false); + + // llvm.eh.selector intrinsic + + getDeclaration(&module, llvm::Intrinsic::eh_selector); + + // llvm.eh.exception intrinsic + + getDeclaration(&module, llvm::Intrinsic::eh_exception); + + // llvm.eh.typeid.for intrinsic + + getDeclaration(&module, llvm::Intrinsic::eh_typeid_for); } -//===---------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// // Main test driver code. -//===---------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// /// Demo main routine which takes the type info types to throw. A test will /// be run for each given type info type. While type info types with the value @@ -1932,99 +1917,99 @@ static void createStandardUtilityFunctions(unsigned numTypeInfos, /// <= 6 and >= 1 will be caught by test functions; and type info types > 6 /// will result in exceptions which pass through to the test harness. All other /// type info types are not supported and could cause a crash. -int main(int argc, char* argv[]) { - if (argc == 1) { - fprintf(stderr, - "\nUsage: ExceptionDemo " - "[...].\n" - " Each type must have the value of 1 - 6 for " - "generated exceptions to be caught;\n" - " the value -1 for foreign C++ exceptions to be " - "generated and thrown;\n" - " or the values > 6 for exceptions to be ignored.\n" - "\nTry: ExceptionDemo 2 3 7 -1\n" - " for a full test.\n\n"); - return(0); - } - - // If not set, exception handling will not be turned on - llvm::JITExceptionHandling = true; - - llvm::InitializeNativeTarget(); - llvm::LLVMContext& context = llvm::getGlobalContext(); - llvm::IRBuilder<> theBuilder(context); - - // Make the module, which holds all the code. - llvm::Module* module = new llvm::Module("my cool jit", context); - - // Build engine with JIT - llvm::EngineBuilder factory(module); - factory.setEngineKind(llvm::EngineKind::JIT); - factory.setAllocateGVsWithCode(false); - llvm::ExecutionEngine* executionEngine = factory.create(); - - { - llvm::FunctionPassManager fpm(module); - - // Set up the optimizer pipeline. - // Start with registering info about how the - // target lays out data structures. - fpm.add(new llvm::TargetData(*executionEngine->getTargetData())); - - // Optimizations turned on -#ifdef ADD_OPT_PASSES - - // Basic AliasAnslysis support for GVN. - fpm.add(llvm::createBasicAliasAnalysisPass()); - - // Promote allocas to registers. - fpm.add(llvm::createPromoteMemoryToRegisterPass()); - - // Do simple "peephole" optimizations and bit-twiddling optzns. - fpm.add(llvm::createInstructionCombiningPass()); - - // Reassociate expressions. - fpm.add(llvm::createReassociatePass()); - - // Eliminate Common SubExpressions. - fpm.add(llvm::createGVNPass()); - - // Simplify the control flow graph (deleting unreachable - // blocks, etc). - fpm.add(llvm::createCFGSimplificationPass()); -#endif // ADD_OPT_PASSES - - fpm.doInitialization(); - - // Generate test code using function throwCppException(...) as - // the function which throws foreign exceptions. - llvm::Function* toRun = - createUnwindExceptionTest(*module, - theBuilder, - fpm, - "throwCppException"); - - fprintf(stderr, "\nBegin module dump:\n\n"); - - module->dump(); - - fprintf(stderr, "\nEnd module dump:\n"); - - fprintf(stderr, "\n\nBegin Test:\n"); - - for (int i = 1; i < argc; ++i) { - // Run test for each argument whose value is the exception - // type to throw. - runExceptionThrow(executionEngine, - toRun, - (unsigned) strtoul(argv[i], NULL, 10)); - } - - fprintf(stderr, "\nEnd Test:\n\n"); - } - - delete executionEngine; +int main(int argc, char *argv[]) { + if (argc == 1) { + fprintf(stderr, + "\nUsage: ExceptionDemo " + "[...].\n" + " Each type must have the value of 1 - 6 for " + "generated exceptions to be caught;\n" + " the value -1 for foreign C++ exceptions to be " + "generated and thrown;\n" + " or the values > 6 for exceptions to be ignored.\n" + "\nTry: ExceptionDemo 2 3 7 -1\n" + " for a full test.\n\n"); + return(0); + } - return 0; + // If not set, exception handling will not be turned on + llvm::JITExceptionHandling = true; + + llvm::InitializeNativeTarget(); + llvm::LLVMContext &context = llvm::getGlobalContext(); + llvm::IRBuilder<> theBuilder(context); + + // Make the module, which holds all the code. + llvm::Module *module = new llvm::Module("my cool jit", context); + + // Build engine with JIT + llvm::EngineBuilder factory(module); + factory.setEngineKind(llvm::EngineKind::JIT); + factory.setAllocateGVsWithCode(false); + llvm::ExecutionEngine *executionEngine = factory.create(); + + { + llvm::FunctionPassManager fpm(module); + + // Set up the optimizer pipeline. + // Start with registering info about how the + // target lays out data structures. + fpm.add(new llvm::TargetData(*executionEngine->getTargetData())); + + // Optimizations turned on +#ifdef ADD_OPT_PASSES + + // Basic AliasAnslysis support for GVN. + fpm.add(llvm::createBasicAliasAnalysisPass()); + + // Promote allocas to registers. + fpm.add(llvm::createPromoteMemoryToRegisterPass()); + + // Do simple "peephole" optimizations and bit-twiddling optzns. + fpm.add(llvm::createInstructionCombiningPass()); + + // Reassociate expressions. + fpm.add(llvm::createReassociatePass()); + + // Eliminate Common SubExpressions. + fpm.add(llvm::createGVNPass()); + + // Simplify the control flow graph (deleting unreachable + // blocks, etc). + fpm.add(llvm::createCFGSimplificationPass()); +#endif // ADD_OPT_PASSES + + fpm.doInitialization(); + + // Generate test code using function throwCppException(...) as + // the function which throws foreign exceptions. + llvm::Function *toRun = + createUnwindExceptionTest(*module, + theBuilder, + fpm, + "throwCppException"); + + fprintf(stderr, "\nBegin module dump:\n\n"); + + module->dump(); + + fprintf(stderr, "\nEnd module dump:\n"); + + fprintf(stderr, "\n\nBegin Test:\n"); + + for (int i = 1; i < argc; ++i) { + // Run test for each argument whose value is the exception + // type to throw. + runExceptionThrow(executionEngine, + toRun, + (unsigned) strtoul(argv[i], NULL, 10)); + } + + fprintf(stderr, "\nEnd Test:\n\n"); + } + + delete executionEngine; + + return 0; } diff --git a/examples/Kaleidoscope/Chapter5/toy.cpp b/examples/Kaleidoscope/Chapter5/toy.cpp index 26b3db66202f..5dcc7ed7f8d4 100644 --- a/examples/Kaleidoscope/Chapter5/toy.cpp +++ b/examples/Kaleidoscope/Chapter5/toy.cpp @@ -550,7 +550,7 @@ Value *IfExprAST::Codegen() { // Emit merge block. TheFunction->getBasicBlockList().push_back(MergeBB); Builder.SetInsertPoint(MergeBB); - PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), + PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, "iftmp"); PN->addIncoming(ThenV, ThenBB); @@ -592,7 +592,7 @@ Value *ForExprAST::Codegen() { Builder.SetInsertPoint(LoopBB); // Start the PHI node with an entry for Start. - PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), VarName.c_str()); + PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, VarName.c_str()); Variable->addIncoming(StartVal, PreheaderBB); // Within the loop, the variable is defined equal to the PHI node. If it diff --git a/examples/Kaleidoscope/Chapter6/toy.cpp b/examples/Kaleidoscope/Chapter6/toy.cpp index 838125ae77dc..c5576992c354 100644 --- a/examples/Kaleidoscope/Chapter6/toy.cpp +++ b/examples/Kaleidoscope/Chapter6/toy.cpp @@ -654,7 +654,7 @@ Value *IfExprAST::Codegen() { // Emit merge block. TheFunction->getBasicBlockList().push_back(MergeBB); Builder.SetInsertPoint(MergeBB); - PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), + PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, "iftmp"); PN->addIncoming(ThenV, ThenBB); @@ -696,7 +696,7 @@ Value *ForExprAST::Codegen() { Builder.SetInsertPoint(LoopBB); // Start the PHI node with an entry for Start. - PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), VarName.c_str()); + PHINode *Variable = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, VarName.c_str()); Variable->addIncoming(StartVal, PreheaderBB); // Within the loop, the variable is defined equal to the PHI node. If it diff --git a/examples/Kaleidoscope/Chapter7/toy.cpp b/examples/Kaleidoscope/Chapter7/toy.cpp index e63578f57e6c..6afd11847be7 100644 --- a/examples/Kaleidoscope/Chapter7/toy.cpp +++ b/examples/Kaleidoscope/Chapter7/toy.cpp @@ -750,7 +750,7 @@ Value *IfExprAST::Codegen() { // Emit merge block. TheFunction->getBasicBlockList().push_back(MergeBB); Builder.SetInsertPoint(MergeBB); - PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), + PHINode *PN = Builder.CreatePHI(Type::getDoubleTy(getGlobalContext()), 2, "iftmp"); PN->addIncoming(ThenV, ThenBB); diff --git a/include/llvm-c/Disassembler.h b/include/llvm-c/Disassembler.h new file mode 100644 index 000000000000..9f10973404b7 --- /dev/null +++ b/include/llvm-c/Disassembler.h @@ -0,0 +1,149 @@ +/*===-- llvm-c/Disassembler.h - Disassembler Public C Interface ---*- C -*-===*\ +|* *| +|* The LLVM Compiler Infrastructure *| +|* *| +|* This file is distributed under the University of Illinois Open Source *| +|* License. See LICENSE.TXT for details. *| +|* *| +|*===----------------------------------------------------------------------===*| +|* *| +|* This header provides public interface to a disassembler library. *| +|* LLVM provides an implementation of this interface. *| +|* *| +\*===----------------------------------------------------------------------===*/ + +#ifndef LLVM_C_DISASSEMBLER_H +#define LLVM_C_DISASSEMBLER_H 1 + +#include +#include "llvm/Support/DataTypes.h" + +/** + * An opaque reference to a disassembler context. + */ +typedef void *LLVMDisasmContextRef; + +/** + * The type for the operand information call back function. This is called to + * get the symbolic information for an operand of an instruction. Typically + * this is from the relocation information, symbol table, etc. That block of + * information is saved when the disassembler context is created and passed to + * the call back in the DisInfo parameter. The instruction containing operand + * is at the PC parameter. For some instruction sets, there can be more than + * one operand with symbolic information. To determine the symbolic operand + * information for each operand, the bytes for the specific operand in the + * instruction are specified by the Offset parameter and its byte widith is the + * size parameter. For instructions sets with fixed widths and one symbolic + * operand per instruction, the Offset parameter will be zero and Size parameter + * will be the instruction width. The information is returned in TagBuf and is + * Triple specific with its specific information defined by the value of + * TagType for that Triple. If symbolic information is returned the function + * returns 1 else it returns 0. + */ +typedef int (*LLVMOpInfoCallback)(void *DisInfo, + uint64_t PC, + uint64_t Offset, + uint64_t Size, + int TagType, + void *TagBuf); + +/** + * The initial support in LLVM MC for the most general form of a relocatable + * expression is "AddSymbol - SubtractSymbol + Offset". For some Darwin targets + * this full form is encoded in the relocation information so that AddSymbol and + * SubtractSymbol can be link edited independent of each other. Many other + * platforms only allow a relocatable expression of the form AddSymbol + Offset + * to be encoded. + * + * The LLVMOpInfoCallback() for the TagType value of 1 uses the struct + * LLVMOpInfo1. The value of the relocatable expression for the operand, + * including any PC adjustment, is passed in to the call back in the Value + * field. The symbolic information about the operand is returned using all + * the fields of the structure with the Offset of the relocatable expression + * returned in the Value field. It is possible that some symbols in the + * relocatable expression were assembly temporary symbols, for example + * "Ldata - LpicBase + constant", and only the Values of the symbols without + * symbol names are present in the relocation information. The VariantKind + * type is one of the Target specific #defines below and is used to print + * operands like "_foo@GOT", ":lower16:_foo", etc. + */ +struct LLVMOpInfoSymbol1 { + uint64_t Present; /* 1 if this symbol is present */ + char *Name; /* symbol name if not NULL */ + uint64_t Value; /* symbol value if name is NULL */ +}; +struct LLVMOpInfo1 { + struct LLVMOpInfoSymbol1 AddSymbol; + struct LLVMOpInfoSymbol1 SubtractSymbol; + uint64_t Value; + uint64_t VariantKind; +}; + +/** + * The operand VariantKinds for symbolic disassembly. + */ +#define LLVMDisassembler_VariantKind_None 0 /* all targets */ + +/** + * The ARM target VariantKinds. + */ +#define LLVMDisassembler_VariantKind_ARM_HI16 1 /* :upper16: */ +#define LLVMDisassembler_VariantKind_ARM_LO16 2 /* :lower16: */ + +/** + * The type for the symbol lookup function. This may be called by the + * disassembler for such things like adding a comment for a PC plus a constant + * offset load instruction to use a symbol name instead of a load address value. + * It is passed the block information is saved when the disassembler context is + * created and a value of a symbol to look up. If no symbol is found NULL is + * to be returned. + */ +typedef const char *(*LLVMSymbolLookupCallback)(void *DisInfo, + uint64_t SymbolValue); + +#ifdef __cplusplus +extern "C" { +#endif /* !defined(__cplusplus) */ + +/** + * Create a disassembler for the TripleName. Symbolic disassembly is supported + * by passing a block of information in the DisInfo parameter and specifing the + * TagType and call back functions as described above. These can all be passed + * as NULL. If successful this returns a disassembler context if not it + * returns NULL. + */ +extern LLVMDisasmContextRef +LLVMCreateDisasm(const char *TripleName, + void *DisInfo, + int TagType, + LLVMOpInfoCallback GetOpInfo, + LLVMSymbolLookupCallback SymbolLookUp); + +/** + * Dispose of a disassembler context. + */ +extern void +LLVMDisasmDispose(LLVMDisasmContextRef DC); + +/** + * Disassmble a single instruction using the disassembler context specified in + * the parameter DC. The bytes of the instruction are specified in the parameter + * Bytes, and contains at least BytesSize number of bytes. The instruction is + * at the address specified by the PC parameter. If a valid instruction can be + * disassembled its string is returned indirectly in OutString which whos size + * is specified in the parameter OutStringSize. This function returns the + * number of bytes in the instruction or zero if there was no valid instruction. + */ +extern size_t +LLVMDisasmInstruction(LLVMDisasmContextRef DC, + uint8_t *Bytes, + uint64_t BytesSize, + uint64_t PC, + char *OutString, + size_t OutStringSize); + +#ifdef __cplusplus +} +#endif /* !defined(__cplusplus) */ + +#endif /* !defined(LLVM_C_DISASSEMBLER_H) */ diff --git a/include/llvm-c/EnhancedDisassembly.h b/include/llvm-c/EnhancedDisassembly.h index 28ac0ed2ab35..0c173c2b1999 100644 --- a/include/llvm-c/EnhancedDisassembly.h +++ b/include/llvm-c/EnhancedDisassembly.h @@ -44,7 +44,7 @@ typedef int (*EDByteReaderCallback)(uint8_t *byte, uint64_t address, void *arg); @param arg An anonymous argument for client use. @result 0 if the register could be read; -1 otherwise. */ -typedef int (*EDRegisterReaderCallback)(uint64_t *value, unsigned regID, +typedef int (*EDRegisterReaderCallback)(uint64_t *value, unsigned regID, void* arg); /*! @@ -83,7 +83,7 @@ typedef void *EDTokenRef; Encapsulates an operand of an instruction. */ typedef void *EDOperandRef; - + /*! @functiongroup Getting a disassembler */ @@ -91,7 +91,7 @@ typedef void *EDOperandRef; /*! @function EDGetDisassembler Gets the disassembler for a given target. - @param disassembler A pointer whose target will be filled in with the + @param disassembler A pointer whose target will be filled in with the disassembler. @param triple Identifies the target. Example: "x86_64-apple-darwin10" @param syntax The assembly syntax to use when decoding instructions. @@ -104,12 +104,12 @@ int EDGetDisassembler(EDDisassemblerRef *disassembler, /*! @functiongroup Generic architectural queries */ - + /*! @function EDGetRegisterName Gets the human-readable name for a given register. @param regName A pointer whose target will be pointed at the name of the - register. The name does not need to be deallocated and will be + register. The name does not need to be deallocated and will be @param disassembler The disassembler to query for the name. @param regID The register identifier, as returned by EDRegisterTokenValue. @result 0 on success; -1 otherwise. @@ -117,7 +117,7 @@ int EDGetDisassembler(EDDisassemblerRef *disassembler, int EDGetRegisterName(const char** regName, EDDisassemblerRef disassembler, unsigned regID); - + /*! @function EDRegisterIsStackPointer Determines if a register is one of the platform's stack-pointer registers. @@ -137,16 +137,16 @@ int EDRegisterIsStackPointer(EDDisassemblerRef disassembler, */ int EDRegisterIsProgramCounter(EDDisassemblerRef disassembler, unsigned regID); - + /*! @functiongroup Creating and querying instructions */ - + /*! @function EDCreateInst Gets a set of contiguous instructions from a disassembler. @param insts A pointer to an array that will be filled in with the - instructions. Must have at least count entries. Entries not filled in will + instructions. Must have at least count entries. Entries not filled in will be set to NULL. @param count The maximum number of instructions to fill in. @param disassembler The disassembler to use when decoding the instructions. @@ -197,7 +197,7 @@ int EDGetInstString(const char **buf, @result 0 on success; -1 otherwise. */ int EDInstID(unsigned *instID, EDInstRef inst); - + /*! @function EDInstIsBranch @param inst The instruction to be queried. @@ -217,7 +217,7 @@ int EDInstIsMove(EDInstRef inst); /*! @function EDBranchTargetID @param inst The instruction to be queried. - @result The ID of the branch target operand, suitable for use with + @result The ID of the branch target operand, suitable for use with EDCopyOperand. -1 if no such operand exists. */ int EDBranchTargetID(EDInstRef inst); @@ -225,7 +225,7 @@ int EDBranchTargetID(EDInstRef inst); /*! @function EDMoveSourceID @param inst The instruction to be queried. - @result The ID of the move source operand, suitable for use with + @result The ID of the move source operand, suitable for use with EDCopyOperand. -1 if no such operand exists. */ int EDMoveSourceID(EDInstRef inst); @@ -233,7 +233,7 @@ int EDMoveSourceID(EDInstRef inst); /*! @function EDMoveTargetID @param inst The instruction to be queried. - @result The ID of the move source operand, suitable for use with + @result The ID of the move source operand, suitable for use with EDCopyOperand. -1 if no such operand exists. */ int EDMoveTargetID(EDInstRef inst); @@ -241,7 +241,7 @@ int EDMoveTargetID(EDInstRef inst); /*! @functiongroup Creating and querying tokens */ - + /*! @function EDNumTokens @param inst The instruction to be queried. @@ -261,7 +261,7 @@ int EDNumTokens(EDInstRef inst); int EDGetToken(EDTokenRef *token, EDInstRef inst, int index); - + /*! @function EDGetTokenString Gets the disassembled text for a token. @@ -287,7 +287,7 @@ int EDOperandIndexForToken(EDTokenRef token); @result 1 if the token is whitespace; 0 if not; -1 on error. */ int EDTokenIsWhitespace(EDTokenRef token); - + /*! @function EDTokenIsPunctuation @param token The token to be queried. @@ -335,18 +335,18 @@ int EDLiteralTokenAbsoluteValue(uint64_t *value, /*! @function EDRegisterTokenValue - @param registerID A pointer whose target will be filled in with the LLVM + @param registerID A pointer whose target will be filled in with the LLVM register identifier for the token. @param token The token to be queried. @result 0 on success; -1 otherwise. */ int EDRegisterTokenValue(unsigned *registerID, EDTokenRef token); - + /*! @functiongroup Creating and querying operands */ - + /*! @function EDNumOperands @param inst The instruction to be queried. @@ -366,7 +366,7 @@ int EDNumOperands(EDInstRef inst); int EDGetOperand(EDOperandRef *operand, EDInstRef inst, int index); - + /*! @function EDOperandIsRegister @param operand The operand to be queried. @@ -391,13 +391,13 @@ int EDOperandIsMemory(EDOperandRef operand); /*! @function EDRegisterOperandValue @param value A pointer whose target will be filled in with the LLVM register ID - of the register named by the operand. + of the register named by the operand. @param operand The operand to be queried. @result 0 on success; -1 otherwise. */ int EDRegisterOperandValue(unsigned *value, EDOperandRef operand); - + /*! @function EDImmediateOperandValue @param value A pointer whose target will be filled in with the value of the @@ -427,7 +427,7 @@ int EDEvaluateOperand(uint64_t *result, EDOperandRef operand, EDRegisterReaderCallback regReader, void *arg); - + #ifdef __BLOCKS__ /*! @@ -458,13 +458,13 @@ typedef int (^EDRegisterBlock_t)(uint64_t *value, unsigned regID); typedef int (^EDTokenVisitor_t)(EDTokenRef token); /*! @functiongroup Block-based interfaces */ - + /*! @function EDBlockCreateInsts Gets a set of contiguous instructions from a disassembler, using a block to read memory. @param insts A pointer to an array that will be filled in with the - instructions. Must have at least count entries. Entries not filled in will + instructions. Must have at least count entries. Entries not filled in will be set to NULL. @param count The maximum number of instructions to fill in. @param disassembler The disassembler to use when decoding the instructions. @@ -505,7 +505,7 @@ int EDBlockVisitTokens(EDInstRef inst, EDTokenVisitor_t visitor); #endif - + #ifdef __cplusplus } #endif diff --git a/include/llvm-c/Object.h b/include/llvm-c/Object.h new file mode 100644 index 000000000000..6e72b5946644 --- /dev/null +++ b/include/llvm-c/Object.h @@ -0,0 +1,77 @@ +/*===-- llvm-c/Object.h - Object Lib C Iface --------------------*- C++ -*-===*/ +/* */ +/* The LLVM Compiler Infrastructure */ +/* */ +/* This file is distributed under the University of Illinois Open Source */ +/* License. See LICENSE.TXT for details. */ +/* */ +/*===----------------------------------------------------------------------===*/ +/* */ +/* This header declares the C interface to libLLVMObject.a, which */ +/* implements object file reading and writing. */ +/* */ +/* Many exotic languages can interoperate with C code but have a harder time */ +/* with C++ due to name mangling. So in addition to C, this interface enables */ +/* tools written in such languages. */ +/* */ +/*===----------------------------------------------------------------------===*/ + +#ifndef LLVM_C_OBJECT_H +#define LLVM_C_OBJECT_H + +#include "llvm-c/Core.h" +#include "llvm/Config/llvm-config.h" + +#ifdef __cplusplus +#include "llvm/Object/ObjectFile.h" + +extern "C" { +#endif + + +typedef struct LLVMOpaqueObjectFile *LLVMObjectFileRef; + +typedef struct LLVMOpaqueSectionIterator *LLVMSectionIteratorRef; + +LLVMObjectFileRef LLVMCreateObjectFile(LLVMMemoryBufferRef MemBuf); +void LLVMDisposeObjectFile(LLVMObjectFileRef ObjectFile); + +LLVMSectionIteratorRef LLVMGetSections(LLVMObjectFileRef ObjectFile); +void LLVMDisposeSectionIterator(LLVMSectionIteratorRef SI); +LLVMBool LLVMIsSectionIteratorAtEnd(LLVMObjectFileRef ObjectFile, + LLVMSectionIteratorRef SI); +void LLVMMoveToNextSection(LLVMSectionIteratorRef SI); +const char *LLVMGetSectionName(LLVMSectionIteratorRef SI); +uint64_t LLVMGetSectionSize(LLVMSectionIteratorRef SI); +const char *LLVMGetSectionContents(LLVMSectionIteratorRef SI); + + +#ifdef __cplusplus +} + +namespace llvm { + namespace object { + inline ObjectFile *unwrap(LLVMObjectFileRef OF) { + return reinterpret_cast(OF); + } + + inline LLVMObjectFileRef wrap(const ObjectFile *OF) { + return reinterpret_cast(const_cast(OF)); + } + + inline ObjectFile::section_iterator *unwrap(LLVMSectionIteratorRef SI) { + return reinterpret_cast(SI); + } + + inline LLVMSectionIteratorRef + wrap(const ObjectFile::section_iterator *SI) { + return reinterpret_cast + (const_cast(SI)); + } + } +} + +#endif /* defined(__cplusplus) */ + +#endif + diff --git a/include/llvm-c/Transforms/Scalar.h b/include/llvm-c/Transforms/Scalar.h index 2ddfb38171c2..cf8d71f5d007 100644 --- a/include/llvm-c/Transforms/Scalar.h +++ b/include/llvm-c/Transforms/Scalar.h @@ -52,6 +52,9 @@ void LLVMAddLICMPass(LLVMPassManagerRef PM); /** See llvm::createLoopDeletionPass function. */ void LLVMAddLoopDeletionPass(LLVMPassManagerRef PM); +/** See llvm::createLoopIdiomPass function */ +void LLVMAddLoopIdiomPass(LLVMPassManagerRef PM); + /** See llvm::createLoopRotatePass function. */ void LLVMAddLoopRotatePass(LLVMPassManagerRef PM); @@ -76,6 +79,9 @@ void LLVMAddSCCPPass(LLVMPassManagerRef PM); /** See llvm::createScalarReplAggregatesPass function. */ void LLVMAddScalarReplAggregatesPass(LLVMPassManagerRef PM); +/** See llvm::createScalarReplAggregatesPass function. */ +void LLVMAddScalarReplAggregatesPassSSA(LLVMPassManagerRef PM); + /** See llvm::createScalarReplAggregatesPass function. */ void LLVMAddScalarReplAggregatesPassWithThreshold(LLVMPassManagerRef PM, int Threshold); @@ -95,6 +101,19 @@ void LLVMAddDemoteMemoryToRegisterPass(LLVMPassManagerRef PM); /** See llvm::createVerifierPass function. */ void LLVMAddVerifierPass(LLVMPassManagerRef PM); +/** See llvm::createCorrelatedValuePropagationPass function */ +void LLVMAddCorrelatedValuePropagationPass(LLVMPassManagerRef PM); + +/** See llvm::createEarlyCSEPass function */ +void LLVMAddEarlyCSEPass(LLVMPassManagerRef PM); + +/** See llvm::createTypeBasedAliasAnalysisPass function */ +void LLVMAddTypeBasedAliasAnalysisPass(LLVMPassManagerRef PM); + +/** See llvm::createBasicAliasAnalysisPass function */ +void LLVMAddBasicAliasAnalysisPass(LLVMPassManagerRef PM); + + #ifdef __cplusplus } #endif /* defined(__cplusplus) */ diff --git a/include/llvm-c/lto.h b/include/llvm-c/lto.h index 1c42ce0cec77..7ea7ad01a211 100644 --- a/include/llvm-c/lto.h +++ b/include/llvm-c/lto.h @@ -72,7 +72,7 @@ lto_get_version(void); /** - * Returns the last error string or NULL if last operation was sucessful. + * Returns the last error string or NULL if last operation was successful. */ extern const char* lto_get_error_message(void); @@ -127,7 +127,15 @@ lto_module_create_from_memory(const void* mem, size_t length); * Returns NULL on error (check lto_get_error_message() for details). */ extern lto_module_t -lto_module_create_from_fd(int fd, const char *path, off_t size); +lto_module_create_from_fd(int fd, const char *path, size_t file_size); + +/** + * Loads an object file from disk. The seek point of fd is not preserved. + * Returns NULL on error (check lto_get_error_message() for details). + */ +extern lto_module_t +lto_module_create_from_fd_at_offset(int fd, const char *path, size_t file_size, + size_t map_size, off_t offset); /** @@ -255,7 +263,7 @@ lto_codegen_write_merged_modules(lto_code_gen_t cg, const char* path); /** * Generates code for all added modules into one native object file. - * On sucess returns a pointer to a generated mach-o/ELF buffer and + * On success returns a pointer to a generated mach-o/ELF buffer and * length set to the buffer size. The buffer is owned by the * lto_code_gen_t and will be freed when lto_codegen_dispose() * is called, or lto_codegen_compile() is called again. @@ -264,6 +272,13 @@ lto_codegen_write_merged_modules(lto_code_gen_t cg, const char* path); extern const void* lto_codegen_compile(lto_code_gen_t cg, size_t* length); +/** + * Generates code for all added modules into one native object file. + * The name of the file is written to name. Returns true on error. + */ +extern bool +lto_codegen_compile_to_file(lto_code_gen_t cg, const char** name); + /** * Sets options to help debug codegen bugs. diff --git a/include/llvm/ADT/APFloat.h b/include/llvm/ADT/APFloat.h index ca4138b825a6..21b8c86d1d5b 100644 --- a/include/llvm/ADT/APFloat.h +++ b/include/llvm/ADT/APFloat.h @@ -353,6 +353,10 @@ namespace llvm { unsigned FormatPrecision = 0, unsigned FormatMaxPadding = 3) const; + /// getExactInverse - If this value has an exact multiplicative inverse, + /// store it in inv and return true. + bool getExactInverse(APFloat *inv) const; + private: /* Trivial queries. */ diff --git a/include/llvm/ADT/APInt.h b/include/llvm/ADT/APInt.h index d1fd3e5034bf..2feef076fa7b 100644 --- a/include/llvm/ADT/APInt.h +++ b/include/llvm/ADT/APInt.h @@ -818,6 +818,7 @@ class APInt { APInt usub_ov(const APInt &RHS, bool &Overflow) const; APInt sdiv_ov(const APInt &RHS, bool &Overflow) const; APInt smul_ov(const APInt &RHS, bool &Overflow) const; + APInt umul_ov(const APInt &RHS, bool &Overflow) const; APInt sshl_ov(unsigned Amt, bool &Overflow) const; /// @returns the bit value at bitPosition @@ -1372,7 +1373,7 @@ class APInt { /// Calculate the magic number for unsigned division by a constant. struct mu; - mu magicu() const; + mu magicu(unsigned LeadingZeros = 0) const; /// @} /// @name Building-block Operations for APInt and APFloat diff --git a/include/llvm/ADT/ArrayRef.h b/include/llvm/ADT/ArrayRef.h index d3ea9c0f03b7..97e42cb26602 100644 --- a/include/llvm/ADT/ArrayRef.h +++ b/include/llvm/ADT/ArrayRef.h @@ -22,8 +22,8 @@ namespace llvm { /// /// This class does not own the underlying data, it is expected to be used in /// situations where the data resides in some other buffer, whose lifetime - /// extends past that of the StringRef. For this reason, it is not in general - /// safe to store a ArrayRef. + /// extends past that of the ArrayRef. For this reason, it is not in general + /// safe to store an ArrayRef. /// /// This is intended to be trivially copyable, so it should be passed by /// value. @@ -79,6 +79,8 @@ namespace llvm { /// empty - Check if the array is empty. bool empty() const { return Length == 0; } + const T *data() const { return Data; } + /// size - Get the array size. size_t size() const { return Length; } @@ -94,10 +96,22 @@ namespace llvm { return Data[Length-1]; } + /// slice(n) - Chop off the first N elements of the array. + ArrayRef slice(unsigned N) { + assert(N <= size() && "Invalid specifier"); + return ArrayRef(data()+N, size()-N); + } + + /// slice(n, m) - Chop off the first N elements of the array, and keep M + /// elements in the array. + ArrayRef slice(unsigned N, unsigned M) { + assert(N+M <= size() && "Invalid specifier"); + return ArrayRef(data()+N, M); + } + /// @} /// @name Operator Overloads /// @{ - const T &operator[](size_t Index) const { assert(Index < Length && "Invalid index!"); return Data[Index]; @@ -106,7 +120,6 @@ namespace llvm { /// @} /// @name Expensive Operations /// @{ - std::vector vec() const { return std::vector(Data, Data+Length); } diff --git a/include/llvm/ADT/DenseMap.h b/include/llvm/ADT/DenseMap.h index 61d6ae70e1d9..0f1cfebc3672 100644 --- a/include/llvm/ADT/DenseMap.h +++ b/include/llvm/ADT/DenseMap.h @@ -53,13 +53,13 @@ class DenseMap { CopyFrom(other); } - explicit DenseMap(unsigned NumInitBuckets = 64) { + explicit DenseMap(unsigned NumInitBuckets = 0) { init(NumInitBuckets); } template DenseMap(const InputIt &I, const InputIt &E) { - init(64); + init(NextPowerOf2(std::distance(I, E))); insert(I, E); } @@ -72,7 +72,8 @@ class DenseMap { P->first.~KeyT(); } #ifndef NDEBUG - memset(Buckets, 0x5a, sizeof(BucketT)*NumBuckets); + if (NumBuckets) + memset((void*)Buckets, 0x5a, sizeof(BucketT)*NumBuckets); #endif operator delete(Buckets); } @@ -98,7 +99,10 @@ class DenseMap { unsigned size() const { return NumEntries; } /// Grow the densemap so that it has at least Size buckets. Does not shrink - void resize(size_t Size) { grow(Size); } + void resize(size_t Size) { + if (Size > NumBuckets) + grow(Size); + } void clear() { if (NumEntries == 0 && NumTombstones == 0) return; @@ -248,23 +252,29 @@ class DenseMap { if (NumBuckets) { #ifndef NDEBUG - memset(Buckets, 0x5a, sizeof(BucketT)*NumBuckets); + memset((void*)Buckets, 0x5a, sizeof(BucketT)*NumBuckets); #endif operator delete(Buckets); } - Buckets = static_cast(operator new(sizeof(BucketT) * - other.NumBuckets)); + + NumBuckets = other.NumBuckets; + + if (NumBuckets == 0) { + Buckets = 0; + return; + } + + Buckets = static_cast(operator new(sizeof(BucketT) * NumBuckets)); if (isPodLike::value && isPodLike::value) - memcpy(Buckets, other.Buckets, other.NumBuckets * sizeof(BucketT)); + memcpy(Buckets, other.Buckets, NumBuckets * sizeof(BucketT)); else - for (size_t i = 0; i < other.NumBuckets; ++i) { + for (size_t i = 0; i < NumBuckets; ++i) { new (&Buckets[i].first) KeyT(other.Buckets[i].first); if (!KeyInfoT::isEqual(Buckets[i].first, getEmptyKey()) && !KeyInfoT::isEqual(Buckets[i].first, getTombstoneKey())) new (&Buckets[i].second) ValueT(other.Buckets[i].second); } - NumBuckets = other.NumBuckets; } BucketT *InsertIntoBucket(const KeyT &Key, const ValueT &Value, @@ -279,11 +289,14 @@ class DenseMap { // table completely filled with tombstones, no lookup would ever succeed, // causing infinite loops in lookup. ++NumEntries; - if (NumEntries*4 >= NumBuckets*3 || - NumBuckets-(NumEntries+NumTombstones) < NumBuckets/8) { + if (NumEntries*4 >= NumBuckets*3) { this->grow(NumBuckets * 2); LookupBucketFor(Key, TheBucket); } + if (NumBuckets-(NumEntries+NumTombstones) < NumBuckets/8) { + this->grow(NumBuckets); + LookupBucketFor(Key, TheBucket); + } // If we are writing over a tombstone, remember this. if (!KeyInfoT::isEqual(TheBucket->first, getEmptyKey())) @@ -313,6 +326,11 @@ class DenseMap { unsigned ProbeAmt = 1; BucketT *BucketsPtr = Buckets; + if (NumBuckets == 0) { + FoundBucket = 0; + return false; + } + // FoundTombstone - Keep track of whether we find a tombstone while probing. BucketT *FoundTombstone = 0; const KeyT EmptyKey = getEmptyKey(); @@ -354,6 +372,12 @@ class DenseMap { NumEntries = 0; NumTombstones = 0; NumBuckets = InitBuckets; + + if (InitBuckets == 0) { + Buckets = 0; + return; + } + assert(InitBuckets && (InitBuckets & (InitBuckets-1)) == 0 && "# initial buckets must be a power of two!"); Buckets = static_cast(operator new(sizeof(BucketT)*InitBuckets)); @@ -367,6 +391,9 @@ class DenseMap { unsigned OldNumBuckets = NumBuckets; BucketT *OldBuckets = Buckets; + if (NumBuckets < 64) + NumBuckets = 64; + // Double the number of buckets. while (NumBuckets < AtLeast) NumBuckets <<= 1; @@ -398,7 +425,8 @@ class DenseMap { } #ifndef NDEBUG - memset(OldBuckets, 0x5a, sizeof(BucketT)*OldNumBuckets); + if (OldNumBuckets) + memset((void*)OldBuckets, 0x5a, sizeof(BucketT)*OldNumBuckets); #endif // Free the old table. operator delete(OldBuckets); @@ -431,13 +459,22 @@ class DenseMap { } #ifndef NDEBUG - memset(OldBuckets, 0x5a, sizeof(BucketT)*OldNumBuckets); + memset((void*)OldBuckets, 0x5a, sizeof(BucketT)*OldNumBuckets); #endif // Free the old table. operator delete(OldBuckets); NumEntries = 0; } + +public: + /// Return the approximate size (in bytes) of the actual map. + /// This is just the raw memory used by DenseMap. + /// If entries are pointers to objects, the size of the referenced objects + /// are not included. + size_t getMemorySize() const { + return NumBuckets * sizeof(BucketT); + } }; template > { key ^= (key >> 31); return (unsigned)key; } - static bool isEqual(const Pair& LHS, const Pair& RHS) { return LHS == RHS; } + static bool isEqual(const Pair &LHS, const Pair &RHS) { + return FirstInfo::isEqual(LHS.first, RHS.first) && + SecondInfo::isEqual(LHS.second, RHS.second); + } }; } // end namespace llvm diff --git a/include/llvm/ADT/DepthFirstIterator.h b/include/llvm/ADT/DepthFirstIterator.h index b9e5cbdf8c6b..dd13a2c02053 100644 --- a/include/llvm/ADT/DepthFirstIterator.h +++ b/include/llvm/ADT/DepthFirstIterator.h @@ -143,8 +143,7 @@ class df_iterator : public std::iterator struct FoldingSetTrait; /// for FoldingSetTrait implementations. /// template struct DefaultFoldingSetTrait { - static void Profile(const T& X, FoldingSetNodeID& ID) { + static void Profile(const T &X, FoldingSetNodeID &ID) { X.Profile(ID); } - static void Profile(T& X, FoldingSetNodeID& ID) { + static void Profile(T &X, FoldingSetNodeID &ID) { X.Profile(ID); } @@ -267,7 +267,7 @@ template struct ContextualFoldingSetTrait /// is often much larger than necessary, and the possibility of heap /// allocation means it requires a non-trivial destructor call. class FoldingSetNodeIDRef { - const unsigned* Data; + const unsigned *Data; size_t Size; public: FoldingSetNodeIDRef() : Data(0), Size(0) {} @@ -310,9 +310,10 @@ class FoldingSetNodeID { void AddInteger(unsigned long long I); void AddBoolean(bool B) { AddInteger(B ? 1U : 0U); } void AddString(StringRef String); + void AddNodeID(const FoldingSetNodeID &ID); template - inline void Add(const T& x) { FoldingSetTrait::Profile(x, *this); } + inline void Add(const T &x) { FoldingSetTrait::Profile(x, *this); } /// clear - Clear the accumulated profile, allowing this FoldingSetNodeID /// object to be used to compute a new profile. @@ -548,7 +549,7 @@ class FoldingSetIterator : public FoldingSetIteratorImpl { return static_cast(NodePtr); } - inline FoldingSetIterator& operator++() { // Preincrement + inline FoldingSetIterator &operator++() { // Preincrement advance(); return *this; } @@ -596,10 +597,10 @@ class FoldingSetBucketIterator : public FoldingSetBucketIteratorImpl { FoldingSetBucketIterator(void **Bucket, bool) : FoldingSetBucketIteratorImpl(Bucket, true) {} - T& operator*() const { return *static_cast(Ptr); } - T* operator->() const { return static_cast(Ptr); } + T &operator*() const { return *static_cast(Ptr); } + T *operator->() const { return static_cast(Ptr); } - inline FoldingSetBucketIterator& operator++() { // Preincrement + inline FoldingSetBucketIterator &operator++() { // Preincrement advance(); return *this; } @@ -615,36 +616,36 @@ template class FoldingSetNodeWrapper : public FoldingSetNode { T data; public: - explicit FoldingSetNodeWrapper(const T& x) : data(x) {} + explicit FoldingSetNodeWrapper(const T &x) : data(x) {} virtual ~FoldingSetNodeWrapper() {} template - explicit FoldingSetNodeWrapper(const A1& a1) + explicit FoldingSetNodeWrapper(const A1 &a1) : data(a1) {} template - explicit FoldingSetNodeWrapper(const A1& a1, const A2& a2) + explicit FoldingSetNodeWrapper(const A1 &a1, const A2 &a2) : data(a1,a2) {} template - explicit FoldingSetNodeWrapper(const A1& a1, const A2& a2, const A3& a3) + explicit FoldingSetNodeWrapper(const A1 &a1, const A2 &a2, const A3 &a3) : data(a1,a2,a3) {} template - explicit FoldingSetNodeWrapper(const A1& a1, const A2& a2, const A3& a3, - const A4& a4) + explicit FoldingSetNodeWrapper(const A1 &a1, const A2 &a2, const A3 &a3, + const A4 &a4) : data(a1,a2,a3,a4) {} template - explicit FoldingSetNodeWrapper(const A1& a1, const A2& a2, const A3& a3, - const A4& a4, const A5& a5) + explicit FoldingSetNodeWrapper(const A1 &a1, const A2 &a2, const A3 &a3, + const A4 &a4, const A5 &a5) : data(a1,a2,a3,a4,a5) {} - void Profile(FoldingSetNodeID& ID) { FoldingSetTrait::Profile(data, ID); } + void Profile(FoldingSetNodeID &ID) { FoldingSetTrait::Profile(data, ID); } - T& getValue() { return data; } - const T& getValue() const { return data; } + T &getValue() { return data; } + const T &getValue() const { return data; } operator T&() { return data; } operator const T&() const { return data; } @@ -661,20 +662,22 @@ class FastFoldingSetNode : public FoldingSetNode { protected: explicit FastFoldingSetNode(const FoldingSetNodeID &ID) : FastID(ID) {} public: - void Profile(FoldingSetNodeID& ID) const { ID = FastID; } + void Profile(FoldingSetNodeID &ID) const { + ID.AddNodeID(FastID); + } }; //===----------------------------------------------------------------------===// // Partial specializations of FoldingSetTrait. template struct FoldingSetTrait { - static inline void Profile(const T* X, FoldingSetNodeID& ID) { + static inline void Profile(const T *X, FoldingSetNodeID &ID) { ID.AddPointer(X); } }; template struct FoldingSetTrait { - static inline void Profile(const T* X, FoldingSetNodeID& ID) { + static inline void Profile(const T *X, FoldingSetNodeID &ID) { ID.AddPointer(X); } }; diff --git a/include/llvm/ADT/ImmutableIntervalMap.h b/include/llvm/ADT/ImmutableIntervalMap.h index 0d8fcf343385..fa7ccb975e52 100644 --- a/include/llvm/ADT/ImmutableIntervalMap.h +++ b/include/llvm/ADT/ImmutableIntervalMap.h @@ -10,6 +10,10 @@ // This file defines the ImmutableIntervalMap class. // //===----------------------------------------------------------------------===// + +#ifndef LLVM_ADT_IMMUTABLE_INTERVAL_MAP_H +#define LLVM_ADT_IMMUTABLE_INTERVAL_MAP_H + #include "llvm/ADT/ImmutableMap.h" namespace llvm { @@ -240,3 +244,5 @@ class ImmutableIntervalMap }; } // end namespace llvm + +#endif diff --git a/include/llvm/ADT/IntervalMap.h b/include/llvm/ADT/IntervalMap.h index 79f24d31c068..f28ebf3b9a5f 100644 --- a/include/llvm/ADT/IntervalMap.h +++ b/include/llvm/ADT/IntervalMap.h @@ -1328,6 +1328,10 @@ class IntervalMap::const_iterator : /// const_iterator - Create an iterator that isn't pointing anywhere. const_iterator() : map(0) {} + /// setMap - Change the map iterated over. This call must be followed by a + /// call to goToBegin(), goToEnd(), or find() + void setMap(const IntervalMap &m) { map = const_cast(&m); } + /// valid - Return true if the current position is valid, false for end(). bool valid() const { return path.valid(); } diff --git a/include/llvm/ADT/IntrusiveRefCntPtr.h b/include/llvm/ADT/IntrusiveRefCntPtr.h index 37d4ac9d29df..2f6fd2bd5590 100644 --- a/include/llvm/ADT/IntrusiveRefCntPtr.h +++ b/include/llvm/ADT/IntrusiveRefCntPtr.h @@ -42,18 +42,16 @@ namespace llvm { //===----------------------------------------------------------------------===// template class RefCountedBase { - unsigned ref_cnt; + mutable unsigned ref_cnt; - protected: + public: RefCountedBase() : ref_cnt(0) {} - void Retain() { ++ref_cnt; } - void Release() { + void Retain() const { ++ref_cnt; } + void Release() const { assert (ref_cnt > 0 && "Reference count is already zero."); - if (--ref_cnt == 0) delete static_cast(this); + if (--ref_cnt == 0) delete static_cast(this); } - - friend class IntrusiveRefCntPtr; }; //===----------------------------------------------------------------------===// @@ -64,21 +62,21 @@ namespace llvm { /// inherit from RefCountedBaseVPTR can't be allocated on stack - /// attempting to do this will produce a compile error. //===----------------------------------------------------------------------===// - template class RefCountedBaseVPTR { - unsigned ref_cnt; + mutable unsigned ref_cnt; protected: RefCountedBaseVPTR() : ref_cnt(0) {} virtual ~RefCountedBaseVPTR() {} - void Retain() { ++ref_cnt; } - void Release() { + void Retain() const { ++ref_cnt; } + void Release() const { assert (ref_cnt > 0 && "Reference count is already zero."); if (--ref_cnt == 0) delete this; } - friend class IntrusiveRefCntPtr; + template + friend class IntrusiveRefCntPtr; }; //===----------------------------------------------------------------------===// @@ -155,6 +153,10 @@ namespace llvm { other.Obj = Obj; Obj = tmp; } + + void resetWithoutRelease() { + Obj = 0; + } private: void retain() { if (Obj) Obj->Retain(); } diff --git a/include/llvm/ADT/PointerUnion.h b/include/llvm/ADT/PointerUnion.h index 61de042b0ff2..13b98cef07ab 100644 --- a/include/llvm/ADT/PointerUnion.h +++ b/include/llvm/ADT/PointerUnion.h @@ -19,16 +19,33 @@ namespace llvm { - /// getPointerUnionTypeNum - If the argument has type PT1* or PT2* return - /// false or true respectively. - template - static inline int getPointerUnionTypeNum(PT1 *P) { return 0; } - template - static inline int getPointerUnionTypeNum(PT2 *P) { return 1; } - template - static inline int getPointerUnionTypeNum(...) { return -1; } - - + template + struct PointerUnionTypeSelectorReturn { + typedef T Return; + }; + + /// \brief Get a type based on whether two types are the same or not. For: + /// @code + /// typedef typename PointerUnionTypeSelector::Return Ret; + /// @endcode + /// Ret will be EQ type if T1 is same as T2 or NE type otherwise. + template + struct PointerUnionTypeSelector { + typedef typename PointerUnionTypeSelectorReturn::Return Return; + }; + + template + struct PointerUnionTypeSelector { + typedef typename PointerUnionTypeSelectorReturn::Return Return; + }; + + template + struct PointerUnionTypeSelectorReturn< + PointerUnionTypeSelector > { + typedef typename PointerUnionTypeSelector::Return + Return; + }; + /// Provide PointerLikeTypeTraits for void* that is used by PointerUnion /// for the two template arguments. template @@ -65,6 +82,16 @@ namespace llvm { PointerUnionUIntTraits > ValTy; private: ValTy Val; + + struct IsPT1 { + static const int Num = 0; + }; + struct IsPT2 { + static const int Num = 1; + }; + template + struct UNION_DOESNT_CONTAIN_TYPE { }; + public: PointerUnion() {} @@ -87,8 +114,11 @@ namespace llvm { /// is() return true if the Union currently holds the type matching T. template int is() const { - int TyNo = ::llvm::getPointerUnionTypeNum((T*)0); - assert(TyNo != -1 && "Type query could never succeed on PointerUnion!"); + typedef typename + ::llvm::PointerUnionTypeSelector > >::Return Ty; + int TyNo = Ty::Num; return static_cast(Val.getInt()) == TyNo; } @@ -175,6 +205,34 @@ namespace llvm { typedef PointerUnion ValTy; private: ValTy Val; + + struct IsInnerUnion { + ValTy Val; + IsInnerUnion(ValTy val) : Val(val) { } + template + int is() const { + return Val.template is() && + Val.template get().template is(); + } + template + T get() const { + return Val.template get().template get(); + } + }; + + struct IsPT3 { + ValTy Val; + IsPT3(ValTy val) : Val(val) { } + template + int is() const { + return Val.template is(); + } + template + T get() const { + return Val.template get(); + } + }; + public: PointerUnion3() {} @@ -196,11 +254,12 @@ namespace llvm { /// is() return true if the Union currently holds the type matching T. template int is() const { - // Is it PT1/PT2? - if (::llvm::getPointerUnionTypeNum((T*)0) != -1) - return Val.template is() && - Val.template get().template is(); - return Val.template is(); + // If T is PT1/PT2 choose IsInnerUnion otherwise choose IsPT3. + typedef typename + ::llvm::PointerUnionTypeSelector + >::Return Ty; + return Ty(Val).is(); } /// get() - Return the value of the specified pointer type. If the @@ -208,11 +267,12 @@ namespace llvm { template T get() const { assert(is() && "Invalid accessor called"); - // Is it PT1/PT2? - if (::llvm::getPointerUnionTypeNum((T*)0) != -1) - return Val.template get().template get(); - - return Val.template get(); + // If T is PT1/PT2 choose IsInnerUnion otherwise choose IsPT3. + typedef typename + ::llvm::PointerUnionTypeSelector + >::Return Ty; + return Ty(Val).get(); } /// dyn_cast() - If the current value is of the specified pointer type, @@ -302,12 +362,13 @@ namespace llvm { /// is() return true if the Union currently holds the type matching T. template int is() const { - // Is it PT1/PT2? - if (::llvm::getPointerUnionTypeNum((T*)0) != -1) - return Val.template is() && - Val.template get().template is(); - return Val.template is() && - Val.template get().template is(); + // If T is PT1/PT2 choose InnerUnion1 otherwise choose InnerUnion2. + typedef typename + ::llvm::PointerUnionTypeSelector + >::Return Ty; + return Val.template is() && + Val.template get().template is(); } /// get() - Return the value of the specified pointer type. If the @@ -315,11 +376,12 @@ namespace llvm { template T get() const { assert(is() && "Invalid accessor called"); - // Is it PT1/PT2? - if (::llvm::getPointerUnionTypeNum((T*)0) != -1) - return Val.template get().template get(); - - return Val.template get().template get(); + // If T is PT1/PT2 choose InnerUnion1 otherwise choose InnerUnion2. + typedef typename + ::llvm::PointerUnionTypeSelector + >::Return Ty; + return Val.template get().template get(); } /// dyn_cast() - If the current value is of the specified pointer type, diff --git a/include/llvm/ADT/ScopedHashTable.h b/include/llvm/ADT/ScopedHashTable.h index af3c482043b1..a6803ee0eddf 100644 --- a/include/llvm/ADT/ScopedHashTable.h +++ b/include/llvm/ADT/ScopedHashTable.h @@ -96,6 +96,9 @@ class ScopedHashTableScope { ScopedHashTableScope(ScopedHashTable &HT); ~ScopedHashTableScope(); + ScopedHashTableScope *getParentScope() { return PrevScope; } + const ScopedHashTableScope *getParentScope() const { return PrevScope; } + private: friend class ScopedHashTable; ScopedHashTableVal *getLastValInScope() { @@ -141,9 +144,14 @@ class ScopedHashTableIterator { template class ScopedHashTable { +public: + /// ScopeTy - This is a helpful typedef that allows clients to get easy access + /// to the name of the scope for this hash table. + typedef ScopedHashTableScope ScopeTy; +private: typedef ScopedHashTableVal ValTy; DenseMap TopLevelMap; - ScopedHashTableScope *CurScope; + ScopeTy *CurScope; AllocatorTy Allocator; @@ -157,9 +165,6 @@ class ScopedHashTable { assert(CurScope == 0 && TopLevelMap.empty() && "Scope imbalance!"); } - /// ScopeTy - This is a helpful typedef that allows clients to get easy access - /// to the name of the scope for this hash table. - typedef ScopedHashTableScope ScopeTy; /// Access to the allocator. typedef typename ReferenceAdder::result AllocatorRefTy; @@ -180,13 +185,7 @@ class ScopedHashTable { } void insert(const K &Key, const V &Val) { - assert(CurScope && "No scope active!"); - - ScopedHashTableVal *&KeyEntry = TopLevelMap[Key]; - - KeyEntry = ValTy::Create(CurScope->getLastValInScope(), KeyEntry, Key, Val, - Allocator); - CurScope->setLastValInScope(KeyEntry); + insertIntoScope(CurScope, Key, Val); } typedef ScopedHashTableIterator iterator; @@ -199,6 +198,21 @@ class ScopedHashTable { if (I == TopLevelMap.end()) return end(); return iterator(I->second); } + + ScopeTy *getCurScope() { return CurScope; } + const ScopeTy *getCurScope() const { return CurScope; } + + /// insertIntoScope - This inserts the specified key/value at the specified + /// (possibly not the current) scope. While it is ok to insert into a scope + /// that isn't the current one, it isn't ok to insert *underneath* an existing + /// value of the specified key. + void insertIntoScope(ScopeTy *S, const K &Key, const V &Val) { + assert(S && "No scope active!"); + ScopedHashTableVal *&KeyEntry = TopLevelMap[Key]; + KeyEntry = ValTy::Create(S->getLastValInScope(), KeyEntry, Key, Val, + Allocator); + S->setLastValInScope(KeyEntry); + } }; /// ScopedHashTableScope ctor - Install this as the current scope for the hash diff --git a/include/llvm/ADT/SmallPtrSet.h b/include/llvm/ADT/SmallPtrSet.h index ff32ba87a264..9992858d67b0 100644 --- a/include/llvm/ADT/SmallPtrSet.h +++ b/include/llvm/ADT/SmallPtrSet.h @@ -133,7 +133,7 @@ class SmallPtrSetImpl { void shrink_and_clear(); /// Grow - Allocate a larger backing store for the buckets and move it over. - void Grow(); + void Grow(unsigned NewSize); void operator=(const SmallPtrSetImpl &RHS); // DO NOT IMPLEMENT. protected: diff --git a/include/llvm/ADT/Statistic.h b/include/llvm/ADT/Statistic.h index f137ea21d058..fda99c6edbc3 100644 --- a/include/llvm/ADT/Statistic.h +++ b/include/llvm/ADT/Statistic.h @@ -121,6 +121,9 @@ class Statistic { /// \brief Enable the collection and printing of statistics. void EnableStatistics(); +/// \brief Check if statistics are enabled. +bool AreStatisticsEnabled(); + /// \brief Print statistics to the file returned by CreateInfoOutputFile(). void PrintStatistics(); diff --git a/include/llvm/ADT/StringExtras.h b/include/llvm/ADT/StringExtras.h index acbed66ef401..5f5c04187ada 100644 --- a/include/llvm/ADT/StringExtras.h +++ b/include/llvm/ADT/StringExtras.h @@ -20,7 +20,6 @@ #include #include #include -#include namespace llvm { template class SmallVectorImpl; @@ -153,7 +152,7 @@ void SplitString(StringRef Source, SmallVectorImpl &OutFragments, StringRef Delimiters = " \t\n\v\f\r"); -/// HashString - Hash funtion for strings. +/// HashString - Hash function for strings. /// /// This is the Bernstein hash function. // diff --git a/include/llvm/ADT/StringMap.h b/include/llvm/ADT/StringMap.h index bad0e6f5136a..934cacc78a8c 100644 --- a/include/llvm/ADT/StringMap.h +++ b/include/llvm/ADT/StringMap.h @@ -17,7 +17,6 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Support/Allocator.h" #include -#include namespace llvm { template @@ -81,16 +80,6 @@ class StringMapImpl { StringMapImpl(unsigned InitSize, unsigned ItemSize); void RehashTable(); - /// ShouldRehash - Return true if the table should be rehashed after a new - /// element was recently inserted. - bool ShouldRehash() const { - // If the hash table is now more than 3/4 full, or if fewer than 1/8 of - // the buckets are empty (meaning that many are filled with tombstones), - // grow the table. - return NumItems*4 > NumBuckets*3 || - NumBuckets-(NumItems+NumTombstones) < NumBuckets/8; - } - /// LookupBucketFor - Look up the bucket that the specified string should end /// up in. If it already exists as a key in the map, the Item pointer for the /// specified bucket will be non-null. Otherwise, it will be null. In either @@ -339,9 +328,9 @@ class StringMap : public StringMapImpl { --NumTombstones; Bucket.Item = KeyValue; ++NumItems; + assert(NumItems + NumTombstones <= NumBuckets); - if (ShouldRehash()) - RehashTable(); + RehashTable(); return true; } @@ -359,6 +348,7 @@ class StringMap : public StringMapImpl { } NumItems = 0; + NumTombstones = 0; } /// GetOrCreateValue - Look up the specified key in the table. If a value @@ -378,13 +368,13 @@ class StringMap : public StringMapImpl { if (Bucket.Item == getTombstoneVal()) --NumTombstones; ++NumItems; + assert(NumItems + NumTombstones <= NumBuckets); // Fill in the bucket for the hash table. The FullHashValue was already // filled in by LookupBucketFor. Bucket.Item = NewItem; - if (ShouldRehash()) - RehashTable(); + RehashTable(); return *NewItem; } diff --git a/include/llvm/ADT/Triple.h b/include/llvm/ADT/Triple.h index e6dcc23258f2..2659bce61750 100644 --- a/include/llvm/ADT/Triple.h +++ b/include/llvm/ADT/Triple.h @@ -64,7 +64,8 @@ class Triple { x86_64, // X86-64: amd64, x86_64 xcore, // XCore: xcore mblaze, // MBlaze: mblaze - ptx, // PTX: ptx + ptx32, // PTX: ptx (32-bit) + ptx64, // PTX: ptx (64-bit) InvalidArch }; @@ -72,7 +73,8 @@ class Triple { UnknownVendor, Apple, - PC + PC, + SCEI }; enum OSType { UnknownOS, @@ -82,8 +84,10 @@ class Triple { Darwin, DragonFly, FreeBSD, + IOS, Linux, Lv2, // PS3 + MacOSX, MinGW32, // i*86-pc-mingw32, *-w64-mingw32 NetBSD, OpenBSD, @@ -221,21 +225,81 @@ class Triple { /// if the environment component is present). StringRef getOSAndEnvironmentName() const; + /// getOSNumber - Parse the version number from the OS name component of the + /// triple, if present. + /// + /// For example, "fooos1.2.3" would return (1, 2, 3). + /// + /// If an entry is not defined, it will be returned as 0. + void getOSVersion(unsigned &Major, unsigned &Minor, unsigned &Micro) const; - /// getDarwinNumber - Parse the 'darwin number' out of the specific target - /// triple. For example, if we have darwin8.5 return 8,5,0. If any entry is - /// not defined, return 0's. This requires that the triple have an OSType of - /// darwin before it is called. - void getDarwinNumber(unsigned &Maj, unsigned &Min, unsigned &Revision) const; - - /// getDarwinMajorNumber - Return just the major version number, this is + /// getOSMajorVersion - Return just the major version number, this is /// specialized because it is a common query. - unsigned getDarwinMajorNumber() const { - unsigned Maj, Min, Rev; - getDarwinNumber(Maj, Min, Rev); + unsigned getOSMajorVersion() const { + unsigned Maj, Min, Micro; + getDarwinNumber(Maj, Min, Micro); return Maj; } + void getDarwinNumber(unsigned &Major, unsigned &Minor, + unsigned &Micro) const { + return getOSVersion(Major, Minor, Micro); + } + + unsigned getDarwinMajorNumber() const { + return getOSMajorVersion(); + } + + /// isOSVersionLT - Helper function for doing comparisons against version + /// numbers included in the target triple. + bool isOSVersionLT(unsigned Major, unsigned Minor = 0, + unsigned Micro = 0) const { + unsigned LHS[3]; + getOSVersion(LHS[0], LHS[1], LHS[2]); + + if (LHS[0] != Major) + return LHS[0] < Major; + if (LHS[1] != Minor) + return LHS[1] < Minor; + if (LHS[2] != Micro) + return LHS[1] < Micro; + + return false; + } + + /// isMacOSX - Is this a Mac OS X triple. For legacy reasons, we support both + /// "darwin" and "osx" as OS X triples. + bool isMacOSX() const { + return getOS() == Triple::Darwin || getOS() == Triple::MacOSX; + } + + /// isOSDarwin - Is this a "Darwin" OS (OS X or iOS). + bool isOSDarwin() const { + return isMacOSX() ||getOS() == Triple::IOS; + } + + /// isOSWindows - Is this a "Windows" OS. + bool isOSWindows() const { + return getOS() == Triple::Win32 || getOS() == Triple::Cygwin || + getOS() == Triple::MinGW32; + } + + /// isMacOSXVersionLT - Comparison function for checking OS X version + /// compatibility, which handles supporting skewed version numbering schemes + /// used by the "darwin" triples. + unsigned isMacOSXVersionLT(unsigned Major, unsigned Minor = 0, + unsigned Micro = 0) const { + assert(isMacOSX() && "Not an OS X triple!"); + + // If this is OS X, expect a sane version number. + if (getOS() == Triple::MacOSX) + return isOSVersionLT(Major, Minor, Micro); + + // Otherwise, compare to the "Darwin" number. + assert(Major == 10 && "Unexpected major version"); + return isOSVersionLT(Minor + 4, Micro, 0); + } + /// @} /// @name Mutators /// @{ diff --git a/include/llvm/ADT/ilist.h b/include/llvm/ADT/ilist.h index 865fcb3d8aad..bcacfd9df426 100644 --- a/include/llvm/ADT/ilist.h +++ b/include/llvm/ADT/ilist.h @@ -289,7 +289,7 @@ template struct simplify_type > { //===----------------------------------------------------------------------===// // /// iplist - The subset of list functionality that can safely be used on nodes -/// of polymorphic types, i.e. a heterogenous list with a common base class that +/// of polymorphic types, i.e. a heterogeneous list with a common base class that /// holds the next/prev pointers. The only state of the list itself is a single /// pointer to the head of the list. /// diff --git a/include/llvm/Analysis/AliasAnalysis.h b/include/llvm/Analysis/AliasAnalysis.h index 71a5982c7d39..8f9708b3d889 100644 --- a/include/llvm/Analysis/AliasAnalysis.h +++ b/include/llvm/Analysis/AliasAnalysis.h @@ -38,7 +38,6 @@ #define LLVM_ANALYSIS_ALIAS_ANALYSIS_H #include "llvm/Support/CallSite.h" -#include namespace llvm { diff --git a/include/llvm/Analysis/AliasSetTracker.h b/include/llvm/Analysis/AliasSetTracker.h index e844d10dda03..03149c662e83 100644 --- a/include/llvm/Analysis/AliasSetTracker.h +++ b/include/llvm/Analysis/AliasSetTracker.h @@ -259,6 +259,7 @@ class AliasSet : public ilist_node { if (CallSites[i] == CS.getInstruction()) { CallSites[i] = CallSites.back(); CallSites.pop_back(); + --i; --e; // Revisit the moved entry. } } void setVolatile() { Volatile = true; } @@ -283,6 +284,7 @@ class AliasSetTracker { class ASTCallbackVH : public CallbackVH { AliasSetTracker *AST; virtual void deleted(); + virtual void allUsesReplacedWith(Value *); public: ASTCallbackVH(Value *V, AliasSetTracker *AST = 0); ASTCallbackVH &operator=(Value *V); diff --git a/include/llvm/Analysis/CFGPrinter.h b/include/llvm/Analysis/CFGPrinter.h index ac8f59602dab..61614e34dacc 100644 --- a/include/llvm/Analysis/CFGPrinter.h +++ b/include/llvm/Analysis/CFGPrinter.h @@ -15,6 +15,7 @@ #ifndef LLVM_ANALYSIS_CFGPRINTER_H #define LLVM_ANALYSIS_CFGPRINTER_H +#include "llvm/Constants.h" #include "llvm/Function.h" #include "llvm/Instructions.h" #include "llvm/Assembly/Writer.h" diff --git a/include/llvm/Analysis/DIBuilder.h b/include/llvm/Analysis/DIBuilder.h index 417dbc4e802c..5846dbff041a 100644 --- a/include/llvm/Analysis/DIBuilder.h +++ b/include/llvm/Analysis/DIBuilder.h @@ -16,6 +16,7 @@ #define LLVM_ANALYSIS_DIBUILDER_H #include "llvm/Support/DataTypes.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" namespace llvm { @@ -146,6 +147,30 @@ namespace llvm { uint64_t AlignInBits, uint64_t OffsetInBits, unsigned Flags, DIType Ty); + /// createObjCIVar - Create debugging information entry for Objective-C + /// instance variable. + /// @param Name Member name. + /// @param File File where this member is defined. + /// @param LineNo Line number. + /// @param SizeInBits Member size. + /// @param AlignInBits Member alignment. + /// @param OffsetInBits Member offset. + /// @param Flags Flags to encode member attribute, e.g. private + /// @param Ty Parent type. + /// @param PropertyName Name of the Objective C property assoicated with + /// this ivar. + /// @param GetterName Name of the Objective C property getter selector. + /// @param SetterName Name of the Objective C property setter selector. + /// @param PropertyAttributes Objective C property attributes. + DIType createObjCIVar(StringRef Name, DIFile File, + unsigned LineNo, uint64_t SizeInBits, + uint64_t AlignInBits, uint64_t OffsetInBits, + unsigned Flags, DIType Ty, + StringRef PropertyName = StringRef(), + StringRef PropertyGetterName = StringRef(), + StringRef PropertySetterName = StringRef(), + unsigned PropertyAttributes = 0); + /// createClassType - Create debugging information entry for a class. /// @param Scope Scope in which this class is defined. /// @param Name class name. @@ -278,7 +303,7 @@ namespace llvm { DIDescriptor createUnspecifiedParameter(); /// getOrCreateArray - Get a DIArray, create one if required. - DIArray getOrCreateArray(Value *const *Elements, unsigned NumElements); + DIArray getOrCreateArray(ArrayRef Elements); /// getOrCreateSubrange - Create a descriptor for a value range. This /// implicitly uniques the values returned. @@ -326,11 +351,14 @@ namespace llvm { /// @param AlwaysPreserve Boolean. Set to true if debug info for this /// variable should be preserved in optimized build. /// @param Flags Flags, e.g. artificial variable. + /// @param ArgNo If this variable is an arugment then this argument's + /// number. 1 indicates 1st argument. DIVariable createLocalVariable(unsigned Tag, DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNo, DIType Ty, bool AlwaysPreserve = false, - unsigned Flags = 0); + unsigned Flags = 0, + unsigned ArgNo = 0); /// createComplexVariable - Create a new descriptor for the specified @@ -342,12 +370,13 @@ namespace llvm { /// @param File File where this variable is defined. /// @param LineNo Line number. /// @param Ty Variable Type - /// @param Addr A pointer to a vector of complex address operations. - /// @param NumAddr Num of address operations in the vector. + /// @param Addr An array of complex address operations. + /// @param ArgNo If this variable is an arugment then this argument's + /// number. 1 indicates 1st argument. DIVariable createComplexVariable(unsigned Tag, DIDescriptor Scope, StringRef Name, DIFile F, unsigned LineNo, - DIType Ty, Value *const *Addr, - unsigned NumAddr); + DIType Ty, ArrayRef Addr, + unsigned ArgNo = 0); /// createFunction - Create a new descriptor for the specified subprogram. /// See comments in DISubprogram for descriptions of these fields. @@ -363,6 +392,7 @@ namespace llvm { /// This flags are used to emit dwarf attributes. /// @param isOptimized True if optimization is ON. /// @param Fn llvm::Function pointer. + /// @param TParam Function template parameters. DISubprogram createFunction(DIDescriptor Scope, StringRef Name, StringRef LinkageName, DIFile File, unsigned LineNo, @@ -370,7 +400,9 @@ namespace llvm { bool isDefinition, unsigned Flags = 0, bool isOptimized = false, - Function *Fn = 0); + Function *Fn = 0, + MDNode *TParam = 0, + MDNode *Decl = 0); /// createMethod - Create a new descriptor for the specified C++ method. /// See comments in DISubprogram for descriptions of these fields. @@ -382,7 +414,7 @@ namespace llvm { /// @param Ty Function type. /// @param isLocalToUnit True if this function is not externally visible.. /// @param isDefinition True if this is a function definition. - /// @param Virtuality Attributes describing virutallness. e.g. pure + /// @param Virtuality Attributes describing virtualness. e.g. pure /// virtual function. /// @param VTableIndex Index no of this method in virtual table. /// @param VTableHolder Type that holds vtable. @@ -390,6 +422,7 @@ namespace llvm { /// This flags are used to emit dwarf attributes. /// @param isOptimized True if optimization is ON. /// @param Fn llvm::Function pointer. + /// @param TParam Function template parameters. DISubprogram createMethod(DIDescriptor Scope, StringRef Name, StringRef LinkageName, DIFile File, unsigned LineNo, @@ -399,7 +432,8 @@ namespace llvm { MDNode *VTableHolder = 0, unsigned Flags = 0, bool isOptimized = false, - Function *Fn = 0); + Function *Fn = 0, + MDNode *TParam = 0); /// createNameSpace - This creates new descriptor for a namespace /// with the specified parent scope. diff --git a/include/llvm/Analysis/DebugInfo.h b/include/llvm/Analysis/DebugInfo.h index aa69088b425b..c6cc8f7665ed 100644 --- a/include/llvm/Analysis/DebugInfo.h +++ b/include/llvm/Analysis/DebugInfo.h @@ -332,6 +332,32 @@ namespace llvm { /// return base type size. uint64_t getOriginalTypeSize() const; + StringRef getObjCPropertyName() const { return getStringField(10); } + StringRef getObjCPropertyGetterName() const { + return getStringField(11); + } + StringRef getObjCPropertySetterName() const { + return getStringField(12); + } + bool isReadOnlyObjCProperty() { + return (getUnsignedField(13) & dwarf::DW_APPLE_PROPERTY_readonly) != 0; + } + bool isReadWriteObjCProperty() { + return (getUnsignedField(13) & dwarf::DW_APPLE_PROPERTY_readwrite) != 0; + } + bool isAssignObjCProperty() { + return (getUnsignedField(13) & dwarf::DW_APPLE_PROPERTY_assign) != 0; + } + bool isRetainObjCProperty() { + return (getUnsignedField(13) & dwarf::DW_APPLE_PROPERTY_retain) != 0; + } + bool isCopyObjCProperty() { + return (getUnsignedField(13) & dwarf::DW_APPLE_PROPERTY_copy) != 0; + } + bool isNonAtomicObjCProperty() { + return (getUnsignedField(13) & dwarf::DW_APPLE_PROPERTY_nonatomic) != 0; + } + /// Verify - Verify that a derived type descriptor is well formed. bool Verify() const; @@ -511,6 +537,10 @@ namespace llvm { bool describes(const Function *F); Function *getFunction() const { return getFunctionField(16); } + DIArray getTemplateParams() const { return getFieldAs(17); } + DISubprogram getFunctionDeclaration() const { + return getFieldAs(18); + } }; /// DIGlobalVariable - This is a wrapper for a global variable. @@ -564,7 +594,13 @@ namespace llvm { DIFile F = getFieldAs(3); return F.getCompileUnit(); } - unsigned getLineNumber() const { return getUnsignedField(4); } + unsigned getLineNumber() const { + return (getUnsignedField(4) << 8) >> 8; + } + unsigned getArgNumber() const { + unsigned L = getUnsignedField(4); + return L >> 24; + } DIType getType() const { return getFieldAs(5); } /// isArtificial - Return true if this variable is marked as "artificial". @@ -586,7 +622,9 @@ namespace llvm { unsigned getNumAddrElements() const; uint64_t getAddrElement(unsigned Idx) const { - return getUInt64Field(Idx+6); + if (getVersion() <= llvm::LLVMDebugVersion8) + return getUInt64Field(Idx+6); + return getUInt64Field(Idx+7); } /// isBlockByrefVariable - Return true if the variable was declared as @@ -660,214 +698,6 @@ namespace llvm { bool Verify() const; }; - /// DIFactory - This object assists with the construction of the various - /// descriptors. - class DIFactory { - Module &M; - LLVMContext& VMContext; - - Function *DeclareFn; // llvm.dbg.declare - Function *ValueFn; // llvm.dbg.value - - DIFactory(const DIFactory &); // DO NOT IMPLEMENT - void operator=(const DIFactory&); // DO NOT IMPLEMENT - public: - enum ComplexAddrKind { OpPlus=1, OpDeref }; - - explicit DIFactory(Module &m); - - /// GetOrCreateArray - Create an descriptor for an array of descriptors. - /// This implicitly uniques the arrays created. - DIArray GetOrCreateArray(DIDescriptor *Tys, unsigned NumTys); - - /// GetOrCreateSubrange - Create a descriptor for a value range. This - /// implicitly uniques the values returned. - DISubrange GetOrCreateSubrange(int64_t Lo, int64_t Hi); - - /// CreateUnspecifiedParameter - Create unspeicified type descriptor - /// for a subroutine type. - DIDescriptor CreateUnspecifiedParameter(); - - /// CreateCompileUnit - Create a new descriptor for the specified compile - /// unit. - DICompileUnit CreateCompileUnit(unsigned LangID, - StringRef Filename, - StringRef Directory, - StringRef Producer, - bool isMain = false, - bool isOptimized = false, - StringRef Flags = "", - unsigned RunTimeVer = 0); - - /// CreateFile - Create a new descriptor for the specified file. - DIFile CreateFile(StringRef Filename, StringRef Directory, - DICompileUnit CU); - - /// CreateEnumerator - Create a single enumerator value. - DIEnumerator CreateEnumerator(StringRef Name, uint64_t Val); - - /// CreateBasicType - Create a basic type like int, float, etc. - DIBasicType CreateBasicType(DIDescriptor Context, StringRef Name, - DIFile F, unsigned LineNumber, - uint64_t SizeInBits, uint64_t AlignInBits, - uint64_t OffsetInBits, unsigned Flags, - unsigned Encoding); - - /// CreateBasicType - Create a basic type like int, float, etc. - DIBasicType CreateBasicTypeEx(DIDescriptor Context, StringRef Name, - DIFile F, unsigned LineNumber, - Constant *SizeInBits, Constant *AlignInBits, - Constant *OffsetInBits, unsigned Flags, - unsigned Encoding); - - /// CreateDerivedType - Create a derived type like const qualified type, - /// pointer, typedef, etc. - DIDerivedType CreateDerivedType(unsigned Tag, DIDescriptor Context, - StringRef Name, - DIFile F, - unsigned LineNumber, - uint64_t SizeInBits, uint64_t AlignInBits, - uint64_t OffsetInBits, unsigned Flags, - DIType DerivedFrom); - - /// CreateDerivedType - Create a derived type like const qualified type, - /// pointer, typedef, etc. - DIDerivedType CreateDerivedTypeEx(unsigned Tag, DIDescriptor Context, - StringRef Name, - DIFile F, - unsigned LineNumber, - Constant *SizeInBits, - Constant *AlignInBits, - Constant *OffsetInBits, unsigned Flags, - DIType DerivedFrom); - - /// CreateCompositeType - Create a composite type like array, struct, etc. - DICompositeType CreateCompositeType(unsigned Tag, DIDescriptor Context, - StringRef Name, - DIFile F, - unsigned LineNumber, - uint64_t SizeInBits, - uint64_t AlignInBits, - uint64_t OffsetInBits, unsigned Flags, - DIType DerivedFrom, - DIArray Elements, - unsigned RunTimeLang = 0, - MDNode *ContainingType = 0); - - /// CreateTemporaryType - Create a temporary forward-declared type. - DIType CreateTemporaryType(); - DIType CreateTemporaryType(DIFile F); - - /// CreateArtificialType - Create a new DIType with "artificial" flag set. - DIType CreateArtificialType(DIType Ty); - - /// CreateCompositeType - Create a composite type like array, struct, etc. - DICompositeType CreateCompositeTypeEx(unsigned Tag, DIDescriptor Context, - StringRef Name, - DIFile F, - unsigned LineNumber, - Constant *SizeInBits, - Constant *AlignInBits, - Constant *OffsetInBits, - unsigned Flags, - DIType DerivedFrom, - DIArray Elements, - unsigned RunTimeLang = 0, - MDNode *ContainingType = 0); - - /// CreateSubprogram - Create a new descriptor for the specified subprogram. - /// See comments in DISubprogram for descriptions of these fields. - DISubprogram CreateSubprogram(DIDescriptor Context, StringRef Name, - StringRef DisplayName, - StringRef LinkageName, - DIFile F, unsigned LineNo, - DIType Ty, bool isLocalToUnit, - bool isDefinition, - unsigned VK = 0, - unsigned VIndex = 0, - DIType ContainingType = DIType(), - unsigned Flags = 0, - bool isOptimized = false, - Function *Fn = 0); - - /// CreateSubprogramDefinition - Create new subprogram descriptor for the - /// given declaration. - DISubprogram CreateSubprogramDefinition(DISubprogram &SPDeclaration); - - /// CreateGlobalVariable - Create a new descriptor for the specified global. - DIGlobalVariable - CreateGlobalVariable(DIDescriptor Context, StringRef Name, - StringRef DisplayName, - StringRef LinkageName, - DIFile F, - unsigned LineNo, DIType Ty, bool isLocalToUnit, - bool isDefinition, llvm::GlobalVariable *GV); - - /// CreateGlobalVariable - Create a new descriptor for the specified constant. - DIGlobalVariable - CreateGlobalVariable(DIDescriptor Context, StringRef Name, - StringRef DisplayName, - StringRef LinkageName, - DIFile F, - unsigned LineNo, DIType Ty, bool isLocalToUnit, - bool isDefinition, llvm::Constant *C); - - /// CreateVariable - Create a new descriptor for the specified variable. - DIVariable CreateVariable(unsigned Tag, DIDescriptor Context, - StringRef Name, - DIFile F, unsigned LineNo, - DIType Ty, bool AlwaysPreserve = false, - unsigned Flags = 0); - - /// CreateComplexVariable - Create a new descriptor for the specified - /// variable which has a complex address expression for its address. - DIVariable CreateComplexVariable(unsigned Tag, DIDescriptor Context, - StringRef Name, DIFile F, unsigned LineNo, - DIType Ty, Value *const *Addr, - unsigned NumAddr); - - /// CreateLexicalBlock - This creates a descriptor for a lexical block - /// with the specified parent context. - DILexicalBlock CreateLexicalBlock(DIDescriptor Context, DIFile F, - unsigned Line = 0, unsigned Col = 0); - - /// CreateNameSpace - This creates new descriptor for a namespace - /// with the specified parent context. - DINameSpace CreateNameSpace(DIDescriptor Context, StringRef Name, - DIFile F, unsigned LineNo); - - /// CreateLocation - Creates a debug info location. - DILocation CreateLocation(unsigned LineNo, unsigned ColumnNo, - DIScope S, DILocation OrigLoc); - - /// CreateLocation - Creates a debug info location. - DILocation CreateLocation(unsigned LineNo, unsigned ColumnNo, - DIScope S, MDNode *OrigLoc = 0); - - /// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call. - Instruction *InsertDeclare(llvm::Value *Storage, DIVariable D, - BasicBlock *InsertAtEnd); - - /// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call. - Instruction *InsertDeclare(llvm::Value *Storage, DIVariable D, - Instruction *InsertBefore); - - /// InsertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call. - Instruction *InsertDbgValueIntrinsic(llvm::Value *V, uint64_t Offset, - DIVariable D, BasicBlock *InsertAtEnd); - - /// InsertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call. - Instruction *InsertDbgValueIntrinsic(llvm::Value *V, uint64_t Offset, - DIVariable D, Instruction *InsertBefore); - - // RecordType - Record DIType in a module such that it is not lost even if - // it is not referenced through debug info anchors. - void RecordType(DIType T); - - private: - Constant *GetTagConstant(unsigned TAG); - }; - /// getDISubprogram - Find subprogram that is enclosing this scope. DISubprogram getDISubprogram(const MDNode *Scope); diff --git a/include/llvm/Analysis/IVUsers.h b/include/llvm/Analysis/IVUsers.h index 578e6aba8338..e56d24d583df 100644 --- a/include/llvm/Analysis/IVUsers.h +++ b/include/llvm/Analysis/IVUsers.h @@ -28,6 +28,7 @@ class IVUsers; class ScalarEvolution; class SCEV; class IVUsers; +class TargetData; /// IVStrideUse - Keep track of one use of a strided induction variable. /// The Expr member keeps track of the expression, User is the actual user @@ -122,6 +123,7 @@ class IVUsers : public LoopPass { LoopInfo *LI; DominatorTree *DT; ScalarEvolution *SE; + TargetData *TD; SmallPtrSet Processed; /// IVUses - A list of all tracked IV uses of induction variable expressions diff --git a/include/llvm/Analysis/InlineCost.h b/include/llvm/Analysis/InlineCost.h index b08bf57ace96..a0cce515e9e2 100644 --- a/include/llvm/Analysis/InlineCost.h +++ b/include/llvm/Analysis/InlineCost.h @@ -43,7 +43,7 @@ namespace llvm { /// InlineCost - Represent the cost of inlining a function. This /// supports special values for functions which should "always" or /// "never" be inlined. Otherwise, the cost represents a unitless - /// amount; smaller values increase the likelyhood of the function + /// amount; smaller values increase the likelihood of the function /// being inlined. class InlineCost { enum Kind { diff --git a/include/llvm/Analysis/InstructionSimplify.h b/include/llvm/Analysis/InstructionSimplify.h index dff1ba2f7beb..bc6e55f5490a 100644 --- a/include/llvm/Analysis/InstructionSimplify.h +++ b/include/llvm/Analysis/InstructionSimplify.h @@ -55,6 +55,21 @@ namespace llvm { Value *SimplifyFDivInst(Value *LHS, Value *RHS, const TargetData *TD = 0, const DominatorTree *DT = 0); + /// SimplifySRemInst - Given operands for an SRem, see if we can + /// fold the result. If not, this returns null. + Value *SimplifySRemInst(Value *LHS, Value *RHS, const TargetData *TD = 0, + const DominatorTree *DT = 0); + + /// SimplifyURemInst - Given operands for a URem, see if we can + /// fold the result. If not, this returns null. + Value *SimplifyURemInst(Value *LHS, Value *RHS, const TargetData *TD = 0, + const DominatorTree *DT = 0); + + /// SimplifyFRemInst - Given operands for an FRem, see if we can + /// fold the result. If not, this returns null. + Value *SimplifyFRemInst(Value *LHS, Value *RHS, const TargetData *TD = 0, + const DominatorTree *DT = 0); + /// SimplifyShlInst - Given operands for a Shl, see if we can /// fold the result. If not, this returns null. Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, diff --git a/include/llvm/Analysis/Lint.h b/include/llvm/Analysis/Lint.h index eb65d2236441..7c88b137ec3b 100644 --- a/include/llvm/Analysis/Lint.h +++ b/include/llvm/Analysis/Lint.h @@ -20,8 +20,6 @@ #ifndef LLVM_ANALYSIS_LINT_H #define LLVM_ANALYSIS_LINT_H -#include - namespace llvm { class FunctionPass; diff --git a/include/llvm/Analysis/LiveValues.h b/include/llvm/Analysis/LiveValues.h deleted file mode 100644 index b92cb7833a7e..000000000000 --- a/include/llvm/Analysis/LiveValues.h +++ /dev/null @@ -1,99 +0,0 @@ -//===- LiveValues.h - Liveness information for LLVM IR Values. ------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the interface for the LLVM IR Value liveness -// analysis pass. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_ANALYSIS_LIVEVALUES_H -#define LLVM_ANALYSIS_LIVEVALUES_H - -#include "llvm/Pass.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallPtrSet.h" - -namespace llvm { - -class DominatorTree; -class LoopInfo; -class Value; - -/// LiveValues - Analysis that provides liveness information for -/// LLVM IR Values. -/// -class LiveValues : public FunctionPass { - DominatorTree *DT; - LoopInfo *LI; - - /// Memo - A bunch of state to be associated with a value. - /// - struct Memo { - /// Used - The set of blocks which contain a use of the value. - /// - SmallPtrSet Used; - - /// LiveThrough - A conservative approximation of the set of blocks in - /// which the value is live-through, meaning blocks properly dominated - /// by the definition, and from which blocks containing uses of the - /// value are reachable. - /// - SmallPtrSet LiveThrough; - - /// Killed - A conservative approximation of the set of blocks in which - /// the value is used and not live-out. - /// - SmallPtrSet Killed; - }; - - /// Memos - Remembers the Memo for each Value. This is populated on - /// demand. - /// - DenseMap Memos; - - /// getMemo - Retrieve an existing Memo for the given value if one - /// is available, otherwise compute a new one. - /// - Memo &getMemo(const Value *V); - - /// compute - Compute a new Memo for the given value. - /// - Memo &compute(const Value *V); - -public: - static char ID; - LiveValues(); - - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - virtual bool runOnFunction(Function &F); - virtual void releaseMemory(); - - /// isUsedInBlock - Test if the given value is used in the given block. - /// - bool isUsedInBlock(const Value *V, const BasicBlock *BB); - - /// isLiveThroughBlock - Test if the given value is known to be - /// live-through the given block, meaning that the block is properly - /// dominated by the value's definition, and there exists a block - /// reachable from it that contains a use. This uses a conservative - /// approximation that errs on the side of returning false. - /// - bool isLiveThroughBlock(const Value *V, const BasicBlock *BB); - - /// isKilledInBlock - Test if the given value is known to be killed in - /// the given block, meaning that the block contains a use of the value, - /// and no blocks reachable from the block contain a use. This uses a - /// conservative approximation that errs on the side of returning false. - /// - bool isKilledInBlock(const Value *V, const BasicBlock *BB); -}; - -} // end namespace llvm - -#endif diff --git a/include/llvm/Analysis/MemoryDependenceAnalysis.h b/include/llvm/Analysis/MemoryDependenceAnalysis.h index 4d5dd1987f28..b56fe08e23d8 100644 --- a/include/llvm/Analysis/MemoryDependenceAnalysis.h +++ b/include/llvm/Analysis/MemoryDependenceAnalysis.h @@ -48,6 +48,11 @@ namespace llvm { /// this occurs when we see a may-aliased store to the memory location we /// care about. /// + /// There are several cases that may be interesting here: + /// 1. Loads are clobbered by may-alias stores. + /// 2. Loads are considered clobbered by partially-aliased loads. The + /// client may choose to analyze deeper into these cases. + /// /// A dependence query on the first instruction of the entry block will /// return a clobber(self) result. Clobber, @@ -350,6 +355,20 @@ namespace llvm { BasicBlock::iterator ScanIt, BasicBlock *BB); + + /// getLoadLoadClobberFullWidthSize - This is a little bit of analysis that + /// looks at a memory location for a load (specified by MemLocBase, Offs, + /// and Size) and compares it against a load. If the specified load could + /// be safely widened to a larger integer load that is 1) still efficient, + /// 2) safe for the target, and 3) would provide the specified memory + /// location value, then this function returns the size in bytes of the + /// load width to use. If not, this returns zero. + static unsigned getLoadLoadClobberFullWidthSize(const Value *MemLocBase, + int64_t MemLocOffs, + unsigned MemLocSize, + const LoadInst *LI, + const TargetData &TD); + private: MemDepResult getCallSiteDependencyFrom(CallSite C, bool isReadOnlyCall, BasicBlock::iterator ScanIt, diff --git a/include/llvm/Analysis/Passes.h b/include/llvm/Analysis/Passes.h index 5b0c5b1e6bec..0eff75fe2f8c 100644 --- a/include/llvm/Analysis/Passes.h +++ b/include/llvm/Analysis/Passes.h @@ -157,12 +157,6 @@ namespace llvm { // ModulePass *createSteensgaardPass(); - //===--------------------------------------------------------------------===// - // - // createLiveValuesPass - This creates an instance of the LiveValues pass. - // - FunctionPass *createLiveValuesPass(); - //===--------------------------------------------------------------------===// // /// createLazyValueInfoPass - This creates an instance of the LazyValueInfo diff --git a/include/llvm/Analysis/PathProfileInfo.h b/include/llvm/Analysis/PathProfileInfo.h index 263763f7a8db..cef6d2d2a6c8 100644 --- a/include/llvm/Analysis/PathProfileInfo.h +++ b/include/llvm/Analysis/PathProfileInfo.h @@ -16,7 +16,6 @@ #include "llvm/BasicBlock.h" #include "llvm/Analysis/PathNumbering.h" -#include namespace llvm { diff --git a/include/llvm/Analysis/PostDominators.h b/include/llvm/Analysis/PostDominators.h index 2cd6ae346eeb..0eddb9105e60 100644 --- a/include/llvm/Analysis/PostDominators.h +++ b/include/llvm/Analysis/PostDominators.h @@ -14,7 +14,7 @@ #ifndef LLVM_ANALYSIS_POST_DOMINATORS_H #define LLVM_ANALYSIS_POST_DOMINATORS_H -#include "llvm/Analysis/DominanceFrontier.h" +#include "llvm/Analysis/Dominators.h" namespace llvm { @@ -101,37 +101,6 @@ template <> struct GraphTraits } }; -/// PostDominanceFrontier Class - Concrete subclass of DominanceFrontier that is -/// used to compute the a post-dominance frontier. -/// -struct PostDominanceFrontier : public DominanceFrontierBase { - static char ID; - PostDominanceFrontier() - : DominanceFrontierBase(ID, true) { - initializePostDominanceFrontierPass(*PassRegistry::getPassRegistry()); - } - - virtual bool runOnFunction(Function &) { - Frontiers.clear(); - PostDominatorTree &DT = getAnalysis(); - Roots = DT.getRoots(); - if (const DomTreeNode *Root = DT.getRootNode()) - calculate(DT, Root); - return false; - } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - AU.addRequired(); - } - -private: - const DomSetType &calculate(const PostDominatorTree &DT, - const DomTreeNode *Node); -}; - -FunctionPass* createPostDomFrontier(); - } // End llvm namespace #endif diff --git a/include/llvm/Analysis/RegionInfo.h b/include/llvm/Analysis/RegionInfo.h index a36ca110d8c0..9d8954595d61 100644 --- a/include/llvm/Analysis/RegionInfo.h +++ b/include/llvm/Analysis/RegionInfo.h @@ -28,9 +28,10 @@ #define LLVM_ANALYSIS_REGION_INFO_H #include "llvm/ADT/PointerIntPair.h" -#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/DominanceFrontier.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/Support/Allocator.h" +#include namespace llvm { @@ -145,7 +146,7 @@ inline Region* RegionNode::getNodeAs() const { /// two connections to the remaining graph. It can be used to analyze or /// optimize parts of the control flow graph. /// -/// A simple Region is connected to the remaing graph by just two +/// A simple Region is connected to the remaining graph by just two /// edges. One edge entering the Region and another one leaving the Region. /// /// An extended Region (or just Region) is a subgraph that can be @@ -335,12 +336,16 @@ class Region : public RegionNode { return RI; } + /// PrintStyle - Print region in difference ways. + enum PrintStyle { PrintNone, PrintBB, PrintRN }; + /// @brief Print the region. /// /// @param OS The output stream the Region is printed to. /// @param printTree Print also the tree of subregions. /// @param level The indentation level used for printing. - void print(raw_ostream& OS, bool printTree = true, unsigned level = 0) const; + void print(raw_ostream& OS, bool printTree = true, unsigned level = 0, + enum PrintStyle Style = PrintNone) const; /// @brief Print the region to stderr. void dump() const; @@ -438,7 +443,7 @@ class Region : public RegionNode { /// @brief Move all direct child nodes of this Region to another Region. /// - /// @param To The Region the child nodes will be transfered to. + /// @param To The Region the child nodes will be transferred to. void transferChildrenTo(Region *To); /// @brief Verify if the region is a correct region. diff --git a/include/llvm/Analysis/RegionIterator.h b/include/llvm/Analysis/RegionIterator.h index ced5b528cbb1..7adc71ca82ac 100644 --- a/include/llvm/Analysis/RegionIterator.h +++ b/include/llvm/Analysis/RegionIterator.h @@ -20,7 +20,7 @@ namespace llvm { //===----------------------------------------------------------------------===// -/// @brief Hierachical RegionNode successor iterator. +/// @brief Hierarchical RegionNode successor iterator. /// /// This iterator iterates over all successors of a RegionNode. /// diff --git a/include/llvm/Analysis/RegionPass.h b/include/llvm/Analysis/RegionPass.h index aedc06aa6cf3..5403e09c480c 100644 --- a/include/llvm/Analysis/RegionPass.h +++ b/include/llvm/Analysis/RegionPass.h @@ -54,7 +54,7 @@ class RegionPass : public Pass { /// @brief Get a pass to print the LLVM IR in the region. /// /// @param O The ouput stream to print the Region. - /// @param Banner The banner to seperate different printed passes. + /// @param Banner The banner to separate different printed passes. /// /// @return The pass to print the LLVM IR in the region. Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const; diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h index d1938061bef6..a62f6a80d1a7 100644 --- a/include/llvm/Analysis/ScalarEvolution.h +++ b/include/llvm/Analysis/ScalarEvolution.h @@ -24,6 +24,7 @@ #include "llvm/Pass.h" #include "llvm/Instructions.h" #include "llvm/Function.h" +#include "llvm/Operator.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Support/Allocator.h" @@ -72,6 +73,29 @@ namespace llvm { void operator=(const SCEV &); // DO NOT IMPLEMENT public: + /// NoWrapFlags are bitfield indices into SubclassData. + /// + /// Add and Mul expressions may have no-unsigned-wrap or + /// no-signed-wrap properties, which are derived from the IR + /// operator. NSW is a misnomer that we use to mean no signed overflow or + /// underflow. + /// + /// AddRec expression may have a no-self-wraparound property if the + /// result can never reach the start value. This property is independent of + /// the actual start value and step direction. Self-wraparound is defined + /// purely in terms of the recurrence's loop, step size, and + /// bitwidth. Formally, a recurrence with no self-wraparound satisfies: + /// abs(step) * max-iteration(loop) <= unsigned-max(bitwidth). + /// + /// Note that NUW and NSW are also valid properties of a recurrence, and + /// either implies NW. For convenience, NW will be set for a recurrence + /// whenever either NUW or NSW are set. + enum NoWrapFlags { FlagAnyWrap = 0, // No guarantee. + FlagNW = (1 << 0), // No self-wrap. + FlagNUW = (1 << 1), // No unsigned wrap. + FlagNSW = (1 << 2), // No signed wrap. + NoWrapMask = (1 << 3) -1 }; + explicit SCEV(const FoldingSetNodeIDRef ID, unsigned SCEVTy) : FastID(ID), SCEVType(SCEVTy), SubclassData(0) {} @@ -159,6 +183,20 @@ namespace llvm { ProperlyDominatesBlock ///< The SCEV properly dominates the block. }; + /// Convenient NoWrapFlags manipulation that hides enum casts and is + /// visible in the ScalarEvolution name space. + static SCEV::NoWrapFlags maskFlags(SCEV::NoWrapFlags Flags, int Mask) { + return (SCEV::NoWrapFlags)(Flags & Mask); + } + static SCEV::NoWrapFlags setFlags(SCEV::NoWrapFlags Flags, + SCEV::NoWrapFlags OnFlags) { + return (SCEV::NoWrapFlags)(Flags | OnFlags); + } + static SCEV::NoWrapFlags clearFlags(SCEV::NoWrapFlags Flags, + SCEV::NoWrapFlags OffFlags) { + return (SCEV::NoWrapFlags)(Flags & ~OffFlags); + } + private: /// SCEVCallbackVH - A CallbackVH to arrange for ScalarEvolution to be /// notified whenever a Value is deleted. @@ -465,44 +503,41 @@ namespace llvm { const SCEV *getSignExtendExpr(const SCEV *Op, const Type *Ty); const SCEV *getAnyExtendExpr(const SCEV *Op, const Type *Ty); const SCEV *getAddExpr(SmallVectorImpl &Ops, - bool HasNUW = false, bool HasNSW = false); + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap); const SCEV *getAddExpr(const SCEV *LHS, const SCEV *RHS, - bool HasNUW = false, bool HasNSW = false) { + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap) { SmallVector Ops; Ops.push_back(LHS); Ops.push_back(RHS); - return getAddExpr(Ops, HasNUW, HasNSW); + return getAddExpr(Ops, Flags); } - const SCEV *getAddExpr(const SCEV *Op0, const SCEV *Op1, - const SCEV *Op2, - bool HasNUW = false, bool HasNSW = false) { + const SCEV *getAddExpr(const SCEV *Op0, const SCEV *Op1, const SCEV *Op2, + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap) { SmallVector Ops; Ops.push_back(Op0); Ops.push_back(Op1); Ops.push_back(Op2); - return getAddExpr(Ops, HasNUW, HasNSW); + return getAddExpr(Ops, Flags); } const SCEV *getMulExpr(SmallVectorImpl &Ops, - bool HasNUW = false, bool HasNSW = false); + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap); const SCEV *getMulExpr(const SCEV *LHS, const SCEV *RHS, - bool HasNUW = false, bool HasNSW = false) { + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap) + { SmallVector Ops; Ops.push_back(LHS); Ops.push_back(RHS); - return getMulExpr(Ops, HasNUW, HasNSW); + return getMulExpr(Ops, Flags); } const SCEV *getUDivExpr(const SCEV *LHS, const SCEV *RHS); const SCEV *getAddRecExpr(const SCEV *Start, const SCEV *Step, - const Loop *L, - bool HasNUW = false, bool HasNSW = false); + const Loop *L, SCEV::NoWrapFlags Flags); const SCEV *getAddRecExpr(SmallVectorImpl &Operands, - const Loop *L, - bool HasNUW = false, bool HasNSW = false); + const Loop *L, SCEV::NoWrapFlags Flags); const SCEV *getAddRecExpr(const SmallVectorImpl &Operands, - const Loop *L, - bool HasNUW = false, bool HasNSW = false) { + const Loop *L, SCEV::NoWrapFlags Flags) { SmallVector NewOp(Operands.begin(), Operands.end()); - return getAddRecExpr(NewOp, L, HasNUW, HasNSW); + return getAddRecExpr(NewOp, L, Flags); } const SCEV *getSMaxExpr(const SCEV *LHS, const SCEV *RHS); const SCEV *getSMaxExpr(SmallVectorImpl &Operands); @@ -537,11 +572,9 @@ namespace llvm { /// const SCEV *getNotSCEV(const SCEV *V); - /// getMinusSCEV - Return LHS-RHS. Minus is represented in SCEV as A+B*-1, - /// and thus the HasNUW and HasNSW bits apply to the resultant add, not - /// whether the sub would have overflowed. + /// getMinusSCEV - Return LHS-RHS. Minus is represented in SCEV as A+B*-1. const SCEV *getMinusSCEV(const SCEV *LHS, const SCEV *RHS, - bool HasNUW = false, bool HasNSW = false); + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap); /// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion /// of the input value to the specified type. If the type must be @@ -586,6 +619,12 @@ namespace llvm { const SCEV *getUMinFromMismatchedTypes(const SCEV *LHS, const SCEV *RHS); + /// getPointerBase - Transitively follow the chain of pointer-type operands + /// until reaching a SCEV that does not have a single pointer operand. This + /// returns a SCEVUnknown pointer for well-formed pointer-type expressions, + /// but corner cases do exist. + const SCEV *getPointerBase(const SCEV *V); + /// getSCEVAtScope - Return a SCEV expression for the specified value /// at the specified scope in the program. The L value specifies a loop /// nest to evaluate the expression at, where null is the top-level or a diff --git a/include/llvm/Analysis/ScalarEvolutionExpressions.h b/include/llvm/Analysis/ScalarEvolutionExpressions.h index db432c8173dd..856d92c97c08 100644 --- a/include/llvm/Analysis/ScalarEvolutionExpressions.h +++ b/include/llvm/Analysis/ScalarEvolutionExpressions.h @@ -160,13 +160,8 @@ namespace llvm { const Type *getType() const { return getOperand(0)->getType(); } - bool hasNoUnsignedWrap() const { return SubclassData & (1 << 0); } - void setHasNoUnsignedWrap(bool B) { - SubclassData = (SubclassData & ~(1 << 0)) | (B << 0); - } - bool hasNoSignedWrap() const { return SubclassData & (1 << 1); } - void setHasNoSignedWrap(bool B) { - SubclassData = (SubclassData & ~(1 << 1)) | (B << 1); + NoWrapFlags getNoWrapFlags(NoWrapFlags Mask = NoWrapMask) const { + return (NoWrapFlags)(SubclassData & Mask); } /// Methods for support type inquiry through isa, cast, and dyn_cast: @@ -199,6 +194,11 @@ namespace llvm { S->getSCEVType() == scSMaxExpr || S->getSCEVType() == scUMaxExpr; } + + /// Set flags for a non-recurrence without clearing previously set flags. + void setNoWrapFlags(NoWrapFlags Flags) { + SubclassData |= Flags; + } }; @@ -305,11 +305,12 @@ namespace llvm { /// getStepRecurrence - This method constructs and returns the recurrence /// indicating how much this expression steps by. If this is a polynomial /// of degree N, it returns a chrec of degree N-1. + /// We cannot determine whether the step recurrence has self-wraparound. const SCEV *getStepRecurrence(ScalarEvolution &SE) const { if (isAffine()) return getOperand(1); return SE.getAddRecExpr(SmallVector(op_begin()+1, op_end()), - getLoop()); + getLoop(), FlagAnyWrap); } /// isAffine - Return true if this is an affine AddRec (i.e., it represents @@ -327,6 +328,15 @@ namespace llvm { return getNumOperands() == 3; } + /// Set flags for a recurrence without clearing any previously set flags. + /// For AddRec, either NUW or NSW implies NW. Keep track of this fact here + /// to make it easier to propagate flags. + void setNoWrapFlags(NoWrapFlags Flags) { + if (Flags & (FlagNUW | FlagNSW)) + Flags = ScalarEvolution::setFlags(Flags, FlagNW); + SubclassData |= Flags; + } + /// evaluateAtIteration - Return the value of this chain of recurrences at /// the specified iteration number. const SCEV *evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const; @@ -364,8 +374,7 @@ namespace llvm { const SCEV *const *O, size_t N) : SCEVCommutativeExpr(ID, scSMaxExpr, O, N) { // Max never overflows. - setHasNoUnsignedWrap(true); - setHasNoSignedWrap(true); + setNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW)); } public: @@ -387,8 +396,7 @@ namespace llvm { const SCEV *const *O, size_t N) : SCEVCommutativeExpr(ID, scUMaxExpr, O, N) { // Max never overflows. - setHasNoUnsignedWrap(true); - setHasNoSignedWrap(true); + setNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW)); } public: diff --git a/include/llvm/Bitcode/Archive.h b/include/llvm/Bitcode/Archive.h index c3c07d8588a3..f89a86cb0f77 100644 --- a/include/llvm/Bitcode/Archive.h +++ b/include/llvm/Bitcode/Archive.h @@ -25,7 +25,6 @@ namespace llvm { class MemoryBuffer; - class raw_ostream; // Forward declare classes class Module; // From VMCore @@ -436,7 +435,7 @@ class Archive { /// to determine just enough information to create an ArchiveMember object /// which is then inserted into the Archive object's ilist at the location /// given by \p where. - /// @returns true if an error occured, false otherwise + /// @returns true if an error occurred, false otherwise /// @brief Add a file to the archive. bool addFileBefore( const sys::Path& filename, ///< The file to be added @@ -483,7 +482,7 @@ class Archive { bool loadSymbolTable(std::string* ErrMessage); /// @brief Write the symbol table to an ofstream. - void writeSymbolTable(raw_ostream& ARFile); + void writeSymbolTable(std::ofstream& ARFile); /// Writes one ArchiveMember to an ofstream. If an error occurs, returns /// false, otherwise true. If an error occurs and error is non-null then @@ -492,7 +491,7 @@ class Archive { /// @returns true Writing member failed, \p error set to error message bool writeMember( const ArchiveMember& member, ///< The member to be written - raw_ostream& ARFile, ///< The file to write member onto + std::ofstream& ARFile, ///< The file to write member onto bool CreateSymbolTable, ///< Should symbol table be created? bool TruncateNames, ///< Should names be truncated to 11 chars? bool ShouldCompress, ///< Should the member be compressed? diff --git a/include/llvm/CodeGen/AsmPrinter.h b/include/llvm/CodeGen/AsmPrinter.h index a071febb102f..58395ba9b4db 100644 --- a/include/llvm/CodeGen/AsmPrinter.h +++ b/include/llvm/CodeGen/AsmPrinter.h @@ -183,6 +183,10 @@ namespace llvm { /// function. void EmitFunctionBody(); + void emitPrologLabel(const MachineInstr &MI); + + bool needsCFIMoves(); + /// EmitConstantPool - Print to the current output stream assembly /// representations of the constants in the constant pool MCP. This is /// used to print out constants which have been "spilled to memory" by @@ -377,10 +381,17 @@ namespace llvm { /// operands. virtual MachineLocation getDebugValueLocation(const MachineInstr *MI) const; + /// getDwarfRegOpSize - get size required to emit given machine location + /// using dwarf encoding. + virtual unsigned getDwarfRegOpSize(const MachineLocation &MLoc) const; + /// getISAEncoding - Get the value for DW_AT_APPLE_isa. Zero if no isa /// encoding specified. virtual unsigned getISAEncoding() { return 0; } + /// EmitDwarfRegOp - Emit dwarf register operation. + virtual void EmitDwarfRegOp(const MachineLocation &MLoc) const; + //===------------------------------------------------------------------===// // Dwarf Lowering Routines //===------------------------------------------------------------------===// @@ -389,6 +400,7 @@ namespace llvm { /// frame. void EmitFrameMoves(const std::vector &Moves, MCSymbol *BaseLabel, bool isEH) const; + void EmitCFIFrameMove(const MachineMove &Move) const; void EmitCFIFrameMoves(const std::vector &Moves) const; //===------------------------------------------------------------------===// diff --git a/include/llvm/CodeGen/CalcSpillWeights.h b/include/llvm/CodeGen/CalcSpillWeights.h index 853ebf99a87b..60edcc584559 100644 --- a/include/llvm/CodeGen/CalcSpillWeights.h +++ b/include/llvm/CodeGen/CalcSpillWeights.h @@ -11,7 +11,7 @@ #ifndef LLVM_CODEGEN_CALCSPILLWEIGHTS_H #define LLVM_CODEGEN_CALCSPILLWEIGHTS_H -#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/SlotIndexes.h" #include "llvm/ADT/DenseMap.h" namespace llvm { @@ -29,28 +29,25 @@ namespace llvm { /// @param Size Size of live interval as returnexd by getSize() /// static inline float normalizeSpillWeight(float UseDefFreq, unsigned Size) { - // The magic constant 200 corresponds to approx. 25 instructions since - // SlotIndexes allocate 8 slots per instruction. - // - // The constant is added to avoid depending too much on accidental SlotIndex - // gaps for small intervals. The effect is that small intervals have a spill - // weight that is mostly proportional to the number of uses, while large - // intervals get a spill weight that is closer to a use density. - // - return UseDefFreq / (Size + 200); + // The constant 25 instructions is added to avoid depending too much on + // accidental SlotIndex gaps for small intervals. The effect is that small + // intervals have a spill weight that is mostly proportional to the number + // of uses, while large intervals get a spill weight that is closer to a use + // density. + return UseDefFreq / (Size + 25*SlotIndex::InstrDist); } /// VirtRegAuxInfo - Calculate auxiliary information for a virtual /// register such as its spill weight and allocation hint. class VirtRegAuxInfo { - MachineFunction &mf_; - LiveIntervals &lis_; - const MachineLoopInfo &loops_; - DenseMap hint_; + MachineFunction &MF; + LiveIntervals &LIS; + const MachineLoopInfo &Loops; + DenseMap Hint; public: VirtRegAuxInfo(MachineFunction &mf, LiveIntervals &lis, const MachineLoopInfo &loops) : - mf_(mf), lis_(lis), loops_(loops) {} + MF(mf), LIS(lis), Loops(loops) {} /// CalculateRegClass - recompute the register class for reg from its uses. /// Since the register class can affect the allocation hint, this function diff --git a/include/llvm/CodeGen/CallingConvLower.h b/include/llvm/CodeGen/CallingConvLower.h index 2a9bbdfb7ceb..9018ea36e7b5 100644 --- a/include/llvm/CodeGen/CallingConvLower.h +++ b/include/llvm/CodeGen/CallingConvLower.h @@ -141,6 +141,8 @@ typedef bool CCCustomFn(unsigned &ValNo, MVT &ValVT, MVT &LocVT, CCValAssign::LocInfo &LocInfo, ISD::ArgFlagsTy &ArgFlags, CCState &State); +typedef enum { Invalid, Prologue, Call } ParmContext; + /// CCState - This class holds information needed while lowering arguments and /// return values. It captures which registers are already assigned and which /// stack slots are used. It provides accessors to allocate these values. @@ -154,6 +156,9 @@ class CCState { unsigned StackOffset; SmallVector UsedRegs; + unsigned FirstByValReg; + bool FirstByValRegValid; + ParmContext CallOrPrologue; public: CCState(CallingConv::ID CC, bool isVarArg, const TargetMachine &TM, SmallVector &locs, LLVMContext &C); @@ -288,6 +293,16 @@ class CCState { MVT LocVT, CCValAssign::LocInfo LocInfo, int MinSize, int MinAlign, ISD::ArgFlagsTy ArgFlags); + // First GPR that carries part of a byval aggregate that's split + // between registers and memory. + unsigned getFirstByValReg() { return FirstByValRegValid ? FirstByValReg : 0; } + void setFirstByValReg(unsigned r) { FirstByValReg = r; FirstByValRegValid = true; } + void clearFirstByValReg() { FirstByValReg = 0; FirstByValRegValid = false; } + bool isFirstByValRegValid() { return FirstByValRegValid; } + + ParmContext getCallOrPrologue() { return CallOrPrologue; } + void setCallOrPrologue(ParmContext pc) { CallOrPrologue = pc; } + private: /// MarkAllocated - Mark a register and all of its aliases as allocated. void MarkAllocated(unsigned Reg); diff --git a/include/llvm/CodeGen/EdgeBundles.h b/include/llvm/CodeGen/EdgeBundles.h index 2c5215a7927a..8aab3c64f170 100644 --- a/include/llvm/CodeGen/EdgeBundles.h +++ b/include/llvm/CodeGen/EdgeBundles.h @@ -16,6 +16,7 @@ #ifndef LLVM_CODEGEN_EDGEBUNDLES_H #define LLVM_CODEGEN_EDGEBUNDLES_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/IntEqClasses.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -29,6 +30,9 @@ class EdgeBundles : public MachineFunctionPass { /// 2*BB->getNumber()+1 -> Outgoing bundle. IntEqClasses EC; + /// Blocks - Map each bundle to a list of basic block numbers. + SmallVector, 4> Blocks; + public: static char ID; EdgeBundles() : MachineFunctionPass(ID) {} @@ -40,6 +44,9 @@ class EdgeBundles : public MachineFunctionPass { /// getNumBundles - Return the total number of bundles in the CFG. unsigned getNumBundles() const { return EC.getNumClasses(); } + /// getBlocks - Return an array of blocks that are connected to Bundle. + ArrayRef getBlocks(unsigned Bundle) { return Blocks[Bundle]; } + /// getMachineFunction - Return the last machine function computed. const MachineFunction *getMachineFunction() const { return MF; } diff --git a/include/llvm/CodeGen/FastISel.h b/include/llvm/CodeGen/FastISel.h index fbb12005444f..10c4c33dde51 100644 --- a/include/llvm/CodeGen/FastISel.h +++ b/include/llvm/CodeGen/FastISel.h @@ -8,9 +8,9 @@ //===----------------------------------------------------------------------===// // // This file defines the FastISel class. -// +// //===----------------------------------------------------------------------===// - + #ifndef LLVM_CODEGEN_FASTISEL_H #define LLVM_CODEGEN_FASTISEL_H @@ -108,7 +108,7 @@ class FastISel { const LoadInst * /*LI*/) { return false; } - + /// recomputeInsertPt - Reset InsertPt to prepare for inserting instructions /// into the current block. void recomputeInsertPt(); @@ -203,16 +203,7 @@ class FastISel { unsigned Opcode, unsigned Op0, bool Op0IsKill, uint64_t Imm, MVT ImmType); - - /// FastEmit_rf_ - This method is a wrapper of FastEmit_rf. It first tries - /// to emit an instruction with an immediate operand using FastEmit_rf. - /// If that fails, it materializes the immediate into a register and try - /// FastEmit_rr instead. - unsigned FastEmit_rf_(MVT VT, - unsigned Opcode, - unsigned Op0, bool Op0IsKill, - const ConstantFP *FPImm, MVT ImmType); - + /// FastEmit_i - This method is called by target-independent code /// to request that an instruction with the given type, opcode, and /// immediate operand be emitted. @@ -250,14 +241,22 @@ class FastISel { unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill); - /// FastEmitInst_ri - Emit a MachineInstr with two register operands - /// and a result register in the given register class. + /// FastEmitInst_ri - Emit a MachineInstr with a register operand, + /// an immediate, and a result register in the given register class. /// unsigned FastEmitInst_ri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, uint64_t Imm); + /// FastEmitInst_rii - Emit a MachineInstr with one register operand + /// and two immediate operands. + /// + unsigned FastEmitInst_rii(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + uint64_t Imm1, uint64_t Imm2); + /// FastEmitInst_rf - Emit a MachineInstr with two register operands /// and a result register in the given register class. /// @@ -274,13 +273,18 @@ class FastISel { unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill, uint64_t Imm); - + /// FastEmitInst_i - Emit a MachineInstr with a single immediate /// operand, and a result register in the given register class. unsigned FastEmitInst_i(unsigned MachineInstrOpcode, const TargetRegisterClass *RC, uint64_t Imm); + /// FastEmitInst_ii - Emit a MachineInstr with a two immediate operands. + unsigned FastEmitInst_ii(unsigned MachineInstrOpcode, + const TargetRegisterClass *RC, + uint64_t Imm1, uint64_t Imm2); + /// FastEmitInst_extractsubreg - Emit a MachineInstr for an extract_subreg /// from a specified index of a superregister to a specified type. unsigned FastEmitInst_extractsubreg(MVT RetVT, @@ -300,8 +304,8 @@ class FastISel { unsigned UpdateValueMap(const Value* I, unsigned Reg); unsigned createResultReg(const TargetRegisterClass *RC); - - /// TargetMaterializeConstant - Emit a constant in a register using + + /// TargetMaterializeConstant - Emit a constant in a register using /// target-specific logic, such as constant pool loads. virtual unsigned TargetMaterializeConstant(const Constant* C) { return 0; @@ -313,6 +317,10 @@ class FastISel { return 0; } + virtual unsigned TargetMaterializeFloatZero(const ConstantFP* CF) { + return 0; + } + private: bool SelectBinaryOp(const User *I, unsigned ISDOpcode); @@ -323,7 +331,7 @@ class FastISel { bool SelectCall(const User *I); bool SelectBitCast(const User *I); - + bool SelectCast(const User *I, unsigned Opcode); /// HandlePHINodesInSuccessorBlocks - Handle PHI nodes in successor blocks. diff --git a/include/llvm/CodeGen/FunctionLoweringInfo.h b/include/llvm/CodeGen/FunctionLoweringInfo.h index b41f30d8251d..4421cc02d1cd 100644 --- a/include/llvm/CodeGen/FunctionLoweringInfo.h +++ b/include/llvm/CodeGen/FunctionLoweringInfo.h @@ -187,7 +187,12 @@ class FunctionLoweringInfo { /// InvalidatePHILiveOutRegInfo - Invalidates a PHI's LiveOutInfo, to be /// called when a block is visited before all of its predecessors. void InvalidatePHILiveOutRegInfo(const PHINode *PN) { - unsigned Reg = ValueMap[PN]; + // PHIs with no uses have no ValueMap entry. + DenseMap::const_iterator It = ValueMap.find(PN); + if (It == ValueMap.end()) + return; + + unsigned Reg = It->second; LiveOutRegInfo.grow(Reg); LiveOutRegInfo[Reg].IsValid = false; } @@ -209,8 +214,9 @@ class FunctionLoweringInfo { void AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI, MachineBasicBlock *MBB); -/// CopyCatchInfo - Copy catch information from DestBB to SrcBB. -void CopyCatchInfo(const BasicBlock *SrcBB, const BasicBlock *DestBB, +/// CopyCatchInfo - Copy catch information from SuccBB (or one of its +/// successors) to LPad. +void CopyCatchInfo(const BasicBlock *SuccBB, const BasicBlock *LPad, MachineModuleInfo *MMI, FunctionLoweringInfo &FLI); } // end namespace llvm diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h index 3da11c4a0e0f..f0de9361daeb 100644 --- a/include/llvm/CodeGen/ISDOpcodes.h +++ b/include/llvm/CodeGen/ISDOpcodes.h @@ -219,7 +219,7 @@ namespace ISD { // RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition. // These nodes take two operands: the normal LHS and RHS to the add. They // produce two results: the normal result of the add, and a boolean that - // indicates if an overflow occured (*not* a flag, because it may be stored + // indicates if an overflow occurred (*not* a flag, because it may be stored // to memory, etc.). If the type of the boolean is not i1 then the high // bits conform to getBooleanContents. // These nodes are generated from the llvm.[su]add.with.overflow intrinsics. diff --git a/include/llvm/CodeGen/JITCodeEmitter.h b/include/llvm/CodeGen/JITCodeEmitter.h index fea852305158..88e22d6a24ce 100644 --- a/include/llvm/CodeGen/JITCodeEmitter.h +++ b/include/llvm/CodeGen/JITCodeEmitter.h @@ -23,8 +23,6 @@ #include "llvm/CodeGen/MachineCodeEmitter.h" #include "llvm/ADT/DenseMap.h" -using namespace std; - namespace llvm { class MachineBasicBlock; @@ -38,7 +36,7 @@ class GlobalValue; class Function; /// JITCodeEmitter - This class defines two sorts of methods: those for -/// emitting the actual bytes of machine code, and those for emitting auxillary +/// emitting the actual bytes of machine code, and those for emitting auxiliary /// structures, such as jump tables, relocations, etc. /// /// Emission of machine code is complicated by the fact that we don't (in diff --git a/include/llvm/CodeGen/LiveInterval.h b/include/llvm/CodeGen/LiveInterval.h index 88131fbc40ff..c5285cec1e2d 100644 --- a/include/llvm/CodeGen/LiveInterval.h +++ b/include/llvm/CodeGen/LiveInterval.h @@ -286,6 +286,11 @@ namespace llvm { return valnos[ValNo]; } + /// containsValue - Returns true if VNI belongs to this interval. + bool containsValue(const VNInfo *VNI) const { + return VNI && VNI->id < getNumValNums() && VNI == getValNumInfo(VNI->id); + } + /// getNextValue - Create a new value number and return it. MIIdx specifies /// the instruction that defines the value number. VNInfo *getNextValue(SlotIndex def, MachineInstr *CopyMI, @@ -447,6 +452,11 @@ namespace llvm { addRangeFrom(LR, ranges.begin()); } + /// extendInBlock - If this interval is live before UseIdx in the basic + /// block that starts at StartIdx, extend it to be live at UseIdx and return + /// the value. If there is no live range before UseIdx, return NULL. + VNInfo *extendInBlock(SlotIndex StartIdx, SlotIndex UseIdx); + /// join - Join two live intervals (this, and other) together. This applies /// mappings to the value numbers in the LHS/RHS intervals as specified. If /// the intervals are not joinable, this aborts. @@ -543,8 +553,8 @@ namespace llvm { /// } class ConnectedVNInfoEqClasses { - LiveIntervals &lis_; - IntEqClasses eqClass_; + LiveIntervals &LIS; + IntEqClasses EqClass; // Note that values a and b are connected. void Connect(unsigned a, unsigned b); @@ -552,7 +562,7 @@ namespace llvm { unsigned Renumber(); public: - explicit ConnectedVNInfoEqClasses(LiveIntervals &lis) : lis_(lis) {} + explicit ConnectedVNInfoEqClasses(LiveIntervals &lis) : LIS(lis) {} /// Classify - Classify the values in LI into connected components. /// Return the number of connected components. @@ -560,12 +570,13 @@ namespace llvm { /// getEqClass - Classify creates equivalence classes numbered 0..N. Return /// the equivalence class assigned the VNI. - unsigned getEqClass(const VNInfo *VNI) const { return eqClass_[VNI->id]; } + unsigned getEqClass(const VNInfo *VNI) const { return EqClass[VNI->id]; } /// Distribute - Distribute values in LIV[0] into a separate LiveInterval /// for each connected component. LIV must have a LiveInterval for each /// connected component. The LiveIntervals in Liv[1..] must be empty. - void Distribute(LiveInterval *LIV[]); + /// Instructions using LIV[0] are rewritten. + void Distribute(LiveInterval *LIV[], MachineRegisterInfo &MRI); }; diff --git a/include/llvm/CodeGen/LiveIntervalAnalysis.h b/include/llvm/CodeGen/LiveIntervalAnalysis.h index b09f8d111066..8ca58b82c8bb 100644 --- a/include/llvm/CodeGen/LiveIntervalAnalysis.h +++ b/include/llvm/CodeGen/LiveIntervalAnalysis.h @@ -159,7 +159,11 @@ namespace llvm { /// range to just the remaining uses. This method does not compute reaching /// defs for new uses, and it doesn't remove dead defs. /// Dead PHIDef values are marked as unused. - void shrinkToUses(LiveInterval *li); + /// New dead machine instructions are added to the dead vector. + /// Return true if the interval may have been separated into multiple + /// connected components. + bool shrinkToUses(LiveInterval *li, + SmallVectorImpl *dead = 0); // Interval removal @@ -272,7 +276,7 @@ namespace llvm { /// (if any is created) by reference. This is temporary. std::vector addIntervalsForSpills(const LiveInterval& i, - const SmallVectorImpl &SpillIs, + const SmallVectorImpl *SpillIs, const MachineLoopInfo *loopInfo, VirtRegMap& vrm); /// spillPhysRegAroundRegDefsUses - Spill the specified physical register @@ -285,7 +289,7 @@ namespace llvm { /// val# of the specified interval is re-materializable. Also returns true /// by reference if all of the defs are load instructions. bool isReMaterializable(const LiveInterval &li, - const SmallVectorImpl &SpillIs, + const SmallVectorImpl *SpillIs, bool &isLoad); /// isReMaterializable - Returns true if the definition MI of the specified @@ -372,7 +376,7 @@ namespace llvm { /// by reference if the def is a load. bool isReMaterializable(const LiveInterval &li, const VNInfo *ValNo, MachineInstr *MI, - const SmallVectorImpl &SpillIs, + const SmallVectorImpl *SpillIs, bool &isLoad); /// tryFoldMemoryOperand - Attempts to fold either a spill / restore from diff --git a/include/llvm/CodeGen/MachineBasicBlock.h b/include/llvm/CodeGen/MachineBasicBlock.h index 1785451c7ec5..ad121572fca0 100644 --- a/include/llvm/CodeGen/MachineBasicBlock.h +++ b/include/llvm/CodeGen/MachineBasicBlock.h @@ -16,6 +16,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/ADT/GraphTraits.h" +#include namespace llvm { @@ -304,10 +305,18 @@ class MachineBasicBlock : public ilist_node { /// it returns end() iterator getFirstTerminator(); + const_iterator getFirstTerminator() const { + return const_cast(this)->getFirstTerminator(); + } + /// getLastNonDebugInstr - returns an iterator to the last non-debug /// instruction in the basic block, or end() iterator getLastNonDebugInstr(); + const_iterator getLastNonDebugInstr() const { + return const_cast(this)->getLastNonDebugInstr(); + } + /// SplitCriticalEdge - Split the critical edge from this block to the /// given successor block, and return the newly created block, or null /// if splitting is not possible. @@ -411,6 +420,14 @@ raw_ostream& operator<<(raw_ostream &OS, const MachineBasicBlock &MBB); void WriteAsOperand(raw_ostream &, const MachineBasicBlock*, bool t); +// This is useful when building IndexedMaps keyed on basic block pointers. +struct MBB2NumberFunctor : + public std::unary_function { + unsigned operator()(const MachineBasicBlock *MBB) const { + return MBB->getNumber(); + } +}; + //===--------------------------------------------------------------------===// // GraphTraits specializations for machine basic block graphs (machine-CFGs) //===--------------------------------------------------------------------===// diff --git a/include/llvm/CodeGen/MachineCodeEmitter.h b/include/llvm/CodeGen/MachineCodeEmitter.h index 8fc80adf7fb8..428aada7ba13 100644 --- a/include/llvm/CodeGen/MachineCodeEmitter.h +++ b/include/llvm/CodeGen/MachineCodeEmitter.h @@ -34,7 +34,7 @@ class Function; class MCSymbol; /// MachineCodeEmitter - This class defines two sorts of methods: those for -/// emitting the actual bytes of machine code, and those for emitting auxillary +/// emitting the actual bytes of machine code, and those for emitting auxiliary /// structures, such as jump tables, relocations, etc. /// /// Emission of machine code is complicated by the fact that we don't (in @@ -54,7 +54,7 @@ class MachineCodeEmitter { /// allocated for this code buffer. uint8_t *BufferBegin, *BufferEnd; /// CurBufferPtr - Pointer to the next byte of memory to fill when emitting - /// code. This is guranteed to be in the range [BufferBegin,BufferEnd]. If + /// code. This is guaranteed to be in the range [BufferBegin,BufferEnd]. If /// this pointer is at BufferEnd, it will never move due to code emission, and /// all code emission requests will be ignored (this is the buffer overflow /// condition). diff --git a/include/llvm/CodeGen/MachineConstantPool.h b/include/llvm/CodeGen/MachineConstantPool.h index 5727321a0da4..beb16a2824d7 100644 --- a/include/llvm/CodeGen/MachineConstantPool.h +++ b/include/llvm/CodeGen/MachineConstantPool.h @@ -80,7 +80,7 @@ class MachineConstantPoolEntry { } Val; /// The required alignment for this entry. The top bit is set when Val is - /// a MachineConstantPoolValue. + /// a target specific MachineConstantPoolValue. unsigned Alignment; MachineConstantPoolEntry(const Constant *V, unsigned A) @@ -93,6 +93,9 @@ class MachineConstantPoolEntry { Alignment |= 1U << (sizeof(unsigned)*CHAR_BIT-1); } + /// isMachineConstantPoolEntry - Return true if the MachineConstantPoolEntry + /// is indeed a target specific constantpool entry, not a wrapper over a + /// Constant. bool isMachineConstantPoolEntry() const { return (int)Alignment < 0; } diff --git a/include/llvm/CodeGen/MachineFrameInfo.h b/include/llvm/CodeGen/MachineFrameInfo.h index 22a82a9d6e75..4ea6aa3396a9 100644 --- a/include/llvm/CodeGen/MachineFrameInfo.h +++ b/include/llvm/CodeGen/MachineFrameInfo.h @@ -15,7 +15,6 @@ #define LLVM_CODEGEN_MACHINEFRAMEINFO_H #include "llvm/ADT/SmallVector.h" -//#include "llvm/ADT/IndexedMap.h" #include "llvm/Support/DataTypes.h" #include #include diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h index 82c5332ccd9f..2724689786e5 100644 --- a/include/llvm/CodeGen/MachineInstr.h +++ b/include/llvm/CodeGen/MachineInstr.h @@ -50,13 +50,22 @@ class MachineInstr : public ilist_node { enum CommentFlag { ReloadReuse = 0x1 }; - + + enum MIFlag { + NoFlags = 0, + FrameSetup = 1 << 0 // Instruction is used as a part of + // function frame setup code. + }; private: const TargetInstrDesc *TID; // Instruction descriptor. - unsigned short NumImplicitOps; // Number of implicit operands (which + uint16_t NumImplicitOps; // Number of implicit operands (which // are determined at construction time). - unsigned short AsmPrinterFlags; // Various bits of information used by + uint8_t Flags; // Various bits of additional + // information about machine + // instruction. + + uint8_t AsmPrinterFlags; // Various bits of information used by // the AsmPrinter to emit helpful // comments. This is *not* semantic // information. Do not use this for @@ -105,13 +114,13 @@ class MachineInstr : public ilist_node { /// MachineInstr ctor - This constructor create a MachineInstr and add the /// implicit operands. It reserves space for number of operands specified by /// TargetInstrDesc. An explicit DebugLoc is supplied. - explicit MachineInstr(const TargetInstrDesc &TID, const DebugLoc dl, + explicit MachineInstr(const TargetInstrDesc &TID, const DebugLoc dl, bool NoImp = false); /// MachineInstr ctor - Work exactly the same as the ctor above, except that /// the MachineInstr is created and added to the end of the specified basic /// block. - MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl, + MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl, const TargetInstrDesc &TID); ~MachineInstr(); @@ -125,12 +134,12 @@ class MachineInstr : public ilist_node { /// getAsmPrinterFlags - Return the asm printer flags bitvector. /// - unsigned short getAsmPrinterFlags() const { return AsmPrinterFlags; } + uint8_t getAsmPrinterFlags() const { return AsmPrinterFlags; } /// clearAsmPrinterFlags - clear the AsmPrinter bitvector /// void clearAsmPrinterFlags() { AsmPrinterFlags = 0; } - + /// getAsmPrinterFlag - Return whether an AsmPrinter flag is set. /// bool getAsmPrinterFlag(CommentFlag Flag) const { @@ -140,9 +149,28 @@ class MachineInstr : public ilist_node { /// setAsmPrinterFlag - Set a flag for the AsmPrinter. /// void setAsmPrinterFlag(CommentFlag Flag) { - AsmPrinterFlags |= (unsigned short)Flag; + AsmPrinterFlags |= (uint8_t)Flag; } - + + /// getFlags - Return the MI flags bitvector. + uint8_t getFlags() const { + return Flags; + } + + /// getFlag - Return whether an MI flag is set. + bool getFlag(MIFlag Flag) const { + return Flags & Flag; + } + + /// setFlag - Set a MI flag. + void setFlag(MIFlag Flag) { + Flags |= (uint8_t)Flag; + } + + void setFlags(unsigned flags) { + Flags = flags; + } + /// clearAsmPrinterFlag - clear specific AsmPrinter flags /// void clearAsmPrinterFlag(CommentFlag Flag) { @@ -152,7 +180,7 @@ class MachineInstr : public ilist_node { /// getDebugLoc - Returns the debug location id of this MachineInstr. /// DebugLoc getDebugLoc() const { return debugLoc; } - + /// getDesc - Returns the target instruction descriptor of this /// MachineInstr. const TargetInstrDesc &getDesc() const { return *TID; } @@ -213,7 +241,7 @@ class MachineInstr : public ilist_node { /// removeFromParent - This method unlinks 'this' from the containing basic /// block, and returns it, but does not delete it. MachineInstr *removeFromParent(); - + /// eraseFromParent - This method unlinks 'this' from the containing basic /// block and deletes it. void eraseFromParent(); @@ -225,14 +253,14 @@ class MachineInstr : public ilist_node { getOpcode() == TargetOpcode::EH_LABEL || getOpcode() == TargetOpcode::GC_LABEL; } - + bool isPrologLabel() const { return getOpcode() == TargetOpcode::PROLOG_LABEL; } bool isEHLabel() const { return getOpcode() == TargetOpcode::EH_LABEL; } bool isGCLabel() const { return getOpcode() == TargetOpcode::GC_LABEL; } bool isDebugValue() const { return getOpcode() == TargetOpcode::DBG_VALUE; } - + bool isPHI() const { return getOpcode() == TargetOpcode::PHI; } bool isKill() const { return getOpcode() == TargetOpcode::KILL; } bool isImplicitDef() const { return getOpcode()==TargetOpcode::IMPLICIT_DEF; } @@ -329,7 +357,7 @@ class MachineInstr : public ilist_node { int Idx = findRegisterUseOperandIdx(Reg, isKill, TRI); return (Idx == -1) ? NULL : &getOperand(Idx); } - + /// findRegisterDefOperandIdx() - Returns the operand index that is a def of /// the specified register or -1 if it is not found. If isDead is true, defs /// that are not dead are skipped. If Overlap is true, then it also looks for @@ -351,7 +379,7 @@ class MachineInstr : public ilist_node { /// operand list that is used to represent the predicate. It returns -1 if /// none is found. int findFirstPredOperandIdx() const; - + /// isRegTiedToUseOperand - Given the index of a register def operand, /// check if the register def is tied to a source operand, due to either /// two-address elimination or inline assembly constraints. Returns the @@ -399,8 +427,8 @@ class MachineInstr : public ilist_node { void addRegisterDefined(unsigned IncomingReg, const TargetRegisterInfo *RegInfo = 0); - /// setPhysRegsDeadExcept - Mark every physreg used by this instruction as dead - /// except those in the UsedRegs list. + /// setPhysRegsDeadExcept - Mark every physreg used by this instruction as + /// dead except those in the UsedRegs list. void setPhysRegsDeadExcept(const SmallVectorImpl &UsedRegs, const TargetRegisterInfo &TRI); @@ -462,9 +490,9 @@ class MachineInstr : public ilist_node { /// addOperand - Add the specified operand to the instruction. If it is an /// implicit operand, it is added to the end of the operand list. If it is /// an explicit operand it is added at the end of the explicit operand list - /// (before the first implicit operand). + /// (before the first implicit operand). void addOperand(const MachineOperand &Op); - + /// setDesc - Replace the instruction descriptor (thus opcode) of /// the current instruction with a new one. /// @@ -501,12 +529,12 @@ class MachineInstr : public ilist_node { /// addImplicitDefUseOperands - Add all implicit def and use operands to /// this instruction. void addImplicitDefUseOperands(); - + /// RemoveRegOperandsFromUseLists - Unlink all of the register operands in /// this instruction from their respective use lists. This requires that the /// operands already be on their use lists. void RemoveRegOperandsFromUseLists(); - + /// AddRegOperandsToUseLists - Add all of the register operands in /// this instruction from their respective use lists. This requires that the /// operands not be on their use lists yet. diff --git a/include/llvm/CodeGen/MachineInstrBuilder.h b/include/llvm/CodeGen/MachineInstrBuilder.h index 1eb97353088f..967e0197bb7d 100644 --- a/include/llvm/CodeGen/MachineInstrBuilder.h +++ b/include/llvm/CodeGen/MachineInstrBuilder.h @@ -48,6 +48,7 @@ class MachineInstrBuilder { /// Allow automatic conversion to the machine instruction we are working on. /// operator MachineInstr*() const { return MI; } + MachineInstr *operator->() const { return MI; } operator MachineBasicBlock::iterator() const { return MI; } /// addReg - Add a new virtual register operand... @@ -145,6 +146,16 @@ class MachineInstrBuilder { return *this; } + const MachineInstrBuilder &setMIFlags(unsigned Flags) const { + MI->setFlags(Flags); + return *this; + } + + const MachineInstrBuilder &setMIFlag(MachineInstr::MIFlag Flag) const { + MI->setFlag(Flag); + return *this; + } + // Add a displacement from an existing MachineOperand with an added offset. const MachineInstrBuilder &addDisp(const MachineOperand &Disp, int64_t off) const { diff --git a/include/llvm/CodeGen/PBQP/Graph.h b/include/llvm/CodeGen/PBQP/Graph.h index b2224cb051dc..5240729f52d0 100644 --- a/include/llvm/CodeGen/PBQP/Graph.h +++ b/include/llvm/CodeGen/PBQP/Graph.h @@ -18,7 +18,6 @@ #include "Math.h" #include -#include #include namespace PBQP { diff --git a/include/llvm/CodeGen/PBQP/Heuristics/Briggs.h b/include/llvm/CodeGen/PBQP/Heuristics/Briggs.h index 47a287ccf2f6..e96c4cb1e0c1 100644 --- a/include/llvm/CodeGen/PBQP/Heuristics/Briggs.h +++ b/include/llvm/CodeGen/PBQP/Heuristics/Briggs.h @@ -21,7 +21,6 @@ #include "../HeuristicSolver.h" #include "../HeuristicBase.h" -#include #include namespace PBQP { diff --git a/include/llvm/CodeGen/ProcessImplicitDefs.h b/include/llvm/CodeGen/ProcessImplicitDefs.h index e2ab899f183f..6ab57f03aee7 100644 --- a/include/llvm/CodeGen/ProcessImplicitDefs.h +++ b/include/llvm/CodeGen/ProcessImplicitDefs.h @@ -18,14 +18,20 @@ namespace llvm { class MachineInstr; class TargetInstrInfo; + class TargetRegisterInfo; + class MachineRegisterInfo; + class LiveVariables; /// Process IMPLICIT_DEF instructions and make sure there is one implicit_def /// for each use. Add isUndef marker to implicit_def defs and their uses. class ProcessImplicitDefs : public MachineFunctionPass { - private: + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + MachineRegisterInfo *MRI; + LiveVariables *LV; bool CanTurnIntoImplicitDef(MachineInstr *MI, unsigned Reg, - unsigned OpIdx, const TargetInstrInfo *tii_, + unsigned OpIdx, SmallSet &ImpDefRegs); public: diff --git a/include/llvm/CodeGen/RegisterScavenging.h b/include/llvm/CodeGen/RegisterScavenging.h index 246831c034d4..26b6773c0530 100644 --- a/include/llvm/CodeGen/RegisterScavenging.h +++ b/include/llvm/CodeGen/RegisterScavenging.h @@ -100,7 +100,7 @@ class RegScavenger { /// getRegsAvailable - Return all available registers in the register class /// in Mask. - void getRegsAvailable(const TargetRegisterClass *RC, BitVector &Mask); + BitVector getRegsAvailable(const TargetRegisterClass *RC); /// FindUnusedReg - Find a unused register of the specified register class. /// Return 0 if none is found. diff --git a/include/llvm/CodeGen/RuntimeLibcalls.h b/include/llvm/CodeGen/RuntimeLibcalls.h index a51e82a6404a..576be821774d 100644 --- a/include/llvm/CodeGen/RuntimeLibcalls.h +++ b/include/llvm/CodeGen/RuntimeLibcalls.h @@ -66,6 +66,16 @@ namespace RTLIB { UREM_I32, UREM_I64, UREM_I128, + SDIVREM_I8, + SDIVREM_I16, + SDIVREM_I32, + SDIVREM_I64, + SDIVREM_I128, + UDIVREM_I8, + UDIVREM_I16, + UDIVREM_I32, + UDIVREM_I64, + UDIVREM_I128, NEG_I32, NEG_I64, diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h index 3864ffd50a19..2eb3db319ddf 100644 --- a/include/llvm/CodeGen/ScheduleDAG.h +++ b/include/llvm/CodeGen/ScheduleDAG.h @@ -250,7 +250,9 @@ namespace llvm { unsigned NumSuccsLeft; // # of succs not scheduled. unsigned short NumRegDefsLeft; // # of reg defs with no scheduled use. unsigned short Latency; // Node latency. + bool isVRegCycle : 1; // May use and def the same vreg. bool isCall : 1; // Is a function call. + bool isCallOp : 1; // Is a function call operand. bool isTwoAddress : 1; // Is a two-address instruction. bool isCommutable : 1; // Is a commutable instruction. bool hasPhysRegDefs : 1; // Has physreg defs that are being used. @@ -259,6 +261,7 @@ namespace llvm { bool isAvailable : 1; // True once available. bool isScheduled : 1; // True once scheduled. bool isScheduleHigh : 1; // True if preferable to schedule high. + bool isScheduleLow : 1; // True if preferable to schedule low. bool isCloned : 1; // True if this node has been cloned. Sched::Preference SchedulingPref; // Scheduling preference. @@ -278,10 +281,10 @@ namespace llvm { : Node(node), Instr(0), OrigNode(0), NodeNum(nodenum), NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0), NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0), - isCall(false), isTwoAddress(false), isCommutable(false), - hasPhysRegDefs(false), hasPhysRegClobbers(false), + isVRegCycle(false), isCall(false), isCallOp(false), isTwoAddress(false), + isCommutable(false), hasPhysRegDefs(false), hasPhysRegClobbers(false), isPending(false), isAvailable(false), isScheduled(false), - isScheduleHigh(false), isCloned(false), + isScheduleHigh(false), isScheduleLow(false), isCloned(false), SchedulingPref(Sched::None), isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0), CopyDstRC(NULL), CopySrcRC(NULL) {} @@ -292,10 +295,10 @@ namespace llvm { : Node(0), Instr(instr), OrigNode(0), NodeNum(nodenum), NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0), NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0), - isCall(false), isTwoAddress(false), isCommutable(false), - hasPhysRegDefs(false), hasPhysRegClobbers(false), + isVRegCycle(false), isCall(false), isCallOp(false), isTwoAddress(false), + isCommutable(false), hasPhysRegDefs(false), hasPhysRegClobbers(false), isPending(false), isAvailable(false), isScheduled(false), - isScheduleHigh(false), isCloned(false), + isScheduleHigh(false), isScheduleLow(false), isCloned(false), SchedulingPref(Sched::None), isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0), CopyDstRC(NULL), CopySrcRC(NULL) {} @@ -305,10 +308,10 @@ namespace llvm { : Node(0), Instr(0), OrigNode(0), NodeNum(~0u), NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0), NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0), - isCall(false), isTwoAddress(false), isCommutable(false), - hasPhysRegDefs(false), hasPhysRegClobbers(false), + isVRegCycle(false), isCall(false), isCallOp(false), isTwoAddress(false), + isCommutable(false), hasPhysRegDefs(false), hasPhysRegClobbers(false), isPending(false), isAvailable(false), isScheduled(false), - isScheduleHigh(false), isCloned(false), + isScheduleHigh(false), isScheduleLow(false), isCloned(false), SchedulingPref(Sched::None), isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0), CopyDstRC(NULL), CopySrcRC(NULL) {} @@ -356,7 +359,7 @@ namespace llvm { void removePred(const SDep &D); /// getDepth - Return the depth of this node, which is the length of the - /// maximum path up to any node with has no predecessors. + /// maximum path up to any node which has no predecessors. unsigned getDepth() const { if (!isDepthCurrent) const_cast(this)->ComputeDepth(); @@ -364,7 +367,7 @@ namespace llvm { } /// getHeight - Return the height of this node, which is the length of the - /// maximum path down to any node with has no successors. + /// maximum path down to any node which has no successors. unsigned getHeight() const { if (!isHeightCurrent) const_cast(this)->ComputeHeight(); @@ -690,11 +693,11 @@ namespace llvm { /// will create a cycle. bool WillCreateCycle(SUnit *SU, SUnit *TargetSU); - /// AddPred - Updates the topological ordering to accomodate an edge + /// AddPred - Updates the topological ordering to accommodate an edge /// to be added from SUnit X to SUnit Y. void AddPred(SUnit *Y, SUnit *X); - /// RemovePred - Updates the topological ordering to accomodate an + /// RemovePred - Updates the topological ordering to accommodate an /// an edge to be removed from the specified node N from the predecessors /// of the current node M. void RemovePred(SUnit *M, SUnit *N); diff --git a/include/llvm/CodeGen/ScoreboardHazardRecognizer.h b/include/llvm/CodeGen/ScoreboardHazardRecognizer.h index 8850006df84c..118df28abbb4 100644 --- a/include/llvm/CodeGen/ScoreboardHazardRecognizer.h +++ b/include/llvm/CodeGen/ScoreboardHazardRecognizer.h @@ -21,7 +21,6 @@ #include #include -#include namespace llvm { diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h index c9de95bebd54..92fd0c9e1cc1 100644 --- a/include/llvm/CodeGen/SelectionDAG.h +++ b/include/llvm/CodeGen/SelectionDAG.h @@ -438,12 +438,12 @@ class SelectionDAG { SDValue getConvertRndSat(EVT VT, DebugLoc dl, SDValue Val, SDValue DTy, SDValue STy, SDValue Rnd, SDValue Sat, ISD::CvtCode Code); - + /// getVectorShuffle - Return an ISD::VECTOR_SHUFFLE node. The number of /// elements in VT, which must be a vector type, must match the number of /// mask elements NumElts. A integer mask element equal to -1 is treated as /// undefined. - SDValue getVectorShuffle(EVT VT, DebugLoc dl, SDValue N1, SDValue N2, + SDValue getVectorShuffle(EVT VT, DebugLoc dl, SDValue N1, SDValue N2, const int *MaskElts); /// getSExtOrTrunc - Convert Op, which must be of integer type, to the @@ -671,10 +671,10 @@ class SelectionDAG { /// getMDNode - Return an MDNodeSDNode which holds an MDNode. SDValue getMDNode(const MDNode *MD); - + /// getShiftAmountOperand - Return the specified value casted to /// the target's desired shift amount type. - SDValue getShiftAmountOperand(SDValue Op); + SDValue getShiftAmountOperand(EVT LHSTy, SDValue Op); /// UpdateNodeOperands - *Mutate* the specified node in-place to have the /// specified operands. If the resultant node already exists in the DAG, @@ -829,7 +829,7 @@ class SelectionDAG { /// These functions only replace all existing uses. It's possible that as /// these replacements are being performed, CSE may cause the From node /// to be given new uses. These new uses of From are left in place, and - /// not automatically transfered to To. + /// not automatically transferred to To. /// void ReplaceAllUsesWith(SDValue From, SDValue Op, DAGUpdateListener *UpdateListener = 0); @@ -901,7 +901,7 @@ class SelectionDAG { SmallVector &GetDbgValues(const SDNode* SD) { return DbgInfo->getSDDbgValues(SD); } - + /// TransferDbgValues - Transfer SDDbgValues. void TransferDbgValues(SDValue From, SDValue To); @@ -911,11 +911,11 @@ class SelectionDAG { SDDbgInfo::DbgIterator DbgBegin() { return DbgInfo->DbgBegin(); } SDDbgInfo::DbgIterator DbgEnd() { return DbgInfo->DbgEnd(); } - SDDbgInfo::DbgIterator ByvalParmDbgBegin() { - return DbgInfo->ByvalParmDbgBegin(); + SDDbgInfo::DbgIterator ByvalParmDbgBegin() { + return DbgInfo->ByvalParmDbgBegin(); } - SDDbgInfo::DbgIterator ByvalParmDbgEnd() { - return DbgInfo->ByvalParmDbgEnd(); + SDDbgInfo::DbgIterator ByvalParmDbgEnd() { + return DbgInfo->ByvalParmDbgEnd(); } void dump() const; @@ -972,7 +972,7 @@ class SelectionDAG { /// semantics as an ADD. This handles the equivalence: /// X|Cst == X+Cst iff X&Cst = 0. bool isBaseWithConstantOffset(SDValue Op) const; - + /// isKnownNeverNan - Test whether the given SDValue is known to never be NaN. bool isKnownNeverNaN(SDValue Op) const; @@ -997,8 +997,8 @@ class SelectionDAG { /// vector op and fill the end of the resulting vector with UNDEFS. SDValue UnrollVectorOp(SDNode *N, unsigned ResNE = 0); - /// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a - /// location that is 'Dist' units away from the location that the 'Base' load + /// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a + /// location that is 'Dist' units away from the location that the 'Base' load /// is loading from. bool isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const; @@ -1032,7 +1032,7 @@ class SelectionDAG { std::vector ValueTypeNodes; std::map ExtendedValueTypeNodes; StringMap ExternalSymbols; - + std::map,SDNode*> TargetExternalSymbols; }; diff --git a/include/llvm/CodeGen/SelectionDAGISel.h b/include/llvm/CodeGen/SelectionDAGISel.h index 62358e7639ee..ecf394701053 100644 --- a/include/llvm/CodeGen/SelectionDAGISel.h +++ b/include/llvm/CodeGen/SelectionDAGISel.h @@ -127,6 +127,7 @@ class SelectionDAGISel : public MachineFunctionPass { OPC_EmitInteger, OPC_EmitRegister, + OPC_EmitRegister2, OPC_EmitConvertToTarget, OPC_EmitMergeInputChains, OPC_EmitMergeInputChains1_0, @@ -257,7 +258,7 @@ class SelectionDAGISel : public MachineFunctionPass { } virtual SDValue RunSDNodeXForm(SDValue V, unsigned XFormNo) { - assert(0 && "Tblgen shoudl generate this!"); + assert(0 && "Tblgen should generate this!"); return SDValue(); } @@ -279,7 +280,8 @@ class SelectionDAGISel : public MachineFunctionPass { void PrepareEHLandingPad(); void SelectAllBasicBlocks(const Function &Fn); - bool TryToFoldFastISelLoad(const LoadInst *LI, FastISel *FastIS); + bool TryToFoldFastISelLoad(const LoadInst *LI, const Instruction *FoldInst, + FastISel *FastIS); void FinishBasicBlock(); void SelectBasicBlock(BasicBlock::const_iterator Begin, diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h index 64546394ce91..9d265f14516d 100644 --- a/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/include/llvm/CodeGen/SelectionDAGNodes.h @@ -838,7 +838,7 @@ class TernarySDNode : public SDNode { /// HandleSDNode - This class is used to form a handle around another node that -/// is persistant and is updated across invocations of replaceAllUsesWith on its +/// is persistent and is updated across invocations of replaceAllUsesWith on its /// operand. This node should be directly created by end-users and not added to /// the AllNodes list. class HandleSDNode : public SDNode { diff --git a/include/llvm/CodeGen/SlotIndexes.h b/include/llvm/CodeGen/SlotIndexes.h index 1da1e91be14a..33ce675e5cc4 100644 --- a/include/llvm/CodeGen/SlotIndexes.h +++ b/include/llvm/CodeGen/SlotIndexes.h @@ -34,77 +34,35 @@ namespace llvm { /// SlotIndex & SlotIndexes classes for the public interface to this /// information. class IndexListEntry { - static const unsigned EMPTY_KEY_INDEX = ~0U & ~3U, - TOMBSTONE_KEY_INDEX = ~0U & ~7U; - IndexListEntry *next, *prev; MachineInstr *mi; unsigned index; - protected: - - typedef enum { EMPTY_KEY, TOMBSTONE_KEY } ReservedEntryType; - - // This constructor is only to be used by getEmptyKeyEntry - // & getTombstoneKeyEntry. It sets index to the given - // value and mi to zero. - IndexListEntry(ReservedEntryType r) : mi(0) { - switch(r) { - case EMPTY_KEY: index = EMPTY_KEY_INDEX; break; - case TOMBSTONE_KEY: index = TOMBSTONE_KEY_INDEX; break; - default: assert(false && "Invalid value for constructor."); - } - next = this; - prev = this; - } - public: - IndexListEntry(MachineInstr *mi, unsigned index) : mi(mi), index(index) { - assert(index != EMPTY_KEY_INDEX && index != TOMBSTONE_KEY_INDEX && - "Attempt to create invalid index. " - "Available indexes may have been exhausted?."); - } - - bool isValid() const { - return (index != EMPTY_KEY_INDEX && index != TOMBSTONE_KEY_INDEX); - } + IndexListEntry(MachineInstr *mi, unsigned index) : mi(mi), index(index) {} MachineInstr* getInstr() const { return mi; } void setInstr(MachineInstr *mi) { - assert(isValid() && "Attempt to modify reserved index."); this->mi = mi; } unsigned getIndex() const { return index; } void setIndex(unsigned index) { - assert(index != EMPTY_KEY_INDEX && index != TOMBSTONE_KEY_INDEX && - "Attempt to set index to invalid value."); - assert(isValid() && "Attempt to reset reserved index value."); this->index = index; } IndexListEntry* getNext() { return next; } const IndexListEntry* getNext() const { return next; } void setNext(IndexListEntry *next) { - assert(isValid() && "Attempt to modify reserved index."); this->next = next; } IndexListEntry* getPrev() { return prev; } const IndexListEntry* getPrev() const { return prev; } void setPrev(IndexListEntry *prev) { - assert(isValid() && "Attempt to modify reserved index."); this->prev = prev; } - - // This function returns the index list entry that is to be used for empty - // SlotIndex keys. - static IndexListEntry* getEmptyKeyEntry(); - - // This function returns the index list entry that is to be used for - // tombstone SlotIndex keys. - static IndexListEntry* getTombstoneKeyEntry(); }; // Specialize PointerLikeTypeTraits for IndexListEntry. @@ -130,11 +88,10 @@ namespace llvm { PointerIntPair lie; SlotIndex(IndexListEntry *entry, unsigned slot) - : lie(entry, slot) { - assert(entry != 0 && "Attempt to construct index with 0 pointer."); - } + : lie(entry, slot) {} IndexListEntry& entry() const { + assert(isValid() && "Attempt to compare reserved index."); return *lie.getPointer(); } @@ -148,22 +105,27 @@ namespace llvm { } static inline unsigned getHashValue(const SlotIndex &v) { - IndexListEntry *ptrVal = &v.entry(); - return (unsigned((intptr_t)ptrVal) >> 4) ^ - (unsigned((intptr_t)ptrVal) >> 9); + void *ptrVal = v.lie.getOpaqueValue(); + return (unsigned((intptr_t)ptrVal)) ^ (unsigned((intptr_t)ptrVal) >> 9); } public: + enum { + /// The default distance between instructions as returned by distance(). + /// This may vary as instructions are inserted and removed. + InstrDist = 4*NUM + }; + static inline SlotIndex getEmptyKey() { - return SlotIndex(IndexListEntry::getEmptyKeyEntry(), 0); + return SlotIndex(0, 1); } static inline SlotIndex getTombstoneKey() { - return SlotIndex(IndexListEntry::getTombstoneKeyEntry(), 0); + return SlotIndex(0, 2); } /// Construct an invalid index. - SlotIndex() : lie(IndexListEntry::getEmptyKeyEntry(), 0) {} + SlotIndex() : lie(0, 0) {} // Construct a new slot index from the given one, and set the slot. SlotIndex(const SlotIndex &li, Slot s) @@ -175,8 +137,7 @@ namespace llvm { /// Returns true if this is a valid index. Invalid indicies do /// not point into an index table, and cannot be compared. bool isValid() const { - IndexListEntry *entry = lie.getPointer(); - return ((entry!= 0) && (entry->isValid())); + return lie.getPointer(); } /// Print this index to the given raw_ostream. @@ -187,11 +148,11 @@ namespace llvm { /// Compare two SlotIndex objects for equality. bool operator==(SlotIndex other) const { - return getIndex() == other.getIndex(); + return lie == other.lie; } /// Compare two SlotIndex objects for inequality. bool operator!=(SlotIndex other) const { - return getIndex() != other.getIndex(); + return lie != other.lie; } /// Compare two SlotIndex objects. Return true if the first index @@ -217,6 +178,11 @@ namespace llvm { return getIndex() >= other.getIndex(); } + /// isSameInstr - Return true if A and B refer to the same instruction. + static bool isSameInstr(SlotIndex A, SlotIndex B) { + return A.lie.getPointer() == B.lie.getPointer(); + } + /// Return the distance from this index to the given one. int distance(SlotIndex other) const { return other.getIndex() - getIndex(); @@ -376,15 +342,12 @@ namespace llvm { typedef DenseMap Mi2IndexMap; Mi2IndexMap mi2iMap; - /// MBB2IdxMap - The indexes of the first and last instructions in the - /// specified basic block. - typedef DenseMap > MBB2IdxMap; - MBB2IdxMap mbb2IdxMap; + /// MBBRanges - Map MBB number to (start, stop) indexes. + SmallVector, 8> MBBRanges; /// Idx2MBBMap - Sorted list of pairs of index of first instruction /// and MBB id. - std::vector idx2MBBMap; + SmallVector idx2MBBMap; // IndexListEntry allocator. BumpPtrAllocator ileAllocator; @@ -466,6 +429,9 @@ namespace llvm { insert(getTail(), val); } + /// Renumber locally after inserting newEntry. + void renumberIndexes(IndexListEntry *newEntry); + public: static char ID; @@ -530,7 +496,7 @@ namespace llvm { /// Returns the instruction for the given index, or null if the given /// index has no instruction associated with it. MachineInstr* getInstructionFromIndex(SlotIndex index) const { - return index.entry().getInstr(); + return index.isValid() ? index.entry().getInstr() : 0; } /// Returns the next non-null index. @@ -545,12 +511,55 @@ namespace llvm { return nextNonNull; } + /// getIndexBefore - Returns the index of the last indexed instruction + /// before MI, or the the start index of its basic block. + /// MI is not required to have an index. + SlotIndex getIndexBefore(const MachineInstr *MI) const { + const MachineBasicBlock *MBB = MI->getParent(); + assert(MBB && "MI must be inserted inna basic block"); + MachineBasicBlock::const_iterator I = MI, B = MBB->begin(); + for (;;) { + if (I == B) + return getMBBStartIdx(MBB); + --I; + Mi2IndexMap::const_iterator MapItr = mi2iMap.find(I); + if (MapItr != mi2iMap.end()) + return MapItr->second; + } + } + + /// getIndexAfter - Returns the index of the first indexed instruction + /// after MI, or the end index of its basic block. + /// MI is not required to have an index. + SlotIndex getIndexAfter(const MachineInstr *MI) const { + const MachineBasicBlock *MBB = MI->getParent(); + assert(MBB && "MI must be inserted inna basic block"); + MachineBasicBlock::const_iterator I = MI, E = MBB->end(); + for (;;) { + ++I; + if (I == E) + return getMBBEndIdx(MBB); + Mi2IndexMap::const_iterator MapItr = mi2iMap.find(I); + if (MapItr != mi2iMap.end()) + return MapItr->second; + } + } + + /// Return the (start,end) range of the given basic block number. + const std::pair & + getMBBRange(unsigned Num) const { + return MBBRanges[Num]; + } + /// Return the (start,end) range of the given basic block. const std::pair & - getMBBRange(const MachineBasicBlock *mbb) const { - MBB2IdxMap::const_iterator itr = mbb2IdxMap.find(mbb); - assert(itr != mbb2IdxMap.end() && "MBB not found in maps."); - return itr->second; + getMBBRange(const MachineBasicBlock *MBB) const { + return getMBBRange(MBB->getNumber()); + } + + /// Returns the first index in the given basic block number. + SlotIndex getMBBStartIdx(unsigned Num) const { + return getMBBRange(Num).first; } /// Returns the first index in the given basic block. @@ -558,6 +567,11 @@ namespace llvm { return getMBBRange(mbb).first; } + /// Returns the last index in the given basic block number. + SlotIndex getMBBEndIdx(unsigned Num) const { + return getMBBRange(Num).second; + } + /// Returns the last index in the given basic block. SlotIndex getMBBEndIdx(const MachineBasicBlock *mbb) const { return getMBBRange(mbb).second; @@ -565,10 +579,12 @@ namespace llvm { /// Returns the basic block which the given index falls in. MachineBasicBlock* getMBBFromIndex(SlotIndex index) const { - std::vector::const_iterator I = + if (MachineInstr *MI = getInstructionFromIndex(index)) + return MI->getParent(); + SmallVectorImpl::const_iterator I = std::lower_bound(idx2MBBMap.begin(), idx2MBBMap.end(), index); // Take the pair containing the index - std::vector::const_iterator J = + SmallVectorImpl::const_iterator J = ((I != idx2MBBMap.end() && I->first > index) || (I == idx2MBBMap.end() && idx2MBBMap.size()>0)) ? (I-1): I; @@ -580,7 +596,7 @@ namespace llvm { bool findLiveInMBBs(SlotIndex start, SlotIndex end, SmallVectorImpl &mbbs) const { - std::vector::const_iterator itr = + SmallVectorImpl::const_iterator itr = std::lower_bound(idx2MBBMap.begin(), idx2MBBMap.end(), start); bool resVal = false; @@ -600,7 +616,7 @@ namespace llvm { assert(start < end && "Backwards ranges not allowed."); - std::vector::const_iterator itr = + SmallVectorImpl::const_iterator itr = std::lower_bound(idx2MBBMap.begin(), idx2MBBMap.end(), start); if (itr == idx2MBBMap.end()) { @@ -622,95 +638,47 @@ namespace llvm { /// Insert the given machine instruction into the mapping. Returns the /// assigned index. - SlotIndex insertMachineInstrInMaps(MachineInstr *mi, - bool *deferredRenumber = 0) { + /// If Late is set and there are null indexes between mi's neighboring + /// instructions, create the new index after the null indexes instead of + /// before them. + SlotIndex insertMachineInstrInMaps(MachineInstr *mi, bool Late = false) { assert(mi2iMap.find(mi) == mi2iMap.end() && "Instr already indexed."); // Numbering DBG_VALUE instructions could cause code generation to be // affected by debug information. assert(!mi->isDebugValue() && "Cannot number DBG_VALUE instructions."); - MachineBasicBlock *mbb = mi->getParent(); + assert(mi->getParent() != 0 && "Instr must be added to function."); - assert(mbb != 0 && "Instr must be added to function."); - - MBB2IdxMap::iterator mbbRangeItr = mbb2IdxMap.find(mbb); - - assert(mbbRangeItr != mbb2IdxMap.end() && - "Instruction's parent MBB has not been added to SlotIndexes."); - - MachineBasicBlock::iterator miItr(mi); - bool needRenumber = false; - IndexListEntry *newEntry; - // Get previous index, considering that not all instructions are indexed. - IndexListEntry *prevEntry; - for (;;) { - // If mi is at the mbb beginning, get the prev index from the mbb. - if (miItr == mbb->begin()) { - prevEntry = &mbbRangeItr->second.first.entry(); - break; - } - // Otherwise rewind until we find a mapped instruction. - Mi2IndexMap::const_iterator itr = mi2iMap.find(--miItr); - if (itr != mi2iMap.end()) { - prevEntry = &itr->second.entry(); - break; - } + // Get the entries where mi should be inserted. + IndexListEntry *prevEntry, *nextEntry; + if (Late) { + // Insert mi's index immediately before the following instruction. + nextEntry = &getIndexAfter(mi).entry(); + prevEntry = nextEntry->getPrev(); + } else { + // Insert mi's index immediately after the preceeding instruction. + prevEntry = &getIndexBefore(mi).entry(); + nextEntry = prevEntry->getNext(); } - // Get next entry from previous entry. - IndexListEntry *nextEntry = prevEntry->getNext(); - // Get a number for the new instr, or 0 if there's no room currently. // In the latter case we'll force a renumber later. - unsigned dist = nextEntry->getIndex() - prevEntry->getIndex(); - unsigned newNumber = dist > SlotIndex::NUM ? - prevEntry->getIndex() + ((dist >> 1) & ~3U) : 0; - - if (newNumber == 0) { - needRenumber = true; - } + unsigned dist = ((nextEntry->getIndex() - prevEntry->getIndex())/2) & ~3u; + unsigned newNumber = prevEntry->getIndex() + dist; // Insert a new list entry for mi. - newEntry = createEntry(mi, newNumber); + IndexListEntry *newEntry = createEntry(mi, newNumber); insert(nextEntry, newEntry); - + + // Renumber locally if we need to. + if (dist == 0) + renumberIndexes(newEntry); + SlotIndex newIndex(newEntry, SlotIndex::LOAD); mi2iMap.insert(std::make_pair(mi, newIndex)); - - if (miItr == mbb->end()) { - // If this is the last instr in the MBB then we need to fix up the bb - // range: - mbbRangeItr->second.second = SlotIndex(newEntry, SlotIndex::STORE); - } - - // Renumber if we need to. - if (needRenumber) { - if (deferredRenumber == 0) - renumberIndexes(); - else - *deferredRenumber = true; - } - return newIndex; } - /// Add all instructions in the vector to the index list. This method will - /// defer renumbering until all instrs have been added, and should be - /// preferred when adding multiple instrs. - void insertMachineInstrsInMaps(SmallVectorImpl &mis) { - bool renumber = false; - - for (SmallVectorImpl::iterator - miItr = mis.begin(), miEnd = mis.end(); - miItr != miEnd; ++miItr) { - insertMachineInstrInMaps(*miItr, &renumber); - } - - if (renumber) - renumberIndexes(); - } - - /// Remove the given machine instruction from the mapping. void removeMachineInstrFromMaps(MachineInstr *mi) { // remove index -> MachineInstr and @@ -760,21 +728,14 @@ namespace llvm { SlotIndex startIdx(startEntry, SlotIndex::LOAD); SlotIndex endIdx(nextEntry, SlotIndex::LOAD); - mbb2IdxMap.insert( - std::make_pair(mbb, std::make_pair(startIdx, endIdx))); + assert(unsigned(mbb->getNumber()) == MBBRanges.size() && + "Blocks must be added in order"); + MBBRanges.push_back(std::make_pair(startIdx, endIdx)); idx2MBBMap.push_back(IdxMBBPair(startIdx, mbb)); - if (MachineFunction::iterator(mbb) != mbb->getParent()->begin()) { - // Have to update the end index of the previous block. - MachineBasicBlock *priorMBB = - llvm::prior(MachineFunction::iterator(mbb)); - mbb2IdxMap[priorMBB].second = startIdx; - } - renumberIndexes(); std::sort(idx2MBBMap.begin(), idx2MBBMap.end(), Idx2MBBCompare()); - } }; diff --git a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h index fba3e48c475e..829f580df33d 100644 --- a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h +++ b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h @@ -59,6 +59,10 @@ class TargetLoweringObjectFileELF : public TargetLoweringObjectFile { virtual const MCSection *getEHFrameSection() const; + virtual void emitPersonalityValue(MCStreamer &Streamer, + const TargetMachine &TM, + const MCSymbol *Sym) const; + const MCSection *getDataRelSection() const { return DataRelSection; } /// getSectionForConstant - Given a constant with the SectionKind, return a @@ -81,6 +85,11 @@ class TargetLoweringObjectFileELF : public TargetLoweringObjectFile { getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, MachineModuleInfo *MMI, unsigned Encoding, MCStreamer &Streamer) const; + + // getCFIPersonalitySymbol - The symbol that gets passed to .cfi_personality. + virtual MCSymbol * + getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang, + MachineModuleInfo *MMI) const; }; @@ -94,7 +103,7 @@ class TargetLoweringObjectFileMachO : public TargetLoweringObjectFile { /// const MCSection *TLSBSSSection; // Defaults to ".tbss". - /// TLSTLVSection - Section for thread local structure infomation. + /// TLSTLVSection - Section for thread local structure information. /// Contains the source code name of the variable, visibility and a pointer /// to the initial value (.tdata or .tbss). const MCSection *TLSTLVSection; // Defaults to ".tlv". @@ -172,9 +181,14 @@ class TargetLoweringObjectFileMachO : public TargetLoweringObjectFile { MachineModuleInfo *MMI, unsigned Encoding, MCStreamer &Streamer) const; + // getCFIPersonalitySymbol - The symbol that gets passed to .cfi_personality. + virtual MCSymbol * + getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang, + MachineModuleInfo *MMI) const; + virtual unsigned getPersonalityEncoding() const; virtual unsigned getLSDAEncoding() const; - virtual unsigned getFDEEncoding() const; + virtual unsigned getFDEEncoding(bool CFI) const; virtual unsigned getTTypeEncoding() const; }; diff --git a/include/llvm/CompilerDriver/CompilationGraph.h b/include/llvm/CompilerDriver/CompilationGraph.h index e1eea325e348..951aff6f938d 100644 --- a/include/llvm/CompilerDriver/CompilationGraph.h +++ b/include/llvm/CompilerDriver/CompilationGraph.h @@ -40,7 +40,7 @@ namespace llvmc { }; /// Edge - Represents an edge of the compilation graph. - class Edge : public llvm::RefCountedBaseVPTR { + class Edge : public llvm::RefCountedBaseVPTR { public: Edge(const std::string& T) : ToolName_(T) {} virtual ~Edge() {} diff --git a/include/llvm/CompilerDriver/Tool.h b/include/llvm/CompilerDriver/Tool.h index d0926ba98312..18a2b767923e 100644 --- a/include/llvm/CompilerDriver/Tool.h +++ b/include/llvm/CompilerDriver/Tool.h @@ -33,7 +33,7 @@ namespace llvmc { typedef llvm::StringSet<> InputLanguagesSet; /// Tool - Represents a single tool. - class Tool : public llvm::RefCountedBaseVPTR { + class Tool : public llvm::RefCountedBaseVPTR { public: virtual ~Tool() {} diff --git a/include/llvm/Config/config.h.cmake b/include/llvm/Config/config.h.cmake index bf69375ff562..755daa6dc37a 100644 --- a/include/llvm/Config/config.h.cmake +++ b/include/llvm/Config/config.h.cmake @@ -196,6 +196,9 @@ /* Define to 1 if you have the `udis86' library (-ludis86). */ #undef HAVE_LIBUDIS86 +/* Type of 1st arg on ELM Callback */ +#cmakedefine WIN32_ELMCB_PCSTR ${WIN32_ELMCB_PCSTR} + /* Define to 1 if you have the header file. */ #cmakedefine HAVE_LIMITS_H ${HAVE_LIMITS_H} @@ -453,7 +456,7 @@ #cmakedefine HAVE_WRITEV ${HAVE_WRITEV} /* Define if the xdot.py program is available */ -#undef HAVE_XDOT_PY +#cmakedefine HAVE_XDOT_PY ${HAVE_XDOT_PY} /* Have host's _alloca */ #cmakedefine HAVE__ALLOCA ${HAVE__ALLOCA} @@ -585,7 +588,7 @@ #cmakedefine LLVM_PATH_TWOPI "${LLVM_PATH_TWOPI}" /* Define to path to xdot.py program if found or 'echo xdot.py' otherwise */ -#undef LLVM_PATH_XDOT_PY +#cmakedefine LLVM_PATH_XDOT_PY "${LLVM_PATH_XDOT_PY}" /* Installation prefix directory */ #cmakedefine LLVM_PREFIX "${LLVM_PREFIX}" diff --git a/include/llvm/Config/config.h.in b/include/llvm/Config/config.h.in index 14c44b4b1e20..10a8935f5611 100644 --- a/include/llvm/Config/config.h.in +++ b/include/llvm/Config/config.h.in @@ -567,6 +567,9 @@ /* LLVM architecture name for the native architecture, if available */ #undef LLVM_NATIVE_ARCH +/* LLVM name for the native AsmParser init function, if available */ +#undef LLVM_NATIVE_ASMPARSER + /* LLVM name for the native AsmPrinter init function, if available */ #undef LLVM_NATIVE_ASMPRINTER @@ -672,6 +675,9 @@ /* Define if use udis86 library */ #undef USE_UDIS86 +/* Type of 1st arg on ELM Callback */ +#undef WIN32_ELMCB_PCSTR + /* Define to empty if `const' does not conform to ANSI C. */ #undef const diff --git a/include/llvm/Config/llvm-config.h.cmake b/include/llvm/Config/llvm-config.h.cmake index a679b956b373..9a9cb3b739ea 100644 --- a/include/llvm/Config/llvm-config.h.cmake +++ b/include/llvm/Config/llvm-config.h.cmake @@ -61,6 +61,9 @@ /* LLVM name for the native AsmPrinter init function, if available */ #cmakedefine LLVM_NATIVE_ASMPRINTER LLVMInitialize${LLVM_NATIVE_ARCH}AsmPrinter +/* LLVM name for the native AsmPrinter init function, if available */ +#cmakedefine LLVM_NATIVE_ASMPARSER LLVMInitialize${LLVM_NATIVE_ARCH}AsmParser + /* Define if this is Unixish platform */ #cmakedefine LLVM_ON_UNIX ${LLVM_ON_UNIX} @@ -91,6 +94,9 @@ /* Define to path to twopi program if found or 'echo twopi' otherwise */ #cmakedefine LLVM_PATH_TWOPI "${LLVM_PATH_TWOPI}" +/* Define to path to xdot.py program if found or 'echo xdot.py' otherwise */ +#cmakedefine LLVM_PATH_XDOT_PY "${LLVM_PATH_XDOT.PY}" + /* Installation prefix directory */ #cmakedefine LLVM_PREFIX "${LLVM_PREFIX}" diff --git a/include/llvm/Config/llvm-config.h.in b/include/llvm/Config/llvm-config.h.in index e7a04ee91bb9..4766a7a2b245 100644 --- a/include/llvm/Config/llvm-config.h.in +++ b/include/llvm/Config/llvm-config.h.in @@ -61,6 +61,9 @@ /* LLVM name for the native AsmPrinter init function, if available */ #undef LLVM_NATIVE_ASMPRINTER +/* LLVM name for the native AsmPrinter init function, if available */ +#undef LLVM_NATIVE_ASMPARSER + /* Define if this is Unixish platform */ #undef LLVM_ON_UNIX diff --git a/include/llvm/Constant.h b/include/llvm/Constant.h index 38045fc0c1d6..5f32ce0ac5e2 100644 --- a/include/llvm/Constant.h +++ b/include/llvm/Constant.h @@ -47,10 +47,6 @@ class Constant : public User { : User(ty, vty, Ops, NumOps) {} void destroyConstantImpl(); - - void setOperand(unsigned i, Value *V) { - User::setOperand(i, V); - } public: /// isNullValue - Return true if this is the value that would be returned by /// getNullValue. @@ -90,15 +86,6 @@ class Constant : public User { /// FIXME: This really should not be in VMCore. PossibleRelocationsTy getRelocationInfo() const; - // Specialize get/setOperand for Users as their operands are always - // constants or BasicBlocks as well. - User *getOperand(unsigned i) { - return static_cast(User::getOperand(i)); - } - const User *getOperand(unsigned i) const { - return static_cast(User::getOperand(i)); - } - /// getVectorElements - This method, which is only valid on constant of vector /// type, returns the elements of the vector in the specified smallvector. /// This handles breaking down a vector undef into undef elements, etc. For diff --git a/include/llvm/Constants.h b/include/llvm/Constants.h index c4768f842345..eabc3a50aa0a 100644 --- a/include/llvm/Constants.h +++ b/include/llvm/Constants.h @@ -57,6 +57,8 @@ class ConstantInt : public Constant { public: static ConstantInt *getTrue(LLVMContext &Context); static ConstantInt *getFalse(LLVMContext &Context); + static Constant *getTrue(const Type *Ty); + static Constant *getFalse(const Type *Ty); /// If Ty is a vector type, return a Constant with a splat of the given /// value. Otherwise return a ConstantInt for the given value. @@ -425,6 +427,8 @@ class ConstantStruct : public Constant { const std::vector &V, bool Packed); static Constant *get(LLVMContext &Context, Constant *const *Vals, unsigned NumVals, bool Packed); + static Constant *get(LLVMContext &Context, bool Packed, + Constant * Val, ...) END_WITH_NULL; /// Transparently provide more efficient getOperand methods. DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant); @@ -599,6 +603,7 @@ struct OperandTraits : DEFINE_TRANSPARENT_CASTED_OPERAND_ACCESSORS(BlockAddress, Value) + //===----------------------------------------------------------------------===// /// ConstantExpr - a constant value that is initialized with an expression using /// other constant values. @@ -836,7 +841,7 @@ class ConstantExpr : public Constant { static Constant *getICmp(unsigned short pred, Constant *LHS, Constant *RHS); static Constant *getFCmp(unsigned short pred, Constant *LHS, Constant *RHS); - /// Getelementptr form. std::vector is only accepted for convenience: + /// Getelementptr form. Value* is only accepted for convenience; /// all elements must be Constant's. /// static Constant *getGetElementPtr(Constant *C, @@ -880,7 +885,7 @@ class ConstantExpr : public Constant { /// getIndices - Assert that this is an insertvalue or exactvalue /// expression and return the list of indices. - const SmallVector &getIndices() const; + ArrayRef getIndices() const; /// getOpcodeName - Return a string representation for an opcode. const char *getOpcodeName() const; @@ -892,10 +897,7 @@ class ConstantExpr : public Constant { /// getWithOperands - This returns the current constant expression with the /// operands replaced with the specified values. The specified operands must /// match count and type with the existing ones. - Constant *getWithOperands(const std::vector &Ops) const { - return getWithOperands(&Ops[0], (unsigned)Ops.size()); - } - Constant *getWithOperands(Constant *const *Ops, unsigned NumOps) const; + Constant *getWithOperands(ArrayRef Ops) const; virtual void destroyConstant(); virtual void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U); diff --git a/include/llvm/DebugInfoProbe.h b/include/llvm/DebugInfoProbe.h new file mode 100644 index 000000000000..78d00dfeeddf --- /dev/null +++ b/include/llvm/DebugInfoProbe.h @@ -0,0 +1,67 @@ +//===-- DebugInfoProbe.h - DebugInfo Probe ----------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines a probe, DebugInfoProbe, that can be used by pass +// manager to analyze how optimizer is treating debugging information. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_UTILS_DEBUGINFOPROBE_H +#define LLVM_TRANSFORMS_UTILS_DEBUGINFOPROBE_H + +#include "llvm/ADT/StringMap.h" + +namespace llvm { + class Function; + class Pass; + class DebugInfoProbeImpl; + + /// DebugInfoProbe - This class provides a interface to monitor + /// how an optimization pass is preserving debugging information. + class DebugInfoProbe { + public: + DebugInfoProbe(); + ~DebugInfoProbe(); + + /// initialize - Collect information before running an optimization pass. + void initialize(StringRef PName, Function &F); + + /// finalize - Collect information after running an optimization pass. This + /// must be used after initialization. + void finalize(Function &F); + + /// report - Report findings. This should be invoked after finalize. + void report(); + + private: + DebugInfoProbeImpl *pImpl; + }; + + /// DebugInfoProbeInfo - This class provides an interface that a pass manager + /// can use to manage debug info probes. + class DebugInfoProbeInfo { + StringMap Probes; + public: + DebugInfoProbeInfo() {} + + /// ~DebugInfoProbeInfo - Report data collected by all probes before deleting + /// them. + ~DebugInfoProbeInfo(); + + /// initialize - Collect information before running an optimization pass. + void initialize(Pass *P, Function &F); + + /// finalize - Collect information after running an optimization pass. This + /// must be used after initialization. + void finalize(Pass *P, Function &F); + }; + +} // End llvm namespace + +#endif diff --git a/include/llvm/DerivedTypes.h b/include/llvm/DerivedTypes.h index 56d1e3e237d6..f1cb33039f81 100644 --- a/include/llvm/DerivedTypes.h +++ b/include/llvm/DerivedTypes.h @@ -19,6 +19,7 @@ #define LLVM_DERIVED_TYPES_H #include "llvm/Type.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/Support/DataTypes.h" namespace llvm { @@ -147,7 +148,7 @@ class FunctionType : public DerivedType { FunctionType(const FunctionType &); // Do not implement const FunctionType &operator=(const FunctionType &); // Do not implement - FunctionType(const Type *Result, const std::vector &Params, + FunctionType(const Type *Result, ArrayRef Params, bool IsVarArgs); public: @@ -156,7 +157,7 @@ class FunctionType : public DerivedType { /// static FunctionType *get( const Type *Result, ///< The result type - const std::vector &Params, ///< The types of the parameters + ArrayRef Params, ///< The types of the parameters bool isVarArg ///< Whether this is a variable argument length function ); @@ -166,7 +167,7 @@ class FunctionType : public DerivedType { const Type *Result, ///< The result type bool isVarArg ///< Whether this is a variable argument length function ) { - return get(Result, std::vector(), isVarArg); + return get(Result, ArrayRef(), isVarArg); } /// isValidReturnType - Return true if the specified type is valid as a return @@ -237,20 +238,19 @@ class StructType : public CompositeType { friend class TypeMap; StructType(const StructType &); // Do not implement const StructType &operator=(const StructType &); // Do not implement - StructType(LLVMContext &C, - const std::vector &Types, bool isPacked); + StructType(LLVMContext &C, ArrayRef Types, bool isPacked); public: /// StructType::get - This static method is the primary way to create a /// StructType. /// static StructType *get(LLVMContext &Context, - const std::vector &Params, + ArrayRef Params, bool isPacked=false); /// StructType::get - Create an empty structure type. /// static StructType *get(LLVMContext &Context, bool isPacked=false) { - return get(Context, std::vector(), isPacked); + return get(Context, llvm::ArrayRef(), isPacked); } /// StructType::get - This static method is a convenience method for diff --git a/include/llvm/ExecutionEngine/ExecutionEngine.h b/include/llvm/ExecutionEngine/ExecutionEngine.h index 71698fa00874..a01ad3ae7755 100644 --- a/include/llvm/ExecutionEngine/ExecutionEngine.h +++ b/include/llvm/ExecutionEngine/ExecutionEngine.h @@ -21,6 +21,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/ValueMap.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Support/Mutex.h" #include "llvm/Target/TargetMachine.h" @@ -117,11 +118,11 @@ class ExecutionEngine { /// The list of Modules that we are JIT'ing from. We use a SmallVector to /// optimize for the case where there is only one module. SmallVector Modules; - + void setTargetData(const TargetData *td) { TD = td; } - + /// getMemoryforGV - Allocate memory for a global variable. virtual char *getMemoryForGV(const GlobalVariable *GV); @@ -155,13 +156,15 @@ class ExecutionEngine { /// pointer is invoked to create it. If this returns null, the JIT will /// abort. void *(*LazyFunctionCreator)(const std::string &); - + /// ExceptionTableRegister - If Exception Handling is set, the JIT will /// register dwarf tables with this function. typedef void (*EERegisterFn)(void*); EERegisterFn ExceptionTableRegister; EERegisterFn ExceptionTableDeregister; - std::vector AllExceptionTables; + /// This maps functions to their exception tables frames. + DenseMap AllExceptionTables; + public: /// lock - This lock protects the ExecutionEngine, JIT, JITResolver and @@ -182,7 +185,7 @@ class ExecutionEngine { /// \param GVsWithCode - Allocating globals with code breaks /// freeMachineCodeForFunction and is probably unsafe and bad for performance. /// However, we have clients who depend on this behavior, so we must support - /// it. Eventually, when we're willing to break some backwards compatability, + /// it. Eventually, when we're willing to break some backwards compatibility, /// this flag should be flipped to false, so that by default /// freeMachineCodeForFunction works. static ExecutionEngine *create(Module *M, @@ -213,7 +216,7 @@ class ExecutionEngine { virtual void addModule(Module *M) { Modules.push_back(M); } - + //===--------------------------------------------------------------------===// const TargetData *getTargetData() const { return TD; } @@ -226,7 +229,7 @@ class ExecutionEngine { /// defines FnName. This is very slow operation and shouldn't be used for /// general code. Function *FindFunctionNamed(const char *FnName); - + /// runFunction - Execute the specified function with the specified arguments, /// and return the result. virtual GenericValue runFunction(Function *F, @@ -243,8 +246,8 @@ class ExecutionEngine { /// /// \param isDtors - Run the destructors instead of constructors. void runStaticConstructorsDestructors(Module *module, bool isDtors); - - + + /// runFunctionAsMain - This is a helper function which wraps runFunction to /// handle the common task of starting up main with the specified argc, argv, /// and envp parameters. @@ -259,21 +262,21 @@ class ExecutionEngine { /// existing data in memory. Mappings are automatically removed when their /// GlobalValue is destroyed. void addGlobalMapping(const GlobalValue *GV, void *Addr); - + /// clearAllGlobalMappings - Clear all global mappings and start over again, /// for use in dynamic compilation scenarios to move globals. void clearAllGlobalMappings(); - + /// clearGlobalMappingsFromModule - Clear all global mappings that came from a /// particular module, because it has been removed from the JIT. void clearGlobalMappingsFromModule(Module *M); - + /// updateGlobalMapping - Replace an existing mapping for GV with a new /// address. This updates both maps as required. If "Addr" is null, the /// entry for the global is removed from the mappings. This returns the old /// value of the pointer, or null if it was not in the map. void *updateGlobalMapping(const GlobalValue *GV, void *Addr); - + /// getPointerToGlobalIfAvailable - This returns the address of the specified /// global value if it is has already been codegen'd, otherwise it returns /// null. @@ -294,7 +297,7 @@ class ExecutionEngine { /// different ways. Return the representation for a blockaddress of the /// specified block. virtual void *getPointerToBasicBlock(BasicBlock *BB) = 0; - + /// getPointerToFunctionOrStub - If the specified function has been /// code-gen'd, return a pointer to the function. If not, compile it, or use /// a stub to implement lazy compilation if available. See @@ -398,7 +401,7 @@ class ExecutionEngine { void InstallLazyFunctionCreator(void* (*P)(const std::string &)) { LazyFunctionCreator = P; } - + /// InstallExceptionTableRegister - The JIT will use the given function /// to register the exception tables it generates. void InstallExceptionTableRegister(EERegisterFn F) { @@ -407,13 +410,26 @@ class ExecutionEngine { void InstallExceptionTableDeregister(EERegisterFn F) { ExceptionTableDeregister = F; } - + /// RegisterTable - Registers the given pointer as an exception table. It /// uses the ExceptionTableRegister function. - void RegisterTable(void* res) { + void RegisterTable(const Function *fn, void* res) { if (ExceptionTableRegister) { ExceptionTableRegister(res); - AllExceptionTables.push_back(res); + AllExceptionTables[fn] = res; + } + } + + /// DeregisterTable - Deregisters the exception frame previously registered + /// for the given function. + void DeregisterTable(const Function *Fn) { + if (ExceptionTableDeregister) { + DenseMap::iterator frame = + AllExceptionTables.find(Fn); + if(frame != AllExceptionTables.end()) { + ExceptionTableDeregister(frame->second); + AllExceptionTables.erase(frame); + } } } @@ -429,7 +445,7 @@ class ExecutionEngine { void EmitGlobalVariable(const GlobalVariable *GV); GenericValue getConstantValue(const Constant *C); - void LoadValueFromMemory(GenericValue &Result, GenericValue *Ptr, + void LoadValueFromMemory(GenericValue &Result, GenericValue *Ptr, const Type *Ty); }; @@ -540,8 +556,9 @@ class EngineBuilder { /// setUseMCJIT - Set whether the MC-JIT implementation should be used /// (experimental). - void setUseMCJIT(bool Value) { + EngineBuilder &setUseMCJIT(bool Value) { UseMCJIT = Value; + return *this; } /// setMAttrs - Set cpu-specific attributes. diff --git a/include/llvm/ExecutionEngine/JITMemoryManager.h b/include/llvm/ExecutionEngine/JITMemoryManager.h index 384141801667..a63f0da773a2 100644 --- a/include/llvm/ExecutionEngine/JITMemoryManager.h +++ b/include/llvm/ExecutionEngine/JITMemoryManager.h @@ -29,11 +29,11 @@ class JITMemoryManager { public: JITMemoryManager() : HasGOT(false) {} virtual ~JITMemoryManager(); - + /// CreateDefaultMemManager - This is used to create the default /// JIT Memory Manager if the client does not provide one to the JIT. static JITMemoryManager *CreateDefaultMemManager(); - + /// setMemoryWritable - When code generation is in progress, /// the code pages may need permissions changed. virtual void setMemoryWritable() = 0; @@ -55,16 +55,16 @@ class JITMemoryManager { /// method is invoked to allocate it. This method is required to set HasGOT /// to true. virtual void AllocateGOT() = 0; - + /// isManagingGOT - Return true if the AllocateGOT method is called. bool isManagingGOT() const { return HasGOT; } - + /// getGOTBase - If this is managing a Global Offset Table, this method should /// return a pointer to its base. virtual uint8_t *getGOTBase() const = 0; - + //===--------------------------------------------------------------------===// // Main Allocation Functions //===--------------------------------------------------------------------===// @@ -91,11 +91,11 @@ class JITMemoryManager { /// startFunctionBody. virtual uint8_t *allocateStub(const GlobalValue* F, unsigned StubSize, unsigned Alignment) = 0; - + /// endFunctionBody - This method is called when the JIT is done codegen'ing /// the specified function. At this point we know the size of the JIT /// compiled function. This passes in FunctionStart (which was returned by - /// the startFunctionBody method) and FunctionEnd which is a pointer to the + /// the startFunctionBody method) and FunctionEnd which is a pointer to the /// actual end of the function. This method should mark the space allocated /// and remember where it is in case the client wants to deallocate it. virtual void endFunctionBody(const Function *F, uint8_t *FunctionStart, @@ -113,12 +113,12 @@ class JITMemoryManager { /// been deallocated yet. This is never called when the JIT is currently /// emitting a function. virtual void deallocateFunctionBody(void *Body) = 0; - + /// startExceptionTable - When we finished JITing the function, if exception /// handling is set, we emit the exception table. virtual uint8_t* startExceptionTable(const Function* F, uintptr_t &ActualSize) = 0; - + /// endExceptionTable - This method is called when the JIT is done emitting /// the exception table. virtual void endExceptionTable(const Function *F, uint8_t *TableStart, diff --git a/include/llvm/ExecutionEngine/RuntimeDyld.h b/include/llvm/ExecutionEngine/RuntimeDyld.h new file mode 100644 index 000000000000..3dc65e33d4e8 --- /dev/null +++ b/include/llvm/ExecutionEngine/RuntimeDyld.h @@ -0,0 +1,75 @@ +//===-- RuntimeDyld.h - Run-time dynamic linker for MC-JIT ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Interface for the runtime dynamic linker facilities of the MC-JIT. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_RUNTIME_DYLD_H +#define LLVM_RUNTIME_DYLD_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Memory.h" + +namespace llvm { + +class RuntimeDyldImpl; +class MemoryBuffer; + +// RuntimeDyld clients often want to handle the memory management of +// what gets placed where. For JIT clients, this is an abstraction layer +// over the JITMemoryManager, which references objects by their source +// representations in LLVM IR. +// FIXME: As the RuntimeDyld fills out, additional routines will be needed +// for the varying types of objects to be allocated. +class RTDyldMemoryManager { + RTDyldMemoryManager(const RTDyldMemoryManager&); // DO NOT IMPLEMENT + void operator=(const RTDyldMemoryManager&); // DO NOT IMPLEMENT +public: + RTDyldMemoryManager() {} + virtual ~RTDyldMemoryManager(); + + // Allocate ActualSize bytes, or more, for the named function. Return + // a pointer to the allocated memory and update Size to reflect how much + // memory was acutally allocated. + virtual uint8_t *startFunctionBody(const char *Name, uintptr_t &Size) = 0; + + // Mark the end of the function, including how much of the allocated + // memory was actually used. + virtual void endFunctionBody(const char *Name, uint8_t *FunctionStart, + uint8_t *FunctionEnd) = 0; +}; + +class RuntimeDyld { + RuntimeDyld(const RuntimeDyld &); // DO NOT IMPLEMENT + void operator=(const RuntimeDyld &); // DO NOT IMPLEMENT + + // RuntimeDyldImpl is the actual class. RuntimeDyld is just the public + // interface. + RuntimeDyldImpl *Dyld; +public: + RuntimeDyld(RTDyldMemoryManager*); + ~RuntimeDyld(); + + bool loadObject(MemoryBuffer *InputBuffer); + // Get the address of our local copy of the symbol. This may or may not + // be the address used for relocation (clients can copy the data around + // and resolve relocatons based on where they put it). + void *getSymbolAddress(StringRef Name); + // Resolve the relocations for all symbols we currently know about. + void resolveRelocations(); + // Change the address associated with a symbol when resolving relocations. + // Any relocations already associated with the symbol will be re-resolved. + void reassignSymbolAddress(StringRef Name, uint8_t *Addr); + StringRef getErrorString(); +}; + +} // end namespace llvm + +#endif diff --git a/include/llvm/GlobalVariable.h b/include/llvm/GlobalVariable.h index 1769c665d062..442e0c0e1b20 100644 --- a/include/llvm/GlobalVariable.h +++ b/include/llvm/GlobalVariable.h @@ -12,7 +12,7 @@ // // Global variables are constant pointers that refer to hunks of space that are // allocated by either the VM, or by the linker in a static compiler. A global -// variable may have an intial value, which is copied into the executables .data +// variable may have an initial value, which is copied into the executables .data // area. Global Constants are required to have initializers. // //===----------------------------------------------------------------------===// diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h index 02dbfbd26d58..cca0194a60eb 100644 --- a/include/llvm/InitializePasses.h +++ b/include/llvm/InitializePasses.h @@ -94,12 +94,12 @@ void initializeDominatorTreePass(PassRegistry&); void initializeEdgeBundlesPass(PassRegistry&); void initializeEdgeProfilerPass(PassRegistry&); void initializePathProfilerPass(PassRegistry&); +void initializeGCOVProfilerPass(PassRegistry&); void initializeEarlyCSEPass(PassRegistry&); void initializeExpandISelPseudosPass(PassRegistry&); void initializeFindUsedTypesPass(PassRegistry&); void initializeFunctionAttrsPass(PassRegistry&); void initializeGCModuleInfoPass(PassRegistry&); -void initializeGEPSplitterPass(PassRegistry&); void initializeGVNPass(PassRegistry&); void initializeGlobalDCEPass(PassRegistry&); void initializeGlobalOptPass(PassRegistry&); @@ -123,7 +123,6 @@ void initializeLintPass(PassRegistry&); void initializeLiveDebugVariablesPass(PassRegistry&); void initializeLiveIntervalsPass(PassRegistry&); void initializeLiveStacksPass(PassRegistry&); -void initializeLiveValuesPass(PassRegistry&); void initializeLiveVariablesPass(PassRegistry&); void initializeLoaderPassPass(PassRegistry&); void initializePathProfileLoaderPassPass(PassRegistry&); @@ -170,7 +169,6 @@ void initializePostDomOnlyPrinterPass(PassRegistry&); void initializePostDomOnlyViewerPass(PassRegistry&); void initializePostDomPrinterPass(PassRegistry&); void initializePostDomViewerPass(PassRegistry&); -void initializePostDominanceFrontierPass(PassRegistry&); void initializePostDominatorTreePass(PassRegistry&); void initializePreAllocSplittingPass(PassRegistry&); void initializePreVerifierPass(PassRegistry&); @@ -196,14 +194,12 @@ void initializeRegionViewerPass(PassRegistry&); void initializeRegisterCoalescerAnalysisGroup(PassRegistry&); void initializeRenderMachineFunctionPass(PassRegistry&); void initializeSCCPPass(PassRegistry&); -void initializeSRETPromotionPass(PassRegistry&); void initializeSROA_DTPass(PassRegistry&); void initializeSROA_SSAUpPass(PassRegistry&); void initializeScalarEvolutionAliasAnalysisPass(PassRegistry&); void initializeScalarEvolutionPass(PassRegistry&); void initializeSimpleInlinerPass(PassRegistry&); void initializeSimpleRegisterCoalescingPass(PassRegistry&); -void initializeSimplifyHalfPowrLibCallsPass(PassRegistry&); void initializeSimplifyLibCallsPass(PassRegistry&); void initializeSingleLoopExtractorPass(PassRegistry&); void initializeSinkingPass(PassRegistry&); diff --git a/include/llvm/InstrTypes.h b/include/llvm/InstrTypes.h index a166956e1a64..cc9ec3ac76e1 100644 --- a/include/llvm/InstrTypes.h +++ b/include/llvm/InstrTypes.h @@ -18,7 +18,6 @@ #include "llvm/Instruction.h" #include "llvm/OperandTraits.h" -#include "llvm/Operator.h" #include "llvm/DerivedTypes.h" #include "llvm/ADT/Twine.h" diff --git a/include/llvm/Instructions.h b/include/llvm/Instructions.h index 17ff763c52bf..54dfe3957fff 100644 --- a/include/llvm/Instructions.h +++ b/include/llvm/Instructions.h @@ -584,7 +584,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GetElementPtrInst, Value) /// @brief Represent an integer comparison operator. class ICmpInst: public CmpInst { protected: - /// @brief Clone an indentical ICmpInst + /// @brief Clone an identical ICmpInst virtual ICmpInst *clone_impl() const; public: /// @brief Constructor with insert-before-instruction semantics. @@ -735,7 +735,7 @@ class ICmpInst: public CmpInst { /// @brief Represents a floating point comparison operator. class FCmpInst: public CmpInst { protected: - /// @brief Clone an indentical FCmpInst + /// @brief Clone an identical FCmpInst virtual FCmpInst *clone_impl() const; public: /// @brief Constructor with insert-before-instruction semantics. @@ -1811,39 +1811,37 @@ class PHINode : public Instruction { void *operator new(size_t s) { return User::operator new(s, 0); } - explicit PHINode(const Type *Ty, const Twine &NameStr = "", - Instruction *InsertBefore = 0) + explicit PHINode(const Type *Ty, unsigned NumReservedValues, + const Twine &NameStr = "", Instruction *InsertBefore = 0) : Instruction(Ty, Instruction::PHI, 0, 0, InsertBefore), - ReservedSpace(0) { + ReservedSpace(NumReservedValues * 2) { setName(NameStr); + OperandList = allocHungoffUses(ReservedSpace); } - PHINode(const Type *Ty, const Twine &NameStr, BasicBlock *InsertAtEnd) + PHINode(const Type *Ty, unsigned NumReservedValues, const Twine &NameStr, + BasicBlock *InsertAtEnd) : Instruction(Ty, Instruction::PHI, 0, 0, InsertAtEnd), - ReservedSpace(0) { + ReservedSpace(NumReservedValues * 2) { setName(NameStr); + OperandList = allocHungoffUses(ReservedSpace); } protected: virtual PHINode *clone_impl() const; public: - static PHINode *Create(const Type *Ty, const Twine &NameStr = "", + /// Constructors - NumReservedValues is a hint for the number of incoming + /// edges that this phi node will have (use 0 if you really have no idea). + static PHINode *Create(const Type *Ty, unsigned NumReservedValues, + const Twine &NameStr = "", Instruction *InsertBefore = 0) { - return new PHINode(Ty, NameStr, InsertBefore); + return new PHINode(Ty, NumReservedValues, NameStr, InsertBefore); } - static PHINode *Create(const Type *Ty, const Twine &NameStr, - BasicBlock *InsertAtEnd) { - return new PHINode(Ty, NameStr, InsertAtEnd); + static PHINode *Create(const Type *Ty, unsigned NumReservedValues, + const Twine &NameStr, BasicBlock *InsertAtEnd) { + return new PHINode(Ty, NumReservedValues, NameStr, InsertAtEnd); } ~PHINode(); - /// reserveOperandSpace - This method can be used to avoid repeated - /// reallocation of PHI operand lists by reserving space for the correct - /// number of operands before adding them. Unlike normal vector reserves, - /// this method can also be used to trim the operand space. - void reserveOperandSpace(unsigned NumValues) { - resizeOperands(NumValues*2); - } - /// Provide fast operand accessors DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value); @@ -1912,7 +1910,7 @@ class PHINode : public Instruction { "All operands to PHI node must be the same type as the PHI node!"); unsigned OpNo = NumOperands; if (OpNo+2 > ReservedSpace) - resizeOperands(0); // Get more space! + growOperands(); // Get more space! // Initialize some new operands. NumOperands = OpNo+2; OperandList[OpNo] = V; @@ -1962,7 +1960,7 @@ class PHINode : public Instruction { return isa(V) && classof(cast(V)); } private: - void resizeOperands(unsigned NumOperands); + void growOperands(); }; template <> @@ -2154,7 +2152,7 @@ class SwitchInst : public TerminatorInst { // Operand[2n+1] = BasicBlock to go to on match SwitchInst(const SwitchInst &SI); void init(Value *Value, BasicBlock *Default, unsigned NumReserved); - void resizeOperands(unsigned No); + void growOperands(); // allocate space for exactly zero operands void *operator new(size_t s) { return User::operator new(s, 0); @@ -2306,7 +2304,7 @@ class IndirectBrInst : public TerminatorInst { // Operand[2n+1] = BasicBlock to go to on match IndirectBrInst(const IndirectBrInst &IBI); void init(Value *Address, unsigned NumDests); - void resizeOperands(unsigned No); + void growOperands(); // allocate space for exactly zero operands void *operator new(size_t s) { return User::operator new(s, 0); diff --git a/include/llvm/Intrinsics.td b/include/llvm/Intrinsics.td index 0c9be78b0d10..a63cd6ab600e 100644 --- a/include/llvm/Intrinsics.td +++ b/include/llvm/Intrinsics.td @@ -30,7 +30,7 @@ class IntrinsicProperty; def IntrNoMem : IntrinsicProperty; // IntrReadArgMem - This intrinsic reads only from memory that one of its -// arguments points to, but may read an unspecified amount. +// pointer-typed arguments points to, but may read an unspecified amount. def IntrReadArgMem : IntrinsicProperty; // IntrReadMem - This intrinsic reads from unspecified memory, so it cannot be @@ -307,7 +307,7 @@ let Properties = [IntrNoMem] in { def int_eh_sjlj_lsda : Intrinsic<[llvm_ptr_ty]>; def int_eh_sjlj_callsite: Intrinsic<[], [llvm_i32_ty]>; } -def int_eh_sjlj_dispatch_setup : Intrinsic<[], [llvm_ptr_ty]>; +def int_eh_sjlj_dispatch_setup : Intrinsic<[], []>; def int_eh_sjlj_setjmp : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty]>; def int_eh_sjlj_longjmp : Intrinsic<[], [llvm_ptr_ty]>; @@ -490,3 +490,4 @@ include "llvm/IntrinsicsARM.td" include "llvm/IntrinsicsCellSPU.td" include "llvm/IntrinsicsAlpha.td" include "llvm/IntrinsicsXCore.td" +include "llvm/IntrinsicsPTX.td" diff --git a/include/llvm/IntrinsicsARM.td b/include/llvm/IntrinsicsARM.td index 546538a57abd..03e9261e60cb 100644 --- a/include/llvm/IntrinsicsARM.td +++ b/include/llvm/IntrinsicsARM.td @@ -1,10 +1,10 @@ //===- IntrinsicsARM.td - Defines ARM intrinsics -----------*- tablegen -*-===// -// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // // This file defines all of the ARM-specific intrinsics. @@ -129,8 +129,12 @@ let Properties = [IntrNoMem, Commutative] in { def int_arm_neon_vmulp : Neon_2Arg_Intrinsic; def int_arm_neon_vqdmulh : Neon_2Arg_Intrinsic; def int_arm_neon_vqrdmulh : Neon_2Arg_Intrinsic; + def int_arm_neon_vmulls : Neon_2Arg_Long_Intrinsic; + def int_arm_neon_vmullu : Neon_2Arg_Long_Intrinsic; def int_arm_neon_vmullp : Neon_2Arg_Long_Intrinsic; def int_arm_neon_vqdmull : Neon_2Arg_Long_Intrinsic; + + // Vector Multiply and Accumulate/Subtract. def int_arm_neon_vqdmlal : Neon_3Arg_Long_Intrinsic; def int_arm_neon_vqdmlsl : Neon_3Arg_Long_Intrinsic; diff --git a/include/llvm/IntrinsicsPTX.td b/include/llvm/IntrinsicsPTX.td new file mode 100644 index 000000000000..28379c918dea --- /dev/null +++ b/include/llvm/IntrinsicsPTX.td @@ -0,0 +1,92 @@ +//===- IntrinsicsPTX.td - Defines PTX intrinsics -----------*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines all of the PTX-specific intrinsics. +// +//===----------------------------------------------------------------------===// + +let TargetPrefix = "ptx" in { + multiclass PTXReadSpecialRegisterIntrinsic_v4i32 { +// FIXME: Do we need the 128-bit integer type version? +// def _r64 : Intrinsic<[llvm_i128_ty], [], [IntrNoMem]>; + +// FIXME: Enable this once v4i32 support is enabled in back-end. +// def _v4i16 : Intrinsic<[llvm_v4i32_ty], [], [IntrNoMem]>; + + def _x : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>, + GCCBuiltin; + def _y : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>, + GCCBuiltin; + def _z : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>, + GCCBuiltin; + def _w : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>, + GCCBuiltin; + } + + class PTXReadSpecialRegisterIntrinsic_r32 + : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>, + GCCBuiltin; + + class PTXReadSpecialRegisterIntrinsic_r64 + : Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>, + GCCBuiltin; +} + +defm int_ptx_read_tid : PTXReadSpecialRegisterIntrinsic_v4i32 + <"__builtin_ptx_read_tid">; +defm int_ptx_read_ntid : PTXReadSpecialRegisterIntrinsic_v4i32 + <"__builtin_ptx_read_ntid">; + +def int_ptx_read_laneid : PTXReadSpecialRegisterIntrinsic_r32 + <"__builtin_ptx_read_laneid">; +def int_ptx_read_warpid : PTXReadSpecialRegisterIntrinsic_r32 + <"__builtin_ptx_read_warpid">; +def int_ptx_read_nwarpid : PTXReadSpecialRegisterIntrinsic_r32 + <"__builtin_ptx_read_nwarpid">; + +defm int_ptx_read_ctaid : PTXReadSpecialRegisterIntrinsic_v4i32 + <"__builtin_ptx_read_ctaid">; +defm int_ptx_read_nctaid : PTXReadSpecialRegisterIntrinsic_v4i32 + <"__builtin_ptx_read_nctaid">; + +def int_ptx_read_smid : PTXReadSpecialRegisterIntrinsic_r32 + <"__builtin_ptx_read_smid">; +def int_ptx_read_nsmid : PTXReadSpecialRegisterIntrinsic_r32 + <"__builtin_ptx_read_nsmid">; +def int_ptx_read_gridid : PTXReadSpecialRegisterIntrinsic_r32 + <"__builtin_ptx_read_gridid">; + +def int_ptx_read_lanemask_eq : PTXReadSpecialRegisterIntrinsic_r32 + <"__builtin_ptx_read_lanemask_eq">; +def int_ptx_read_lanemask_le : PTXReadSpecialRegisterIntrinsic_r32 + <"__builtin_ptx_read_lanemask_le">; +def int_ptx_read_lanemask_lt : PTXReadSpecialRegisterIntrinsic_r32 + <"__builtin_ptx_read_lanemask_lt">; +def int_ptx_read_lanemask_ge : PTXReadSpecialRegisterIntrinsic_r32 + <"__builtin_ptx_read_lanemask_ge">; +def int_ptx_read_lanemask_gt : PTXReadSpecialRegisterIntrinsic_r32 + <"__builtin_ptx_read_lanemask_gt">; + +def int_ptx_read_clock : PTXReadSpecialRegisterIntrinsic_r32 + <"__builtin_ptx_read_clock">; +def int_ptx_read_clock64 : PTXReadSpecialRegisterIntrinsic_r64 + <"__builtin_ptx_read_clock64">; + +def int_ptx_read_pm0 : PTXReadSpecialRegisterIntrinsic_r32 + <"__builtin_ptx_read_pm0">; +def int_ptx_read_pm1 : PTXReadSpecialRegisterIntrinsic_r32 + <"__builtin_ptx_read_pm1">; +def int_ptx_read_pm2 : PTXReadSpecialRegisterIntrinsic_r32 + <"__builtin_ptx_read_pm2">; +def int_ptx_read_pm3 : PTXReadSpecialRegisterIntrinsic_r32 + <"__builtin_ptx_read_pm3">; + +let TargetPrefix = "ptx" in + def int_ptx_bar_sync : Intrinsic<[], [llvm_i32_ty], []>, + GCCBuiltin<"__builtin_ptx_bar_sync">; diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td index 49462200f093..b44101a11c07 100644 --- a/include/llvm/IntrinsicsX86.td +++ b/include/llvm/IntrinsicsX86.td @@ -17,6 +17,83 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_int : Intrinsic<[], [llvm_i8_ty]>; } +//===----------------------------------------------------------------------===// +// 3DNow! + +let TargetPrefix = "x86" in { + def int_x86_3dnow_pavgusb : GCCBuiltin<"__builtin_ia32_pavgusb">, + Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], + [IntrNoMem]>; + def int_x86_3dnow_pf2id : GCCBuiltin<"__builtin_ia32_pf2id">, + Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>; + def int_x86_3dnow_pfacc : GCCBuiltin<"__builtin_ia32_pfacc">, + Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], + [IntrNoMem]>; + def int_x86_3dnow_pfadd : GCCBuiltin<"__builtin_ia32_pfadd">, + Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], + [IntrNoMem]>; + def int_x86_3dnow_pfcmpeq : GCCBuiltin<"__builtin_ia32_pfcmpeq">, + Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], + [IntrNoMem]>; + def int_x86_3dnow_pfcmpge : GCCBuiltin<"__builtin_ia32_pfcmpge">, + Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], + [IntrNoMem]>; + def int_x86_3dnow_pfcmpgt : GCCBuiltin<"__builtin_ia32_pfcmpgt">, + Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], + [IntrNoMem]>; + def int_x86_3dnow_pfmax : GCCBuiltin<"__builtin_ia32_pfmax">, + Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], + [IntrNoMem]>; + def int_x86_3dnow_pfmin : GCCBuiltin<"__builtin_ia32_pfmin">, + Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], + [IntrNoMem]>; + def int_x86_3dnow_pfmul : GCCBuiltin<"__builtin_ia32_pfmul">, + Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], + [IntrNoMem]>; + def int_x86_3dnow_pfrcp : GCCBuiltin<"__builtin_ia32_pfrcp">, + Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>; + def int_x86_3dnow_pfrcpit1 : GCCBuiltin<"__builtin_ia32_pfrcpit1">, + Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], + [IntrNoMem]>; + def int_x86_3dnow_pfrcpit2 : GCCBuiltin<"__builtin_ia32_pfrcpit2">, + Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], + [IntrNoMem]>; + def int_x86_3dnow_pfrsqrt : GCCBuiltin<"__builtin_ia32_pfrsqrt">, + Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>; + def int_x86_3dnow_pfrsqit1 : GCCBuiltin<"__builtin_ia32_pfrsqit1">, + Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], + [IntrNoMem]>; + def int_x86_3dnow_pfsub : GCCBuiltin<"__builtin_ia32_pfsub">, + Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], + [IntrNoMem]>; + def int_x86_3dnow_pfsubr : GCCBuiltin<"__builtin_ia32_pfsubr">, + Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], + [IntrNoMem]>; + def int_x86_3dnow_pi2fd : GCCBuiltin<"__builtin_ia32_pi2fd">, + Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>; + def int_x86_3dnow_pmulhrw : GCCBuiltin<"__builtin_ia32_pmulhrw">, + Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], + [IntrNoMem]>; +} + +//===----------------------------------------------------------------------===// +// 3DNow! extensions + +let TargetPrefix = "x86" in { + def int_x86_3dnowa_pf2iw : GCCBuiltin<"__builtin_ia32_pf2iw">, + Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>; + def int_x86_3dnowa_pfnacc : GCCBuiltin<"__builtin_ia32_pfnacc">, + Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], + [IntrNoMem]>; + def int_x86_3dnowa_pfpnacc : GCCBuiltin<"__builtin_ia32_pfpnacc">, + Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], + [IntrNoMem]>; + def int_x86_3dnowa_pi2fw : GCCBuiltin<"__builtin_ia32_pi2fw">, + Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>; + def int_x86_3dnowa_pswapd : + Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>; +} + //===----------------------------------------------------------------------===// // SSE1 @@ -138,12 +215,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". llvm_x86mmx_ty], [IntrNoMem]>; } -// SIMD load ops -let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_sse_loadu_ps : GCCBuiltin<"__builtin_ia32_loadups">, - Intrinsic<[llvm_v4f32_ty], [llvm_ptr_ty], [IntrReadMem]>; -} - // SIMD store ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse_storeu_ps : GCCBuiltin<"__builtin_ia32_storeups">, @@ -452,14 +523,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v2f64_ty], [llvm_x86mmx_ty], [IntrNoMem]>; } -// SIMD load ops -let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". - def int_x86_sse2_loadu_pd : GCCBuiltin<"__builtin_ia32_loadupd">, - Intrinsic<[llvm_v2f64_ty], [llvm_ptr_ty], [IntrReadMem]>; - def int_x86_sse2_loadu_dq : GCCBuiltin<"__builtin_ia32_loaddqu">, - Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty], [IntrReadMem]>; -} - // SIMD store ops let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse2_storeu_pd : GCCBuiltin<"__builtin_ia32_storeupd">, @@ -921,68 +984,68 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". // String/text processing ops. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_sse42_pcmpistrm128 : GCCBuiltin<"__builtin_ia32_pcmpistrm128">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; + Intrinsic<[llvm_v16i8_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_sse42_pcmpistri128 : GCCBuiltin<"__builtin_ia32_pcmpistri128">, - Intrinsic<[llvm_i32_ty], - [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; + Intrinsic<[llvm_i32_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_sse42_pcmpistria128 : GCCBuiltin<"__builtin_ia32_pcmpistria128">, - Intrinsic<[llvm_i32_ty], - [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; + Intrinsic<[llvm_i32_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_sse42_pcmpistric128 : GCCBuiltin<"__builtin_ia32_pcmpistric128">, - Intrinsic<[llvm_i32_ty], - [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; + Intrinsic<[llvm_i32_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_sse42_pcmpistrio128 : GCCBuiltin<"__builtin_ia32_pcmpistrio128">, - Intrinsic<[llvm_i32_ty], - [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; + Intrinsic<[llvm_i32_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_sse42_pcmpistris128 : GCCBuiltin<"__builtin_ia32_pcmpistris128">, - Intrinsic<[llvm_i32_ty], - [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; + Intrinsic<[llvm_i32_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_sse42_pcmpistriz128 : GCCBuiltin<"__builtin_ia32_pcmpistriz128">, - Intrinsic<[llvm_i32_ty], - [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], - [IntrNoMem]>; + Intrinsic<[llvm_i32_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_sse42_pcmpestrm128 : GCCBuiltin<"__builtin_ia32_pcmpestrm128">, - Intrinsic<[llvm_v16i8_ty], - [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty, - llvm_i8_ty], - [IntrNoMem]>; + Intrinsic<[llvm_v16i8_ty], + [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty, + llvm_i8_ty], + [IntrNoMem]>; def int_x86_sse42_pcmpestri128 : GCCBuiltin<"__builtin_ia32_pcmpestri128">, - Intrinsic<[llvm_i32_ty], - [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty, - llvm_i8_ty], - [IntrNoMem]>; + Intrinsic<[llvm_i32_ty], + [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty, + llvm_i8_ty], + [IntrNoMem]>; def int_x86_sse42_pcmpestria128 : GCCBuiltin<"__builtin_ia32_pcmpestria128">, - Intrinsic<[llvm_i32_ty], - [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty, - llvm_i8_ty], - [IntrNoMem]>; + Intrinsic<[llvm_i32_ty], + [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty, + llvm_i8_ty], + [IntrNoMem]>; def int_x86_sse42_pcmpestric128 : GCCBuiltin<"__builtin_ia32_pcmpestric128">, - Intrinsic<[llvm_i32_ty], - [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty, - llvm_i8_ty], - [IntrNoMem]>; + Intrinsic<[llvm_i32_ty], + [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty, + llvm_i8_ty], + [IntrNoMem]>; def int_x86_sse42_pcmpestrio128 : GCCBuiltin<"__builtin_ia32_pcmpestrio128">, - Intrinsic<[llvm_i32_ty], - [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty, - llvm_i8_ty], - [IntrNoMem]>; + Intrinsic<[llvm_i32_ty], + [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty, + llvm_i8_ty], + [IntrNoMem]>; def int_x86_sse42_pcmpestris128 : GCCBuiltin<"__builtin_ia32_pcmpestris128">, - Intrinsic<[llvm_i32_ty], - [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty, - llvm_i8_ty], - [IntrNoMem]>; + Intrinsic<[llvm_i32_ty], + [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty, + llvm_i8_ty], + [IntrNoMem]>; def int_x86_sse42_pcmpestriz128 : GCCBuiltin<"__builtin_ia32_pcmpestriz128">, - Intrinsic<[llvm_i32_ty], - [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty, - llvm_i8_ty], - [IntrNoMem]>; + Intrinsic<[llvm_i32_ty], + [llvm_v16i8_ty, llvm_i32_ty, llvm_v16i8_ty, llvm_i32_ty, + llvm_i8_ty], + [IntrNoMem]>; } //===----------------------------------------------------------------------===// @@ -1571,14 +1634,14 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[], [llvm_ptrx86mmx_ty, llvm_x86mmx_ty], []>; def int_x86_mmx_palignr_b : GCCBuiltin<"__builtin_ia32_palignr">, - Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, + Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_mmx_pextr_w : GCCBuiltin<"__builtin_ia32_vec_ext_v4hi">, - Intrinsic<[llvm_i32_ty], [llvm_x86mmx_ty, llvm_i32_ty], + Intrinsic<[llvm_i32_ty], [llvm_x86mmx_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_mmx_pinsr_w : GCCBuiltin<"__builtin_ia32_vec_set_v4hi">, - Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, + Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; } diff --git a/include/llvm/IntrinsicsXCore.td b/include/llvm/IntrinsicsXCore.td index 944120fc8c6e..e633af045c33 100644 --- a/include/llvm/IntrinsicsXCore.td +++ b/include/llvm/IntrinsicsXCore.td @@ -9,8 +9,13 @@ //===----------------------------------------------------------------------===// let TargetPrefix = "xcore" in { // All intrinsics start with "llvm.xcore.". + // Miscellaneous instructions. def int_xcore_bitrev : Intrinsic<[llvm_i32_ty],[llvm_i32_ty],[IntrNoMem]>; def int_xcore_getid : Intrinsic<[llvm_i32_ty],[],[IntrNoMem]>; + def int_xcore_getps : Intrinsic<[llvm_i32_ty],[llvm_i32_ty]>; + def int_xcore_setps : Intrinsic<[],[llvm_i32_ty, llvm_i32_ty]>; + def int_xcore_setsr : Intrinsic<[],[llvm_i32_ty]>; + def int_xcore_clrsr : Intrinsic<[],[llvm_i32_ty]>; // Resource instructions. def int_xcore_getr : Intrinsic<[llvm_anyptr_ty],[llvm_i32_ty]>; @@ -48,8 +53,37 @@ let TargetPrefix = "xcore" in { // All intrinsics start with "llvm.xcore.". def int_xcore_setv : Intrinsic<[],[llvm_anyptr_ty, llvm_ptr_ty], [NoCapture<0>]>; def int_xcore_eeu : Intrinsic<[],[llvm_anyptr_ty], [NoCapture<0>]>; + def int_xcore_setclk : Intrinsic<[],[llvm_anyptr_ty, llvm_anyptr_ty], + [NoCapture<0>, NoCapture<1>]>; + def int_xcore_setrdy : Intrinsic<[],[llvm_anyptr_ty, llvm_anyptr_ty], + [NoCapture<0>, NoCapture<1>]>; + def int_xcore_setpsc : Intrinsic<[],[llvm_anyptr_ty, llvm_i32_ty], + [NoCapture<0>]>; // Intrinsics for events. def int_xcore_waitevent : Intrinsic<[llvm_ptr_ty],[], [IntrReadMem]>; + + // If any of the resources owned by the thread are ready this returns the + // vector of one of the ready resources. If no resources owned by the thread + // are ready then the operand passed to the intrinsic is returned. + def int_xcore_checkevent : Intrinsic<[llvm_ptr_ty],[llvm_ptr_ty]>; + def int_xcore_clre : Intrinsic<[],[],[]>; + + // Intrinsics for threads. + def int_xcore_getst : Intrinsic <[llvm_anyptr_ty],[llvm_anyptr_ty], + [NoCapture<0>]>; + def int_xcore_msync : Intrinsic <[],[llvm_anyptr_ty], [NoCapture<0>]>; + def int_xcore_ssync : Intrinsic <[],[]>; + def int_xcore_mjoin : Intrinsic <[],[llvm_anyptr_ty], [NoCapture<0>]>; + def int_xcore_initsp : Intrinsic <[],[llvm_anyptr_ty, llvm_ptr_ty], + [NoCapture<0>]>; + def int_xcore_initpc : Intrinsic <[],[llvm_anyptr_ty, llvm_ptr_ty], + [NoCapture<0>]>; + def int_xcore_initlr : Intrinsic <[],[llvm_anyptr_ty, llvm_ptr_ty], + [NoCapture<0>]>; + def int_xcore_initcp : Intrinsic <[],[llvm_anyptr_ty, llvm_ptr_ty], + [NoCapture<0>]>; + def int_xcore_initdp : Intrinsic <[],[llvm_anyptr_ty, llvm_ptr_ty], + [NoCapture<0>]>; } diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h index 69e1bd919f74..88ee65ac311a 100644 --- a/include/llvm/LinkAllPasses.h +++ b/include/llvm/LinkAllPasses.h @@ -49,7 +49,6 @@ namespace { (void) llvm::createAliasAnalysisCounterPass(); (void) llvm::createAliasDebugger(); (void) llvm::createArgumentPromotionPass(); - (void) llvm::createStructRetPromotionPass(); (void) llvm::createBasicAliasAnalysisPass(); (void) llvm::createLibCallAliasAnalysisPass(0); (void) llvm::createScalarEvolutionAliasAnalysisPass(); @@ -71,6 +70,7 @@ namespace { (void) llvm::createEdgeProfilerPass(); (void) llvm::createOptimalEdgeProfilerPass(); (void) llvm::createPathProfilerPass(); + (void) llvm::createGCOVProfilerPass(true, true); (void) llvm::createFunctionInliningPass(); (void) llvm::createAlwaysInlinerPass(); (void) llvm::createGlobalDCEPass(); @@ -84,7 +84,6 @@ namespace { (void) llvm::createLCSSAPass(); (void) llvm::createLICMPass(); (void) llvm::createLazyValueInfoPass(); - (void) llvm::createLiveValuesPass(); (void) llvm::createLoopDependenceAnalysisPass(); (void) llvm::createLoopExtractorPass(); (void) llvm::createLoopSimplifyPass(); @@ -119,7 +118,6 @@ namespace { (void) llvm::createSCCPPass(); (void) llvm::createScalarReplAggregatesPass(); (void) llvm::createSimplifyLibCallsPass(); - (void) llvm::createSimplifyHalfPowrLibCallsPass(); (void) llvm::createSingleLoopExtractorPass(); (void) llvm::createStripSymbolsPass(); (void) llvm::createStripNonDebugSymbolsPass(); @@ -136,7 +134,6 @@ namespace { (void) llvm::createMemCpyOptPass(); (void) llvm::createLoopDeletionPass(); (void) llvm::createPostDomTree(); - (void) llvm::createPostDomFrontier(); (void) llvm::createInstructionNamerPass(); (void) llvm::createFunctionAttrsPass(); (void) llvm::createMergeFunctionsPass(); @@ -145,7 +142,6 @@ namespace { (void) llvm::createDbgInfoPrinterPass(); (void) llvm::createModuleDebugInfoPrinterPass(); (void) llvm::createPartialInliningPass(); - (void) llvm::createGEPSplitterPass(); (void) llvm::createLintPass(); (void) llvm::createSinkingPass(); (void) llvm::createLowerAtomicPass(); diff --git a/include/llvm/MC/MCAsmInfo.h b/include/llvm/MC/MCAsmInfo.h index 0bf364a6dfcf..873316139457 100644 --- a/include/llvm/MC/MCAsmInfo.h +++ b/include/llvm/MC/MCAsmInfo.h @@ -20,13 +20,16 @@ #include namespace llvm { + class MCExpr; class MCSection; + class MCStreamer; + class MCSymbol; class MCContext; /// MCAsmInfo - This class is intended to be used as a base class for asm /// properties and features specific to the target. namespace ExceptionHandling { - enum ExceptionsType { None, DwarfTable, DwarfCFI, SjLj }; + enum ExceptionsType { None, DwarfTable, DwarfCFI, SjLj, ARM }; } class MCAsmInfo { @@ -66,10 +69,9 @@ namespace llvm { /// relative expressions. const char *PCSymbol; // Defaults to "$". - /// SeparatorChar - This character, if specified, is used to separate - /// instructions from each other when on the same line. This is used to - /// measure inline asm instructions. - char SeparatorChar; // Defaults to ';' + /// SeparatorString - This string, if specified, is used to separate + /// instructions from each other when on the same line. + const char *SeparatorString; // Defaults to ';' /// CommentColumn - This indicates the comment num (zero-based) at /// which asm comments should be printed. @@ -322,6 +324,16 @@ namespace llvm { return 0; } + virtual const MCExpr * + getExprForPersonalitySymbol(const MCSymbol *Sym, + unsigned Encoding, + MCStreamer &Streamer) const; + + const MCExpr * + getExprForFDESymbol(const MCSymbol *Sym, + unsigned Encoding, + MCStreamer &Streamer) const; + bool usesSunStyleELFSectionSwitchSyntax() const { return SunStyleELFSectionSwitchSyntax; } @@ -350,8 +362,8 @@ namespace llvm { const char *getPCSymbol() const { return PCSymbol; } - char getSeparatorChar() const { - return SeparatorChar; + const char *getSeparatorString() const { + return SeparatorString; } unsigned getCommentColumn() const { return CommentColumn; @@ -451,7 +463,8 @@ namespace llvm { bool isExceptionHandlingDwarf() const { return (ExceptionsType == ExceptionHandling::DwarfTable || - ExceptionsType == ExceptionHandling::DwarfCFI); + ExceptionsType == ExceptionHandling::DwarfCFI || + ExceptionsType == ExceptionHandling::ARM); } bool doesDwarfRequireFrameSection() const { diff --git a/include/llvm/MC/MCAsmLayout.h b/include/llvm/MC/MCAsmLayout.h index 01cb0006b362..a4585d1f1953 100644 --- a/include/llvm/MC/MCAsmLayout.h +++ b/include/llvm/MC/MCAsmLayout.h @@ -36,8 +36,8 @@ class MCAsmLayout { /// List of sections in layout order. llvm::SmallVector SectionOrder; - /// The last fragment which was layed out, or 0 if nothing has been layed - /// out. Fragments are always layed out in order, so all fragments with a + /// The last fragment which was laid out, or 0 if nothing has been laid + /// out. Fragments are always laid out in order, so all fragments with a /// lower ordinal will be up to date. mutable DenseMap LastValidFragment; @@ -58,7 +58,7 @@ class MCAsmLayout { void Invalidate(MCFragment *F); /// \brief Perform layout for a single fragment, assuming that the previous - /// fragment has already been layed out correctly, and the parent section has + /// fragment has already been laid out correctly, and the parent section has /// been initialized. void LayoutFragment(MCFragment *Fragment); diff --git a/include/llvm/MC/MCAssembler.h b/include/llvm/MC/MCAssembler.h index 30971c62a97e..fc919669e82d 100644 --- a/include/llvm/MC/MCAssembler.h +++ b/include/llvm/MC/MCAssembler.h @@ -706,7 +706,7 @@ class MCAssembler { /// \param DF The fragment the fixup is inside. /// \param Target [out] On return, the relocatable expression the fixup /// evaluates to. - /// \param Value [out] On return, the value of the fixup as currently layed + /// \param Value [out] On return, the value of the fixup as currently laid /// out. /// \return Whether the fixup value was fully resolved. This is true if the /// \arg Value result is fixed, otherwise the value may change due to @@ -745,7 +745,7 @@ class MCAssembler { MCFragment &F, const MCFixup &Fixup); public: - /// Compute the effective fragment size assuming it is layed out at the given + /// Compute the effective fragment size assuming it is laid out at the given /// \arg SectionAddress and \arg FragmentOffset. uint64_t ComputeFragmentSize(const MCAsmLayout &Layout, const MCFragment &F) const; diff --git a/include/llvm/MC/MCContext.h b/include/llvm/MC/MCContext.h index 7b26d5493776..070089e2c938 100644 --- a/include/llvm/MC/MCContext.h +++ b/include/llvm/MC/MCContext.h @@ -45,12 +45,18 @@ namespace llvm { const TargetAsmInfo *TAI; + /// Allocator - Allocator object used for creating machine code objects. + /// + /// We use a bump pointer allocator to avoid the need to track all allocated + /// objects. + BumpPtrAllocator Allocator; + /// Symbols - Bindings of names to symbols. - StringMap Symbols; + StringMap Symbols; /// UsedNames - Keeps tracks of names that were used both for used declared /// and artificial symbols. - StringMap UsedNames; + StringMap UsedNames; /// NextUniqueID - The next ID to dole out to an unnamed assembler temporary /// symbol. @@ -84,6 +90,11 @@ namespace llvm { MCDwarfLoc CurrentDwarfLoc; bool DwarfLocSeen; + /// Honor temporary labels, this is useful for debugging semantic + /// differences between temporary and non-temporary labels (primarily on + /// Darwin). + bool AllowTemporaryLabels; + /// The dwarf line information from the .loc directives for the sections /// with assembled machine instructions have after seeing .loc directives. DenseMap MCLineSections; @@ -91,12 +102,6 @@ namespace llvm { /// the elements were added. std::vector MCLineSectionOrder; - /// Allocator - Allocator object used for creating machine code objects. - /// - /// We use a bump pointer allocator to avoid the need to track all allocated - /// objects. - BumpPtrAllocator Allocator; - void *MachOUniquingMap, *ELFUniquingMap, *COFFUniquingMap; MCSymbol *CreateSymbol(StringRef Name); @@ -109,6 +114,8 @@ namespace llvm { const TargetAsmInfo &getTargetAsmInfo() const { return *TAI; } + void setAllowTemporaryLabels(bool Value) { AllowTemporaryLabels = Value; } + /// @name Symbol Management /// @{ diff --git a/include/llvm/MC/MCDisassembler.h b/include/llvm/MC/MCDisassembler.h index c9e42eb6c798..ce8759a882eb 100644 --- a/include/llvm/MC/MCDisassembler.h +++ b/include/llvm/MC/MCDisassembler.h @@ -10,12 +10,14 @@ #define MCDISASSEMBLER_H #include "llvm/Support/DataTypes.h" +#include "llvm-c/Disassembler.h" namespace llvm { class MCInst; class MemoryObject; class raw_ostream; +class MCContext; struct EDInstInfo; @@ -24,7 +26,7 @@ struct EDInstInfo; class MCDisassembler { public: /// Constructor - Performs initial setup for the disassembler. - MCDisassembler() {} + MCDisassembler() : GetOpInfo(0), DisInfo(0), Ctx(0) {} virtual ~MCDisassembler(); @@ -46,13 +48,37 @@ class MCDisassembler { uint64_t address, raw_ostream &vStream) const = 0; - /// getEDInfo - Returns the enhanced insturction information corresponding to + /// getEDInfo - Returns the enhanced instruction information corresponding to /// the disassembler. /// /// @return - An array of instruction information, with one entry for /// each MCInst opcode this disassembler returns. /// NULL if there is no info for this target. virtual EDInstInfo *getEDInfo() const { return (EDInstInfo*)0; } + +private: + // + // Hooks for symbolic disassembly via the public 'C' interface. + // + // The function to get the symbolic information for operands. + LLVMOpInfoCallback GetOpInfo; + // The pointer to the block of symbolic information for above call back. + void *DisInfo; + // The assembly context for creating symbols and MCExprs in place of + // immediate operands when there is symbolic information. + MCContext *Ctx; + +public: + void setupForSymbolicDisassembly(LLVMOpInfoCallback getOpInfo, + void *disInfo, + MCContext *ctx) { + GetOpInfo = getOpInfo; + DisInfo = disInfo; + Ctx = ctx; + } + LLVMOpInfoCallback getLLVMOpInfoCallback() const { return GetOpInfo; } + void *getDisInfoBlock() const { return DisInfo; } + MCContext *getMCContext() const { return Ctx; } }; } // namespace llvm diff --git a/include/llvm/MC/MCDwarf.h b/include/llvm/MC/MCDwarf.h index 07a7bad15b1e..3bbcf3eb6ed0 100644 --- a/include/llvm/MC/MCDwarf.h +++ b/include/llvm/MC/MCDwarf.h @@ -23,6 +23,7 @@ #include namespace llvm { + class TargetAsmInfo; class MachineMove; class MCContext; class MCExpr; @@ -230,7 +231,7 @@ namespace llvm { class MCCFIInstruction { public: - enum OpType { Remember, Restore, Move }; + enum OpType { SameValue, Remember, Restore, Move, RelMove }; private: OpType Operation; MCSymbol *Label; @@ -242,10 +243,19 @@ namespace llvm { : Operation(Op), Label(L) { assert(Op == Remember || Op == Restore); } + MCCFIInstruction(OpType Op, MCSymbol *L, unsigned Register) + : Operation(Op), Label(L), Destination(Register) { + assert(Op == SameValue); + } MCCFIInstruction(MCSymbol *L, const MachineLocation &D, const MachineLocation &S) : Operation(Move), Label(L), Destination(D), Source(S) { } + MCCFIInstruction(OpType Op, MCSymbol *L, const MachineLocation &D, + const MachineLocation &S) + : Operation(Op), Label(L), Destination(D), Source(S) { + assert(Op == RelMove); + } OpType getOperation() const { return Operation; } MCSymbol *getLabel() const { return Label; } const MachineLocation &getDestination() const { return Destination; } @@ -254,12 +264,13 @@ namespace llvm { struct MCDwarfFrameInfo { MCDwarfFrameInfo() : Begin(0), End(0), Personality(0), Lsda(0), - Instructions(), PersonalityEncoding(0), + Function(0), Instructions(), PersonalityEncoding(), LsdaEncoding(0) {} MCSymbol *Begin; MCSymbol *End; const MCSymbol *Personality; const MCSymbol *Lsda; + const MCSymbol *Function; std::vector Instructions; unsigned PersonalityEncoding; unsigned LsdaEncoding; @@ -270,9 +281,11 @@ namespace llvm { // // This emits the frame info section. // - static void Emit(MCStreamer &streamer); + static void Emit(MCStreamer &streamer, bool usingCFI); + static void EmitDarwin(MCStreamer &streamer, bool usingCFI); static void EmitAdvanceLoc(MCStreamer &Streamer, uint64_t AddrDelta); - static void EncodeAdvanceLoc(uint64_t AddrDelta, raw_ostream &OS); + static void EncodeAdvanceLoc(uint64_t AddrDelta, raw_ostream &OS, + const TargetAsmInfo &AsmInfo); }; } // end namespace llvm diff --git a/include/llvm/MC/MCExpr.h b/include/llvm/MC/MCExpr.h index fea5249eaba0..521fde6982b5 100644 --- a/include/llvm/MC/MCExpr.h +++ b/include/llvm/MC/MCExpr.h @@ -19,6 +19,7 @@ class MCAsmInfo; class MCAsmLayout; class MCAssembler; class MCContext; +class MCSection; class MCSectionData; class MCSymbol; class MCValue; @@ -92,6 +93,12 @@ class MCExpr { /// @result - True on success. bool EvaluateAsRelocatable(MCValue &Res, const MCAsmLayout &Layout) const; + /// FindAssociatedSection - Find the "associated section" for this expression, + /// which is currently defined as the absolute section for constants, or + /// otherwise the section associated with the first defined symbol in the + /// expression. + const MCSection *FindAssociatedSection() const; + /// @} static bool classof(const MCExpr *) { return true; } @@ -420,6 +427,7 @@ class MCTargetExpr : public MCExpr { virtual bool EvaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout) const = 0; virtual void AddValueSymbols(MCAssembler *) const = 0; + virtual const MCSection *FindAssociatedSection() const = 0; static bool classof(const MCExpr *E) { return E->getKind() == MCExpr::Target; diff --git a/include/llvm/MC/MCInstPrinter.h b/include/llvm/MC/MCInstPrinter.h index 96716c775fdf..066955867c59 100644 --- a/include/llvm/MC/MCInstPrinter.h +++ b/include/llvm/MC/MCInstPrinter.h @@ -25,9 +25,12 @@ class MCInstPrinter { /// assembly emission is disable. raw_ostream *CommentStream; const MCAsmInfo &MAI; + + /// The current set of available features. + unsigned AvailableFeatures; public: MCInstPrinter(const MCAsmInfo &mai) - : CommentStream(0), MAI(mai) {} + : CommentStream(0), MAI(mai), AvailableFeatures(0) {} virtual ~MCInstPrinter(); @@ -41,6 +44,12 @@ class MCInstPrinter { /// getOpcodeName - Return the name of the specified opcode enum (e.g. /// "MOV32ri") or empty if we can't resolve it. virtual StringRef getOpcodeName(unsigned Opcode) const; + + /// getRegName - Return the assembler register name. + virtual StringRef getRegName(unsigned RegNo) const; + + unsigned getAvailableFeatures() const { return AvailableFeatures; } + void setAvailableFeatures(unsigned Value) { AvailableFeatures = Value; } }; } // namespace llvm diff --git a/include/llvm/MC/MCObjectStreamer.h b/include/llvm/MC/MCObjectStreamer.h index 833341eb97f5..8b0d87adabd4 100644 --- a/include/llvm/MC/MCObjectStreamer.h +++ b/include/llvm/MC/MCObjectStreamer.h @@ -38,6 +38,9 @@ class MCObjectStreamer : public MCStreamer { protected: MCObjectStreamer(MCContext &Context, TargetAsmBackend &TAB, raw_ostream &_OS, MCCodeEmitter *_Emitter); + MCObjectStreamer(MCContext &Context, TargetAsmBackend &TAB, + raw_ostream &_OS, MCCodeEmitter *_Emitter, + MCAssembler *_Assembler); ~MCObjectStreamer(); MCSectionData *getCurrentSectionData() const { @@ -60,9 +63,9 @@ class MCObjectStreamer : public MCStreamer { virtual void EmitLabel(MCSymbol *Symbol); virtual void EmitValueImpl(const MCExpr *Value, unsigned Size, - bool isPCRel, unsigned AddrSpace); - virtual void EmitULEB128Value(const MCExpr *Value, unsigned AddrSpace = 0); - virtual void EmitSLEB128Value(const MCExpr *Value, unsigned AddrSpace = 0); + unsigned AddrSpace); + virtual void EmitULEB128Value(const MCExpr *Value); + virtual void EmitSLEB128Value(const MCExpr *Value); virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol); virtual void ChangeSection(const MCSection *Section); virtual void EmitInstruction(const MCInst &Inst); diff --git a/include/llvm/MC/MCParser/AsmLexer.h b/include/llvm/MC/MCParser/AsmLexer.h index 252696bec317..ab78799fdcd2 100644 --- a/include/llvm/MC/MCParser/AsmLexer.h +++ b/include/llvm/MC/MCParser/AsmLexer.h @@ -49,6 +49,7 @@ class AsmLexer : public MCAsmLexer { virtual StringRef LexUntilEndOfStatement(); bool isAtStartOfComment(char Char); + bool isAtStatementSeparator(const char *Ptr); const MCAsmInfo &getMAI() const { return MAI; } diff --git a/include/llvm/MC/MCSection.h b/include/llvm/MC/MCSection.h index 1c01b2f8f3cc..57008177b6d3 100644 --- a/include/llvm/MC/MCSection.h +++ b/include/llvm/MC/MCSection.h @@ -14,7 +14,6 @@ #ifndef LLVM_MC_MCSECTION_H #define LLVM_MC_MCSECTION_H -#include #include "llvm/ADT/StringRef.h" #include "llvm/MC/SectionKind.h" #include "llvm/Support/Casting.h" diff --git a/include/llvm/MC/MCSectionMachO.h b/include/llvm/MC/MCSectionMachO.h index 7633515f2744..bdb17e9008b2 100644 --- a/include/llvm/MC/MCSectionMachO.h +++ b/include/llvm/MC/MCSectionMachO.h @@ -66,10 +66,10 @@ class MCSectionMachO : public MCSection { /// S_SYMBOL_STUBS - Section with symbol stubs, byte size of stub in /// the Reserved2 field. S_SYMBOL_STUBS = 0x08U, - /// S_SYMBOL_STUBS - Section with only function pointers for + /// S_MOD_INIT_FUNC_POINTERS - Section with only function pointers for /// initialization. S_MOD_INIT_FUNC_POINTERS = 0x09U, - /// S_MOD_INIT_FUNC_POINTERS - Section with only function pointers for + /// S_MOD_TERM_FUNC_POINTERS - Section with only function pointers for /// termination. S_MOD_TERM_FUNC_POINTERS = 0x0AU, /// S_COALESCED - Section contains symbols that are to be coalesced. @@ -157,10 +157,12 @@ class MCSectionMachO : public MCSection { /// flavored .s file. If successful, this fills in the specified Out /// parameters and returns an empty string. When an invalid section /// specifier is present, this returns a string indicating the problem. + /// If no TAA was parsed, TAA is not altered, and TAAWasSet becomes false. static std::string ParseSectionSpecifier(StringRef Spec, // In. StringRef &Segment, // Out. StringRef &Section, // Out. unsigned &TAA, // Out. + bool &TAAParsed, // Out. unsigned &StubSize); // Out. virtual void PrintSwitchToSection(const MCAsmInfo &MAI, diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h index 4451199b7fb3..b005c8bd886b 100644 --- a/include/llvm/MC/MCStreamer.h +++ b/include/llvm/MC/MCStreamer.h @@ -50,13 +50,12 @@ namespace llvm { MCStreamer(const MCStreamer&); // DO NOT IMPLEMENT MCStreamer &operator=(const MCStreamer&); // DO NOT IMPLEMENT - void EmitSymbolValue(const MCSymbol *Sym, unsigned Size, - bool isPCRel, unsigned AddrSpace); - std::vector FrameInfos; MCDwarfFrameInfo *getCurrentFrameInfo(); void EnsureValidFrame(); + const MCSymbol* LastNonPrivate; + /// SectionStack - This is stack of current and previous section /// values saved by PushSection. SmallVector &RegList, + bool isVector); + /// Finish - Finish emission of machine code. virtual void Finish() = 0; }; @@ -485,6 +501,7 @@ namespace llvm { MCStreamer *createAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS, bool isVerboseAsm, bool useLoc, + bool useCFI, MCInstPrinter *InstPrint = 0, MCCodeEmitter *CE = 0, TargetAsmBackend *TAB = 0, diff --git a/include/llvm/MC/MCSymbol.h b/include/llvm/MC/MCSymbol.h index 7da4d7c15e3b..0583ce56820b 100644 --- a/include/llvm/MC/MCSymbol.h +++ b/include/llvm/MC/MCSymbol.h @@ -56,6 +56,7 @@ namespace llvm { mutable unsigned IsUsed : 1; private: // MCContext creates and uniques these. + friend class MCExpr; friend class MCContext; MCSymbol(StringRef name, bool isTemporary) : Name(name), Section(0), Value(0), diff --git a/include/llvm/Metadata.h b/include/llvm/Metadata.h index a6c3f039a11e..c3230251d482 100644 --- a/include/llvm/Metadata.h +++ b/include/llvm/Metadata.h @@ -17,6 +17,7 @@ #define LLVM_METADATA_H #include "llvm/Value.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/ilist_node.h" @@ -110,28 +111,25 @@ class MDNode : public Value, public FoldingSetNode { void replaceOperand(MDNodeOperand *Op, Value *NewVal); ~MDNode(); - MDNode(LLVMContext &C, Value *const *Vals, unsigned NumVals, - bool isFunctionLocal); + MDNode(LLVMContext &C, ArrayRef Vals, bool isFunctionLocal); - static MDNode *getMDNode(LLVMContext &C, Value *const *Vals, unsigned NumVals, + static MDNode *getMDNode(LLVMContext &C, ArrayRef Vals, FunctionLocalness FL, bool Insert = true); public: // Constructors and destructors. - static MDNode *get(LLVMContext &Context, Value *const *Vals, - unsigned NumVals); + static MDNode *get(LLVMContext &Context, ArrayRef Vals); // getWhenValsUnresolved - Construct MDNode determining function-localness // from isFunctionLocal argument, not by analyzing Vals. - static MDNode *getWhenValsUnresolved(LLVMContext &Context, Value *const *Vals, - unsigned NumVals, bool isFunctionLocal); + static MDNode *getWhenValsUnresolved(LLVMContext &Context, + ArrayRef Vals, + bool isFunctionLocal); - static MDNode *getIfExists(LLVMContext &Context, Value *const *Vals, - unsigned NumVals); + static MDNode *getIfExists(LLVMContext &Context, ArrayRef Vals); /// getTemporary - Return a temporary MDNode, for use in constructing /// cyclic MDNode structures. A temporary MDNode is not uniqued, /// may be RAUW'd, and must be manually deleted with deleteTemporary. - static MDNode *getTemporary(LLVMContext &Context, Value *const *Vals, - unsigned NumVals); + static MDNode *getTemporary(LLVMContext &Context, ArrayRef Vals); /// deleteTemporary - Deallocate a node created by getTemporary. The /// node must not have any users. diff --git a/include/llvm/Module.h b/include/llvm/Module.h index f95895e95773..aef8eb890fe0 100644 --- a/include/llvm/Module.h +++ b/include/llvm/Module.h @@ -211,15 +211,20 @@ class Module { void setTargetTriple(StringRef T) { TargetTriple = T; } /// Set the module-scope inline assembly blocks. - void setModuleInlineAsm(StringRef Asm) { GlobalScopeAsm = Asm; } + void setModuleInlineAsm(StringRef Asm) { + GlobalScopeAsm = Asm; + if (!GlobalScopeAsm.empty() && + GlobalScopeAsm[GlobalScopeAsm.size()-1] != '\n') + GlobalScopeAsm += '\n'; + } /// Append to the module-scope inline assembly blocks, automatically inserting /// a separating newline if necessary. void appendModuleInlineAsm(StringRef Asm) { + GlobalScopeAsm += Asm; if (!GlobalScopeAsm.empty() && GlobalScopeAsm[GlobalScopeAsm.size()-1] != '\n') GlobalScopeAsm += '\n'; - GlobalScopeAsm += Asm; } /// @} @@ -303,7 +308,7 @@ class Module { /// 1. If it does not exist, add a declaration of the global and return it. /// 2. Else, the global exists but has the wrong type: return the function /// with a constantexpr cast to the right type. - /// 3. Finally, if the existing global is the correct delclaration, return + /// 3. Finally, if the existing global is the correct declaration, return /// the existing global. Constant *getOrInsertGlobal(StringRef Name, const Type *Ty); diff --git a/include/llvm/Object/MachOObject.h b/include/llvm/Object/MachOObject.h index 03d9c147b413..19a399e62fe3 100644 --- a/include/llvm/Object/MachOObject.h +++ b/include/llvm/Object/MachOObject.h @@ -19,6 +19,7 @@ namespace llvm { class MemoryBuffer; +class raw_ostream; namespace object { @@ -172,7 +173,26 @@ class MachOObject { InMemoryStruct &Res) const; /// @} + + /// @name Object Dump Facilities + /// @{ + /// dump - Support for debugging, callable in GDB: V->dump() + // + void dump() const; + void dumpHeader() const; + + /// print - Implement operator<< on Value. + /// + void print(raw_ostream &O) const; + void printHeader(raw_ostream &O) const; + + /// @} }; + +inline raw_ostream &operator<<(raw_ostream &OS, const MachOObject &V) { + V.print(OS); + return OS; +} } // end namespace object } // end namespace llvm diff --git a/include/llvm/Pass.h b/include/llvm/Pass.h index ed0fb39f5d6c..04dd8b60547a 100644 --- a/include/llvm/Pass.h +++ b/include/llvm/Pass.h @@ -13,7 +13,7 @@ // Passes are designed this way so that it is possible to run passes in a cache // and organizationally optimal order without having to specify it at the front // end. This allows arbitrary passes to be strung together and have them -// executed as effeciently as possible. +// executed as efficiently as possible. // // Passes should extend one of the classes below, depending on the guarantees // that it can make about what will be modified as it is run. For example, most @@ -114,7 +114,7 @@ class Pass { void dump() const; // dump - Print to stderr. /// createPrinterPass - Get a Pass appropriate to print the IR this - /// pass operates one (Module, Function or MachineFunction). + /// pass operates on (Module, Function or MachineFunction). virtual Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const = 0; @@ -320,7 +320,7 @@ class BasicBlockPass : public Pass { public: explicit BasicBlockPass(char &pid) : Pass(PT_BasicBlock, pid) {} - /// createPrinterPass - Get a function printer pass. + /// createPrinterPass - Get a basic block printer pass. Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const; /// doInitialization - Virtual method overridden by subclasses to do diff --git a/include/llvm/PassAnalysisSupport.h b/include/llvm/PassAnalysisSupport.h index a3342d51386b..fede1216c3c4 100644 --- a/include/llvm/PassAnalysisSupport.h +++ b/include/llvm/PassAnalysisSupport.h @@ -142,6 +142,8 @@ class AnalysisResolver { Pass *findImplPass(Pass *P, AnalysisID PI, Function &F); void addAnalysisImplsPair(AnalysisID PI, Pass *P) { + if (findImplPass(PI) == P) + return; std::pair pir = std::make_pair(PI,P); AnalysisImpls.push_back(pir); } diff --git a/include/llvm/Support/Allocator.h b/include/llvm/Support/Allocator.h index c6807099f85e..a2ad24ffead9 100644 --- a/include/llvm/Support/Allocator.h +++ b/include/llvm/Support/Allocator.h @@ -177,6 +177,9 @@ class BumpPtrAllocator { unsigned GetNumSlabs() const; void PrintStats() const; + + /// Compute the total physical memory allocated by this allocator. + size_t getTotalMemory() const; }; /// SpecificBumpPtrAllocator - Same as BumpPtrAllocator but allows only diff --git a/include/llvm/Support/CFG.h b/include/llvm/Support/CFG.h index 9ba71fcca8a5..d2ea12364e9f 100644 --- a/include/llvm/Support/CFG.h +++ b/include/llvm/Support/CFG.h @@ -41,6 +41,7 @@ class PredIterator : public std::iteratoruse_begin()) { advancePastNonTerminators(); } @@ -64,6 +65,12 @@ class PredIterator : public std::iterator pred_iterator; diff --git a/include/llvm/Support/Casting.h b/include/llvm/Support/Casting.h index 6bb98064382e..abb5a9aa11d8 100644 --- a/include/llvm/Support/Casting.h +++ b/include/llvm/Support/Casting.h @@ -192,8 +192,8 @@ template struct cast_convert_val { // cast - Return the argument parameter cast to the specified type. This // casting operator asserts that the type is correct, so it does not return null -// on failure. But it will correctly return NULL when the input is NULL. -// Used Like this: +// on failure. It does not allow a null argument (use cast_or_null for that). +// It is typically used like this: // // cast(myVal)->getParent() // diff --git a/include/llvm/Support/CommandLine.h b/include/llvm/Support/CommandLine.h index 9ae3d6af32ee..d6098711a07a 100644 --- a/include/llvm/Support/CommandLine.h +++ b/include/llvm/Support/CommandLine.h @@ -60,6 +60,12 @@ void ParseEnvironmentOptions(const char *progName, const char *envvar, void SetVersionPrinter(void (*func)()); +// PrintOptionValues - Print option values. +// With -print-options print the difference between option values and defaults. +// With -print-all-options print all option values. +// (Currently not perfect, but best-effort.) +void PrintOptionValues(); + // MarkOptionsChanged - Internal helper function. void MarkOptionsChanged(); @@ -230,6 +236,8 @@ class Option { // virtual void printOptionInfo(size_t GlobalWidth) const = 0; + virtual void printOptionValue(size_t GlobalWidth, bool Force) const = 0; + virtual void getExtraOptionNames(SmallVectorImpl &) {} // addOccurrence - Wrapper around handleOccurrence that enforces Flags. @@ -302,6 +310,120 @@ template LocationClass location(Ty &L) { return LocationClass(L); } +//===----------------------------------------------------------------------===// +// OptionValue class + +// Support value comparison outside the template. +struct GenericOptionValue { + virtual ~GenericOptionValue() {} + virtual bool compare(const GenericOptionValue &V) const = 0; +}; + +template struct OptionValue; + +// The default value safely does nothing. Option value printing is only +// best-effort. +template +struct OptionValueBase : public GenericOptionValue { + // Temporary storage for argument passing. + typedef OptionValue WrapperType; + + bool hasValue() const { return false; } + + const DataType &getValue() const { assert(false && "no default value"); } + + // Some options may take their value from a different data type. + template + void setValue(const DT& /*V*/) {} + + bool compare(const DataType &/*V*/) const { return false; } + + virtual bool compare(const GenericOptionValue& /*V*/) const { return false; } +}; + +// Simple copy of the option value. +template +class OptionValueCopy : public GenericOptionValue { + DataType Value; + bool Valid; +public: + OptionValueCopy() : Valid(false) {} + + bool hasValue() const { return Valid; } + + const DataType &getValue() const { + assert(Valid && "invalid option value"); + return Value; + } + + void setValue(const DataType &V) { Valid = true; Value = V; } + + bool compare(const DataType &V) const { + return Valid && (Value != V); + } + + virtual bool compare(const GenericOptionValue &V) const { + const OptionValueCopy &VC = + static_cast< const OptionValueCopy& >(V); + if (!VC.hasValue()) return false; + return compare(VC.getValue()); + } +}; + +// Non-class option values. +template +struct OptionValueBase : OptionValueCopy { + typedef DataType WrapperType; +}; + +// Top-level option class. +template +struct OptionValue : OptionValueBase::value> { + OptionValue() {} + + OptionValue(const DataType& V) { + this->setValue(V); + } + // Some options may take their value from a different data type. + template + OptionValue &operator=(const DT& V) { + this->setValue(V); + return *this; + } +}; + +// Other safe-to-copy-by-value common option types. +enum boolOrDefault { BOU_UNSET, BOU_TRUE, BOU_FALSE }; +template<> +struct OptionValue : OptionValueCopy { + typedef cl::boolOrDefault WrapperType; + + OptionValue() {} + + OptionValue(const cl::boolOrDefault& V) { + this->setValue(V); + } + OptionValue &operator=(const cl::boolOrDefault& V) { + setValue(V); + return *this; + } +}; + +template<> +struct OptionValue : OptionValueCopy { + typedef StringRef WrapperType; + + OptionValue() {} + + OptionValue(const std::string& V) { + this->setValue(V); + } + OptionValue &operator=(const std::string& V) { + setValue(V); + return *this; + } +}; + //===----------------------------------------------------------------------===// // Enum valued command line option // @@ -355,7 +477,6 @@ ValuesClass END_WITH_NULL values(const char *Arg, DataType Val, return Vals; } - //===----------------------------------------------------------------------===// // parser class - Parameterizable parser for different data types. By default, // known data types (string, int, bool) have specialized parsers, that do what @@ -368,7 +489,16 @@ ValuesClass END_WITH_NULL values(const char *Arg, DataType Val, // not need replicated for every instance of the generic parser. This also // allows us to put stuff into CommandLine.cpp // -struct generic_parser_base { +class generic_parser_base { +protected: + class GenericOptionInfo { + public: + GenericOptionInfo(const char *name, const char *helpStr) : + Name(name), HelpStr(helpStr) {} + const char *Name; + const char *HelpStr; + }; +public: virtual ~generic_parser_base() {} // Base class should have virtual-dtor // getNumOptions - Virtual function implemented by generic subclass to @@ -385,11 +515,28 @@ struct generic_parser_base { // Return the width of the option tag for printing... virtual size_t getOptionWidth(const Option &O) const; + virtual const GenericOptionValue &getOptionValue(unsigned N) const = 0; + // printOptionInfo - Print out information about this option. The // to-be-maintained width is specified. // virtual void printOptionInfo(const Option &O, size_t GlobalWidth) const; + void printGenericOptionDiff(const Option &O, const GenericOptionValue &V, + const GenericOptionValue &Default, + size_t GlobalWidth) const; + + // printOptionDiff - print the value of an option and it's default. + // + // Template definition ensures that the option and default have the same + // DataType (via the same AnyOptionValue). + template + void printOptionDiff(const Option &O, const AnyOptionValue &V, + const AnyOptionValue &Default, + size_t GlobalWidth) const { + printGenericOptionDiff(O, V, Default, GlobalWidth); + } + void initialize(Option &O) { // All of the modifiers for the option have been processed by now, so the // argstr field should be stable, copy it down now. @@ -443,13 +590,11 @@ struct generic_parser_base { template class parser : public generic_parser_base { protected: - class OptionInfo { + class OptionInfo : public GenericOptionInfo { public: OptionInfo(const char *name, DataType v, const char *helpStr) : - Name(name), V(v), HelpStr(helpStr) {} - const char *Name; - DataType V; - const char *HelpStr; + GenericOptionInfo(name, helpStr), V(v) {} + OptionValue V; }; SmallVector Values; public: @@ -462,6 +607,11 @@ class parser : public generic_parser_base { return Values[N].HelpStr; } + // getOptionValue - Return the value of option name N. + virtual const GenericOptionValue &getOptionValue(unsigned N) const { + return Values[N].V; + } + // parse - Return true on error. bool parse(Option &O, StringRef ArgName, StringRef Arg, DataType &V) { StringRef ArgVal; @@ -473,7 +623,7 @@ class parser : public generic_parser_base { for (unsigned i = 0, e = static_cast(Values.size()); i != e; ++i) if (Values[i].Name == ArgVal) { - V = Values[i].V; + V = Values[i].V.getValue(); return false; } @@ -522,11 +672,19 @@ class basic_parser_impl { // non-template implementation of basic_parser // void printOptionInfo(const Option &O, size_t GlobalWidth) const; + // printOptionNoValue - Print a placeholder for options that don't yet support + // printOptionDiff(). + void printOptionNoValue(const Option &O, size_t GlobalWidth) const; + // getValueName - Overload in subclass to provide a better default value. virtual const char *getValueName() const { return "value"; } // An out-of-line virtual method to provide a 'home' for this class. virtual void anchor(); + +protected: + // A helper for basic_parser::printOptionDiff. + void printOptionName(const Option &O, size_t GlobalWidth) const; }; // basic_parser - The real basic parser is just a template wrapper that provides @@ -536,6 +694,7 @@ template class basic_parser : public basic_parser_impl { public: typedef DataType parser_data_type; + typedef OptionValue OptVal; }; //-------------------------------------------------- @@ -561,6 +720,9 @@ class parser : public basic_parser { // getValueName - Do not print = at all. virtual const char *getValueName() const { return 0; } + void printOptionDiff(const Option &O, bool V, OptVal Default, + size_t GlobalWidth) const; + // An out-of-line virtual method to provide a 'home' for this class. virtual void anchor(); }; @@ -569,7 +731,6 @@ EXTERN_TEMPLATE_INSTANTIATION(class basic_parser); //-------------------------------------------------- // parser -enum boolOrDefault { BOU_UNSET, BOU_TRUE, BOU_FALSE }; template<> class parser : public basic_parser { public: @@ -583,6 +744,9 @@ class parser : public basic_parser { // getValueName - Do not print = at all. virtual const char *getValueName() const { return 0; } + void printOptionDiff(const Option &O, boolOrDefault V, OptVal Default, + size_t GlobalWidth) const; + // An out-of-line virtual method to provide a 'home' for this class. virtual void anchor(); }; @@ -601,6 +765,9 @@ class parser : public basic_parser { // getValueName - Overload in subclass to provide a better default value. virtual const char *getValueName() const { return "int"; } + void printOptionDiff(const Option &O, int V, OptVal Default, + size_t GlobalWidth) const; + // An out-of-line virtual method to provide a 'home' for this class. virtual void anchor(); }; @@ -620,6 +787,9 @@ class parser : public basic_parser { // getValueName - Overload in subclass to provide a better default value. virtual const char *getValueName() const { return "uint"; } + void printOptionDiff(const Option &O, unsigned V, OptVal Default, + size_t GlobalWidth) const; + // An out-of-line virtual method to provide a 'home' for this class. virtual void anchor(); }; @@ -638,6 +808,9 @@ class parser : public basic_parser { // getValueName - Overload in subclass to provide a better default value. virtual const char *getValueName() const { return "number"; } + void printOptionDiff(const Option &O, double V, OptVal Default, + size_t GlobalWidth) const; + // An out-of-line virtual method to provide a 'home' for this class. virtual void anchor(); }; @@ -656,6 +829,9 @@ class parser : public basic_parser { // getValueName - Overload in subclass to provide a better default value. virtual const char *getValueName() const { return "number"; } + void printOptionDiff(const Option &O, float V, OptVal Default, + size_t GlobalWidth) const; + // An out-of-line virtual method to provide a 'home' for this class. virtual void anchor(); }; @@ -677,6 +853,9 @@ class parser : public basic_parser { // getValueName - Overload in subclass to provide a better default value. virtual const char *getValueName() const { return "string"; } + void printOptionDiff(const Option &O, StringRef V, OptVal Default, + size_t GlobalWidth) const; + // An out-of-line virtual method to provide a 'home' for this class. virtual void anchor(); }; @@ -698,12 +877,63 @@ class parser : public basic_parser { // getValueName - Overload in subclass to provide a better default value. virtual const char *getValueName() const { return "char"; } + void printOptionDiff(const Option &O, char V, OptVal Default, + size_t GlobalWidth) const; + // An out-of-line virtual method to provide a 'home' for this class. virtual void anchor(); }; EXTERN_TEMPLATE_INSTANTIATION(class basic_parser); +//-------------------------------------------------- +// PrintOptionDiff +// +// This collection of wrappers is the intermediary between class opt and class +// parser to handle all the template nastiness. + +// This overloaded function is selected by the generic parser. +template +void printOptionDiff(const Option &O, const generic_parser_base &P, const DT &V, + const OptionValue
        &Default, size_t GlobalWidth) { + OptionValue
        OV = V; + P.printOptionDiff(O, OV, Default, GlobalWidth); +} + +// This is instantiated for basic parsers when the parsed value has a different +// type than the option value. e.g. HelpPrinter. +template +struct OptionDiffPrinter { + void print(const Option &O, const parser P, const ValDT &/*V*/, + const OptionValue &/*Default*/, size_t GlobalWidth) { + P.printOptionNoValue(O, GlobalWidth); + } +}; + +// This is instantiated for basic parsers when the parsed value has the same +// type as the option value. +template +struct OptionDiffPrinter { + void print(const Option &O, const parser
        P, const DT &V, + const OptionValue
        &Default, size_t GlobalWidth) { + P.printOptionDiff(O, V, Default, GlobalWidth); + } +}; + +// This overloaded function is selected by the basic parser, which may parse a +// different type than the option type. +template +void printOptionDiff( + const Option &O, + const basic_parser &P, + const ValDT &V, const OptionValue &Default, + size_t GlobalWidth) { + + OptionDiffPrinter printer; + printer.print(O, static_cast(P), V, Default, + GlobalWidth); +} + //===----------------------------------------------------------------------===// // applicator class - This class is used because we must use partial // specialization to handle literal string arguments specially (const char* does @@ -753,7 +983,6 @@ void apply(const Mod &M, Opt *O) { applicator::opt(M, *O); } - //===----------------------------------------------------------------------===// // opt_storage class @@ -764,6 +993,7 @@ void apply(const Mod &M, Opt *O) { template class opt_storage { DataType *Location; // Where to store the object... + OptionValue Default; void check() const { assert(Location != 0 && "cl::location(...) not specified for a command " @@ -777,21 +1007,25 @@ class opt_storage { if (Location) return O.error("cl::location(x) specified more than once!"); Location = &L; + Default = L; return false; } template - void setValue(const T &V) { + void setValue(const T &V, bool initial = false) { check(); *Location = V; + if (initial) + Default = V; } DataType &getValue() { check(); return *Location; } const DataType &getValue() const { check(); return *Location; } operator DataType() const { return this->getValue(); } -}; + const OptionValue &getDefault() const { return Default; } +}; // Define how to hold a class type object, such as a string. Since we can // inherit from a class, we do so. This makes us exactly compatible with the @@ -800,11 +1034,19 @@ class opt_storage { template class opt_storage : public DataType { public: + OptionValue Default; + template - void setValue(const T &V) { DataType::operator=(V); } + void setValue(const T &V, bool initial = false) { + DataType::operator=(V); + if (initial) + Default = V; + } DataType &getValue() { return *this; } const DataType &getValue() const { return *this; } + + const OptionValue &getDefault() const { return Default; } }; // Define a partial specialization to handle things we cannot inherit from. In @@ -815,16 +1057,23 @@ template class opt_storage { public: DataType Value; + OptionValue Default; // Make sure we initialize the value with the default constructor for the // type. opt_storage() : Value(DataType()) {} template - void setValue(const T &V) { Value = V; } + void setValue(const T &V, bool initial = false) { + Value = V; + if (initial) + Default = V; + } DataType &getValue() { return Value; } DataType getValue() const { return Value; } + const OptionValue &getDefault() const { return Default; } + operator DataType() const { return getValue(); } // If the datatype is a pointer, support -> on it. @@ -866,13 +1115,20 @@ class opt : public Option, Parser.printOptionInfo(*this, GlobalWidth); } + virtual void printOptionValue(size_t GlobalWidth, bool Force) const { + if (Force || this->getDefault().compare(this->getValue())) { + cl::printOptionDiff( + *this, Parser, this->getValue(), this->getDefault(), GlobalWidth); + } + } + void done() { addArgument(); Parser.initialize(*this); } public: // setInitialValue - Used by the cl::init modifier... - void setInitialValue(const DataType &V) { this->setValue(V); } + void setInitialValue(const DataType &V) { this->setValue(V, true); } ParserClass &getParser() { return Parser; } @@ -1030,6 +1286,9 @@ class list : public Option, public list_storage { Parser.printOptionInfo(*this, GlobalWidth); } + // Unimplemented: list options don't currently store their default value. + virtual void printOptionValue(size_t /*GlobalWidth*/, bool /*Force*/) const {} + void done() { addArgument(); Parser.initialize(*this); @@ -1229,6 +1488,9 @@ class bits : public Option, public bits_storage { Parser.printOptionInfo(*this, GlobalWidth); } + // Unimplemented: bits options don't currently store their default values. + virtual void printOptionValue(size_t /*GlobalWidth*/, bool /*Force*/) const {} + void done() { addArgument(); Parser.initialize(*this); @@ -1320,6 +1582,9 @@ class alias : public Option { virtual size_t getOptionWidth() const; virtual void printOptionInfo(size_t GlobalWidth) const; + // Aliases do not need to print their values. + virtual void printOptionValue(size_t /*GlobalWidth*/, bool /*Force*/) const {} + void done() { if (!hasArgStr()) error("cl::alias must have argument name specified!"); diff --git a/include/llvm/Support/Compiler.h b/include/llvm/Support/Compiler.h index 67f0fd7e0dc6..e0921572182b 100644 --- a/include/llvm/Support/Compiler.h +++ b/include/llvm/Support/Compiler.h @@ -126,4 +126,12 @@ decl #endif +// LLVM_BUILTIN_UNREACHABLE - On compilers which support it, expands +// to an expression which states that it is undefined behavior for the +// compiler to reach this point. Otherwise is not defined. +#if defined(__clang__) || (__GNUC__ > 4) \ + || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5) +# define LLVM_BUILTIN_UNREACHABLE __builtin_unreachable() +#endif + #endif diff --git a/include/llvm/Support/ConstantFolder.h b/include/llvm/Support/ConstantFolder.h index bd3765d592db..d0eaa3e487df 100644 --- a/include/llvm/Support/ConstantFolder.h +++ b/include/llvm/Support/ConstantFolder.h @@ -22,12 +22,10 @@ namespace llvm { -class LLVMContext; - /// ConstantFolder - Create constants with minimum, target independent, folding. class ConstantFolder { public: - explicit ConstantFolder(LLVMContext &) {} + explicit ConstantFolder() {} //===--------------------------------------------------------------------===// // Binary Operators diff --git a/include/llvm/Support/CrashRecoveryContext.h b/include/llvm/Support/CrashRecoveryContext.h index 2e9b5d4aa541..db835e8c2048 100644 --- a/include/llvm/Support/CrashRecoveryContext.h +++ b/include/llvm/Support/CrashRecoveryContext.h @@ -15,6 +15,8 @@ namespace llvm { class StringRef; +class CrashRecoveryContextCleanup; + /// \brief Crash recovery helper object. /// /// This class implements support for running operations in a safe context so @@ -42,10 +44,14 @@ class StringRef; /// Crash recovery contexts may not be nested. class CrashRecoveryContext { void *Impl; + CrashRecoveryContextCleanup *head; public: - CrashRecoveryContext() : Impl(0) {} + CrashRecoveryContext() : Impl(0), head(0) {} ~CrashRecoveryContext(); + + void registerCleanup(CrashRecoveryContextCleanup *cleanup); + void unregisterCleanup(CrashRecoveryContextCleanup *cleanup); /// \brief Enable crash recovery. static void Enable(); @@ -57,6 +63,10 @@ class CrashRecoveryContext { /// thread which is in a protected context. static CrashRecoveryContext *GetCurrent(); + /// \brief Return true if the current thread is recovering from a + /// crash. + static bool isRecoveringFromCrash(); + /// \brief Execute the provide callback function (with the given arguments) in /// a protected context. /// @@ -87,6 +97,99 @@ class CrashRecoveryContext { const std::string &getBacktrace() const; }; +class CrashRecoveryContextCleanup { +protected: + CrashRecoveryContext *context; + CrashRecoveryContextCleanup(CrashRecoveryContext *context) + : context(context), cleanupFired(false) {} +public: + bool cleanupFired; + + virtual ~CrashRecoveryContextCleanup(); + virtual void recoverResources() = 0; + + CrashRecoveryContext *getContext() const { + return context; + } + +private: + friend class CrashRecoveryContext; + CrashRecoveryContextCleanup *prev, *next; +}; + +template +class CrashRecoveryContextCleanupBase : public CrashRecoveryContextCleanup { +protected: + T *resource; + CrashRecoveryContextCleanupBase(CrashRecoveryContext *context, T* resource) + : CrashRecoveryContextCleanup(context), resource(resource) {} +public: + static DERIVED *create(T *x) { + if (x) { + if (CrashRecoveryContext *context = CrashRecoveryContext::GetCurrent()) + return new DERIVED(context, x); + } + return 0; + } +}; + +template +class CrashRecoveryContextDestructorCleanup : public + CrashRecoveryContextCleanupBase, T> { +public: + CrashRecoveryContextDestructorCleanup(CrashRecoveryContext *context, + T *resource) + : CrashRecoveryContextCleanupBase< + CrashRecoveryContextDestructorCleanup, T>(context, resource) {} + + virtual void recoverResources() { + this->resource->~T(); + } +}; + +template +class CrashRecoveryContextDeleteCleanup : public + CrashRecoveryContextCleanupBase, T> { +public: + CrashRecoveryContextDeleteCleanup(CrashRecoveryContext *context, T *resource) + : CrashRecoveryContextCleanupBase< + CrashRecoveryContextDeleteCleanup, T>(context, resource) {} + + virtual void recoverResources() { + delete this->resource; + } +}; + +template +class CrashRecoveryContextReleaseRefCleanup : public + CrashRecoveryContextCleanupBase, T> +{ +public: + CrashRecoveryContextReleaseRefCleanup(CrashRecoveryContext *context, + T *resource) + : CrashRecoveryContextCleanupBase, + T>(context, resource) {} + + virtual void recoverResources() { + this->resource->Release(); + } +}; + +template > +class CrashRecoveryContextCleanupRegistrar { + CrashRecoveryContextCleanup *cleanup; +public: + CrashRecoveryContextCleanupRegistrar(T *x) + : cleanup(Cleanup::create(x)) { + if (cleanup) + cleanup->getContext()->registerCleanup(cleanup); + } + + ~CrashRecoveryContextCleanupRegistrar() { + if (cleanup && !cleanup->cleanupFired) + cleanup->getContext()->unregisterCleanup(cleanup); + } +}; } #endif diff --git a/include/llvm/Support/DOTGraphTraits.h b/include/llvm/Support/DOTGraphTraits.h index 796c74a21ca8..3cb8164c3c3d 100644 --- a/include/llvm/Support/DOTGraphTraits.h +++ b/include/llvm/Support/DOTGraphTraits.h @@ -89,8 +89,9 @@ struct DefaultDOTGraphTraits { /// If you want to override the dot attributes printed for a particular edge, /// override this method. - template - static std::string getEdgeAttributes(const void *Node, EdgeIter EI) { + template + static std::string getEdgeAttributes(const void *Node, EdgeIter EI, + const GraphType& Graph) { return ""; } diff --git a/include/llvm/Support/DebugLoc.h b/include/llvm/Support/DebugLoc.h index ccc344612913..98a05a45a76d 100644 --- a/include/llvm/Support/DebugLoc.h +++ b/include/llvm/Support/DebugLoc.h @@ -15,6 +15,8 @@ #ifndef LLVM_SUPPORT_DEBUGLOC_H #define LLVM_SUPPORT_DEBUGLOC_H +#include "llvm/ADT/DenseMapInfo.h" + namespace llvm { class MDNode; class LLVMContext; @@ -23,6 +25,24 @@ namespace llvm { /// and MachineInstr to compactly encode file/line/scope information for an /// operation. class DebugLoc { + friend struct DenseMapInfo; + + /// getEmptyKey() - A private constructor that returns an unknown that is + /// not equal to the tombstone key or DebugLoc(). + static DebugLoc getEmptyKey() { + DebugLoc DL; + DL.LineCol = 1; + return DL; + } + + /// getTombstoneKey() - A private constructor that returns an unknown that + /// is not equal to the empty key or DebugLoc(). + static DebugLoc getTombstoneKey() { + DebugLoc DL; + DL.LineCol = 2; + return DL; + } + /// LineCol - This 32-bit value encodes the line and column number for the /// location, encoded as 24-bits for line and 8 bits for col. A value of 0 /// for either means unknown. @@ -75,6 +95,14 @@ namespace llvm { } bool operator!=(const DebugLoc &DL) const { return !(*this == DL); } }; + + template <> + struct DenseMapInfo { + static DebugLoc getEmptyKey(); + static DebugLoc getTombstoneKey(); + static unsigned getHashValue(const DebugLoc &Key); + static bool isEqual(const DebugLoc &LHS, const DebugLoc &RHS); + }; } // end namespace llvm #endif /* LLVM_DEBUGLOC_H */ diff --git a/include/llvm/Support/Dwarf.h b/include/llvm/Support/Dwarf.h index 5d0b5a943d56..f6d680b8b9d8 100644 --- a/include/llvm/Support/Dwarf.h +++ b/include/llvm/Support/Dwarf.h @@ -231,6 +231,10 @@ enum dwarf_constants { DW_AT_APPLE_major_runtime_vers = 0x3fe5, DW_AT_APPLE_runtime_class = 0x3fe6, DW_AT_APPLE_omit_frame_ptr = 0x3fe7, + DW_AT_APPLE_property_name = 0x3fe8, + DW_AT_APPLE_property_getter = 0x3fe9, + DW_AT_APPLE_property_setter = 0x3fea, + DW_AT_APPLE_property_attribute = 0x3feb, // Attribute form encodings DW_FORM_addr = 0x01, @@ -407,6 +411,7 @@ enum dwarf_constants { DW_OP_call_ref = 0x9a, DW_OP_form_tls_address = 0x9b, DW_OP_call_frame_cfa = 0x9c, + DW_OP_bit_piece = 0x9d, DW_OP_lo_user = 0xe0, DW_OP_hi_user = 0xff, @@ -584,7 +589,15 @@ enum dwarf_constants { DW_EH_PE_datarel = 0x30, DW_EH_PE_funcrel = 0x40, DW_EH_PE_aligned = 0x50, - DW_EH_PE_indirect = 0x80 + DW_EH_PE_indirect = 0x80, + + // Apple Objective-C Property Attributes + DW_APPLE_PROPERTY_readonly = 0x01, + DW_APPLE_PROPERTY_readwrite = 0x02, + DW_APPLE_PROPERTY_assign = 0x04, + DW_APPLE_PROPERTY_retain = 0x08, + DW_APPLE_PROPERTY_copy = 0x10, + DW_APPLE_PROPERTY_nonatomic = 0x20 }; /// TagString - Return the string for the specified tag. diff --git a/include/llvm/Support/ErrorHandling.h b/include/llvm/Support/ErrorHandling.h index 5eca438d8b4a..95b01095c1b2 100644 --- a/include/llvm/Support/ErrorHandling.h +++ b/include/llvm/Support/ErrorHandling.h @@ -86,16 +86,19 @@ namespace llvm { unsigned line=0); } -/// Prints the message and location info to stderr in !NDEBUG builds. -/// This is intended to be used for "impossible" situations that imply -/// a bug in the compiler. +/// Marks that the current location is not supposed to be reachable. +/// In !NDEBUG builds, prints the message and location info to stderr. +/// In NDEBUG builds, becomes an optimizer hint that the current location +/// is not supposed to be reachable. On compilers that don't support +/// such hints, prints a reduced message instead. /// -/// In NDEBUG mode it only prints "UNREACHABLE executed". -/// Use this instead of assert(0), so that the compiler knows this path -/// is not reachable even for NDEBUG builds. +/// Use this instead of assert(0). It conveys intent more clearly and +/// allows compilers to omit some unnecessary code. #ifndef NDEBUG #define llvm_unreachable(msg) \ ::llvm::llvm_unreachable_internal(msg, __FILE__, __LINE__) +#elif defined(LLVM_BUILTIN_UNREACHABLE) +#define llvm_unreachable(msg) LLVM_BUILTIN_UNREACHABLE #else #define llvm_unreachable(msg) ::llvm::llvm_unreachable_internal() #endif diff --git a/include/llvm/Support/FileSystem.h b/include/llvm/Support/FileSystem.h index 4001bf0b84e3..4f013f89e86c 100644 --- a/include/llvm/Support/FileSystem.h +++ b/include/llvm/Support/FileSystem.h @@ -595,7 +595,7 @@ class directory_entry { void replace_filename(const Twine &filename, file_status st = file_status(), file_status symlink_st = file_status()); - StringRef path() const { return Path; } + const std::string &path() const { return Path; } error_code status(file_status &result) const; error_code symlink_status(file_status &result) const; diff --git a/include/llvm/Support/FileUtilities.h b/include/llvm/Support/FileUtilities.h index 748ce7cea7bd..5456eb730a17 100644 --- a/include/llvm/Support/FileUtilities.h +++ b/include/llvm/Support/FileUtilities.h @@ -15,13 +15,14 @@ #ifndef LLVM_SUPPORT_FILEUTILITIES_H #define LLVM_SUPPORT_FILEUTILITIES_H +#include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" namespace llvm { /// DiffFilesWithTolerance - Compare the two files specified, returning 0 if /// the files match, 1 if they are different, and 2 if there is a file error. - /// This function allows you to specify an absolete and relative FP error that + /// This function allows you to specify an absolute and relative FP error that /// is allowed to exist. If you specify a string to fill in for the error /// option, it will set the string to an error message if an error occurs, or /// if the files are different. @@ -37,29 +38,36 @@ namespace llvm { /// specified (if deleteIt is true). /// class FileRemover { - sys::Path Filename; + SmallString<128> Filename; bool DeleteIt; public: FileRemover() : DeleteIt(false) {} - explicit FileRemover(const sys::Path &filename, bool deleteIt = true) - : Filename(filename), DeleteIt(deleteIt) {} + explicit FileRemover(const Twine& filename, bool deleteIt = true) + : DeleteIt(deleteIt) { + filename.toVector(Filename); + } ~FileRemover() { if (DeleteIt) { // Ignore problems deleting the file. - Filename.eraseFromDisk(); + bool existed; + sys::fs::remove(Filename.str(), existed); } } /// setFile - Give ownership of the file to the FileRemover so it will /// be removed when the object is destroyed. If the FileRemover already /// had ownership of a file, remove it first. - void setFile(const sys::Path &filename, bool deleteIt = true) { - if (DeleteIt) - Filename.eraseFromDisk(); + void setFile(const Twine& filename, bool deleteIt = true) { + if (DeleteIt) { + // Ignore problems deleting the file. + bool existed; + sys::fs::remove(Filename.str(), existed); + } - Filename = filename; + Filename.clear(); + filename.toVector(Filename); DeleteIt = deleteIt; } diff --git a/include/llvm/Support/GraphWriter.h b/include/llvm/Support/GraphWriter.h index 7573ef0dc9e7..eab0c9d18db1 100644 --- a/include/llvm/Support/GraphWriter.h +++ b/include/llvm/Support/GraphWriter.h @@ -70,7 +70,7 @@ class GraphWriter { for (unsigned i = 0; EI != EE && i != 64; ++EI, ++i) { std::string label = DTraits.getEdgeSourceLabel(Node, EI); - if (label == "") + if (label.empty()) continue; hasEdgeSourceLabels = true; @@ -78,7 +78,7 @@ class GraphWriter { if (i) O << "|"; - O << "" << DTraits.getEdgeSourceLabel(Node, EI); + O << "" << DOT::EscapeString(label); } if (EI != EE && hasEdgeSourceLabels) @@ -235,12 +235,12 @@ class GraphWriter { DestPort = static_cast(Offset); } - if (DTraits.getEdgeSourceLabel(Node, EI) == "") + if (DTraits.getEdgeSourceLabel(Node, EI).empty()) edgeidx = -1; emitEdge(static_cast(Node), edgeidx, static_cast(TargetNode), DestPort, - DTraits.getEdgeAttributes(Node, EI)); + DTraits.getEdgeAttributes(Node, EI, G)); } } @@ -272,7 +272,7 @@ class GraphWriter { const void *DestNodeID, int DestNodePort, const std::string &Attrs) { if (SrcNodePort > 64) return; // Eminating from truncated part? - if (DestNodePort > 64) DestNodePort = 64; // Targetting the truncated part? + if (DestNodePort > 64) DestNodePort = 64; // Targeting the truncated part? O << "\tNode" << SrcNodeID; if (SrcNodePort >= 0) diff --git a/include/llvm/Support/IRBuilder.h b/include/llvm/Support/IRBuilder.h index 2394a59c09cb..3878e793dbe0 100644 --- a/include/llvm/Support/IRBuilder.h +++ b/include/llvm/Support/IRBuilder.h @@ -17,6 +17,8 @@ #include "llvm/Instructions.h" #include "llvm/BasicBlock.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/ConstantFolder.h" @@ -152,9 +154,10 @@ class IRBuilderBase { /// CreateGlobalString - Make a new global variable with an initializer that /// has array of i8 type filled in with the nul terminated string value - /// specified. If Name is specified, it is the name of the global variable - /// created. - Value *CreateGlobalString(const char *Str = "", const Twine &Name = ""); + /// specified. The new global variable will be marked mergable with any + /// others of the same contents. If Name is specified, it is the name of the + /// global variable created. + Value *CreateGlobalString(StringRef Str, const Twine &Name = ""); /// getInt1 - Get a constant value representing either true or false. ConstantInt *getInt1(bool V) { @@ -190,6 +193,10 @@ class IRBuilderBase { ConstantInt *getInt64(uint64_t C) { return ConstantInt::get(getInt64Ty(), C); } + + ConstantInt *getInt(const APInt &AI) { + return ConstantInt::get(Context, AI); + } //===--------------------------------------------------------------------===// // Type creation methods @@ -301,7 +308,7 @@ class IRBuilder : public IRBuilderBase, public Inserter { : IRBuilderBase(C), Inserter(I), Folder(F) { } - explicit IRBuilder(LLVMContext &C) : IRBuilderBase(C), Folder(C) { + explicit IRBuilder(LLVMContext &C) : IRBuilderBase(C), Folder() { } explicit IRBuilder(BasicBlock *TheBB, const T &F) @@ -310,12 +317,12 @@ class IRBuilder : public IRBuilderBase, public Inserter { } explicit IRBuilder(BasicBlock *TheBB) - : IRBuilderBase(TheBB->getContext()), Folder(Context) { + : IRBuilderBase(TheBB->getContext()), Folder() { SetInsertPoint(TheBB); } explicit IRBuilder(Instruction *IP) - : IRBuilderBase(IP->getContext()), Folder(Context) { + : IRBuilderBase(IP->getContext()), Folder() { SetInsertPoint(IP); } @@ -325,7 +332,7 @@ class IRBuilder : public IRBuilderBase, public Inserter { } IRBuilder(BasicBlock *TheBB, BasicBlock::iterator IP) - : IRBuilderBase(TheBB->getContext()), Folder(Context) { + : IRBuilderBase(TheBB->getContext()), Folder() { SetInsertPoint(TheBB, IP); } @@ -861,7 +868,7 @@ class IRBuilder : public IRBuilderBase, public Inserter { /// CreateGlobalStringPtr - Same as CreateGlobalString, but return a pointer /// with "i8*" type instead of a pointer to array of i8. - Value *CreateGlobalStringPtr(const char *Str = "", const Twine &Name = "") { + Value *CreateGlobalStringPtr(StringRef Str, const Twine &Name = "") { Value *gv = CreateGlobalString(Str, Name); Value *zero = ConstantInt::get(Type::getInt32Ty(Context), 0); Value *Args[] = { zero, zero }; @@ -1070,8 +1077,9 @@ class IRBuilder : public IRBuilderBase, public Inserter { // Instruction creation methods: Other Instructions //===--------------------------------------------------------------------===// - PHINode *CreatePHI(const Type *Ty, const Twine &Name = "") { - return Insert(PHINode::Create(Ty), Name); + PHINode *CreatePHI(const Type *Ty, unsigned NumReservedValues, + const Twine &Name = "") { + return Insert(PHINode::Create(Ty, NumReservedValues), Name); } CallInst *CreateCall(Value *Callee, const Twine &Name = "") { @@ -1101,6 +1109,11 @@ class IRBuilder : public IRBuilderBase, public Inserter { return Insert(CallInst::Create(Callee, Args, Args+5), Name); } + CallInst *CreateCall(Value *Callee, ArrayRef Arg, + const Twine &Name = "") { + return Insert(CallInst::Create(Callee, Arg.begin(), Arg.end(), Name)); + } + template CallInst *CreateCall(Value *Callee, RandomAccessIterator ArgBegin, RandomAccessIterator ArgEnd, const Twine &Name = "") { diff --git a/include/llvm/Support/Memory.h b/include/llvm/Support/Memory.h index 9c3f85b958bc..37890e7e4af1 100644 --- a/include/llvm/Support/Memory.h +++ b/include/llvm/Support/Memory.h @@ -75,12 +75,12 @@ namespace sys { /// setExecutable - Before the JIT can run a block of code, it has to be /// given read and executable privilege. Return true if it is already r-x /// or the system is able to change its previlege. - static bool setExecutable (MemoryBlock &M, std::string *ErrMsg = 0); + static bool setExecutable(MemoryBlock &M, std::string *ErrMsg = 0); /// setWritable - When adding to a block of code, the JIT may need /// to mark a block of code as RW since the protections are on page /// boundaries, and the JIT internal allocations are not page aligned. - static bool setWritable (MemoryBlock &M, std::string *ErrMsg = 0); + static bool setWritable(MemoryBlock &M, std::string *ErrMsg = 0); /// setRangeExecutable - Mark the page containing a range of addresses /// as executable. diff --git a/include/llvm/Support/MemoryBuffer.h b/include/llvm/Support/MemoryBuffer.h index b6243b7b10dd..d912e86c8b4e 100644 --- a/include/llvm/Support/MemoryBuffer.h +++ b/include/llvm/Support/MemoryBuffer.h @@ -40,7 +40,8 @@ class MemoryBuffer { MemoryBuffer &operator=(const MemoryBuffer &); // DO NOT IMPLEMENT protected: MemoryBuffer() {} - void init(const char *BufStart, const char *BufEnd); + void init(const char *BufStart, const char *BufEnd, + bool RequiresNullTerminator); public: virtual ~MemoryBuffer(); @@ -63,21 +64,27 @@ class MemoryBuffer { /// specified, this means that the client knows that the file exists and that /// it has the specified size. static error_code getFile(StringRef Filename, OwningPtr &result, - int64_t FileSize = -1); + int64_t FileSize = -1, + bool RequiresNullTerminator = true); static error_code getFile(const char *Filename, OwningPtr &result, - int64_t FileSize = -1); + int64_t FileSize = -1, + bool RequiresNullTerminator = true); /// getOpenFile - Given an already-open file descriptor, read the file and /// return a MemoryBuffer. static error_code getOpenFile(int FD, const char *Filename, OwningPtr &result, - int64_t FileSize = -1); + size_t FileSize = -1, + size_t MapSize = -1, + off_t Offset = 0, + bool RequiresNullTerminator = true); /// getMemBuffer - Open the specified memory range as a MemoryBuffer. Note /// that InputData must be null terminated. static MemoryBuffer *getMemBuffer(StringRef InputData, - StringRef BufferName = ""); + StringRef BufferName = "", + bool RequiresNullTerminator = true); /// getMemBufferCopy - Open the specified memory range as a MemoryBuffer, /// copying the contents and taking ownership of it. InputData does not @@ -112,6 +119,21 @@ class MemoryBuffer { static error_code getFileOrSTDIN(const char *Filename, OwningPtr &result, int64_t FileSize = -1); + + + //===--------------------------------------------------------------------===// + // Provided for performance analysis. + //===--------------------------------------------------------------------===// + + /// The kind of memory backing used to support the MemoryBuffer. + enum BufferKind { + MemoryBuffer_Malloc, + MemoryBuffer_MMap + }; + + /// Return information on the memory mechanism used to support the + /// MemoryBuffer. + virtual BufferKind getBufferKind() const = 0; }; } // end namespace llvm diff --git a/include/llvm/Support/NoFolder.h b/include/llvm/Support/NoFolder.h index 92a9fd695e58..5ead26ec25c1 100644 --- a/include/llvm/Support/NoFolder.h +++ b/include/llvm/Support/NoFolder.h @@ -27,12 +27,10 @@ namespace llvm { -class LLVMContext; - /// NoFolder - Create "constants" (actually, instructions) with no folding. class NoFolder { public: - explicit NoFolder(LLVMContext &) {} + explicit NoFolder() {} //===--------------------------------------------------------------------===// // Binary Operators diff --git a/include/llvm/Support/PathV1.h b/include/llvm/Support/PathV1.h index d7753a3e71e7..024bb39cedc2 100644 --- a/include/llvm/Support/PathV1.h +++ b/include/llvm/Support/PathV1.h @@ -608,14 +608,15 @@ namespace sys { /// /// This API is not intended for general use, clients should use /// MemoryBuffer::getFile instead. - static const char *MapInFilePages(int FD, uint64_t FileSize); + static const char *MapInFilePages(int FD, size_t FileSize, + off_t Offset); /// UnMapFilePages - Free pages mapped into the current process by /// MapInFilePages. /// /// This API is not intended for general use, clients should use /// MemoryBuffer::getFile instead. - static void UnMapFilePages(const char *Base, uint64_t FileSize); + static void UnMapFilePages(const char *Base, size_t FileSize); /// @} /// @name Data diff --git a/include/llvm/Support/PatternMatch.h b/include/llvm/Support/PatternMatch.h index 948ae5176eeb..172480e7ae62 100644 --- a/include/llvm/Support/PatternMatch.h +++ b/include/llvm/Support/PatternMatch.h @@ -40,6 +40,23 @@ bool match(Val *V, const Pattern &P) { return const_cast(P).match(V); } + +template +struct OneUse_match { + SubPattern_t SubPattern; + + OneUse_match(const SubPattern_t &SP) : SubPattern(SP) {} + + template + bool match(OpTy *V) { + return V->hasOneUse() && SubPattern.match(V); + } +}; + +template +inline OneUse_match m_OneUse(const T &SubPattern) { return SubPattern; } + + template struct class_match { template @@ -227,7 +244,25 @@ struct specificval_ty { /// m_Specific - Match if we have a specific specified value. inline specificval_ty m_Specific(const Value *V) { return V; } +struct bind_const_intval_ty { + uint64_t &VR; + bind_const_intval_ty(uint64_t &V) : VR(V) {} + + template + bool match(ITy *V) { + if (ConstantInt *CV = dyn_cast(V)) + if (CV->getBitWidth() <= 64) { + VR = CV->getZExtValue(); + return true; + } + return false; + } +}; +/// m_ConstantInt - Match a ConstantInt and bind to its value. This does not +/// match ConstantInts wider than 64-bits. +inline bind_const_intval_ty m_ConstantInt(uint64_t &V) { return V; } + //===----------------------------------------------------------------------===// // Matchers for specific binary operators. // diff --git a/include/llvm/Support/PrettyStackTrace.h b/include/llvm/Support/PrettyStackTrace.h index 6dbce393b97e..9b3ecda50c1e 100644 --- a/include/llvm/Support/PrettyStackTrace.h +++ b/include/llvm/Support/PrettyStackTrace.h @@ -20,7 +20,7 @@ namespace llvm { class raw_ostream; /// DisablePrettyStackTrace - Set this to true to disable this module. This - /// might be neccessary if the host application installs its own signal + /// might be necessary if the host application installs its own signal /// handlers which conflict with the ones installed by this module. /// Defaults to false. extern bool DisablePrettyStackTrace; diff --git a/include/llvm/Support/Program.h b/include/llvm/Support/Program.h index 78a495ef2105..96b35660f96b 100644 --- a/include/llvm/Support/Program.h +++ b/include/llvm/Support/Program.h @@ -102,7 +102,7 @@ namespace sys { ); /// This function terminates the program. - /// @returns true if an error occured. + /// @returns true if an error occurred. /// @see Execute /// @brief Terminates the program. bool Kill diff --git a/include/llvm/Support/Regex.h b/include/llvm/Support/Regex.h index b46a66889e96..7648e77bfbb5 100644 --- a/include/llvm/Support/Regex.h +++ b/include/llvm/Support/Regex.h @@ -53,7 +53,7 @@ namespace llvm { /// matches - Match the regex against a given \arg String. /// - /// \param Matches - If given, on a succesful match this will be filled in + /// \param Matches - If given, on a successful match this will be filled in /// with references to the matched group expressions (inside \arg String), /// the first group is always the entire pattern. /// diff --git a/include/llvm/Support/Signals.h b/include/llvm/Support/Signals.h index 9a84df68ddba..634f4cf76dc0 100644 --- a/include/llvm/Support/Signals.h +++ b/include/llvm/Support/Signals.h @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// // // This file defines some helpful functions for dealing with the possibility of -// unix signals occuring while your program is running. +// unix signals occurring while your program is running. // //===----------------------------------------------------------------------===// diff --git a/include/llvm/Support/SourceMgr.h b/include/llvm/Support/SourceMgr.h index a41a633ba6b6..2a712e44bd17 100644 --- a/include/llvm/Support/SourceMgr.h +++ b/include/llvm/Support/SourceMgr.h @@ -156,10 +156,9 @@ class SMDiagnostic { // Null diagnostic. SMDiagnostic() : SM(0), LineNo(0), ColumnNo(0), ShowLine(0) {} // Diagnostic with no location (e.g. file not found, command line arg error). - SMDiagnostic(const std::string &filename, const std::string &Msg, - bool showline = true) + SMDiagnostic(const std::string &filename, const std::string &Msg) : SM(0), Filename(filename), LineNo(-1), ColumnNo(-1), - Message(Msg), ShowLine(showline) {} + Message(Msg), ShowLine(false) {} // Diagnostic with a location. SMDiagnostic(const SourceMgr &sm, SMLoc L, const std::string &FN, @@ -171,7 +170,7 @@ class SMDiagnostic { const SourceMgr *getSourceMgr() const { return SM; } SMLoc getLoc() const { return Loc; } - const std::string &getFilename() { return Filename; } + const std::string &getFilename() const { return Filename; } int getLineNo() const { return LineNo; } int getColumnNo() const { return ColumnNo; } const std::string &getMessage() const { return Message; } diff --git a/include/llvm/Support/StandardPasses.h b/include/llvm/Support/StandardPasses.h index d774faf38642..8dfd6f98abfd 100644 --- a/include/llvm/Support/StandardPasses.h +++ b/include/llvm/Support/StandardPasses.h @@ -72,6 +72,7 @@ namespace llvm { Pass *InliningPass) { createStandardAliasAnalysisPasses(PM); + // If all optimizations are disabled, just run the always-inline pass. if (OptimizationLevel == 0) { if (InliningPass) PM->add(InliningPass); @@ -83,9 +84,10 @@ namespace llvm { PM->add(createIPSCCPPass()); // IP SCCP PM->add(createDeadArgEliminationPass()); // Dead argument elimination + + PM->add(createInstructionCombiningPass());// Clean up after IPCP & DAE + PM->add(createCFGSimplificationPass()); // Clean up after IPCP & DAE } - PM->add(createInstructionCombiningPass()); // Clean up after IPCP & DAE - PM->add(createCFGSimplificationPass()); // Clean up after IPCP & DAE // Start of CallGraph SCC passes. if (UnitAtATime && HaveExceptions) @@ -120,7 +122,6 @@ namespace llvm { PM->add(createLoopDeletionPass()); // Delete dead loops if (UnrollLoops) PM->add(createLoopUnrollPass()); // Unroll small loops - PM->add(createInstructionCombiningPass()); // Clean up after the unroller if (OptimizationLevel > 1) PM->add(createGVNPass()); // Remove redundancies PM->add(createMemCpyOptPass()); // Remove memcpy / form memset @@ -134,6 +135,7 @@ namespace llvm { PM->add(createDeadStoreEliminationPass()); // Delete dead stores PM->add(createAggressiveDCEPass()); // Delete dead instructions PM->add(createCFGSimplificationPass()); // Merge & remove BBs + PM->add(createInstructionCombiningPass()); // Clean up after everything. if (UnitAtATime) { PM->add(createStripDeadPrototypesPass()); // Get rid of dead prototypes diff --git a/include/llvm/Support/TimeValue.h b/include/llvm/Support/TimeValue.h index e1227118c22c..94f132a05ca7 100644 --- a/include/llvm/Support/TimeValue.h +++ b/include/llvm/Support/TimeValue.h @@ -35,13 +35,13 @@ namespace sys { public: /// A constant TimeValue representing the smallest time - /// value permissable by the class. MinTime is some point + /// value permissible by the class. MinTime is some point /// in the distant past, about 300 billion years BCE. /// @brief The smallest possible time value. static const TimeValue MinTime; /// A constant TimeValue representing the largest time - /// value permissable by the class. MaxTime is some point + /// value permissible by the class. MaxTime is some point /// in the distant future, about 300 billion years AD. /// @brief The largest possible time value. static const TimeValue MaxTime; diff --git a/include/llvm/Support/system_error.h b/include/llvm/Support/system_error.h index e5306ecfb35c..47759b99ecc5 100644 --- a/include/llvm/Support/system_error.h +++ b/include/llvm/Support/system_error.h @@ -1,4 +1,4 @@ -//===---------------------------- system_error ----------------------------===// +//===---------------------------- system_error ------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/include/llvm/Target/SubtargetFeature.h b/include/llvm/Target/SubtargetFeature.h index 6c21ae9583e0..4213d9b12ed0 100644 --- a/include/llvm/Target/SubtargetFeature.h +++ b/include/llvm/Target/SubtargetFeature.h @@ -35,8 +35,8 @@ namespace llvm { struct SubtargetFeatureKV { const char *Key; // K-V key string const char *Desc; // Help descriptor - uint32_t Value; // K-V integer value - uint32_t Implies; // K-V bit mask + uint64_t Value; // K-V integer value + uint64_t Implies; // K-V bit mask // Compare routine for std binary search bool operator<(const SubtargetFeatureKV &S) const { @@ -94,7 +94,7 @@ class SubtargetFeatures { void AddFeature(const std::string &String, bool IsEnabled = true); /// Get feature bits. - uint32_t getBits(const SubtargetFeatureKV *CPUTable, + uint64_t getBits(const SubtargetFeatureKV *CPUTable, size_t CPUTableSize, const SubtargetFeatureKV *FeatureTable, size_t FeatureTableSize); diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td index 0f7e6aaaf2fa..68f0515f2d17 100644 --- a/include/llvm/Target/Target.td +++ b/include/llvm/Target/Target.td @@ -32,17 +32,6 @@ class Register { string Namespace = ""; string AsmName = n; - // SpillSize - If this value is set to a non-zero value, it is the size in - // bits of the spill slot required to hold this register. If this value is - // set to zero, the information is inferred from any register classes the - // register belongs to. - int SpillSize = 0; - - // SpillAlignment - This value is used to specify the alignment required for - // spilling the register. Like SpillSize, this should only be explicitly - // specified if the register is not in a register class. - int SpillAlignment = 0; - // Aliases - A list of registers that this register overlaps with. A read or // modification of this register can potentially read or modify the aliased // registers. @@ -78,6 +67,13 @@ class Register { // -1 indicates that the gcc number is undefined and -2 that register number // is invalid for this mode/flavour. list DwarfNumbers = []; + + // CostPerUse - Additional cost of instructions using this register compared + // to other registers in its class. The register allocator will try to + // minimize the number of instructions using a register with a CostPerUse. + // This is used by the x86-64 and ARM Thumb targets where some registers + // require larger instruction encodings. + int CostPerUse = 0; } // RegisterWithSubRegs - This can be used to define instances of Register which @@ -200,6 +196,7 @@ class Instruction { bit isIndirectBranch = 0; // Is this instruction an indirect branch? bit isCompare = 0; // Is this instruction a comparison instruction? bit isMoveImm = 0; // Is this instruction a move immediate instruction? + bit isBitcast = 0; // Is this instruction a bitcast instruction? bit isBarrier = 0; // Can control flow fall through this instruction? bit isCall = 0; // Is this instruction a call instruction? bit canFoldAsLoad = 0; // Can this be folded as a simple memory operand? @@ -590,9 +587,10 @@ class MnemonicAlias { /// InstAlias - This defines an alternate assembly syntax that is allowed to /// match an instruction that has a different (more canonical) assembly /// representation. -class InstAlias { +class InstAlias { string AsmString = Asm; // The .s format to match the instruction with. dag ResultInst = Result; // The MCInst to generate. + bit EmitAlias = Emit; // Emit the alias instead of what's aliased. // Predicates - Predicates that must be true for this to match. list Predicates = []; diff --git a/include/llvm/Target/TargetAsmBackend.h b/include/llvm/Target/TargetAsmBackend.h index 7527298efa9e..2111f6b7a950 100644 --- a/include/llvm/Target/TargetAsmBackend.h +++ b/include/llvm/Target/TargetAsmBackend.h @@ -16,6 +16,7 @@ #include "llvm/Support/DataTypes.h" namespace llvm { +class MCELFObjectTargetWriter; class MCFixup; class MCInst; class MCObjectWriter; @@ -40,6 +41,13 @@ class TargetAsmBackend { /// assembler backend to emit the final object file. virtual MCObjectWriter *createObjectWriter(raw_ostream &OS) const = 0; + /// createELFObjectTargetWriter - Create a new ELFObjectTargetWriter to enable + /// non-standard ELFObjectWriters. + virtual MCELFObjectTargetWriter *createELFObjectTargetWriter() const { + assert(0 && "createELFObjectTargetWriter is not supported by asm backend"); + return 0; + } + /// hasReliableSymbolDifference - Check whether this target implements /// accurate relocations for differences between symbols. If not, differences /// between symbols will always be relocatable expressions and any references diff --git a/include/llvm/Target/TargetAsmInfo.h b/include/llvm/Target/TargetAsmInfo.h index 98aab142b8e4..0271b670464e 100644 --- a/include/llvm/Target/TargetAsmInfo.h +++ b/include/llvm/Target/TargetAsmInfo.h @@ -58,6 +58,14 @@ class TargetAsmInfo { return TLOF->getEHFrameSection(); } + unsigned getFDEEncoding(bool CFI) const { + return TLOF->getFDEEncoding(CFI); + } + + bool isFunctionEHFrameSymbolPrivate() const { + return TLOF->isFunctionEHFrameSymbolPrivate(); + } + unsigned getDwarfRARegNum(bool isEH) const { return TRI->getDwarfRegNum(TRI->getRARegister(), isEH); } diff --git a/include/llvm/Target/TargetData.h b/include/llvm/Target/TargetData.h index 25065d30bb6e..32e3e2b0b617 100644 --- a/include/llvm/Target/TargetData.h +++ b/include/llvm/Target/TargetData.h @@ -160,7 +160,18 @@ class TargetData : public ImmutablePass { bool isIllegalInteger(unsigned Width) const { return !isLegalInteger(Width); } - + + /// fitsInLegalInteger - This function returns true if the specified type fits + /// in a native integer type supported by the CPU. For example, if the CPU + /// only supports i32 as a native integer type, then i27 fits in a legal + // integer type but i45 does not. + bool fitsInLegalInteger(unsigned Width) const { + for (unsigned i = 0, e = (unsigned)LegalIntWidths.size(); i != e; ++i) + if (Width <= LegalIntWidths[i]) + return true; + return false; + } + /// Target pointer alignment unsigned getPointerABIAlignment() const { return PointerABIAlign; } /// Return target's alignment for stack-based pointers diff --git a/include/llvm/Target/TargetInstrDesc.h b/include/llvm/Target/TargetInstrDesc.h index 8823d5a4d17e..6e20e8a1ba83 100644 --- a/include/llvm/Target/TargetInstrDesc.h +++ b/include/llvm/Target/TargetInstrDesc.h @@ -105,6 +105,7 @@ namespace TID { IndirectBranch, Compare, MoveImm, + Bitcast, DelaySlot, FoldableAsLoad, MayLoad, @@ -358,6 +359,12 @@ class TargetInstrDesc { bool isMoveImmediate() const { return Flags & (1 << TID::MoveImm); } + + /// isBitcast - Return true if this instruction is a bitcast instruction. + /// + bool isBitcast() const { + return Flags & (1 << TID::Bitcast); + } /// isNotDuplicable - Return true if this instruction cannot be safely /// duplicated. For example, if the instruction has a unique labels attached diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h index fc7b51ec6c2c..418f3fe062f2 100644 --- a/include/llvm/Target/TargetInstrInfo.h +++ b/include/llvm/Target/TargetInstrInfo.h @@ -477,7 +477,7 @@ class TargetInstrInfo { } /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to - /// determine (in conjuction with areLoadsFromSameBasePtr) if two loads should + /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should /// be scheduled togther. On some targets if two loads are loading from /// addresses in the same cache line, it's better if they are scheduled /// together. This function takes two integers that represent the load offsets @@ -641,6 +641,10 @@ class TargetInstrInfo { virtual int getInstrLatency(const InstrItineraryData *ItinData, SDNode *Node) const; + /// isHighLatencyDef - Return true if this opcode has high latency to its + /// result. + virtual bool isHighLatencyDef(int opc) const { return false; } + /// hasHighOperandLatency - Compute operand latency between a def of 'Reg' /// and an use in the current loop, return true if the target considered /// it 'high'. This is used by optimization passes such as machine LICM to diff --git a/include/llvm/Target/TargetInstrItineraries.h b/include/llvm/Target/TargetInstrItineraries.h index a95b70f6b997..198d5854462f 100644 --- a/include/llvm/Target/TargetInstrItineraries.h +++ b/include/llvm/Target/TargetInstrItineraries.h @@ -155,9 +155,13 @@ class InstrItineraryData { /// in the itinerary. /// unsigned getStageLatency(unsigned ItinClassIndx) const { - // If the target doesn't provide itinerary information, use a - // simple non-zero default value for all instructions. - if (isEmpty()) + // If the target doesn't provide itinerary information, use a simple + // non-zero default value for all instructions. Some target's provide a + // dummy (Generic) itinerary which should be handled as if it's itinerary is + // empty. We identify this by looking for a reference to stage zero (invalid + // stage). This is different from beginStage == endState != 0, which could + // be used for zero-latency pseudo ops. + if (isEmpty() || Itineraries[ItinClassIndx].FirstStage == 0) return 1; // Calculate the maximum completion time for any stage. diff --git a/include/llvm/Target/TargetLibraryInfo.h b/include/llvm/Target/TargetLibraryInfo.h index bdd214b6b743..0914b5daa4b2 100644 --- a/include/llvm/Target/TargetLibraryInfo.h +++ b/include/llvm/Target/TargetLibraryInfo.h @@ -23,9 +23,21 @@ namespace llvm { // void *memcpy(void *s1, const void *s2, size_t n); memcpy, + // void *memmove(void *s1, const void *s2, size_t n); + memmove, + /// void memset_pattern16(void *b, const void *pattern16, size_t len); memset_pattern16, + /// int iprintf(const char *format, ...); + iprintf, + + /// int siprintf(char *str, const char *format, ...); + siprintf, + + /// int fiprintf(FILE *stream, const char *format, ...); + fiprintf, + NumLibFuncs }; } diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index ba7574dfdbd7..17d761ce8fb8 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -39,6 +39,7 @@ namespace llvm { class AllocaInst; class APFloat; class CallInst; + class CCState; class Function; class FastISel; class FunctionLoweringInfo; @@ -189,14 +190,6 @@ class TargetLowering { return RepRegClassCostForVT[VT.getSimpleVT().SimpleTy]; } - /// getRegPressureLimit - Return the register pressure "high water mark" for - /// the specific register class. The scheduler is in high register pressure - /// mode (for the specific register class) if it goes over the limit. - virtual unsigned getRegPressureLimit(const TargetRegisterClass *RC, - MachineFunction &MF) const { - return 0; - } - /// isTypeLegal - Return true if the target has native support for the /// specified value type. This means that it has a register that directly /// holds it without promotions or expansions. @@ -934,6 +927,7 @@ class TargetLowering { bool isCalledByLegalizer() const { return CalledByLegalizer; } void AddToWorklist(SDNode *N); + void RemoveFromWorklist(SDNode *N); SDValue CombineTo(SDNode *N, const std::vector &To, bool AddTo = true); SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true); @@ -1048,7 +1042,7 @@ class TargetLowering { } /// JumpIsExpensive - Tells the code generator not to expand sequence of - /// operations into a seperate sequences that increases the amount of + /// operations into a separate sequences that increases the amount of /// flow control. void setJumpIsExpensive(bool isExpensive = true) { JumpIsExpensive = isExpensive; @@ -1258,6 +1252,9 @@ class TargetLowering { return SDValue(); // this is here to silence compiler errors } + /// HandleByVal - Target-specific cleanup for formal ByVal parameters. + virtual void HandleByVal(CCState *, unsigned &) const {} + /// CanLowerReturn - This hook should be implemented to check whether the /// return values described by the Outs array can fit into the return /// registers. If false is returned, an sret-demotion is performed. @@ -1291,6 +1288,26 @@ class TargetLowering { return false; } + /// mayBeEmittedAsTailCall - Return true if the target may be able emit the + /// call instruction as a tail call. This is used by optimization passes to + /// determine if it's profitable to duplicate return instructions to enable + /// tailcall optimization. + virtual bool mayBeEmittedAsTailCall(CallInst *CI) const { + return false; + } + + /// getTypeForExtArgOrReturn - Return the type that should be used to zero or + /// sign extend a zeroext/signext integer argument or return value. + /// FIXME: Most C calling convention requires the return type to be promoted, + /// but this is not true all the time, e.g. i1 on x86-64. It is also not + /// necessary for non-C calling conventions. The frontend should handle this + /// and include all of the necessary information. + virtual EVT getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT, + ISD::NodeType ExtendKind) const { + EVT MinVT = getRegisterType(Context, MVT::i32); + return VT.bitsLT(MinVT) ? MinVT : VT; + } + /// LowerOperationWrapper - This callback is invoked by the type legalizer /// to legalize nodes with an illegal operand type but legal result types. /// It replaces the LowerOperation callback in the type Legalizer. diff --git a/include/llvm/Target/TargetLoweringObjectFile.h b/include/llvm/Target/TargetLoweringObjectFile.h index 34bf27132de5..7402ed697232 100644 --- a/include/llvm/Target/TargetLoweringObjectFile.h +++ b/include/llvm/Target/TargetLoweringObjectFile.h @@ -140,6 +140,9 @@ class TargetLoweringObjectFile { const MCSection *getStaticDtorSection() const { return StaticDtorSection; } const MCSection *getLSDASection() const { return LSDASection; } virtual const MCSection *getEHFrameSection() const = 0; + virtual void emitPersonalityValue(MCStreamer &Streamer, + const TargetMachine &TM, + const MCSymbol *Sym) const; const MCSection *getDwarfAbbrevSection() const { return DwarfAbbrevSection; } const MCSection *getDwarfInfoSection() const { return DwarfInfoSection; } const MCSection *getDwarfLineSection() const { return DwarfLineSection; } @@ -218,15 +221,19 @@ class TargetLoweringObjectFile { MachineModuleInfo *MMI, unsigned Encoding, MCStreamer &Streamer) const; + // getCFIPersonalitySymbol - The symbol that gets passed to .cfi_personality. + virtual MCSymbol * + getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang, + MachineModuleInfo *MMI) const; + /// const MCExpr * - getExprForDwarfReference(const MCSymbol *Sym, Mangler *Mang, - MachineModuleInfo *MMI, unsigned Encoding, + getExprForDwarfReference(const MCSymbol *Sym, unsigned Encoding, MCStreamer &Streamer) const; virtual unsigned getPersonalityEncoding() const; virtual unsigned getLSDAEncoding() const; - virtual unsigned getFDEEncoding() const; + virtual unsigned getFDEEncoding(bool CFI) const; virtual unsigned getTTypeEncoding() const; protected: diff --git a/include/llvm/Target/TargetMachine.h b/include/llvm/Target/TargetMachine.h index 030bf5b89f77..78f770cc41c3 100644 --- a/include/llvm/Target/TargetMachine.h +++ b/include/llvm/Target/TargetMachine.h @@ -38,6 +38,7 @@ class PassManager; class Pass; class TargetELFWriterInfo; class formatted_raw_ostream; +class raw_ostream; // Relocation model types. namespace Reloc { @@ -105,7 +106,9 @@ class TargetMachine { unsigned MCRelaxAll : 1; unsigned MCNoExecStack : 1; + unsigned MCSaveTempLabels : 1; unsigned MCUseLoc : 1; + unsigned MCUseCFI : 1; public: virtual ~TargetMachine(); @@ -171,6 +174,14 @@ class TargetMachine { /// relaxed. void setMCRelaxAll(bool Value) { MCRelaxAll = Value; } + /// hasMCSaveTempLabels - Check whether temporary labels will be preserved + /// (i.e., not treated as temporary). + bool hasMCSaveTempLabels() const { return MCSaveTempLabels; } + + /// setMCSaveTempLabels - Set whether temporary labels will be preserved + /// (i.e., not treated as temporary). + void setMCSaveTempLabels(bool Value) { MCSaveTempLabels = Value; } + /// hasMCNoExecStack - Check whether an executable stack is not needed. bool hasMCNoExecStack() const { return MCNoExecStack; } @@ -183,6 +194,12 @@ class TargetMachine { /// setMCUseLoc - Set whether all we should use dwarf's .loc directive. void setMCUseLoc(bool Value) { MCUseLoc = Value; } + /// hasMCUseCFI - Check whether we should use dwarf's .cfi_* directives. + bool hasMCUseCFI() const { return MCUseCFI; } + + /// setMCUseCFI - Set whether all we should use dwarf's .cfi_* directives. + void setMCUseCFI(bool Value) { MCUseCFI = Value; } + /// getRelocationModel - Returns the code generation relocation model. The /// choices are static, PIC, and dynamic-no-pic, and target default. static Reloc::Model getRelocationModel(); @@ -267,6 +284,7 @@ class TargetMachine { /// virtual bool addPassesToEmitMC(PassManagerBase &, MCContext *&, + raw_ostream &, CodeGenOpt::Level, bool = true) { return true; @@ -324,6 +342,7 @@ class LLVMTargetMachine : public TargetMachine { /// virtual bool addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx, + raw_ostream &OS, CodeGenOpt::Level OptLevel, bool DisableVerify = true); diff --git a/include/llvm/Target/TargetOptions.h b/include/llvm/Target/TargetOptions.h index 97ceffdaecb8..62190c166e3f 100644 --- a/include/llvm/Target/TargetOptions.h +++ b/include/llvm/Target/TargetOptions.h @@ -157,6 +157,11 @@ namespace llvm { /// wth earlier copy coalescing. extern bool StrongPHIElim; + /// getTrapFunctionName - If this returns a non-empty string, this means isel + /// should lower Intrinsic::trap to a call to the specified function name + /// instead of an ISD::TRAP node. + extern StringRef getTrapFunctionName(); + } // End llvm namespace #endif diff --git a/include/llvm/Target/TargetRegisterInfo.h b/include/llvm/Target/TargetRegisterInfo.h index 121091c9b49b..205e76f9c6fc 100644 --- a/include/llvm/Target/TargetRegisterInfo.h +++ b/include/llvm/Target/TargetRegisterInfo.h @@ -46,6 +46,7 @@ struct TargetRegisterDesc { const unsigned *Overlaps; // Overlapping registers, described above const unsigned *SubRegs; // Sub-register set, described above const unsigned *SuperRegs; // Super-register set, described above + unsigned CostPerUse; // Extra cost of instructions using register. }; class TargetRegisterClass { @@ -426,6 +427,12 @@ class TargetRegisterInfo { return get(RegNo).Name; } + /// getCostPerUse - Return the additional cost of using this register instead + /// of other registers in its class. + unsigned getCostPerUse(unsigned RegNo) const { + return get(RegNo).CostPerUse; + } + /// getNumRegs - Return the number of registers this target has (useful for /// sizing arrays holding per register information) unsigned getNumRegs() const { @@ -588,11 +595,32 @@ class TargetRegisterInfo { } /// getCrossCopyRegClass - Returns a legal register class to copy a register - /// in the specified class to or from. Returns NULL if it is possible to copy - /// between a two registers of the specified class. + /// in the specified class to or from. If it is possible to copy the register + /// directly without using a cross register class copy, return the specified + /// RC. Returns NULL if it is not possible to copy between a two registers of + /// the specified class. virtual const TargetRegisterClass * getCrossCopyRegClass(const TargetRegisterClass *RC) const { - return NULL; + return RC; + } + + /// getLargestLegalSuperClass - Returns the largest super class of RC that is + /// legal to use in the current sub-target and has the same spill size. + /// The returned register class can be used to create virtual registers which + /// means that all its registers can be copied and spilled. + virtual const TargetRegisterClass* + getLargestLegalSuperClass(const TargetRegisterClass *RC) const { + /// The default implementation is very conservative and doesn't allow the + /// register allocator to inflate register classes. + return RC; + } + + /// getRegPressureLimit - Return the register pressure "high water mark" for + /// the specific register class. The scheduler is in high register pressure + /// mode (for the specific register class) if it goes over the limit. + virtual unsigned getRegPressureLimit(const TargetRegisterClass *RC, + MachineFunction &MF) const { + return 0; } /// getAllocationOrder - Returns the register allocation order for a specified @@ -614,6 +642,14 @@ class TargetRegisterInfo { return 0; } + /// avoidWriteAfterWrite - Return true if the register allocator should avoid + /// writing a register from RC in two consecutive instructions. + /// This can avoid pipeline stalls on certain architectures. + /// It does cause increased register pressure, though. + virtual bool avoidWriteAfterWrite(const TargetRegisterClass *RC) const { + return false; + } + /// UpdateRegAllocHint - A callback to allow target a chance to update /// register allocation hints when a register is "changed" (e.g. coalesced) /// to another register. e.g. On ARM, some virtual registers should target @@ -631,6 +667,13 @@ class TargetRegisterInfo { return false; } + /// useFPForScavengingIndex - returns true if the target wants to use + /// frame pointer based accesses to spill to the scavenger emergency spill + /// slot. + virtual bool useFPForScavengingIndex(const MachineFunction &MF) const { + return true; + } + /// requiresFrameIndexScavenging - returns true if the target requires post /// PEI scavenging of registers for materializing frame index constants. virtual bool requiresFrameIndexScavenging(const MachineFunction &MF) const { diff --git a/include/llvm/Target/TargetRegistry.h b/include/llvm/Target/TargetRegistry.h index f851ad0a9bfb..a464822893ba 100644 --- a/include/llvm/Target/TargetRegistry.h +++ b/include/llvm/Target/TargetRegistry.h @@ -43,7 +43,7 @@ namespace llvm { MCStreamer *createAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS, bool isVerboseAsm, - bool useLoc, + bool useLoc, bool useCFI, MCInstPrinter *InstPrint, MCCodeEmitter *CE, TargetAsmBackend *TAB, @@ -78,6 +78,7 @@ namespace llvm { TargetMachine &TM); typedef MCDisassembler *(*MCDisassemblerCtorTy)(const Target &T); typedef MCInstPrinter *(*MCInstPrinterCtorTy)(const Target &T, + TargetMachine &TM, unsigned SyntaxVariant, const MCAsmInfo &MAI); typedef MCCodeEmitter *(*CodeEmitterCtorTy)(const Target &T, @@ -95,6 +96,7 @@ namespace llvm { formatted_raw_ostream &OS, bool isVerboseAsm, bool useLoc, + bool useCFI, MCInstPrinter *InstPrint, MCCodeEmitter *CE, TargetAsmBackend *TAB, @@ -286,11 +288,12 @@ namespace llvm { return MCDisassemblerCtorFn(*this); } - MCInstPrinter *createMCInstPrinter(unsigned SyntaxVariant, + MCInstPrinter *createMCInstPrinter(TargetMachine &TM, + unsigned SyntaxVariant, const MCAsmInfo &MAI) const { if (!MCInstPrinterCtorFn) return 0; - return MCInstPrinterCtorFn(*this, SyntaxVariant, MAI); + return MCInstPrinterCtorFn(*this, TM, SyntaxVariant, MAI); } @@ -327,12 +330,13 @@ namespace llvm { formatted_raw_ostream &OS, bool isVerboseAsm, bool useLoc, + bool useCFI, MCInstPrinter *InstPrint, MCCodeEmitter *CE, TargetAsmBackend *TAB, bool ShowInst) const { // AsmStreamerCtorFn is default to llvm::createAsmStreamer - return AsmStreamerCtorFn(Ctx, OS, isVerboseAsm, useLoc, + return AsmStreamerCtorFn(Ctx, OS, isVerboseAsm, useLoc, useCFI, InstPrint, CE, TAB, ShowInst); } diff --git a/include/llvm/Target/TargetSelect.h b/include/llvm/Target/TargetSelect.h index 1891f879741a..c5ab90b0e03d 100644 --- a/include/llvm/Target/TargetSelect.h +++ b/include/llvm/Target/TargetSelect.h @@ -120,6 +120,19 @@ namespace llvm { return true; #endif } + + /// InitializeNativeTargetAsmParser - The main program should call + /// this function to initialize the native target asm parser. + inline bool InitializeNativeTargetAsmParser() { + // If we have a native target, initialize the corresponding asm parser. +#ifdef LLVM_NATIVE_ASMPARSER + LLVM_NATIVE_ASMPARSER(); + return false; +#else + return true; +#endif + } + } #endif diff --git a/include/llvm/Target/TargetSelectionDAG.td b/include/llvm/Target/TargetSelectionDAG.td index c9be40d23f00..ff8d07de036c 100644 --- a/include/llvm/Target/TargetSelectionDAG.td +++ b/include/llvm/Target/TargetSelectionDAG.td @@ -490,6 +490,18 @@ class SDNodeXForm { def NOOP_SDNodeXForm : SDNodeXForm; +//===----------------------------------------------------------------------===// +// PatPred Subclasses. +// +// These allow specifying different sorts of predicates that control whether a +// node is matched. +// +class PatPred; + +class CodePatPred : PatPred { + code PredicateCode = predicate; +} + //===----------------------------------------------------------------------===// // Selection DAG Pattern Fragments. @@ -507,7 +519,8 @@ class PatFrag : SDPatternOperator { dag Operands = ops; dag Fragment = frag; - code Predicate = pred; + code PredicateCode = pred; + code ImmediateCode = [{}]; SDNodeXForm OperandTransform = xform; } @@ -516,6 +529,27 @@ class PatFrag : PatFrag<(ops), frag, pred, xform>; + +// ImmLeaf is a pattern fragment with a constraint on the immediate. The +// constraint is a function that is run on the immediate (always with the value +// sign extended out to an int64_t) as Imm. For example: +// +// def immSExt8 : ImmLeaf; +// +// this is a more convenient form to match 'imm' nodes in than PatLeaf and also +// is preferred over using PatLeaf because it allows the code generator to +// reason more about the constraint. +// +// If FastIsel should ignore all instructions that have an operand of this type, +// the FastIselShouldIgnore flag can be set. This is an optimization to reduce +// the code size of the generated fast instruction selector. +class ImmLeaf + : PatFrag<(ops), (vt imm), [{}], xform> { + let ImmediateCode = pred; + bit FastIselShouldIgnore = 0; +} + + // Leaf fragments. def vtInt : PatLeaf<(vt), [{ return N->getVT().isInteger(); }]>; diff --git a/include/llvm/Transforms/IPO.h b/include/llvm/Transforms/IPO.h index 12398813cc76..d12fd1db7ab0 100644 --- a/include/llvm/Transforms/IPO.h +++ b/include/llvm/Transforms/IPO.h @@ -152,7 +152,6 @@ ModulePass *createDeadArgHackingPass(); /// equal to maxElements (maxElements == 0 means always promote). /// Pass *createArgumentPromotionPass(unsigned maxElements = 3); -Pass *createStructRetPromotionPass(); //===----------------------------------------------------------------------===// /// createIPConstantPropagationPass - This pass propagates constants from call diff --git a/include/llvm/Transforms/Instrumentation.h b/include/llvm/Transforms/Instrumentation.h index aa9873fb8afa..088775a9dfe7 100644 --- a/include/llvm/Transforms/Instrumentation.h +++ b/include/llvm/Transforms/Instrumentation.h @@ -17,7 +17,6 @@ namespace llvm { class ModulePass; -class FunctionPass; // Insert edge profiling instrumentation ModulePass *createEdgeProfilerPass(); @@ -28,6 +27,9 @@ ModulePass *createOptimalEdgeProfilerPass(); // Insert path profiling instrumentation ModulePass *createPathProfilerPass(); +// Insert GCOV profiling instrumentation +ModulePass *createGCOVProfilerPass(bool EmitNotes = true, bool EmitData = true); + } // End llvm namespace #endif diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h index 6f2a38e5840c..de46a8d98407 100644 --- a/include/llvm/Transforms/Scalar.h +++ b/include/llvm/Transforms/Scalar.h @@ -128,7 +128,7 @@ Pass *createLoopInstSimplifyPass(); // // LoopUnroll - This pass is a simple loop unrolling pass. // -Pass *createLoopUnrollPass(); +Pass *createLoopUnrollPass(int Threshold = -1, int Count = -1, int AllowPartial = -1); //===----------------------------------------------------------------------===// // @@ -299,12 +299,6 @@ Pass *createLoopDeletionPass(); /// specific well-known (library) functions. FunctionPass *createSimplifyLibCallsPass(); -//===----------------------------------------------------------------------===// -// -/// createSimplifyHalfPowrLibCallsPass - This is an experimental pass that -/// optimizes specific half_pow functions. -FunctionPass *createSimplifyHalfPowrLibCallsPass(); - //===----------------------------------------------------------------------===// // // CodeGenPrepare - This pass prepares a function for instruction selection. diff --git a/include/llvm/Transforms/Utils/BasicBlockUtils.h b/include/llvm/Transforms/Utils/BasicBlockUtils.h index 533586028700..90eabef12fa7 100644 --- a/include/llvm/Transforms/Utils/BasicBlockUtils.h +++ b/include/llvm/Transforms/Utils/BasicBlockUtils.h @@ -19,6 +19,7 @@ #include "llvm/BasicBlock.h" #include "llvm/Support/CFG.h" +#include "llvm/Support/DebugLoc.h" namespace llvm { @@ -181,6 +182,10 @@ BasicBlock *SplitBlockPredecessors(BasicBlock *BB, BasicBlock *const *Preds, ReturnInst *FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, BasicBlock *Pred); +/// GetFirstDebugLocInBasicBlock - Return first valid DebugLoc entry in a +/// given basic block. +DebugLoc GetFirstDebugLocInBasicBlock(const BasicBlock *BB); + } // End llvm namespace #endif diff --git a/include/llvm/Transforms/Utils/Cloning.h b/include/llvm/Transforms/Utils/Cloning.h index 24ebb109a0ad..853de2dc0312 100644 --- a/include/llvm/Transforms/Utils/Cloning.h +++ b/include/llvm/Transforms/Utils/Cloning.h @@ -207,7 +207,7 @@ class InlineFunctionInfo { /// /// Note that this only does one level of inlining. For example, if the /// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now -/// exists in the instruction stream. Similiarly this will inline a recursive +/// exists in the instruction stream. Similarly this will inline a recursive /// function by one level. /// bool InlineFunction(CallInst *C, InlineFunctionInfo &IFI); diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h index 2823fbb71997..e61dcb347c85 100644 --- a/include/llvm/Transforms/Utils/Local.h +++ b/include/llvm/Transforms/Utils/Local.h @@ -19,14 +19,19 @@ namespace llvm { class User; class BasicBlock; +class Function; class BranchInst; class Instruction; +class DbgDeclareInst; +class StoreInst; +class LoadInst; class Value; class Pass; class PHINode; class AllocaInst; class ConstantExpr; class TargetData; +class DIBuilder; template class SmallVectorImpl; @@ -69,10 +74,6 @@ bool RecursivelyDeleteDeadPHINode(PHINode *PN); /// /// This returns true if it changed the code, note that it can delete /// instructions in other blocks as well in this block. -/// -/// WARNING: Do not use this function on unreachable blocks, as recursive -/// simplification is not able to handle corner-case scenarios that can -/// arise in them. bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD = 0); //===----------------------------------------------------------------------===// @@ -157,6 +158,24 @@ static inline unsigned getKnownAlignment(Value *V, const TargetData *TD = 0) { return getOrEnforceKnownAlignment(V, 0, TD); } +///===---------------------------------------------------------------------===// +/// Dbg Intrinsic utilities +/// + +/// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value +/// that has an associated llvm.dbg.decl intrinsic. +bool ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, + StoreInst *SI, DIBuilder &Builder); + +/// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value +/// that has an associated llvm.dbg.decl intrinsic. +bool ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, + LoadInst *LI, DIBuilder &Builder); + +/// LowerDbgDeclare - Lowers llvm.dbg.declare intrinsics into appropriate set +/// of llvm.dbg.value intrinsics. +bool LowerDbgDeclare(Function &F); + } // End llvm namespace #endif diff --git a/include/llvm/TypeSymbolTable.h b/include/llvm/TypeSymbolTable.h index 9fdcb983232c..89ad534ffb88 100644 --- a/include/llvm/TypeSymbolTable.h +++ b/include/llvm/TypeSymbolTable.h @@ -133,7 +133,7 @@ class TypeSymbolTable : public AbstractTypeUser { /// is refined. virtual void refineAbstractType(const DerivedType *OldTy, const Type *NewTy); - /// This function markes a type as being concrete (defined). + /// This function marks a type as being concrete (defined). virtual void typeBecameConcrete(const DerivedType *AbsTy); /// @} diff --git a/include/llvm/User.h b/include/llvm/User.h index 1363495f7c07..3f9c28e7b381 100644 --- a/include/llvm/User.h +++ b/include/llvm/User.h @@ -95,11 +95,11 @@ class User : public Value { OperandList[i] = Val; } const Use &getOperandUse(unsigned i) const { - assert(i < NumOperands && "getOperand() out of range!"); + assert(i < NumOperands && "getOperandUse() out of range!"); return OperandList[i]; } Use &getOperandUse(unsigned i) { - assert(i < NumOperands && "getOperand() out of range!"); + assert(i < NumOperands && "getOperandUse() out of range!"); return OperandList[i]; } diff --git a/include/llvm/Value.h b/include/llvm/Value.h index 130e2735f525..3a1c3ca298c9 100644 --- a/include/llvm/Value.h +++ b/include/llvm/Value.h @@ -51,8 +51,8 @@ class MDNode; /// This is a very important LLVM class. It is the base class of all values /// computed by a program that may be used as operands to other values. Value is /// the super class of other important classes such as Instruction and Function. -/// All Values have a Type. Type is not a subclass of Value. All types can have -/// a name and they should belong to some Module. Setting the name on the Value +/// All Values have a Type. Type is not a subclass of Value. Some values can +/// have a name and they belong to some Module. Setting the name on the Value /// automatically updates the module's symbol table. /// /// Every value has a "use list" that keeps track of which other Values are diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp index be02ddbaa534..c189a0042928 100644 --- a/lib/Analysis/AliasAnalysis.cpp +++ b/lib/Analysis/AliasAnalysis.cpp @@ -86,14 +86,20 @@ AliasAnalysis::getModRefInfo(ImmutableCallSite CS, if (onlyAccessesArgPointees(MRB)) { bool doesAlias = false; - if (doesAccessArgPointees(MRB)) + if (doesAccessArgPointees(MRB)) { + MDNode *CSTag = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa); for (ImmutableCallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); - AI != AE; ++AI) - if (!isNoAlias(Location(*AI), Loc)) { + AI != AE; ++AI) { + const Value *Arg = *AI; + if (!Arg->getType()->isPointerTy()) + continue; + Location CSLoc(Arg, UnknownSize, CSTag); + if (!isNoAlias(CSLoc, Loc)) { doesAlias = true; break; } - + } + } if (!doesAlias) return NoModRef; } @@ -138,13 +144,19 @@ AliasAnalysis::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) { // CS2's arguments. if (onlyAccessesArgPointees(CS2B)) { AliasAnalysis::ModRefResult R = NoModRef; - if (doesAccessArgPointees(CS2B)) + if (doesAccessArgPointees(CS2B)) { + MDNode *CS2Tag = CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa); for (ImmutableCallSite::arg_iterator I = CS2.arg_begin(), E = CS2.arg_end(); I != E; ++I) { - R = ModRefResult((R | getModRefInfo(CS1, *I, UnknownSize)) & Mask); + const Value *Arg = *I; + if (!Arg->getType()->isPointerTy()) + continue; + Location CS2Loc(Arg, UnknownSize, CS2Tag); + R = ModRefResult((R | getModRefInfo(CS1, CS2Loc)) & Mask); if (R == Mask) break; } + } return R; } @@ -152,13 +164,20 @@ AliasAnalysis::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) { // any of the memory referenced by CS1's arguments. If not, return NoModRef. if (onlyAccessesArgPointees(CS1B)) { AliasAnalysis::ModRefResult R = NoModRef; - if (doesAccessArgPointees(CS1B)) + if (doesAccessArgPointees(CS1B)) { + MDNode *CS1Tag = CS1.getInstruction()->getMetadata(LLVMContext::MD_tbaa); for (ImmutableCallSite::arg_iterator - I = CS1.arg_begin(), E = CS1.arg_end(); I != E; ++I) - if (getModRefInfo(CS2, *I, UnknownSize) != NoModRef) { + I = CS1.arg_begin(), E = CS1.arg_end(); I != E; ++I) { + const Value *Arg = *I; + if (!Arg->getType()->isPointerTy()) + continue; + Location CS1Loc(Arg, UnknownSize, CS1Tag); + if (getModRefInfo(CS2, CS1Loc) != NoModRef) { R = Mask; break; } + } + } if (R == NoModRef) return R; } diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp index 3a46976d66f7..2ed694941212 100644 --- a/lib/Analysis/AliasSetTracker.cpp +++ b/lib/Analysis/AliasSetTracker.cpp @@ -602,6 +602,10 @@ void AliasSetTracker::ASTCallbackVH::deleted() { // this now dangles! } +void AliasSetTracker::ASTCallbackVH::allUsesReplacedWith(Value *V) { + AST->copyValue(getValPtr(), V); +} + AliasSetTracker::ASTCallbackVH::ASTCallbackVH(Value *V, AliasSetTracker *ast) : CallbackVH(V), AST(ast) {} diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp index 1af1c35f5392..6ebe100b1330 100644 --- a/lib/Analysis/Analysis.cpp +++ b/lib/Analysis/Analysis.cpp @@ -43,14 +43,12 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeLazyValueInfoPass(Registry); initializeLibCallAliasAnalysisPass(Registry); initializeLintPass(Registry); - initializeLiveValuesPass(Registry); initializeLoopDependenceAnalysisPass(Registry); initializeLoopInfoPass(Registry); initializeMemDepPrinterPass(Registry); initializeMemoryDependenceAnalysisPass(Registry); initializeModuleDebugInfoPrinterPass(Registry); initializePostDominatorTreePass(Registry); - initializePostDominanceFrontierPass(Registry); initializeProfileEstimatorPassPass(Registry); initializeNoProfileInfoPass(Registry); initializeNoPathProfileInfoPass(Registry); diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index f7bcd9ec44d8..f1bb8a38f090 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -350,7 +350,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, Scale *= IndexScale.getSExtValue(); - // If we already had an occurrance of this index variable, merge this + // If we already had an occurrence of this index variable, merge this // scale into it. For example, we want to handle: // A[x][x] -> x*16 + x*4 -> x*20 // This also ensures that 'x' only appears in the index list once. @@ -779,6 +779,26 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, return NoModRef; break; } + case Intrinsic::arm_neon_vld1: { + // LLVM's vld1 and vst1 intrinsics currently only support a single + // vector register. + uint64_t Size = + TD ? TD->getTypeStoreSize(II->getType()) : UnknownSize; + if (isNoAlias(Location(II->getArgOperand(0), Size, + II->getMetadata(LLVMContext::MD_tbaa)), + Loc)) + return NoModRef; + break; + } + case Intrinsic::arm_neon_vst1: { + uint64_t Size = + TD ? TD->getTypeStoreSize(II->getArgOperand(1)->getType()) : UnknownSize; + if (isNoAlias(Location(II->getArgOperand(0), Size, + II->getMetadata(LLVMContext::MD_tbaa)), + Loc)) + return NoModRef; + break; + } } // The AliasAnalysis base class has some smarts, lets use them. @@ -883,7 +903,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, if (GEP1BaseOffset == 0 && GEP1VariableIndices.empty()) return MustAlias; - // If there is a difference betwen the pointers, but the difference is + // If there is a difference between the pointers, but the difference is // less than the size of the associated memory object, then we know // that the objects are partially overlapping. if (GEP1BaseOffset != 0 && GEP1VariableIndices.empty()) { diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index 1a738fae837d..6be561718c7a 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -9,11 +9,11 @@ add_llvm_library(LLVMAnalysis CFGPrinter.cpp CaptureTracking.cpp ConstantFolding.cpp + DIBuilder.cpp DbgInfoPrinter.cpp DebugInfo.cpp - DIBuilder.cpp - DominanceFrontier.cpp DomPrinter.cpp + DominanceFrontier.cpp IVUsers.cpp InlineCost.cpp InstCount.cpp @@ -24,7 +24,6 @@ add_llvm_library(LLVMAnalysis LibCallAliasAnalysis.cpp LibCallSemantics.cpp Lint.cpp - LiveValues.cpp Loads.cpp LoopDependenceAnalysis.cpp LoopInfo.cpp @@ -33,11 +32,11 @@ add_llvm_library(LLVMAnalysis MemoryBuiltins.cpp MemoryDependenceAnalysis.cpp ModuleDebugInfoPrinter.cpp + NoAliasAnalysis.cpp + PHITransAddr.cpp PathNumbering.cpp PathProfileInfo.cpp PathProfileVerifier.cpp - NoAliasAnalysis.cpp - PHITransAddr.cpp PostDominators.cpp ProfileEstimatorPass.cpp ProfileInfo.cpp diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp index 42a54d9d1eb3..b2c27d1dfc4b 100644 --- a/lib/Analysis/CaptureTracking.cpp +++ b/lib/Analysis/CaptureTracking.cpp @@ -17,6 +17,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Constants.h" #include "llvm/Instructions.h" #include "llvm/Value.h" #include "llvm/Analysis/AliasAnalysis.h" diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index cd8d52c1c465..5de2b04e80dd 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -23,6 +23,7 @@ #include "llvm/GlobalVariable.h" #include "llvm/Instructions.h" #include "llvm/Intrinsics.h" +#include "llvm/Operator.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Target/TargetData.h" #include "llvm/ADT/SmallVector.h" @@ -1048,11 +1049,12 @@ llvm::canConstantFoldCallTo(const Function *F) { case Intrinsic::ctpop: case Intrinsic::ctlz: case Intrinsic::cttz: - case Intrinsic::uadd_with_overflow: - case Intrinsic::usub_with_overflow: case Intrinsic::sadd_with_overflow: + case Intrinsic::uadd_with_overflow: case Intrinsic::ssub_with_overflow: + case Intrinsic::usub_with_overflow: case Intrinsic::smul_with_overflow: + case Intrinsic::umul_with_overflow: case Intrinsic::convert_from_fp16: case Intrinsic::convert_to_fp16: case Intrinsic::x86_sse_cvtss2si: @@ -1362,7 +1364,8 @@ llvm::ConstantFoldCall(Function *F, case Intrinsic::uadd_with_overflow: case Intrinsic::ssub_with_overflow: case Intrinsic::usub_with_overflow: - case Intrinsic::smul_with_overflow: { + case Intrinsic::smul_with_overflow: + case Intrinsic::umul_with_overflow: { APInt Res; bool Overflow; switch (F->getIntrinsicID()) { @@ -1382,6 +1385,9 @@ llvm::ConstantFoldCall(Function *F, case Intrinsic::smul_with_overflow: Res = Op1->getValue().smul_ov(Op2->getValue(), Overflow); break; + case Intrinsic::umul_with_overflow: + Res = Op1->getValue().umul_ov(Op2->getValue(), Overflow); + break; } Constant *Ops[] = { ConstantInt::get(F->getContext(), Res), diff --git a/lib/Analysis/DIBuilder.cpp b/lib/Analysis/DIBuilder.cpp index 590a9c17a8fa..dc98c9e67a80 100644 --- a/lib/Analysis/DIBuilder.cpp +++ b/lib/Analysis/DIBuilder.cpp @@ -50,7 +50,7 @@ void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename, MDString::get(VMContext, Flags), ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeVer) }; - TheCU = DICompileUnit(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + TheCU = DICompileUnit(MDNode::get(VMContext, Elts)); } /// createFile - Create a file descriptor to hold debugging information @@ -63,7 +63,7 @@ DIFile DIBuilder::createFile(StringRef Filename, StringRef Directory) { MDString::get(VMContext, Directory), TheCU }; - return DIFile(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DIFile(MDNode::get(VMContext, Elts)); } /// createEnumerator - Create a single enumerator value. @@ -73,7 +73,7 @@ DIEnumerator DIBuilder::createEnumerator(StringRef Name, uint64_t Val) { MDString::get(VMContext, Name), ConstantInt::get(Type::getInt64Ty(VMContext), Val) }; - return DIEnumerator(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DIEnumerator(MDNode::get(VMContext, Elts)); } /// createBasicType - Create debugging information entry for a basic @@ -95,7 +95,7 @@ DIType DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits, ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags; ConstantInt::get(Type::getInt32Ty(VMContext), Encoding) }; - return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DIType(MDNode::get(VMContext, Elts)); } /// createQaulifiedType - Create debugging information entry for a qualified @@ -114,7 +114,7 @@ DIType DIBuilder::createQualifiedType(unsigned Tag, DIType FromTy) { ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags FromTy }; - return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DIType(MDNode::get(VMContext, Elts)); } /// createPointerType - Create debugging information entry for a pointer. @@ -133,7 +133,7 @@ DIType DIBuilder::createPointerType(DIType PointeeTy, uint64_t SizeInBits, ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags PointeeTy }; - return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DIType(MDNode::get(VMContext, Elts)); } /// createReferenceType - Create debugging information entry for a reference. @@ -151,7 +151,7 @@ DIType DIBuilder::createReferenceType(DIType RTy) { ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags RTy }; - return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DIType(MDNode::get(VMContext, Elts)); } /// createTypedef - Create debugging information entry for a typedef. @@ -171,7 +171,7 @@ DIType DIBuilder::createTypedef(DIType Ty, StringRef Name, DIFile File, ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags Ty }; - return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DIType(MDNode::get(VMContext, Elts)); } /// createFriend - Create debugging information entry for a 'friend'. @@ -191,7 +191,7 @@ DIType DIBuilder::createFriend(DIType Ty, DIType FriendTy) { ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags FriendTy }; - return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DIType(MDNode::get(VMContext, Elts)); } /// createInheritance - Create debugging information entry to establish @@ -211,7 +211,7 @@ DIType DIBuilder::createInheritance(DIType Ty, DIType BaseTy, ConstantInt::get(Type::getInt32Ty(VMContext), Flags), BaseTy }; - return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DIType(MDNode::get(VMContext, Elts)); } /// createMemberType - Create debugging information entry for a member. @@ -233,7 +233,36 @@ DIType DIBuilder::createMemberType(StringRef Name, ConstantInt::get(Type::getInt32Ty(VMContext), Flags), Ty }; - return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DIType(MDNode::get(VMContext, Elts)); +} + +/// createObjCIVar - Create debugging information entry for Objective-C +/// instance variable. +DIType DIBuilder::createObjCIVar(StringRef Name, + DIFile File, unsigned LineNumber, + uint64_t SizeInBits, uint64_t AlignInBits, + uint64_t OffsetInBits, unsigned Flags, + DIType Ty, StringRef PropertyName, + StringRef GetterName, StringRef SetterName, + unsigned PropertyAttributes) { + // TAG_member is encoded in DIDerivedType format. + Value *Elts[] = { + GetTagConstant(VMContext, dwarf::DW_TAG_member), + File, // Or TheCU ? Ty ? + MDString::get(VMContext, Name), + File, + ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), + ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), + ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits), + ConstantInt::get(Type::getInt32Ty(VMContext), Flags), + Ty, + MDString::get(VMContext, PropertyName), + MDString::get(VMContext, GetterName), + MDString::get(VMContext, SetterName), + ConstantInt::get(Type::getInt32Ty(VMContext), PropertyAttributes) + }; + return DIType(MDNode::get(VMContext, Elts)); } /// createClassType - Create debugging information entry for a class. @@ -260,7 +289,7 @@ DIType DIBuilder::createClassType(DIDescriptor Context, StringRef Name, VTableHoder, TemplateParams }; - return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DIType(MDNode::get(VMContext, Elts)); } /// createTemplateTypeParameter - Create debugging information for template @@ -278,8 +307,7 @@ DIBuilder::createTemplateTypeParameter(DIDescriptor Context, StringRef Name, ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo) }; - return DITemplateTypeParameter(MDNode::get(VMContext, &Elts[0], - array_lengthof(Elts))); + return DITemplateTypeParameter(MDNode::get(VMContext, Elts)); } /// createTemplateValueParameter - Create debugging information for template @@ -299,8 +327,7 @@ DIBuilder::createTemplateValueParameter(DIDescriptor Context, StringRef Name, ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo) }; - return DITemplateValueParameter(MDNode::get(VMContext, &Elts[0], - array_lengthof(Elts))); + return DITemplateValueParameter(MDNode::get(VMContext, Elts)); } /// createStructType - Create debugging information entry for a struct. @@ -325,7 +352,7 @@ DIType DIBuilder::createStructType(DIDescriptor Context, StringRef Name, ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang), llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), }; - return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DIType(MDNode::get(VMContext, Elts)); } /// createUnionType - Create debugging information entry for an union. @@ -350,7 +377,7 @@ DIType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name, ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang), llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), }; - return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DIType(MDNode::get(VMContext, Elts)); } /// createSubroutineType - Create subroutine type. @@ -371,7 +398,7 @@ DIType DIBuilder::createSubroutineType(DIFile File, DIArray ParameterTypes) { ConstantInt::get(Type::getInt32Ty(VMContext), 0), llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), }; - return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DIType(MDNode::get(VMContext, Elts)); } /// createEnumerationType - Create debugging information entry for an @@ -396,7 +423,7 @@ DIType DIBuilder::createEnumerationType(DIDescriptor Scope, StringRef Name, ConstantInt::get(Type::getInt32Ty(VMContext), 0), llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), }; - MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)); + MDNode *Node = MDNode::get(VMContext, Elts); NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.enum"); NMD->addOperand(Node); return DIType(Node); @@ -421,7 +448,7 @@ DIType DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits, ConstantInt::get(Type::getInt32Ty(VMContext), 0), llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), }; - return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DIType(MDNode::get(VMContext, Elts)); } /// createVectorType - Create debugging information entry for a vector. @@ -443,7 +470,7 @@ DIType DIBuilder::createVectorType(uint64_t Size, uint64_t AlignInBits, ConstantInt::get(Type::getInt32Ty(VMContext), 0), llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), }; - return DIType(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DIType(MDNode::get(VMContext, Elts)); } /// createArtificialType - Create a new DIType with "artificial" flag set. @@ -467,7 +494,7 @@ DIType DIBuilder::createArtificialType(DIType Ty) { // Flags are stored at this slot. Elts[8] = ConstantInt::get(Type::getInt32Ty(VMContext), CurFlags); - return DIType(MDNode::get(VMContext, Elts.data(), Elts.size())); + return DIType(MDNode::get(VMContext, Elts)); } /// retainType - Retain DIType in a module even if it is not referenced @@ -483,7 +510,7 @@ DIDescriptor DIBuilder::createUnspecifiedParameter() { Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_unspecified_parameters) }; - return DIDescriptor(MDNode::get(VMContext, &Elts[0], 1)); + return DIDescriptor(MDNode::get(VMContext, Elts)); } /// createTemporaryType - Create a temporary forward-declared type. @@ -491,7 +518,7 @@ DIType DIBuilder::createTemporaryType() { // Give the temporary MDNode a tag. It doesn't matter what tag we // use here as long as DIType accepts it. Value *Elts[] = { GetTagConstant(VMContext, DW_TAG_base_type) }; - MDNode *Node = MDNode::getTemporary(VMContext, Elts, array_lengthof(Elts)); + MDNode *Node = MDNode::getTemporary(VMContext, Elts); return DIType(Node); } @@ -505,17 +532,17 @@ DIType DIBuilder::createTemporaryType(DIFile F) { NULL, F }; - MDNode *Node = MDNode::getTemporary(VMContext, Elts, array_lengthof(Elts)); + MDNode *Node = MDNode::getTemporary(VMContext, Elts); return DIType(Node); } /// getOrCreateArray - Get a DIArray, create one if required. -DIArray DIBuilder::getOrCreateArray(Value *const *Elements, unsigned NumElements) { - if (NumElements == 0) { +DIArray DIBuilder::getOrCreateArray(ArrayRef Elements) { + if (Elements.empty()) { Value *Null = llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)); - return DIArray(MDNode::get(VMContext, &Null, 1)); + return DIArray(MDNode::get(VMContext, Null)); } - return DIArray(MDNode::get(VMContext, Elements, NumElements)); + return DIArray(MDNode::get(VMContext, Elements)); } /// getOrCreateSubrange - Create a descriptor for a value range. This @@ -527,7 +554,7 @@ DISubrange DIBuilder::getOrCreateSubrange(int64_t Lo, int64_t Hi) { ConstantInt::get(Type::getInt64Ty(VMContext), Hi) }; - return DISubrange(MDNode::get(VMContext, &Elts[0], 3)); + return DISubrange(MDNode::get(VMContext, Elts)); } /// createGlobalVariable - Create a new descriptor for the specified global. @@ -548,7 +575,7 @@ createGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber, ConstantInt::get(Type::getInt32Ty(VMContext), 1), /* isDefinition*/ Val }; - MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)); + MDNode *Node = MDNode::get(VMContext, Elts); // Create a named metadata so that we do not lose this mdnode. NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv"); NMD->addOperand(Node); @@ -575,7 +602,7 @@ createStaticVariable(DIDescriptor Context, StringRef Name, ConstantInt::get(Type::getInt32Ty(VMContext), 1), /* isDefinition*/ Val }; - MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)); + MDNode *Node = MDNode::get(VMContext, Elts); // Create a named metadata so that we do not lose this mdnode. NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv"); NMD->addOperand(Node); @@ -586,17 +613,18 @@ createStaticVariable(DIDescriptor Context, StringRef Name, DIVariable DIBuilder::createLocalVariable(unsigned Tag, DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNo, DIType Ty, - bool AlwaysPreserve, unsigned Flags) { + bool AlwaysPreserve, unsigned Flags, + unsigned ArgNo) { Value *Elts[] = { GetTagConstant(VMContext, Tag), Scope, MDString::get(VMContext, Name), File, - ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), + ConstantInt::get(Type::getInt32Ty(VMContext), (LineNo | (ArgNo << 24))), Ty, ConstantInt::get(Type::getInt32Ty(VMContext), Flags) }; - MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)); + MDNode *Node = MDNode::get(VMContext, Elts); if (AlwaysPreserve) { // The optimizer may remove local variable. If there is an interest // to preserve variable info in such situation then stash it in a @@ -619,18 +647,19 @@ DIVariable DIBuilder::createLocalVariable(unsigned Tag, DIDescriptor Scope, DIVariable DIBuilder::createComplexVariable(unsigned Tag, DIDescriptor Scope, StringRef Name, DIFile F, unsigned LineNo, - DIType Ty, Value *const *Addr, - unsigned NumAddr) { + DIType Ty, ArrayRef Addr, + unsigned ArgNo) { SmallVector Elts; Elts.push_back(GetTagConstant(VMContext, Tag)); Elts.push_back(Scope); Elts.push_back(MDString::get(VMContext, Name)); Elts.push_back(F); - Elts.push_back(ConstantInt::get(Type::getInt32Ty(VMContext), LineNo)); + Elts.push_back(ConstantInt::get(Type::getInt32Ty(VMContext), (LineNo | (ArgNo << 24)))); Elts.push_back(Ty); - Elts.append(Addr, Addr+NumAddr); + Elts.push_back(llvm::Constant::getNullValue(Type::getInt32Ty(VMContext))); + Elts.append(Addr.begin(), Addr.end()); - return DIVariable(MDNode::get(VMContext, Elts.data(), Elts.size())); + return DIVariable(MDNode::get(VMContext, Elts)); } /// createFunction - Create a new descriptor for the specified function. @@ -641,8 +670,9 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context, DIType Ty, bool isLocalToUnit, bool isDefinition, unsigned Flags, bool isOptimized, - Function *Fn) { - + Function *Fn, + MDNode *TParams, + MDNode *Decl) { Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_subprogram), llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), @@ -660,9 +690,11 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context, llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), ConstantInt::get(Type::getInt32Ty(VMContext), Flags), ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized), - Fn + Fn, + TParams, + Decl }; - MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)); + MDNode *Node = MDNode::get(VMContext, Elts); // Create a named metadata so that we do not lose this mdnode. NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp"); @@ -682,7 +714,8 @@ DISubprogram DIBuilder::createMethod(DIDescriptor Context, MDNode *VTableHolder, unsigned Flags, bool isOptimized, - Function *Fn) { + Function *Fn, + MDNode *TParam) { Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_subprogram), llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), @@ -700,9 +733,10 @@ DISubprogram DIBuilder::createMethod(DIDescriptor Context, VTableHolder, ConstantInt::get(Type::getInt32Ty(VMContext), Flags), ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized), - Fn + Fn, + TParam, }; - MDNode *Node = MDNode::get(VMContext, &Elts[0], array_lengthof(Elts)); + MDNode *Node = MDNode::get(VMContext, Elts); // Create a named metadata so that we do not lose this mdnode. NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp"); @@ -721,7 +755,7 @@ DINameSpace DIBuilder::createNameSpace(DIDescriptor Scope, StringRef Name, File, ConstantInt::get(Type::getInt32Ty(VMContext), LineNo) }; - return DINameSpace(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DINameSpace(MDNode::get(VMContext, Elts)); } DILexicalBlock DIBuilder::createLexicalBlock(DIDescriptor Scope, DIFile File, @@ -736,7 +770,7 @@ DILexicalBlock DIBuilder::createLexicalBlock(DIDescriptor Scope, DIFile File, File, ConstantInt::get(Type::getInt32Ty(VMContext), unique_id++) }; - return DILexicalBlock(MDNode::get(VMContext, &Elts[0], array_lengthof(Elts))); + return DILexicalBlock(MDNode::get(VMContext, Elts)); } /// insertDeclare - Insert a new llvm.dbg.declare intrinsic call. @@ -747,7 +781,7 @@ Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo, if (!DeclareFn) DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare); - Value *Args[] = { MDNode::get(Storage->getContext(), &Storage, 1), VarInfo }; + Value *Args[] = { MDNode::get(Storage->getContext(), Storage), VarInfo }; return CallInst::Create(DeclareFn, Args, Args+2, "", InsertBefore); } @@ -759,7 +793,7 @@ Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo, if (!DeclareFn) DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare); - Value *Args[] = { MDNode::get(Storage->getContext(), &Storage, 1), VarInfo }; + Value *Args[] = { MDNode::get(Storage->getContext(), Storage), VarInfo }; // If this block already has a terminator then insert this intrinsic // before the terminator. @@ -778,7 +812,7 @@ Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset, if (!ValueFn) ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value); - Value *Args[] = { MDNode::get(V->getContext(), &V, 1), + Value *Args[] = { MDNode::get(V->getContext(), V), ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset), VarInfo }; return CallInst::Create(ValueFn, Args, Args+3, "", InsertBefore); @@ -793,7 +827,7 @@ Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset, if (!ValueFn) ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value); - Value *Args[] = { MDNode::get(V->getContext(), &V, 1), + Value *Args[] = { MDNode::get(V->getContext(), V), ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset), VarInfo }; return CallInst::Create(ValueFn, Args, Args+3, "", InsertAtEnd); diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp index 9db1456edd05..67f8147f4d61 100644 --- a/lib/Analysis/DebugInfo.cpp +++ b/lib/Analysis/DebugInfo.cpp @@ -725,484 +725,6 @@ void DIVariable::dump() const { print(dbgs()); dbgs() << '\n'; } -//===----------------------------------------------------------------------===// -// DIFactory: Basic Helpers -//===----------------------------------------------------------------------===// - -DIFactory::DIFactory(Module &m) - : M(m), VMContext(M.getContext()), DeclareFn(0), ValueFn(0) {} - -Constant *DIFactory::GetTagConstant(unsigned TAG) { - assert((TAG & LLVMDebugVersionMask) == 0 && - "Tag too large for debug encoding!"); - return ConstantInt::get(Type::getInt32Ty(VMContext), TAG | LLVMDebugVersion); -} - -//===----------------------------------------------------------------------===// -// DIFactory: Primary Constructors -//===----------------------------------------------------------------------===// - -/// GetOrCreateArray - Create an descriptor for an array of descriptors. -/// This implicitly uniques the arrays created. -DIArray DIFactory::GetOrCreateArray(DIDescriptor *Tys, unsigned NumTys) { - if (NumTys == 0) { - Value *Null = llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)); - return DIArray(MDNode::get(VMContext, &Null, 1)); - } - - SmallVector Elts(Tys, Tys+NumTys); - return DIArray(MDNode::get(VMContext, Elts.data(), Elts.size())); -} - -/// GetOrCreateSubrange - Create a descriptor for a value range. This -/// implicitly uniques the values returned. -DISubrange DIFactory::GetOrCreateSubrange(int64_t Lo, int64_t Hi) { - Value *Elts[] = { - GetTagConstant(dwarf::DW_TAG_subrange_type), - ConstantInt::get(Type::getInt64Ty(VMContext), Lo), - ConstantInt::get(Type::getInt64Ty(VMContext), Hi) - }; - - return DISubrange(MDNode::get(VMContext, &Elts[0], 3)); -} - -/// CreateUnspecifiedParameter - Create unspeicified type descriptor -/// for the subroutine type. -DIDescriptor DIFactory::CreateUnspecifiedParameter() { - Value *Elts[] = { - GetTagConstant(dwarf::DW_TAG_unspecified_parameters) - }; - return DIDescriptor(MDNode::get(VMContext, &Elts[0], 1)); -} - -/// CreateCompileUnit - Create a new descriptor for the specified compile -/// unit. Note that this does not unique compile units within the module. -DICompileUnit DIFactory::CreateCompileUnit(unsigned LangID, - StringRef Filename, - StringRef Directory, - StringRef Producer, - bool isMain, - bool isOptimized, - StringRef Flags, - unsigned RunTimeVer) { - Value *Elts[] = { - GetTagConstant(dwarf::DW_TAG_compile_unit), - llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), - ConstantInt::get(Type::getInt32Ty(VMContext), LangID), - MDString::get(VMContext, Filename), - MDString::get(VMContext, Directory), - MDString::get(VMContext, Producer), - ConstantInt::get(Type::getInt1Ty(VMContext), isMain), - ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized), - MDString::get(VMContext, Flags), - ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeVer) - }; - - return DICompileUnit(MDNode::get(VMContext, &Elts[0], 10)); -} - -/// CreateFile - Create a new descriptor for the specified file. -DIFile DIFactory::CreateFile(StringRef Filename, - StringRef Directory, - DICompileUnit CU) { - Value *Elts[] = { - GetTagConstant(dwarf::DW_TAG_file_type), - MDString::get(VMContext, Filename), - MDString::get(VMContext, Directory), - CU - }; - - return DIFile(MDNode::get(VMContext, &Elts[0], 4)); -} - -/// CreateEnumerator - Create a single enumerator value. -DIEnumerator DIFactory::CreateEnumerator(StringRef Name, uint64_t Val){ - Value *Elts[] = { - GetTagConstant(dwarf::DW_TAG_enumerator), - MDString::get(VMContext, Name), - ConstantInt::get(Type::getInt64Ty(VMContext), Val) - }; - return DIEnumerator(MDNode::get(VMContext, &Elts[0], 3)); -} - - -/// CreateBasicType - Create a basic type like int, float, etc. -DIBasicType DIFactory::CreateBasicType(DIDescriptor Context, - StringRef Name, - DIFile F, - unsigned LineNumber, - uint64_t SizeInBits, - uint64_t AlignInBits, - uint64_t OffsetInBits, unsigned Flags, - unsigned Encoding) { - Value *Elts[] = { - GetTagConstant(dwarf::DW_TAG_base_type), - Context, - MDString::get(VMContext, Name), - F, - ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), - ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), - ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), - ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits), - ConstantInt::get(Type::getInt32Ty(VMContext), Flags), - ConstantInt::get(Type::getInt32Ty(VMContext), Encoding) - }; - return DIBasicType(MDNode::get(VMContext, &Elts[0], 10)); -} - - -/// CreateBasicType - Create a basic type like int, float, etc. -DIBasicType DIFactory::CreateBasicTypeEx(DIDescriptor Context, - StringRef Name, - DIFile F, - unsigned LineNumber, - Constant *SizeInBits, - Constant *AlignInBits, - Constant *OffsetInBits, unsigned Flags, - unsigned Encoding) { - Value *Elts[] = { - GetTagConstant(dwarf::DW_TAG_base_type), - Context, - MDString::get(VMContext, Name), - F, - ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), - SizeInBits, - AlignInBits, - OffsetInBits, - ConstantInt::get(Type::getInt32Ty(VMContext), Flags), - ConstantInt::get(Type::getInt32Ty(VMContext), Encoding) - }; - return DIBasicType(MDNode::get(VMContext, &Elts[0], 10)); -} - -/// CreateArtificialType - Create a new DIType with "artificial" flag set. -DIType DIFactory::CreateArtificialType(DIType Ty) { - if (Ty.isArtificial()) - return Ty; - - SmallVector Elts; - MDNode *N = Ty; - assert (N && "Unexpected input DIType!"); - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { - if (Value *V = N->getOperand(i)) - Elts.push_back(V); - else - Elts.push_back(Constant::getNullValue(Type::getInt32Ty(VMContext))); - } - - unsigned CurFlags = Ty.getFlags(); - CurFlags = CurFlags | DIType::FlagArtificial; - - // Flags are stored at this slot. - Elts[8] = ConstantInt::get(Type::getInt32Ty(VMContext), CurFlags); - - return DIType(MDNode::get(VMContext, Elts.data(), Elts.size())); -} - -/// CreateDerivedType - Create a derived type like const qualified type, -/// pointer, typedef, etc. -DIDerivedType DIFactory::CreateDerivedType(unsigned Tag, - DIDescriptor Context, - StringRef Name, - DIFile F, - unsigned LineNumber, - uint64_t SizeInBits, - uint64_t AlignInBits, - uint64_t OffsetInBits, - unsigned Flags, - DIType DerivedFrom) { - Value *Elts[] = { - GetTagConstant(Tag), - Context, - MDString::get(VMContext, Name), - F, - ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), - ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), - ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), - ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits), - ConstantInt::get(Type::getInt32Ty(VMContext), Flags), - DerivedFrom, - }; - return DIDerivedType(MDNode::get(VMContext, &Elts[0], 10)); -} - - -/// CreateDerivedType - Create a derived type like const qualified type, -/// pointer, typedef, etc. -DIDerivedType DIFactory::CreateDerivedTypeEx(unsigned Tag, - DIDescriptor Context, - StringRef Name, - DIFile F, - unsigned LineNumber, - Constant *SizeInBits, - Constant *AlignInBits, - Constant *OffsetInBits, - unsigned Flags, - DIType DerivedFrom) { - Value *Elts[] = { - GetTagConstant(Tag), - Context, - MDString::get(VMContext, Name), - F, - ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), - SizeInBits, - AlignInBits, - OffsetInBits, - ConstantInt::get(Type::getInt32Ty(VMContext), Flags), - DerivedFrom, - }; - return DIDerivedType(MDNode::get(VMContext, &Elts[0], 10)); -} - - -/// CreateCompositeType - Create a composite type like array, struct, etc. -DICompositeType DIFactory::CreateCompositeType(unsigned Tag, - DIDescriptor Context, - StringRef Name, - DIFile F, - unsigned LineNumber, - uint64_t SizeInBits, - uint64_t AlignInBits, - uint64_t OffsetInBits, - unsigned Flags, - DIType DerivedFrom, - DIArray Elements, - unsigned RuntimeLang, - MDNode *ContainingType) { - - Value *Elts[] = { - GetTagConstant(Tag), - Context, - MDString::get(VMContext, Name), - F, - ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), - ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), - ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), - ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits), - ConstantInt::get(Type::getInt32Ty(VMContext), Flags), - DerivedFrom, - Elements, - ConstantInt::get(Type::getInt32Ty(VMContext), RuntimeLang), - ContainingType - }; - - MDNode *Node = MDNode::get(VMContext, &Elts[0], 13); - // Create a named metadata so that we do not lose this enum info. - if (Tag == dwarf::DW_TAG_enumeration_type) { - NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.enum"); - NMD->addOperand(Node); - } - return DICompositeType(Node); -} - -/// CreateTemporaryType - Create a temporary forward-declared type. -DIType DIFactory::CreateTemporaryType() { - // Give the temporary MDNode a tag. It doesn't matter what tag we - // use here as long as DIType accepts it. - Value *Elts[] = { - GetTagConstant(DW_TAG_base_type) - }; - MDNode *Node = MDNode::getTemporary(VMContext, Elts, array_lengthof(Elts)); - return DIType(Node); -} - -/// CreateTemporaryType - Create a temporary forward-declared type. -DIType DIFactory::CreateTemporaryType(DIFile F) { - // Give the temporary MDNode a tag. It doesn't matter what tag we - // use here as long as DIType accepts it. - Value *Elts[] = { - GetTagConstant(DW_TAG_base_type), - F.getCompileUnit(), - NULL, - F - }; - MDNode *Node = MDNode::getTemporary(VMContext, Elts, array_lengthof(Elts)); - return DIType(Node); -} - -/// CreateCompositeType - Create a composite type like array, struct, etc. -DICompositeType DIFactory::CreateCompositeTypeEx(unsigned Tag, - DIDescriptor Context, - StringRef Name, - DIFile F, - unsigned LineNumber, - Constant *SizeInBits, - Constant *AlignInBits, - Constant *OffsetInBits, - unsigned Flags, - DIType DerivedFrom, - DIArray Elements, - unsigned RuntimeLang, - MDNode *ContainingType) { - Value *Elts[] = { - GetTagConstant(Tag), - Context, - MDString::get(VMContext, Name), - F, - ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), - SizeInBits, - AlignInBits, - OffsetInBits, - ConstantInt::get(Type::getInt32Ty(VMContext), Flags), - DerivedFrom, - Elements, - ConstantInt::get(Type::getInt32Ty(VMContext), RuntimeLang), - ContainingType - }; - MDNode *Node = MDNode::get(VMContext, &Elts[0], 13); - // Create a named metadata so that we do not lose this enum info. - if (Tag == dwarf::DW_TAG_enumeration_type) { - NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.enum"); - NMD->addOperand(Node); - } - return DICompositeType(Node); -} - - -/// CreateSubprogram - Create a new descriptor for the specified subprogram. -/// See comments in DISubprogram for descriptions of these fields. This -/// method does not unique the generated descriptors. -DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context, - StringRef Name, - StringRef DisplayName, - StringRef LinkageName, - DIFile F, - unsigned LineNo, DIType Ty, - bool isLocalToUnit, - bool isDefinition, - unsigned VK, unsigned VIndex, - DIType ContainingType, - unsigned Flags, - bool isOptimized, - Function *Fn) { - - Value *Elts[] = { - GetTagConstant(dwarf::DW_TAG_subprogram), - llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), - Context, - MDString::get(VMContext, Name), - MDString::get(VMContext, DisplayName), - MDString::get(VMContext, LinkageName), - F, - ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), - Ty, - ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit), - ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition), - ConstantInt::get(Type::getInt32Ty(VMContext), (unsigned)VK), - ConstantInt::get(Type::getInt32Ty(VMContext), VIndex), - ContainingType, - ConstantInt::get(Type::getInt32Ty(VMContext), Flags), - ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized), - Fn - }; - MDNode *Node = MDNode::get(VMContext, &Elts[0], 17); - - // Create a named metadata so that we do not lose this mdnode. - NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp"); - NMD->addOperand(Node); - return DISubprogram(Node); -} - -/// CreateSubprogramDefinition - Create new subprogram descriptor for the -/// given declaration. -DISubprogram DIFactory::CreateSubprogramDefinition(DISubprogram &SPDeclaration){ - if (SPDeclaration.isDefinition()) - return DISubprogram(SPDeclaration); - - MDNode *DeclNode = SPDeclaration; - Value *Elts[] = { - GetTagConstant(dwarf::DW_TAG_subprogram), - llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), - DeclNode->getOperand(2), // Context - DeclNode->getOperand(3), // Name - DeclNode->getOperand(4), // DisplayName - DeclNode->getOperand(5), // LinkageName - DeclNode->getOperand(6), // CompileUnit - DeclNode->getOperand(7), // LineNo - DeclNode->getOperand(8), // Type - DeclNode->getOperand(9), // isLocalToUnit - ConstantInt::get(Type::getInt1Ty(VMContext), true), - DeclNode->getOperand(11), // Virtuality - DeclNode->getOperand(12), // VIndex - DeclNode->getOperand(13), // Containting Type - DeclNode->getOperand(14), // Flags - DeclNode->getOperand(15), // isOptimized - SPDeclaration.getFunction() - }; - MDNode *Node =MDNode::get(VMContext, &Elts[0], 16); - - // Create a named metadata so that we do not lose this mdnode. - NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.sp"); - NMD->addOperand(Node); - return DISubprogram(Node); -} - -/// CreateGlobalVariable - Create a new descriptor for the specified global. -DIGlobalVariable -DIFactory::CreateGlobalVariable(DIDescriptor Context, StringRef Name, - StringRef DisplayName, - StringRef LinkageName, - DIFile F, - unsigned LineNo, DIType Ty,bool isLocalToUnit, - bool isDefinition, llvm::GlobalVariable *Val) { - Value *Elts[] = { - GetTagConstant(dwarf::DW_TAG_variable), - llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), - Context, - MDString::get(VMContext, Name), - MDString::get(VMContext, DisplayName), - MDString::get(VMContext, LinkageName), - F, - ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), - Ty, - ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit), - ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition), - Val - }; - - Value *const *Vs = &Elts[0]; - MDNode *Node = MDNode::get(VMContext,Vs, 12); - - // Create a named metadata so that we do not lose this mdnode. - NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv"); - NMD->addOperand(Node); - - return DIGlobalVariable(Node); -} - -/// CreateGlobalVariable - Create a new descriptor for the specified constant. -DIGlobalVariable -DIFactory::CreateGlobalVariable(DIDescriptor Context, StringRef Name, - StringRef DisplayName, - StringRef LinkageName, - DIFile F, - unsigned LineNo, DIType Ty,bool isLocalToUnit, - bool isDefinition, llvm::Constant *Val) { - Value *Elts[] = { - GetTagConstant(dwarf::DW_TAG_variable), - llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)), - Context, - MDString::get(VMContext, Name), - MDString::get(VMContext, DisplayName), - MDString::get(VMContext, LinkageName), - F, - ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), - Ty, - ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit), - ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition), - Val - }; - - Value *const *Vs = &Elts[0]; - MDNode *Node = MDNode::get(VMContext,Vs, 12); - - // Create a named metadata so that we do not lose this mdnode. - NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.gv"); - NMD->addOperand(Node); - - return DIGlobalVariable(Node); -} - /// fixupObjcLikeName - Replace contains special characters used /// in a typical Objective-C names with '.' in a given string. static void fixupObjcLikeName(std::string &Str) { @@ -1214,19 +736,6 @@ static void fixupObjcLikeName(std::string &Str) { } } -/// getOrInsertFnSpecificMDNode - Return a NameMDNode that is suitable -/// to hold function specific information. -NamedMDNode *llvm::getOrInsertFnSpecificMDNode(Module &M, StringRef FuncName) { - SmallString<32> Out; - if (FuncName.find('[') == StringRef::npos) - return M.getOrInsertNamedMetadata(Twine("llvm.dbg.lv.", FuncName) - .toStringRef(Out)); - std::string Name = FuncName; - fixupObjcLikeName(Name); - return M.getOrInsertNamedMetadata(Twine("llvm.dbg.lv.", Name) - .toStringRef(Out)); -} - /// getFnSpecificMDNode - Return a NameMDNode, if available, that is /// suitable to hold function specific information. NamedMDNode *llvm::getFnSpecificMDNode(const Module &M, StringRef FuncName) { @@ -1237,178 +746,18 @@ NamedMDNode *llvm::getFnSpecificMDNode(const Module &M, StringRef FuncName) { return M.getNamedMetadata(Twine("llvm.dbg.lv.", Name)); } -/// CreateVariable - Create a new descriptor for the specified variable. -DIVariable DIFactory::CreateVariable(unsigned Tag, DIDescriptor Context, - StringRef Name, - DIFile F, - unsigned LineNo, - DIType Ty, bool AlwaysPreserve, - unsigned Flags) { - Value *Elts[] = { - GetTagConstant(Tag), - Context, - MDString::get(VMContext, Name), - F, - ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), - Ty, - ConstantInt::get(Type::getInt32Ty(VMContext), Flags) - }; - MDNode *Node = MDNode::get(VMContext, &Elts[0], 7); - if (AlwaysPreserve) { - // The optimizer may remove local variable. If there is an interest - // to preserve variable info in such situation then stash it in a - // named mdnode. - DISubprogram Fn(getDISubprogram(Context)); - StringRef FName = "fn"; - if (Fn.getFunction()) - FName = Fn.getFunction()->getName(); - char One = '\1'; - if (FName.startswith(StringRef(&One, 1))) - FName = FName.substr(1); - - - NamedMDNode *FnLocals = getOrInsertFnSpecificMDNode(M, FName); - FnLocals->addOperand(Node); - } - return DIVariable(Node); -} - - -/// CreateComplexVariable - Create a new descriptor for the specified variable -/// which has a complex address expression for its address. -DIVariable DIFactory::CreateComplexVariable(unsigned Tag, DIDescriptor Context, - StringRef Name, DIFile F, - unsigned LineNo, - DIType Ty, Value *const *Addr, - unsigned NumAddr) { - SmallVector Elts; - Elts.push_back(GetTagConstant(Tag)); - Elts.push_back(Context); - Elts.push_back(MDString::get(VMContext, Name)); - Elts.push_back(F); - Elts.push_back(ConstantInt::get(Type::getInt32Ty(VMContext), LineNo)); - Elts.push_back(Ty); - Elts.append(Addr, Addr+NumAddr); - - return DIVariable(MDNode::get(VMContext, Elts.data(), Elts.size())); -} - - -/// CreateBlock - This creates a descriptor for a lexical block with the -/// specified parent VMContext. -DILexicalBlock DIFactory::CreateLexicalBlock(DIDescriptor Context, - DIFile F, unsigned LineNo, - unsigned Col) { - // Defeat MDNode uniqing for lexical blocks. - static unsigned int unique_id = 0; - Value *Elts[] = { - GetTagConstant(dwarf::DW_TAG_lexical_block), - Context, - ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), - ConstantInt::get(Type::getInt32Ty(VMContext), Col), - F, - ConstantInt::get(Type::getInt32Ty(VMContext), unique_id++) - }; - return DILexicalBlock(MDNode::get(VMContext, &Elts[0], 6)); -} - -/// CreateNameSpace - This creates new descriptor for a namespace -/// with the specified parent context. -DINameSpace DIFactory::CreateNameSpace(DIDescriptor Context, StringRef Name, - DIFile F, - unsigned LineNo) { - Value *Elts[] = { - GetTagConstant(dwarf::DW_TAG_namespace), - Context, - MDString::get(VMContext, Name), - F, - ConstantInt::get(Type::getInt32Ty(VMContext), LineNo) - }; - return DINameSpace(MDNode::get(VMContext, &Elts[0], 5)); -} - -/// CreateLocation - Creates a debug info location. -DILocation DIFactory::CreateLocation(unsigned LineNo, unsigned ColumnNo, - DIScope S, DILocation OrigLoc) { - Value *Elts[] = { - ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), - ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo), - S, - OrigLoc, - }; - return DILocation(MDNode::get(VMContext, &Elts[0], 4)); -} - -//===----------------------------------------------------------------------===// -// DIFactory: Routines for inserting code into a function -//===----------------------------------------------------------------------===// - -/// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call. -Instruction *DIFactory::InsertDeclare(Value *Storage, DIVariable D, - Instruction *InsertBefore) { - assert(Storage && "no storage passed to dbg.declare"); - assert(D.Verify() && "empty DIVariable passed to dbg.declare"); - if (!DeclareFn) - DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare); - - Value *Args[] = { MDNode::get(Storage->getContext(), &Storage, 1), - D }; - return CallInst::Create(DeclareFn, Args, Args+2, "", InsertBefore); -} - -/// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call. -Instruction *DIFactory::InsertDeclare(Value *Storage, DIVariable D, - BasicBlock *InsertAtEnd) { - assert(Storage && "no storage passed to dbg.declare"); - assert(D.Verify() && "invalid DIVariable passed to dbg.declare"); - if (!DeclareFn) - DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare); - - Value *Args[] = { MDNode::get(Storage->getContext(), &Storage, 1), - D }; - - // If this block already has a terminator then insert this intrinsic - // before the terminator. - if (TerminatorInst *T = InsertAtEnd->getTerminator()) - return CallInst::Create(DeclareFn, Args, Args+2, "", T); - else - return CallInst::Create(DeclareFn, Args, Args+2, "", InsertAtEnd);} - -/// InsertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call. -Instruction *DIFactory::InsertDbgValueIntrinsic(Value *V, uint64_t Offset, - DIVariable D, - Instruction *InsertBefore) { - assert(V && "no value passed to dbg.value"); - assert(D.Verify() && "invalid DIVariable passed to dbg.value"); - if (!ValueFn) - ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value); - - Value *Args[] = { MDNode::get(V->getContext(), &V, 1), - ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset), - D }; - return CallInst::Create(ValueFn, Args, Args+3, "", InsertBefore); -} - -/// InsertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call. -Instruction *DIFactory::InsertDbgValueIntrinsic(Value *V, uint64_t Offset, - DIVariable D, - BasicBlock *InsertAtEnd) { - assert(V && "no value passed to dbg.value"); - assert(D.Verify() && "invalid DIVariable passed to dbg.value"); - if (!ValueFn) - ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value); - - Value *Args[] = { MDNode::get(V->getContext(), &V, 1), - ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset), - D }; - return CallInst::Create(ValueFn, Args, Args+3, "", InsertAtEnd); -} - -// RecordType - Record DIType in a module such that it is not lost even if -// it is not referenced through debug info anchors. -void DIFactory::RecordType(DIType T) { - NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.ty"); - NMD->addOperand(T); +/// getOrInsertFnSpecificMDNode - Return a NameMDNode that is suitable +/// to hold function specific information. +NamedMDNode *llvm::getOrInsertFnSpecificMDNode(Module &M, StringRef FuncName) { + SmallString<32> Out; + if (FuncName.find('[') == StringRef::npos) + return M.getOrInsertNamedMetadata(Twine("llvm.dbg.lv.", FuncName) + .toStringRef(Out)); + + std::string Name = FuncName; + fixupObjcLikeName(Name); + return M.getOrInsertNamedMetadata(Twine("llvm.dbg.lv.", Name) + .toStringRef(Out)); } diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp index 116aaf418ea0..b226d66cd78a 100644 --- a/lib/Analysis/IPA/GlobalsModRef.cpp +++ b/lib/Analysis/IPA/GlobalsModRef.cpp @@ -602,7 +602,7 @@ void GlobalsModRef::addEscapingUse(Use &U) { // For the purposes of this analysis, it is conservatively correct to treat // a newly escaping value equivalently to a deleted one. We could perhaps // be more precise by processing the new use and attempting to update our - // saved analysis results to accomodate it. + // saved analysis results to accommodate it. deleteValue(U); AliasAnalysis::addEscapingUse(U); diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index c8382186df3a..2cda7913f024 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -21,6 +21,7 @@ #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Target/TargetData.h" #include "llvm/Assembly/Writer.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Debug.h" @@ -83,7 +84,10 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) { return false; // Void and FP expressions cannot be reduced. // LSR is not APInt clean, do not touch integers bigger than 64-bits. - if (SE->getTypeSizeInBits(I->getType()) > 64) + // Also avoid creating IVs of non-native types. For example, we don't want a + // 64-bit IV in 32-bit code just because the loop has one 64-bit cast. + uint64_t Width = SE->getTypeSizeInBits(I->getType()); + if (Width > 64 || (TD && !TD->isLegalInteger(Width))) return false; if (!Processed.insert(I)) @@ -167,6 +171,7 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) { LI = &getAnalysis(); DT = &getAnalysis(); SE = &getAnalysis(); + TD = getAnalysisIfAvailable(); // Find all uses of induction variables in this loop, and categorize // them by stride. Start by finding all of the PHI nodes in the header for diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index 47f91cfc3bed..a820ecf0372a 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -501,7 +501,7 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, return InlineCost::getAlways(); if (CalleeFI->Metrics.usesDynamicAlloca) { - // Get infomation about the caller. + // Get information about the caller. FunctionInfo &CallerFI = CachedFunctionInfo[Caller]; // If we haven't calculated this information yet, do so now. @@ -549,7 +549,7 @@ InlineCost InlineCostAnalyzer::getSpecializationCost(Function *Callee, int Cost = 0; - // Look at the orginal size of the callee. Each instruction counts as 5. + // Look at the original size of the callee. Each instruction counts as 5. Cost += CalleeFI->Metrics.NumInsts * InlineConstants::InstrCost; // Offset that with the amount of code that can be constant-folded diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index 982dacb50bfc..9d6d3398feb8 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -18,11 +18,13 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "instsimplify" +#include "llvm/Operator.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Support/ConstantRange.h" #include "llvm/Support/PatternMatch.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Target/TargetData.h" @@ -899,6 +901,111 @@ Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, const TargetData *TD, return ::SimplifyFDivInst(Op0, Op1, TD, DT, RecursionLimit); } +/// SimplifyRem - Given operands for an SRem or URem, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, + const TargetData *TD, const DominatorTree *DT, + unsigned MaxRecurse) { + if (Constant *C0 = dyn_cast(Op0)) { + if (Constant *C1 = dyn_cast(Op1)) { + Constant *Ops[] = { C0, C1 }; + return ConstantFoldInstOperands(Opcode, C0->getType(), Ops, 2, TD); + } + } + + bool isSigned = Opcode == Instruction::SRem; + + // X % undef -> undef + if (match(Op1, m_Undef())) + return Op1; + + // undef % X -> 0 + if (match(Op0, m_Undef())) + return Constant::getNullValue(Op0->getType()); + + // 0 % X -> 0, we don't need to preserve faults! + if (match(Op0, m_Zero())) + return Op0; + + // X % 0 -> undef, we don't need to preserve faults! + if (match(Op1, m_Zero())) + return UndefValue::get(Op0->getType()); + + // X % 1 -> 0 + if (match(Op1, m_One())) + return Constant::getNullValue(Op0->getType()); + + if (Op0->getType()->isIntegerTy(1)) + // It can't be remainder by zero, hence it must be remainder by one. + return Constant::getNullValue(Op0->getType()); + + // X % X -> 0 + if (Op0 == Op1) + return Constant::getNullValue(Op0->getType()); + + // If the operation is with the result of a select instruction, check whether + // operating on either branch of the select always yields the same value. + if (isa(Op0) || isa(Op1)) + if (Value *V = ThreadBinOpOverSelect(Opcode, Op0, Op1, TD, DT, MaxRecurse)) + return V; + + // If the operation is with the result of a phi instruction, check whether + // operating on all incoming values of the phi always yields the same value. + if (isa(Op0) || isa(Op1)) + if (Value *V = ThreadBinOpOverPHI(Opcode, Op0, Op1, TD, DT, MaxRecurse)) + return V; + + return 0; +} + +/// SimplifySRemInst - Given operands for an SRem, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifySRemInst(Value *Op0, Value *Op1, const TargetData *TD, + const DominatorTree *DT, unsigned MaxRecurse) { + if (Value *V = SimplifyRem(Instruction::SRem, Op0, Op1, TD, DT, MaxRecurse)) + return V; + + return 0; +} + +Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const TargetData *TD, + const DominatorTree *DT) { + return ::SimplifySRemInst(Op0, Op1, TD, DT, RecursionLimit); +} + +/// SimplifyURemInst - Given operands for a URem, see if we can +/// fold the result. If not, this returns null. +static Value *SimplifyURemInst(Value *Op0, Value *Op1, const TargetData *TD, + const DominatorTree *DT, unsigned MaxRecurse) { + if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, TD, DT, MaxRecurse)) + return V; + + return 0; +} + +Value *llvm::SimplifyURemInst(Value *Op0, Value *Op1, const TargetData *TD, + const DominatorTree *DT) { + return ::SimplifyURemInst(Op0, Op1, TD, DT, RecursionLimit); +} + +static Value *SimplifyFRemInst(Value *Op0, Value *Op1, const TargetData *, + const DominatorTree *, unsigned) { + // undef % X -> undef (the undef could be a snan). + if (match(Op0, m_Undef())) + return Op0; + + // X % undef -> undef + if (match(Op1, m_Undef())) + return Op1; + + return 0; +} + +Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, const TargetData *TD, + const DominatorTree *DT) { + return ::SimplifyFRemInst(Op0, Op1, TD, DT, RecursionLimit); +} + /// SimplifyShift - Given operands for an Shl, LShr or AShr, see if we can /// fold the result. If not, this returns null. static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1, @@ -1343,7 +1450,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // the compare, and if only one of them is then we moved it to RHS already. if (isa(LHS) && (isa(RHS) || isa(RHS) || isa(RHS))) - // We already know that LHS != LHS. + // We already know that LHS != RHS. return ConstantInt::get(ITy, CmpInst::isFalseWhenEqual(Pred)); // If we are comparing with zero then try hard since this is a common case. @@ -1399,40 +1506,66 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // See if we are doing a comparison with a constant integer. if (ConstantInt *CI = dyn_cast(RHS)) { - switch (Pred) { - default: break; - case ICmpInst::ICMP_UGT: - if (CI->isMaxValue(false)) // A >u MAX -> FALSE - return ConstantInt::getFalse(CI->getContext()); - break; - case ICmpInst::ICMP_UGE: - if (CI->isMinValue(false)) // A >=u MIN -> TRUE - return ConstantInt::getTrue(CI->getContext()); - break; - case ICmpInst::ICMP_ULT: - if (CI->isMinValue(false)) // A FALSE - return ConstantInt::getFalse(CI->getContext()); - break; - case ICmpInst::ICMP_ULE: - if (CI->isMaxValue(false)) // A <=u MAX -> TRUE - return ConstantInt::getTrue(CI->getContext()); - break; - case ICmpInst::ICMP_SGT: - if (CI->isMaxValue(true)) // A >s MAX -> FALSE - return ConstantInt::getFalse(CI->getContext()); - break; - case ICmpInst::ICMP_SGE: - if (CI->isMinValue(true)) // A >=s MIN -> TRUE - return ConstantInt::getTrue(CI->getContext()); - break; - case ICmpInst::ICMP_SLT: - if (CI->isMinValue(true)) // A FALSE - return ConstantInt::getFalse(CI->getContext()); - break; - case ICmpInst::ICMP_SLE: - if (CI->isMaxValue(true)) // A <=s MAX -> TRUE - return ConstantInt::getTrue(CI->getContext()); - break; + // Rule out tautological comparisons (eg., ult 0 or uge 0). + ConstantRange RHS_CR = ICmpInst::makeConstantRange(Pred, CI->getValue()); + if (RHS_CR.isEmptySet()) + return ConstantInt::getFalse(CI->getContext()); + if (RHS_CR.isFullSet()) + return ConstantInt::getTrue(CI->getContext()); + + // Many binary operators with constant RHS have easy to compute constant + // range. Use them to check whether the comparison is a tautology. + uint32_t Width = CI->getBitWidth(); + APInt Lower = APInt(Width, 0); + APInt Upper = APInt(Width, 0); + ConstantInt *CI2; + if (match(LHS, m_URem(m_Value(), m_ConstantInt(CI2)))) { + // 'urem x, CI2' produces [0, CI2). + Upper = CI2->getValue(); + } else if (match(LHS, m_SRem(m_Value(), m_ConstantInt(CI2)))) { + // 'srem x, CI2' produces (-|CI2|, |CI2|). + Upper = CI2->getValue().abs(); + Lower = (-Upper) + 1; + } else if (match(LHS, m_UDiv(m_Value(), m_ConstantInt(CI2)))) { + // 'udiv x, CI2' produces [0, UINT_MAX / CI2]. + APInt NegOne = APInt::getAllOnesValue(Width); + if (!CI2->isZero()) + Upper = NegOne.udiv(CI2->getValue()) + 1; + } else if (match(LHS, m_SDiv(m_Value(), m_ConstantInt(CI2)))) { + // 'sdiv x, CI2' produces [INT_MIN / CI2, INT_MAX / CI2]. + APInt IntMin = APInt::getSignedMinValue(Width); + APInt IntMax = APInt::getSignedMaxValue(Width); + APInt Val = CI2->getValue().abs(); + if (!Val.isMinValue()) { + Lower = IntMin.sdiv(Val); + Upper = IntMax.sdiv(Val) + 1; + } + } else if (match(LHS, m_LShr(m_Value(), m_ConstantInt(CI2)))) { + // 'lshr x, CI2' produces [0, UINT_MAX >> CI2]. + APInt NegOne = APInt::getAllOnesValue(Width); + if (CI2->getValue().ult(Width)) + Upper = NegOne.lshr(CI2->getValue()) + 1; + } else if (match(LHS, m_AShr(m_Value(), m_ConstantInt(CI2)))) { + // 'ashr x, CI2' produces [INT_MIN >> CI2, INT_MAX >> CI2]. + APInt IntMin = APInt::getSignedMinValue(Width); + APInt IntMax = APInt::getSignedMaxValue(Width); + if (CI2->getValue().ult(Width)) { + Lower = IntMin.ashr(CI2->getValue()); + Upper = IntMax.ashr(CI2->getValue()) + 1; + } + } else if (match(LHS, m_Or(m_Value(), m_ConstantInt(CI2)))) { + // 'or x, CI2' produces [CI2, UINT_MAX]. + Lower = CI2->getValue(); + } else if (match(LHS, m_And(m_Value(), m_ConstantInt(CI2)))) { + // 'and x, CI2' produces [0, CI2]. + Upper = CI2->getValue() + 1; + } + if (Lower != Upper) { + ConstantRange LHS_CR = ConstantRange(Lower, Upper); + if (RHS_CR.contains(LHS_CR)) + return ConstantInt::getTrue(RHS->getContext()); + if (RHS_CR.inverse().contains(LHS_CR)) + return ConstantInt::getFalse(RHS->getContext()); } } @@ -1644,6 +1777,93 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, } } + if (LBO && match(LBO, m_URem(m_Value(), m_Specific(RHS)))) { + bool KnownNonNegative, KnownNegative; + switch (Pred) { + default: + break; + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: + ComputeSignBit(LHS, KnownNonNegative, KnownNegative, TD); + if (!KnownNonNegative) + break; + // fall-through + case ICmpInst::ICMP_EQ: + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: + return ConstantInt::getFalse(RHS->getContext()); + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: + ComputeSignBit(LHS, KnownNonNegative, KnownNegative, TD); + if (!KnownNonNegative) + break; + // fall-through + case ICmpInst::ICMP_NE: + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: + return ConstantInt::getTrue(RHS->getContext()); + } + } + if (RBO && match(RBO, m_URem(m_Value(), m_Specific(LHS)))) { + bool KnownNonNegative, KnownNegative; + switch (Pred) { + default: + break; + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: + ComputeSignBit(RHS, KnownNonNegative, KnownNegative, TD); + if (!KnownNonNegative) + break; + // fall-through + case ICmpInst::ICMP_NE: + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: + return ConstantInt::getTrue(RHS->getContext()); + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: + ComputeSignBit(RHS, KnownNonNegative, KnownNegative, TD); + if (!KnownNonNegative) + break; + // fall-through + case ICmpInst::ICMP_EQ: + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: + return ConstantInt::getFalse(RHS->getContext()); + } + } + + if (MaxRecurse && LBO && RBO && LBO->getOpcode() == RBO->getOpcode() && + LBO->getOperand(1) == RBO->getOperand(1)) { + switch (LBO->getOpcode()) { + default: break; + case Instruction::UDiv: + case Instruction::LShr: + if (ICmpInst::isSigned(Pred)) + break; + // fall-through + case Instruction::SDiv: + case Instruction::AShr: + if (!LBO->isExact() && !RBO->isExact()) + break; + if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0), + RBO->getOperand(0), TD, DT, MaxRecurse-1)) + return V; + break; + case Instruction::Shl: { + bool NUW = LBO->hasNoUnsignedWrap() && LBO->hasNoUnsignedWrap(); + bool NSW = LBO->hasNoSignedWrap() && RBO->hasNoSignedWrap(); + if (!NUW && !NSW) + break; + if (!NSW && ICmpInst::isSigned(Pred)) + break; + if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0), + RBO->getOperand(0), TD, DT, MaxRecurse-1)) + return V; + break; + } + } + } + // If the comparison is with the result of a select instruction, check whether // comparing with either branch of the select always yields the same value. if (isa(LHS) || isa(RHS)) @@ -1879,6 +2099,9 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, case Instruction::SDiv: return SimplifySDivInst(LHS, RHS, TD, DT, MaxRecurse); case Instruction::UDiv: return SimplifyUDivInst(LHS, RHS, TD, DT, MaxRecurse); case Instruction::FDiv: return SimplifyFDivInst(LHS, RHS, TD, DT, MaxRecurse); + case Instruction::SRem: return SimplifySRemInst(LHS, RHS, TD, DT, MaxRecurse); + case Instruction::URem: return SimplifyURemInst(LHS, RHS, TD, DT, MaxRecurse); + case Instruction::FRem: return SimplifyFRemInst(LHS, RHS, TD, DT, MaxRecurse); case Instruction::Shl: return SimplifyShlInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false, TD, DT, MaxRecurse); @@ -1973,6 +2196,15 @@ Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD, case Instruction::FDiv: Result = SimplifyFDivInst(I->getOperand(0), I->getOperand(1), TD, DT); break; + case Instruction::SRem: + Result = SimplifySRemInst(I->getOperand(0), I->getOperand(1), TD, DT); + break; + case Instruction::URem: + Result = SimplifyURemInst(I->getOperand(0), I->getOperand(1), TD, DT); + break; + case Instruction::FRem: + Result = SimplifyFRemInst(I->getOperand(0), I->getOperand(1), TD, DT); + break; case Instruction::Shl: Result = SimplifyShlInst(I->getOperand(0), I->getOperand(1), cast(I)->hasNoSignedWrap(), diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp index 9e7da6ce2de9..d5f0b5c82154 100644 --- a/lib/Analysis/LazyValueInfo.cpp +++ b/lib/Analysis/LazyValueInfo.cpp @@ -29,7 +29,6 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/STLExtras.h" #include -#include #include using namespace llvm; @@ -268,6 +267,8 @@ class LVILatticeVal { } // end anonymous namespace. namespace llvm { +raw_ostream &operator<<(raw_ostream &OS, const LVILatticeVal &Val) + LLVM_ATTRIBUTE_USED; raw_ostream &operator<<(raw_ostream &OS, const LVILatticeVal &Val) { if (Val.isUndefined()) return OS << "undefined"; diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp index fc7edc0525f9..f130f30c49da 100644 --- a/lib/Analysis/Lint.cpp +++ b/lib/Analysis/Lint.cpp @@ -606,7 +606,7 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk, Type::getInt64Ty(V->getContext()))) return findValueImpl(CE->getOperand(0), OffsetOk, Visited); } else if (CE->getOpcode() == Instruction::ExtractValue) { - const SmallVector &Indices = CE->getIndices(); + ArrayRef Indices = CE->getIndices(); if (Value *W = FindInsertedValue(CE->getOperand(0), Indices.begin(), Indices.end())) diff --git a/lib/Analysis/LiveValues.cpp b/lib/Analysis/LiveValues.cpp deleted file mode 100644 index a0e603419f57..000000000000 --- a/lib/Analysis/LiveValues.cpp +++ /dev/null @@ -1,200 +0,0 @@ -//===- LiveValues.cpp - Liveness information for LLVM IR Values. ----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the implementation for the LLVM IR Value liveness -// analysis pass. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/LiveValues.h" -#include "llvm/Instructions.h" -#include "llvm/Analysis/Dominators.h" -#include "llvm/Analysis/LoopInfo.h" -using namespace llvm; - -namespace llvm { - FunctionPass *createLiveValuesPass() { return new LiveValues(); } -} - -char LiveValues::ID = 0; -INITIALIZE_PASS_BEGIN(LiveValues, "live-values", - "Value Liveness Analysis", false, true) -INITIALIZE_PASS_DEPENDENCY(DominatorTree) -INITIALIZE_PASS_DEPENDENCY(LoopInfo) -INITIALIZE_PASS_END(LiveValues, "live-values", - "Value Liveness Analysis", false, true) - -LiveValues::LiveValues() : FunctionPass(ID) { - initializeLiveValuesPass(*PassRegistry::getPassRegistry()); -} - -void LiveValues::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); - AU.addRequired(); - AU.setPreservesAll(); -} - -bool LiveValues::runOnFunction(Function &F) { - DT = &getAnalysis(); - LI = &getAnalysis(); - - // This pass' values are computed lazily, so there's nothing to do here. - - return false; -} - -void LiveValues::releaseMemory() { - Memos.clear(); -} - -/// isUsedInBlock - Test if the given value is used in the given block. -/// -bool LiveValues::isUsedInBlock(const Value *V, const BasicBlock *BB) { - Memo &M = getMemo(V); - return M.Used.count(BB); -} - -/// isLiveThroughBlock - Test if the given value is known to be -/// live-through the given block, meaning that the block is properly -/// dominated by the value's definition, and there exists a block -/// reachable from it that contains a use. This uses a conservative -/// approximation that errs on the side of returning false. -/// -bool LiveValues::isLiveThroughBlock(const Value *V, - const BasicBlock *BB) { - Memo &M = getMemo(V); - return M.LiveThrough.count(BB); -} - -/// isKilledInBlock - Test if the given value is known to be killed in -/// the given block, meaning that the block contains a use of the value, -/// and no blocks reachable from the block contain a use. This uses a -/// conservative approximation that errs on the side of returning false. -/// -bool LiveValues::isKilledInBlock(const Value *V, const BasicBlock *BB) { - Memo &M = getMemo(V); - return M.Killed.count(BB); -} - -/// getMemo - Retrieve an existing Memo for the given value if one -/// is available, otherwise compute a new one. -/// -LiveValues::Memo &LiveValues::getMemo(const Value *V) { - DenseMap::iterator I = Memos.find(V); - if (I != Memos.end()) - return I->second; - return compute(V); -} - -/// getImmediateDominator - A handy utility for the specific DominatorTree -/// query that we need here. -/// -static const BasicBlock *getImmediateDominator(const BasicBlock *BB, - const DominatorTree *DT) { - DomTreeNode *Node = DT->getNode(const_cast(BB))->getIDom(); - return Node ? Node->getBlock() : 0; -} - -/// compute - Compute a new Memo for the given value. -/// -LiveValues::Memo &LiveValues::compute(const Value *V) { - Memo &M = Memos[V]; - - // Determine the block containing the definition. - const BasicBlock *DefBB; - // Instructions define values with meaningful live ranges. - if (const Instruction *I = dyn_cast(V)) - DefBB = I->getParent(); - // Arguments can be analyzed as values defined in the entry block. - else if (const Argument *A = dyn_cast(V)) - DefBB = &A->getParent()->getEntryBlock(); - // Constants and other things aren't meaningful here, so just - // return having computed an empty Memo so that we don't come - // here again. The assumption here is that client code won't - // be asking about such values very often. - else - return M; - - // Determine if the value is defined inside a loop. This is used - // to track whether the value is ever used outside the loop, so - // it'll be set to null if the value is either not defined in a - // loop or used outside the loop in which it is defined. - const Loop *L = LI->getLoopFor(DefBB); - - // Track whether the value is used anywhere outside of the block - // in which it is defined. - bool LiveOutOfDefBB = false; - - // Examine each use of the value. - for (Value::const_use_iterator I = V->use_begin(), E = V->use_end(); - I != E; ++I) { - const User *U = *I; - const BasicBlock *UseBB = cast(U)->getParent(); - - // Note the block in which this use occurs. - M.Used.insert(UseBB); - - // If the use block doesn't have successors, the value can be - // considered killed. - if (succ_begin(UseBB) == succ_end(UseBB)) - M.Killed.insert(UseBB); - - // Observe whether the value is used outside of the loop in which - // it is defined. Switch to an enclosing loop if necessary. - for (; L; L = L->getParentLoop()) - if (L->contains(UseBB)) - break; - - // Search for live-through blocks. - const BasicBlock *BB; - if (const PHINode *PHI = dyn_cast(U)) { - // For PHI nodes, start the search at the incoming block paired with the - // incoming value, which must be dominated by the definition. - unsigned Num = PHI->getIncomingValueNumForOperand(I.getOperandNo()); - BB = PHI->getIncomingBlock(Num); - - // A PHI-node use means the value is live-out of it's defining block - // even if that block also contains the only use. - LiveOutOfDefBB = true; - } else { - // Otherwise just start the search at the use. - BB = UseBB; - - // Note if the use is outside the defining block. - LiveOutOfDefBB |= UseBB != DefBB; - } - - // Climb the immediate dominator tree from the use to the definition - // and mark all intermediate blocks as live-through. - for (; BB != DefBB; BB = getImmediateDominator(BB, DT)) { - if (BB != UseBB && !M.LiveThrough.insert(BB)) - break; - } - } - - // If the value is defined inside a loop and is not live outside - // the loop, then each exit block of the loop in which the value - // is used is a kill block. - if (L) { - SmallVector ExitingBlocks; - L->getExitingBlocks(ExitingBlocks); - for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { - const BasicBlock *ExitingBlock = ExitingBlocks[i]; - if (M.Used.count(ExitingBlock)) - M.Killed.insert(ExitingBlock); - } - } - - // If the value was never used outside the block in which it was - // defined, it's killed in that block. - if (!LiveOutOfDefBB) - M.Killed.insert(DefBB); - - return M; -} diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp index 2ea27fb62fcb..ab34fd653a70 100644 --- a/lib/Analysis/Loads.cpp +++ b/lib/Analysis/Loads.cpp @@ -17,6 +17,7 @@ #include "llvm/GlobalAlias.h" #include "llvm/GlobalVariable.h" #include "llvm/IntrinsicInst.h" +#include "llvm/Operator.h" using namespace llvm; /// AreEquivalentAddressValues - Test if A and B will obviously have the same diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp index 8e1a7bfef699..10e3f297f9c5 100644 --- a/lib/Analysis/LoopPass.cpp +++ b/lib/Analysis/LoopPass.cpp @@ -14,8 +14,10 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/LoopPass.h" +#include "llvm/DebugInfoProbe.h" #include "llvm/Assembly/PrintModulePass.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Timer.h" using namespace llvm; @@ -51,6 +53,20 @@ class PrintLoopPass : public LoopPass { char PrintLoopPass::ID = 0; } +//===----------------------------------------------------------------------===// +// DebugInfoProbe + +static DebugInfoProbeInfo *TheDebugProbe; +static void createDebugInfoProbe() { + if (TheDebugProbe) return; + + // Constructed the first time this is called. This guarantees that the + // object will be constructed, if -enable-debug-info-probe is set, + // before static globals, thus it will be destroyed before them. + static ManagedStatic DIP; + TheDebugProbe = &*DIP; +} + //===----------------------------------------------------------------------===// // LPPassManager // @@ -223,6 +239,7 @@ void LPPassManager::getAnalysisUsage(AnalysisUsage &Info) const { bool LPPassManager::runOnFunction(Function &F) { LI = &getAnalysis(); bool Changed = false; + createDebugInfoProbe(); // Collect inherited analysis from Module level pass manager. populateInheritedAnalysis(TPM->activeStack); @@ -254,19 +271,21 @@ bool LPPassManager::runOnFunction(Function &F) { // Run all passes on the current Loop. for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { LoopPass *P = getContainedPass(Index); - dumpPassInfo(P, EXECUTION_MSG, ON_LOOP_MSG, CurrentLoop->getHeader()->getName()); dumpRequiredSet(P); initializeAnalysisImpl(P); - + if (TheDebugProbe) + TheDebugProbe->initialize(P, F); { PassManagerPrettyStackEntry X(P, *CurrentLoop->getHeader()); TimeRegion PassTimer(getPassTimer(P)); Changed |= P->runOnLoop(CurrentLoop, *this); } + if (TheDebugProbe) + TheDebugProbe->finalize(P, F); if (Changed) dumpPassInfo(P, MODIFICATION_MSG, ON_LOOP_MSG, diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp index 1ab18ca054a2..769c68ce425e 100644 --- a/lib/Analysis/MemoryBuiltins.cpp +++ b/lib/Analysis/MemoryBuiltins.cpp @@ -35,7 +35,13 @@ static bool isMallocCall(const CallInst *CI) { return false; Function *Callee = CI->getCalledFunction(); - if (Callee == 0 || !Callee->isDeclaration() || Callee->getName() != "malloc") + if (Callee == 0 || !Callee->isDeclaration()) + return false; + if (Callee->getName() != "malloc" && + Callee->getName() != "_Znwj" && // operator new(unsigned int) + Callee->getName() != "_Znwm" && // operator new(unsigned long) + Callee->getName() != "_Znaj" && // operator new[](unsigned int) + Callee->getName() != "_Znam") // operator new[](unsigned long) return false; // Check malloc prototype. @@ -189,7 +195,12 @@ const CallInst *llvm::isFreeCall(const Value *I) { if (!CI) return 0; Function *Callee = CI->getCalledFunction(); - if (Callee == 0 || !Callee->isDeclaration() || Callee->getName() != "free") + if (Callee == 0 || !Callee->isDeclaration()) + return 0; + + if (Callee->getName() != "free" && + Callee->getName() != "_ZdlPv" && // operator delete(void*) + Callee->getName() != "_ZdaPv") // operator delete[](void*) return 0; // Check free prototype. diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index 35043bddfaf6..ce7fab6459ed 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -16,6 +16,7 @@ #define DEBUG_TYPE "memdep" #include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Function.h" @@ -221,6 +222,96 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall, return MemDepResult::getClobber(ScanIt); } +/// isLoadLoadClobberIfExtendedToFullWidth - Return true if LI is a load that +/// would fully overlap MemLoc if done as a wider legal integer load. +/// +/// MemLocBase, MemLocOffset are lazily computed here the first time the +/// base/offs of memloc is needed. +static bool +isLoadLoadClobberIfExtendedToFullWidth(const AliasAnalysis::Location &MemLoc, + const Value *&MemLocBase, + int64_t &MemLocOffs, + const LoadInst *LI, + const TargetData *TD) { + // If we have no target data, we can't do this. + if (TD == 0) return false; + + // If we haven't already computed the base/offset of MemLoc, do so now. + if (MemLocBase == 0) + MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, *TD); + + unsigned Size = MemoryDependenceAnalysis:: + getLoadLoadClobberFullWidthSize(MemLocBase, MemLocOffs, MemLoc.Size, + LI, *TD); + return Size != 0; +} + +/// getLoadLoadClobberFullWidthSize - This is a little bit of analysis that +/// looks at a memory location for a load (specified by MemLocBase, Offs, +/// and Size) and compares it against a load. If the specified load could +/// be safely widened to a larger integer load that is 1) still efficient, +/// 2) safe for the target, and 3) would provide the specified memory +/// location value, then this function returns the size in bytes of the +/// load width to use. If not, this returns zero. +unsigned MemoryDependenceAnalysis:: +getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, + unsigned MemLocSize, const LoadInst *LI, + const TargetData &TD) { + // We can only extend non-volatile integer loads. + if (!isa(LI->getType()) || LI->isVolatile()) return 0; + + // Get the base of this load. + int64_t LIOffs = 0; + const Value *LIBase = + GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, TD); + + // If the two pointers are not based on the same pointer, we can't tell that + // they are related. + if (LIBase != MemLocBase) return 0; + + // Okay, the two values are based on the same pointer, but returned as + // no-alias. This happens when we have things like two byte loads at "P+1" + // and "P+3". Check to see if increasing the size of the "LI" load up to its + // alignment (or the largest native integer type) will allow us to load all + // the bits required by MemLoc. + + // If MemLoc is before LI, then no widening of LI will help us out. + if (MemLocOffs < LIOffs) return 0; + + // Get the alignment of the load in bytes. We assume that it is safe to load + // any legal integer up to this size without a problem. For example, if we're + // looking at an i8 load on x86-32 that is known 1024 byte aligned, we can + // widen it up to an i32 load. If it is known 2-byte aligned, we can widen it + // to i16. + unsigned LoadAlign = LI->getAlignment(); + + int64_t MemLocEnd = MemLocOffs+MemLocSize; + + // If no amount of rounding up will let MemLoc fit into LI, then bail out. + if (LIOffs+LoadAlign < MemLocEnd) return 0; + + // This is the size of the load to try. Start with the next larger power of + // two. + unsigned NewLoadByteSize = LI->getType()->getPrimitiveSizeInBits()/8U; + NewLoadByteSize = NextPowerOf2(NewLoadByteSize); + + while (1) { + // If this load size is bigger than our known alignment or would not fit + // into a native integer register, then we fail. + if (NewLoadByteSize > LoadAlign || + !TD.fitsInLegalInteger(NewLoadByteSize*8)) + return 0; + + // If a load of this width would include all of MemLoc, then we succeed. + if (LIOffs+NewLoadByteSize >= MemLocEnd) + return NewLoadByteSize; + + NewLoadByteSize <<= 1; + } + + return 0; +} + /// getPointerDependencyFrom - Return the instruction on which a memory /// location depends. If isLoad is true, this routine ignores may-aliases with /// read-only operations. If isLoad is false, this routine ignores may-aliases @@ -229,58 +320,31 @@ MemDepResult MemoryDependenceAnalysis:: getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB) { - Value *InvariantTag = 0; - + const Value *MemLocBase = 0; + int64_t MemLocOffset = 0; + // Walk backwards through the basic block, looking for dependencies. while (ScanIt != BB->begin()) { Instruction *Inst = --ScanIt; - // If we're in an invariant region, no dependencies can be found before - // we pass an invariant-begin marker. - if (InvariantTag == Inst) { - InvariantTag = 0; - continue; - } - if (IntrinsicInst *II = dyn_cast(Inst)) { // Debug intrinsics don't (and can't) cause dependences. if (isa(II)) continue; - // If we pass an invariant-end marker, then we've just entered an - // invariant region and can start ignoring dependencies. - if (II->getIntrinsicID() == Intrinsic::invariant_end) { - // FIXME: This only considers queries directly on the invariant-tagged - // pointer, not on query pointers that are indexed off of them. It'd - // be nice to handle that at some point. - AliasAnalysis::AliasResult R = - AA->alias(AliasAnalysis::Location(II->getArgOperand(2)), MemLoc); - if (R == AliasAnalysis::MustAlias) - InvariantTag = II->getArgOperand(0); - - continue; - } - // If we reach a lifetime begin or end marker, then the query ends here // because the value is undefined. if (II->getIntrinsicID() == Intrinsic::lifetime_start) { // FIXME: This only considers queries directly on the invariant-tagged // pointer, not on query pointers that are indexed off of them. It'd - // be nice to handle that at some point. - AliasAnalysis::AliasResult R = - AA->alias(AliasAnalysis::Location(II->getArgOperand(1)), MemLoc); - if (R == AliasAnalysis::MustAlias) + // be nice to handle that at some point (the right approach is to use + // GetPointerBaseWithConstantOffset). + if (AA->isMustAlias(AliasAnalysis::Location(II->getArgOperand(1)), + MemLoc)) return MemDepResult::getDef(II); continue; } } - // If we're querying on a load and we're in an invariant region, we're done - // at this point. Nothing a load depends on can live in an invariant region. - // - // FIXME: this will prevent us from returning load/load must-aliases, so GVN - // won't remove redundant loads. - if (isLoad && InvariantTag) continue; - // Values depend on loads if the pointers are must aliased. This means that // a load depends on another must aliased load from the same value. if (LoadInst *LI = dyn_cast(Inst)) { @@ -288,27 +352,51 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, // If we found a pointer, check if it could be the same as our pointer. AliasAnalysis::AliasResult R = AA->alias(LoadLoc, MemLoc); - if (R == AliasAnalysis::NoAlias) - continue; - // May-alias loads don't depend on each other without a dependence. - if (isLoad && R != AliasAnalysis::MustAlias) + if (isLoad) { + if (R == AliasAnalysis::NoAlias) { + // If this is an over-aligned integer load (for example, + // "load i8* %P, align 4") see if it would obviously overlap with the + // queried location if widened to a larger load (e.g. if the queried + // location is 1 byte at P+1). If so, return it as a load/load + // clobber result, allowing the client to decide to widen the load if + // it wants to. + if (const IntegerType *ITy = dyn_cast(LI->getType())) + if (LI->getAlignment()*8 > ITy->getPrimitiveSizeInBits() && + isLoadLoadClobberIfExtendedToFullWidth(MemLoc, MemLocBase, + MemLocOffset, LI, TD)) + return MemDepResult::getClobber(Inst); + + continue; + } + + // Must aliased loads are defs of each other. + if (R == AliasAnalysis::MustAlias) + return MemDepResult::getDef(Inst); + + // If we have a partial alias, then return this as a clobber for the + // client to handle. + if (R == AliasAnalysis::PartialAlias) + return MemDepResult::getClobber(Inst); + + // Random may-alias loads don't depend on each other without a + // dependence. + continue; + } + + // Stores don't depend on other no-aliased accesses. + if (R == AliasAnalysis::NoAlias) continue; // Stores don't alias loads from read-only memory. - if (!isLoad && AA->pointsToConstantMemory(LoadLoc)) + if (AA->pointsToConstantMemory(LoadLoc)) continue; - // Stores depend on may and must aliased loads, loads depend on must-alias - // loads. + // Stores depend on may/must aliased loads. return MemDepResult::getDef(Inst); } if (StoreInst *SI = dyn_cast(Inst)) { - // There can't be stores to the value we care about inside an - // invariant region. - if (InvariantTag) continue; - // If alias analysis can tell that this store is guaranteed to not modify // the query pointer, ignore it. Use getModRefInfo to handle cases where // the query pointer points to constant memory etc. @@ -341,8 +429,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, (isa(Inst) && extractMallocCall(Inst))) { const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, TD); - if (AccessPtr == Inst || - AA->alias(Inst, 1, AccessPtr, 1) == AliasAnalysis::MustAlias) + if (AccessPtr == Inst || AA->isMustAlias(Inst, AccessPtr)) return MemDepResult::getDef(Inst); continue; } @@ -353,9 +440,6 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, // If the call has no effect on the queried pointer, just ignore it. continue; case AliasAnalysis::Mod: - // If we're in an invariant region, we can ignore calls that ONLY - // modify the pointer. - if (InvariantTag) continue; return MemDepResult::getClobber(Inst); case AliasAnalysis::Ref: // If the call is known to never store to the pointer, and if this is a diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp index 93da5a48518d..70dcd0df242d 100644 --- a/lib/Analysis/PHITransAddr.cpp +++ b/lib/Analysis/PHITransAddr.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/PHITransAddr.h" +#include "llvm/Constants.h" #include "llvm/Instructions.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" diff --git a/lib/Analysis/PathNumbering.cpp b/lib/Analysis/PathNumbering.cpp index 5d3f6bbc7b6e..7c584daef734 100644 --- a/lib/Analysis/PathNumbering.cpp +++ b/lib/Analysis/PathNumbering.cpp @@ -38,13 +38,10 @@ #include "llvm/Support/TypeBuilder.h" #include "llvm/Support/raw_ostream.h" -#include #include -#include #include #include #include -#include #include using namespace llvm; @@ -286,7 +283,7 @@ void BallLarusDag::calculatePathNumbers() { BallLarusEdge* exitEdge = addEdge(node, getExit(), 0); exitEdge->setType(BallLarusEdge::SPLITEDGE_PHONY); - // Counters to handle the possibilty of a multi-graph + // Counters to handle the possibility of a multi-graph BasicBlock* oldTarget = 0; unsigned duplicateNumber = 0; diff --git a/lib/Analysis/PathProfileVerifier.cpp b/lib/Analysis/PathProfileVerifier.cpp index c54977314207..0ae734e259db 100644 --- a/lib/Analysis/PathProfileVerifier.cpp +++ b/lib/Analysis/PathProfileVerifier.cpp @@ -124,7 +124,7 @@ bool PathProfileVerifier::runOnModule (Module &M) { ProfilePathEdgeVector* pev = currentPath->getPathEdges(); DEBUG(dbgs () << "path #" << currentPath->getNumber() << ": " << currentPath->getCount() << "\n"); - // setup the entry edge (normally path profiling doens't care about this) + // setup the entry edge (normally path profiling doesn't care about this) if (currentPath->getFirstBlockInPath() == &F->getEntryBlock()) edgeArray[arrayMap[0][currentPath->getFirstBlockInPath()][0]] += currentPath->getCount(); diff --git a/lib/Analysis/PostDominators.cpp b/lib/Analysis/PostDominators.cpp index 3f0deab9ea87..6ed27297923f 100644 --- a/lib/Analysis/PostDominators.cpp +++ b/lib/Analysis/PostDominators.cpp @@ -28,7 +28,6 @@ using namespace llvm; //===----------------------------------------------------------------------===// char PostDominatorTree::ID = 0; -char PostDominanceFrontier::ID = 0; INITIALIZE_PASS(PostDominatorTree, "postdomtree", "Post-Dominator Tree Construction", true, true) @@ -50,53 +49,3 @@ FunctionPass* llvm::createPostDomTree() { return new PostDominatorTree(); } -//===----------------------------------------------------------------------===// -// PostDominanceFrontier Implementation -//===----------------------------------------------------------------------===// - -INITIALIZE_PASS_BEGIN(PostDominanceFrontier, "postdomfrontier", - "Post-Dominance Frontier Construction", true, true) -INITIALIZE_PASS_DEPENDENCY(PostDominatorTree) -INITIALIZE_PASS_END(PostDominanceFrontier, "postdomfrontier", - "Post-Dominance Frontier Construction", true, true) - -const DominanceFrontier::DomSetType & -PostDominanceFrontier::calculate(const PostDominatorTree &DT, - const DomTreeNode *Node) { - // Loop over CFG successors to calculate DFlocal[Node] - BasicBlock *BB = Node->getBlock(); - DomSetType &S = Frontiers[BB]; // The new set to fill in... - if (getRoots().empty()) return S; - - if (BB) - for (pred_iterator SI = pred_begin(BB), SE = pred_end(BB); - SI != SE; ++SI) { - BasicBlock *P = *SI; - // Does Node immediately dominate this predecessor? - DomTreeNode *SINode = DT[P]; - if (SINode && SINode->getIDom() != Node) - S.insert(P); - } - - // At this point, S is DFlocal. Now we union in DFup's of our children... - // Loop through and visit the nodes that Node immediately dominates (Node's - // children in the IDomTree) - // - for (DomTreeNode::const_iterator - NI = Node->begin(), NE = Node->end(); NI != NE; ++NI) { - DomTreeNode *IDominee = *NI; - const DomSetType &ChildDF = calculate(DT, IDominee); - - DomSetType::const_iterator CDFI = ChildDF.begin(), CDFE = ChildDF.end(); - for (; CDFI != CDFE; ++CDFI) { - if (!DT.properlyDominates(Node, DT[*CDFI])) - S.insert(*CDFI); - } - } - - return S; -} - -FunctionPass* llvm::createPostDomFrontier() { - return new PostDominanceFrontier(); -} diff --git a/lib/Analysis/ProfileEstimatorPass.cpp b/lib/Analysis/ProfileEstimatorPass.cpp index 667ee1cc348a..b594e2ba5506 100644 --- a/lib/Analysis/ProfileEstimatorPass.cpp +++ b/lib/Analysis/ProfileEstimatorPass.cpp @@ -140,7 +140,7 @@ void ProfileEstimatorPass::recurseBasicBlock(BasicBlock *BB) { // loop, thus the edge is a backedge, continue and do not check if the // value is valid. if (BBisHeader && BBLoop->contains(*bbi)) { - printEdgeError(edge, "but is backedge, continueing"); + printEdgeError(edge, "but is backedge, continuing"); continue; } // If the edges value is missing (and this is no loop header, and this is diff --git a/lib/Analysis/ProfileInfo.cpp b/lib/Analysis/ProfileInfo.cpp index 36f211e858d2..173de2c02791 100644 --- a/lib/Analysis/ProfileInfo.cpp +++ b/lib/Analysis/ProfileInfo.cpp @@ -309,9 +309,9 @@ void ProfileInfoT:: removeEdge(oldedge); } -/// Replaces all occurences of RmBB in the ProfilingInfo with DestBB. +/// Replaces all occurrences of RmBB in the ProfilingInfo with DestBB. /// This checks all edges of the function the blocks reside in and replaces the -/// occurences of RmBB with DestBB. +/// occurrences of RmBB with DestBB. template<> void ProfileInfoT:: replaceAllUses(const BasicBlock *RmBB, const BasicBlock *DestBB) { @@ -812,7 +812,7 @@ void ProfileInfoT::repair(const Function *F) { } if (iw < 0) continue; - // Check the recieving end of the path if it can handle the flow. + // Check the receiving end of the path if it can handle the flow. double ow = getExecutionCount(Dest); Processed.clear(); for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB); diff --git a/lib/Analysis/ProfileInfoLoader.cpp b/lib/Analysis/ProfileInfoLoader.cpp index 25481b2ee671..eaa38dad16a1 100644 --- a/lib/Analysis/ProfileInfoLoader.cpp +++ b/lib/Analysis/ProfileInfoLoader.cpp @@ -19,7 +19,6 @@ #include "llvm/Support/raw_ostream.h" #include #include -#include using namespace llvm; // ByteSwap - Byteswap 'Var' if 'Really' is true. diff --git a/lib/Analysis/RegionInfo.cpp b/lib/Analysis/RegionInfo.cpp index e2f6a8bf5d9a..52753cbe85af 100644 --- a/lib/Analysis/RegionInfo.cpp +++ b/lib/Analysis/RegionInfo.cpp @@ -41,16 +41,15 @@ VerifyRegionInfoX("verify-region-info", cl::location(VerifyRegionInfo), STATISTIC(numRegions, "The # of regions"); STATISTIC(numSimpleRegions, "The # of simple regions"); -//===----------------------------------------------------------------------===// -/// PrintStyle - Print region in difference ways. -enum PrintStyle { PrintNone, PrintBB, PrintRN }; - -static cl::opt printStyle("print-region-style", cl::Hidden, +static cl::opt printStyle("print-region-style", + cl::Hidden, cl::desc("style of printing regions"), cl::values( - clEnumValN(PrintNone, "none", "print no details"), - clEnumValN(PrintBB, "bb", "print regions in detail with block_iterator"), - clEnumValN(PrintRN, "rn", "print regions in detail with element_iterator"), + clEnumValN(Region::PrintNone, "none", "print no details"), + clEnumValN(Region::PrintBB, "bb", + "print regions in detail with block_iterator"), + clEnumValN(Region::PrintRN, "rn", + "print regions in detail with element_iterator"), clEnumValEnd)); //===----------------------------------------------------------------------===// /// Region Implementation @@ -413,7 +412,8 @@ Region *Region::getExpandedRegion() const { return new Region(getEntry(), R->getExit(), RI, DT); } -void Region::print(raw_ostream &OS, bool print_tree, unsigned level) const { +void Region::print(raw_ostream &OS, bool print_tree, unsigned level, + enum PrintStyle Style) const { if (print_tree) OS.indent(level*2) << "[" << level << "] " << getNameStr(); else @@ -422,14 +422,14 @@ void Region::print(raw_ostream &OS, bool print_tree, unsigned level) const { OS << "\n"; - if (printStyle != PrintNone) { + if (Style != PrintNone) { OS.indent(level*2) << "{\n"; OS.indent(level*2 + 2); - if (printStyle == PrintBB) { + if (Style == PrintBB) { for (const_block_iterator I = block_begin(), E = block_end(); I!=E; ++I) OS << **I << ", "; // TODO: remove the last "," - } else if (printStyle == PrintRN) { + } else if (Style == PrintRN) { for (const_element_iterator I = element_begin(), E = element_end(); I!=E; ++I) OS << **I << ", "; // TODO: remove the last ", } @@ -439,14 +439,14 @@ void Region::print(raw_ostream &OS, bool print_tree, unsigned level) const { if (print_tree) for (const_iterator RI = begin(), RE = end(); RI != RE; ++RI) - (*RI)->print(OS, print_tree, level+1); + (*RI)->print(OS, print_tree, level+1, Style); - if (printStyle != PrintNone) + if (Style != PrintNone) OS.indent(level*2) << "} \n"; } void Region::dump() const { - print(dbgs(), true, getDepth()); + print(dbgs(), true, getDepth(), printStyle.getValue()); } void Region::clearNodeCache() { @@ -714,7 +714,7 @@ void RegionInfo::getAnalysisUsage(AnalysisUsage &AU) const { void RegionInfo::print(raw_ostream &OS, const Module *) const { OS << "Region tree:\n"; - TopLevelRegion->print(OS, true, 0); + TopLevelRegion->print(OS, true, 0, printStyle.getValue()); OS << "End region tree\n"; } diff --git a/lib/Analysis/RegionPrinter.cpp b/lib/Analysis/RegionPrinter.cpp index 0cf0f9050504..a1730b0a3ca1 100644 --- a/lib/Analysis/RegionPrinter.cpp +++ b/lib/Analysis/RegionPrinter.cpp @@ -70,6 +70,32 @@ struct DOTGraphTraits : public DOTGraphTraits { G->getTopLevelRegion()); } + std::string getEdgeAttributes(RegionNode *srcNode, + GraphTraits::ChildIteratorType CI, RegionInfo *RI) { + + RegionNode *destNode = *CI; + + if (srcNode->isSubRegion() || destNode->isSubRegion()) + return ""; + + // In case of a backedge, do not use it to define the layout of the nodes. + BasicBlock *srcBB = srcNode->getNodeAs(); + BasicBlock *destBB = destNode->getNodeAs(); + + Region *R = RI->getRegionFor(destBB); + + while (R && R->getParent()) + if (R->getParent()->getEntry() == destBB) + R = R->getParent(); + else + break; + + if (R->getEntry() == destBB && R->contains(srcBB)) + return "constraint=false"; + + return ""; + } + // Print the cluster of the subregions. This groups the single basic blocks // and adds a different background color for each group. static void printRegionCluster(const Region *R, GraphWriter &GW, diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 62244ccb3a03..bab4619894c7 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -157,10 +157,13 @@ void SCEV::print(raw_ostream &OS) const { for (unsigned i = 1, e = AR->getNumOperands(); i != e; ++i) OS << ",+," << *AR->getOperand(i); OS << "}<"; - if (AR->hasNoUnsignedWrap()) + if (AR->getNoWrapFlags(FlagNUW)) OS << "nuw><"; - if (AR->hasNoSignedWrap()) + if (AR->getNoWrapFlags(FlagNSW)) OS << "nsw><"; + if (AR->getNoWrapFlags(FlagNW) && + !AR->getNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW))) + OS << "nw><"; WriteAsOperand(OS, AR->getLoop()->getHeader(), /*PrintType=*/false); OS << ">"; return; @@ -203,7 +206,7 @@ void SCEV::print(raw_ostream &OS) const { OS << "alignof(" << *AllocTy << ")"; return; } - + const Type *CTy; Constant *FieldNo; if (U->isOffsetOf(CTy, FieldNo)) { @@ -212,7 +215,7 @@ void SCEV::print(raw_ostream &OS) const { OS << ")"; return; } - + // Otherwise just print it normally. WriteAsOperand(OS, U->getValue(), false); return; @@ -830,7 +833,7 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, Operands.push_back(S); } if (!hasTrunc) - return getAddExpr(Operands, false, false); + return getAddExpr(Operands); UniqueSCEVs.FindNodeOrInsertPos(ID, IP); // Mutates IP, returns NULL. } @@ -845,7 +848,7 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, Operands.push_back(S); } if (!hasTrunc) - return getMulExpr(Operands, false, false); + return getMulExpr(Operands); UniqueSCEVs.FindNodeOrInsertPos(ID, IP); // Mutates IP, returns NULL. } @@ -854,7 +857,7 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, SmallVector Operands; for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) Operands.push_back(getTruncateExpr(AddRec->getOperand(i), Ty)); - return getAddRecExpr(Operands, AddRec->getLoop()); + return getAddRecExpr(Operands, AddRec->getLoop(), SCEV::FlagAnyWrap); } // As a special case, fold trunc(undef) to undef. We don't want to @@ -926,10 +929,10 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, // If we have special knowledge that this addrec won't overflow, // we don't need to do any further analysis. - if (AR->hasNoUnsignedWrap()) + if (AR->getNoWrapFlags(SCEV::FlagNUW)) return getAddRecExpr(getZeroExtendExpr(Start, Ty), getZeroExtendExpr(Step, Ty), - L); + L, AR->getNoWrapFlags()); // Check whether the backedge-taken count is SCEVCouldNotCompute. // Note that this serves two purposes: It filters out loops that are @@ -959,12 +962,14 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, getAddExpr(getZeroExtendExpr(Start, WideTy), getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy), getZeroExtendExpr(Step, WideTy))); - if (getZeroExtendExpr(Add, WideTy) == OperandExtendedAdd) + if (getZeroExtendExpr(Add, WideTy) == OperandExtendedAdd) { + // Cache knowledge of AR NUW, which is propagated to this AddRec. + const_cast(AR)->setNoWrapFlags(SCEV::FlagNUW); // Return the expression with the addrec on the outside. return getAddRecExpr(getZeroExtendExpr(Start, Ty), getZeroExtendExpr(Step, Ty), - L); - + L, AR->getNoWrapFlags()); + } // Similar to above, only this time treat the step value as signed. // This covers loops that count down. const SCEV *SMul = getMulExpr(CastedMaxBECount, Step); @@ -973,11 +978,15 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, getAddExpr(getZeroExtendExpr(Start, WideTy), getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy), getSignExtendExpr(Step, WideTy))); - if (getZeroExtendExpr(Add, WideTy) == OperandExtendedAdd) + if (getZeroExtendExpr(Add, WideTy) == OperandExtendedAdd) { + // Cache knowledge of AR NW, which is propagated to this AddRec. + // Negative step causes unsigned wrap, but it still can't self-wrap. + const_cast(AR)->setNoWrapFlags(SCEV::FlagNW); // Return the expression with the addrec on the outside. return getAddRecExpr(getZeroExtendExpr(Start, Ty), getSignExtendExpr(Step, Ty), - L); + L, AR->getNoWrapFlags()); + } } // If the backedge is guarded by a comparison with the pre-inc value @@ -990,22 +999,29 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, AR, N) || (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_ULT, Start, N) && isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, - AR->getPostIncExpr(*this), N))) + AR->getPostIncExpr(*this), N))) { + // Cache knowledge of AR NUW, which is propagated to this AddRec. + const_cast(AR)->setNoWrapFlags(SCEV::FlagNUW); // Return the expression with the addrec on the outside. return getAddRecExpr(getZeroExtendExpr(Start, Ty), getZeroExtendExpr(Step, Ty), - L); + L, AR->getNoWrapFlags()); + } } else if (isKnownNegative(Step)) { const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) - getSignedRange(Step).getSignedMin()); if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) || (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_UGT, Start, N) && isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, - AR->getPostIncExpr(*this), N))) + AR->getPostIncExpr(*this), N))) { + // Cache knowledge of AR NW, which is propagated to this AddRec. + // Negative step causes unsigned wrap, but it still can't self-wrap. + const_cast(AR)->setNoWrapFlags(SCEV::FlagNW); // Return the expression with the addrec on the outside. return getAddRecExpr(getZeroExtendExpr(Start, Ty), getSignExtendExpr(Step, Ty), - L); + L, AR->getNoWrapFlags()); + } } } } @@ -1080,10 +1096,10 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, // If we have special knowledge that this addrec won't overflow, // we don't need to do any further analysis. - if (AR->hasNoSignedWrap()) + if (AR->getNoWrapFlags(SCEV::FlagNSW)) return getAddRecExpr(getSignExtendExpr(Start, Ty), getSignExtendExpr(Step, Ty), - L); + L, SCEV::FlagNSW); // Check whether the backedge-taken count is SCEVCouldNotCompute. // Note that this serves two purposes: It filters out loops that are @@ -1113,12 +1129,14 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, getAddExpr(getSignExtendExpr(Start, WideTy), getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy), getSignExtendExpr(Step, WideTy))); - if (getSignExtendExpr(Add, WideTy) == OperandExtendedAdd) + if (getSignExtendExpr(Add, WideTy) == OperandExtendedAdd) { + // Cache knowledge of AR NSW, which is propagated to this AddRec. + const_cast(AR)->setNoWrapFlags(SCEV::FlagNSW); // Return the expression with the addrec on the outside. return getAddRecExpr(getSignExtendExpr(Start, Ty), getSignExtendExpr(Step, Ty), - L); - + L, AR->getNoWrapFlags()); + } // Similar to above, only this time treat the step value as unsigned. // This covers loops that count up with an unsigned step. const SCEV *UMul = getMulExpr(CastedMaxBECount, Step); @@ -1127,11 +1145,14 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, getAddExpr(getSignExtendExpr(Start, WideTy), getMulExpr(getZeroExtendExpr(CastedMaxBECount, WideTy), getZeroExtendExpr(Step, WideTy))); - if (getSignExtendExpr(Add, WideTy) == OperandExtendedAdd) + if (getSignExtendExpr(Add, WideTy) == OperandExtendedAdd) { + // Cache knowledge of AR NSW, which is propagated to this AddRec. + const_cast(AR)->setNoWrapFlags(SCEV::FlagNSW); // Return the expression with the addrec on the outside. return getAddRecExpr(getSignExtendExpr(Start, Ty), getZeroExtendExpr(Step, Ty), - L); + L, AR->getNoWrapFlags()); + } } // If the backedge is guarded by a comparison with the pre-inc value @@ -1144,22 +1165,28 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SLT, AR, N) || (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SLT, Start, N) && isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SLT, - AR->getPostIncExpr(*this), N))) + AR->getPostIncExpr(*this), N))) { + // Cache knowledge of AR NSW, which is propagated to this AddRec. + const_cast(AR)->setNoWrapFlags(SCEV::FlagNSW); // Return the expression with the addrec on the outside. return getAddRecExpr(getSignExtendExpr(Start, Ty), getSignExtendExpr(Step, Ty), - L); + L, AR->getNoWrapFlags()); + } } else if (isKnownNegative(Step)) { const SCEV *N = getConstant(APInt::getSignedMaxValue(BitWidth) - getSignedRange(Step).getSignedMin()); if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SGT, AR, N) || (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_SGT, Start, N) && isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_SGT, - AR->getPostIncExpr(*this), N))) + AR->getPostIncExpr(*this), N))) { + // Cache knowledge of AR NSW, which is propagated to this AddRec. + const_cast(AR)->setNoWrapFlags(SCEV::FlagNSW); // Return the expression with the addrec on the outside. return getAddRecExpr(getSignExtendExpr(Start, Ty), getSignExtendExpr(Step, Ty), - L); + L, AR->getNoWrapFlags()); + } } } } @@ -1213,7 +1240,7 @@ const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op, for (SCEVAddRecExpr::op_iterator I = AR->op_begin(), E = AR->op_end(); I != E; ++I) Ops.push_back(getAnyExtendExpr(*I, Ty)); - return getAddRecExpr(Ops, AR->getLoop()); + return getAddRecExpr(Ops, AR->getLoop(), SCEV::FlagNW); } // As a special case, fold anyext(undef) to undef. We don't want to @@ -1334,7 +1361,9 @@ namespace { /// getAddExpr - Get a canonical add expression, or something simpler if /// possible. const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, - bool HasNUW, bool HasNSW) { + SCEV::NoWrapFlags Flags) { + assert(!(Flags & ~(SCEV::FlagNUW | SCEV::FlagNSW)) && + "only nuw or nsw allowed"); assert(!Ops.empty() && "Cannot get empty add!"); if (Ops.size() == 1) return Ops[0]; #ifndef NDEBUG @@ -1344,8 +1373,11 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, "SCEVAddExpr operand types don't match!"); #endif - // If HasNSW is true and all the operands are non-negative, infer HasNUW. - if (!HasNUW && HasNSW) { + // If FlagNSW is true and all the operands are non-negative, infer FlagNUW. + // And vice-versa. + int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW; + SCEV::NoWrapFlags SignOrUnsignWrap = maskFlags(Flags, SignOrUnsignMask); + if (SignOrUnsignWrap && (SignOrUnsignWrap != SignOrUnsignMask)) { bool All = true; for (SmallVectorImpl::const_iterator I = Ops.begin(), E = Ops.end(); I != E; ++I) @@ -1353,7 +1385,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, All = false; break; } - if (All) HasNUW = true; + if (All) Flags = setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask); } // Sort by complexity, this groups all similar expression types together. @@ -1404,7 +1436,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, FoundMatch = true; } if (FoundMatch) - return getAddExpr(Ops, HasNUW, HasNSW); + return getAddExpr(Ops, Flags); // Check for truncates. If all the operands are truncated from the same // type, see if factoring out the truncate would permit the result to be @@ -1454,7 +1486,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, } if (Ok) { // Evaluate the expression in the larger type. - const SCEV *Fold = getAddExpr(LargeOps, HasNUW, HasNSW); + const SCEV *Fold = getAddExpr(LargeOps, Flags); // If it folds to something simple, use it. Otherwise, don't. if (isa(Fold) || isa(Fold)) return getTruncateExpr(Fold, DstType); @@ -1625,9 +1657,9 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, // Build the new addrec. Propagate the NUW and NSW flags if both the // outer add and the inner addrec are guaranteed to have no overflow. - const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRecLoop, - HasNUW && AddRec->hasNoUnsignedWrap(), - HasNSW && AddRec->hasNoSignedWrap()); + // Always propagate NW. + Flags = AddRec->getNoWrapFlags(setFlags(Flags, SCEV::FlagNW)); + const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRecLoop, Flags); // If all of the other operands were loop invariant, we are done. if (Ops.size() == 1) return NewRec; @@ -1668,7 +1700,8 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, } Ops.erase(Ops.begin() + OtherIdx); --OtherIdx; } - Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop); + // Step size has changed, so we cannot guarantee no self-wraparound. + Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop, SCEV::FlagAnyWrap); return getAddExpr(Ops); } @@ -1692,15 +1725,16 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, O, Ops.size()); UniqueSCEVs.InsertNode(S, IP); } - if (HasNUW) S->setHasNoUnsignedWrap(true); - if (HasNSW) S->setHasNoSignedWrap(true); + S->setNoWrapFlags(Flags); return S; } /// getMulExpr - Get a canonical multiply expression, or something simpler if /// possible. const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, - bool HasNUW, bool HasNSW) { + SCEV::NoWrapFlags Flags) { + assert(Flags == maskFlags(Flags, SCEV::FlagNUW | SCEV::FlagNSW) && + "only nuw or nsw allowed"); assert(!Ops.empty() && "Cannot get empty mul!"); if (Ops.size() == 1) return Ops[0]; #ifndef NDEBUG @@ -1710,8 +1744,11 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, "SCEVMulExpr operand types don't match!"); #endif - // If HasNSW is true and all the operands are non-negative, infer HasNUW. - if (!HasNUW && HasNSW) { + // If FlagNSW is true and all the operands are non-negative, infer FlagNUW. + // And vice-versa. + int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW; + SCEV::NoWrapFlags SignOrUnsignWrap = maskFlags(Flags, SignOrUnsignMask); + if (SignOrUnsignWrap && (SignOrUnsignWrap != SignOrUnsignMask)) { bool All = true; for (SmallVectorImpl::const_iterator I = Ops.begin(), E = Ops.end(); I != E; ++I) @@ -1719,7 +1756,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, All = false; break; } - if (All) HasNUW = true; + if (All) Flags = setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask); } // Sort by complexity, this groups all similar expression types together. @@ -1759,12 +1796,12 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, } else if (Ops[0]->isAllOnesValue()) { // If we have a mul by -1 of an add, try distributing the -1 among the // add operands. - if (Ops.size() == 2) + if (Ops.size() == 2) { if (const SCEVAddExpr *Add = dyn_cast(Ops[1])) { SmallVector NewOps; bool AnyFolded = false; - for (SCEVAddRecExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); - I != E; ++I) { + for (SCEVAddRecExpr::op_iterator I = Add->op_begin(), + E = Add->op_end(); I != E; ++I) { const SCEV *Mul = getMulExpr(Ops[0], *I); if (!isa(Mul)) AnyFolded = true; NewOps.push_back(Mul); @@ -1772,6 +1809,18 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, if (AnyFolded) return getAddExpr(NewOps); } + else if (const SCEVAddRecExpr * + AddRec = dyn_cast(Ops[1])) { + // Negation preserves a recurrence's no self-wrap property. + SmallVector Operands; + for (SCEVAddRecExpr::op_iterator I = AddRec->op_begin(), + E = AddRec->op_end(); I != E; ++I) { + Operands.push_back(getMulExpr(Ops[0], *I)); + } + return getAddRecExpr(Operands, AddRec->getLoop(), + AddRec->getNoWrapFlags(SCEV::FlagNW)); + } + } } if (Ops.size() == 1) @@ -1831,9 +1880,11 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, // Build the new addrec. Propagate the NUW and NSW flags if both the // outer mul and the inner addrec are guaranteed to have no overflow. - const SCEV *NewRec = getAddRecExpr(NewOps, AddRecLoop, - HasNUW && AddRec->hasNoUnsignedWrap(), - HasNSW && AddRec->hasNoSignedWrap()); + // + // No self-wrap cannot be guaranteed after changing the step size, but + // will be inferred if either NUW or NSW is true. + Flags = AddRec->getNoWrapFlags(clearFlags(Flags, SCEV::FlagNW)); + const SCEV *NewRec = getAddRecExpr(NewOps, AddRecLoop, Flags); // If all of the other operands were loop invariant, we are done. if (Ops.size() == 1) return NewRec; @@ -1869,7 +1920,8 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, getMulExpr(G, B), getMulExpr(B, D)); const SCEV *NewAddRec = getAddRecExpr(NewStart, NewStep, - F->getLoop()); + F->getLoop(), + SCEV::FlagAnyWrap); if (Ops.size() == 2) return NewAddRec; Ops[Idx] = AddRec = cast(NewAddRec); Ops.erase(Ops.begin() + OtherIdx); --OtherIdx; @@ -1897,8 +1949,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, O, Ops.size()); UniqueSCEVs.InsertNode(S, IP); } - if (HasNUW) S->setHasNoUnsignedWrap(true); - if (HasNSW) S->setHasNoSignedWrap(true); + S->setNoWrapFlags(Flags); return S; } @@ -1938,11 +1989,12 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, getZeroExtendExpr(AR, ExtTy) == getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy), getZeroExtendExpr(Step, ExtTy), - AR->getLoop())) { + AR->getLoop(), SCEV::FlagAnyWrap)) { SmallVector Operands; for (unsigned i = 0, e = AR->getNumOperands(); i != e; ++i) Operands.push_back(getUDivExpr(AR->getOperand(i), RHS)); - return getAddRecExpr(Operands, AR->getLoop()); + return getAddRecExpr(Operands, AR->getLoop(), + SCEV::FlagNW); } // (A*B)/C --> A*(B/C) if safe and B/C can be folded. if (const SCEVMulExpr *M = dyn_cast(LHS)) { @@ -1963,7 +2015,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, } } // (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded. - if (const SCEVAddRecExpr *A = dyn_cast(LHS)) { + if (const SCEVAddExpr *A = dyn_cast(LHS)) { SmallVector Operands; for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) Operands.push_back(getZeroExtendExpr(A->getOperand(i), ExtTy)); @@ -2006,27 +2058,26 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, /// getAddRecExpr - Get an add recurrence expression for the specified loop. /// Simplify the expression as much as possible. -const SCEV *ScalarEvolution::getAddRecExpr(const SCEV *Start, - const SCEV *Step, const Loop *L, - bool HasNUW, bool HasNSW) { +const SCEV *ScalarEvolution::getAddRecExpr(const SCEV *Start, const SCEV *Step, + const Loop *L, + SCEV::NoWrapFlags Flags) { SmallVector Operands; Operands.push_back(Start); if (const SCEVAddRecExpr *StepChrec = dyn_cast(Step)) if (StepChrec->getLoop() == L) { Operands.append(StepChrec->op_begin(), StepChrec->op_end()); - return getAddRecExpr(Operands, L); + return getAddRecExpr(Operands, L, maskFlags(Flags, SCEV::FlagNW)); } Operands.push_back(Step); - return getAddRecExpr(Operands, L, HasNUW, HasNSW); + return getAddRecExpr(Operands, L, Flags); } /// getAddRecExpr - Get an add recurrence expression for the specified loop. /// Simplify the expression as much as possible. const SCEV * ScalarEvolution::getAddRecExpr(SmallVectorImpl &Operands, - const Loop *L, - bool HasNUW, bool HasNSW) { + const Loop *L, SCEV::NoWrapFlags Flags) { if (Operands.size() == 1) return Operands[0]; #ifndef NDEBUG const Type *ETy = getEffectiveSCEVType(Operands[0]->getType()); @@ -2040,7 +2091,7 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl &Operands, if (Operands.back()->isZero()) { Operands.pop_back(); - return getAddRecExpr(Operands, L, HasNUW, HasNSW); // {X,+,0} --> X + return getAddRecExpr(Operands, L, SCEV::FlagAnyWrap); // {X,+,0} --> X } // It's tempting to want to call getMaxBackedgeTakenCount count here and @@ -2049,8 +2100,11 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl &Operands, // meaningful BE count at this point (and if we don't, we'd be stuck // with a SCEVCouldNotCompute as the cached BE count). - // If HasNSW is true and all the operands are non-negative, infer HasNUW. - if (!HasNUW && HasNSW) { + // If FlagNSW is true and all the operands are non-negative, infer FlagNUW. + // And vice-versa. + int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW; + SCEV::NoWrapFlags SignOrUnsignWrap = maskFlags(Flags, SignOrUnsignMask); + if (SignOrUnsignWrap && (SignOrUnsignWrap != SignOrUnsignMask)) { bool All = true; for (SmallVectorImpl::const_iterator I = Operands.begin(), E = Operands.end(); I != E; ++I) @@ -2058,7 +2112,7 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl &Operands, All = false; break; } - if (All) HasNUW = true; + if (All) Flags = setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask); } // Canonicalize nested AddRecs in by nesting them in order of loop depth. @@ -2081,16 +2135,29 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl &Operands, break; } if (AllInvariant) { - NestedOperands[0] = getAddRecExpr(Operands, L); + // Create a recurrence for the outer loop with the same step size. + // + // The outer recurrence keeps its NW flag but only keeps NUW/NSW if the + // inner recurrence has the same property. + SCEV::NoWrapFlags OuterFlags = + maskFlags(Flags, SCEV::FlagNW | NestedAR->getNoWrapFlags()); + + NestedOperands[0] = getAddRecExpr(Operands, L, OuterFlags); AllInvariant = true; for (unsigned i = 0, e = NestedOperands.size(); i != e; ++i) if (!isLoopInvariant(NestedOperands[i], NestedLoop)) { AllInvariant = false; break; } - if (AllInvariant) + if (AllInvariant) { // Ok, both add recurrences are valid after the transformation. - return getAddRecExpr(NestedOperands, NestedLoop, HasNUW, HasNSW); + // + // The inner recurrence keeps its NW flag but only keeps NUW/NSW if + // the outer recurrence has the same property. + SCEV::NoWrapFlags InnerFlags = + maskFlags(NestedAR->getNoWrapFlags(), SCEV::FlagNW | Flags); + return getAddRecExpr(NestedOperands, NestedLoop, InnerFlags); + } } // Reset Operands to its original state. Operands[0] = NestedAR; @@ -2114,8 +2181,7 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl &Operands, O, Operands.size(), L); UniqueSCEVs.InsertNode(S, IP); } - if (HasNUW) S->setHasNoUnsignedWrap(true); - if (HasNSW) S->setHasNoSignedWrap(true); + S->setNoWrapFlags(Flags); return S; } @@ -2510,17 +2576,17 @@ const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) { return getMinusSCEV(AllOnes, V); } -/// getMinusSCEV - Return LHS-RHS. Minus is represented in SCEV as A+B*-1, -/// and thus the HasNUW and HasNSW bits apply to the resultant add, not -/// whether the sub would have overflowed. +/// getMinusSCEV - Return LHS-RHS. Minus is represented in SCEV as A+B*-1. const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS, - bool HasNUW, bool HasNSW) { + SCEV::NoWrapFlags Flags) { + assert(!maskFlags(Flags, SCEV::FlagNUW) && "subtraction does not have NUW"); + // Fast path: X - X --> 0. if (LHS == RHS) return getConstant(LHS->getType(), 0); // X - Y --> X + -Y - return getAddExpr(LHS, getNegativeSCEV(RHS), HasNUW, HasNSW); + return getAddExpr(LHS, getNegativeSCEV(RHS), Flags); } /// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion of the @@ -2652,6 +2718,36 @@ const SCEV *ScalarEvolution::getUMinFromMismatchedTypes(const SCEV *LHS, return getUMinExpr(PromotedLHS, PromotedRHS); } +/// getPointerBase - Transitively follow the chain of pointer-type operands +/// until reaching a SCEV that does not have a single pointer operand. This +/// returns a SCEVUnknown pointer for well-formed pointer-type expressions, +/// but corner cases do exist. +const SCEV *ScalarEvolution::getPointerBase(const SCEV *V) { + // A pointer operand may evaluate to a nonpointer expression, such as null. + if (!V->getType()->isPointerTy()) + return V; + + if (const SCEVCastExpr *Cast = dyn_cast(V)) { + return getPointerBase(Cast->getOperand()); + } + else if (const SCEVNAryExpr *NAry = dyn_cast(V)) { + const SCEV *PtrOp = 0; + for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); + I != E; ++I) { + if ((*I)->getType()->isPointerTy()) { + // Cannot find the base of an expression with multiple pointer operands. + if (PtrOp) + return V; + PtrOp = *I; + } + } + if (!PtrOp) + return V; + return getPointerBase(PtrOp); + } + return V; +} + /// PushDefUseChildren - Push users of the given Instruction /// onto the given Worklist. static void @@ -2773,44 +2869,34 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { if (isLoopInvariant(Accum, L) || (isa(Accum) && cast(Accum)->getLoop() == L)) { - bool HasNUW = false; - bool HasNSW = false; + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap; // If the increment doesn't overflow, then neither the addrec nor // the post-increment will overflow. if (const AddOperator *OBO = dyn_cast(BEValueV)) { if (OBO->hasNoUnsignedWrap()) - HasNUW = true; + Flags = setFlags(Flags, SCEV::FlagNUW); if (OBO->hasNoSignedWrap()) - HasNSW = true; - } else if (const GEPOperator *GEP = - dyn_cast(BEValueV)) { - // If the increment is a GEP, then we know it won't perform a - // signed overflow, because the address space cannot be - // wrapped around. - // - // NOTE: This isn't strictly true, because you could have an - // object straddling the 2G address boundary in a 32-bit address - // space (for example). We really want to model this as a "has - // no signed/unsigned wrap" where the base pointer is treated as - // unsigned and the increment is known to not have signed - // wrapping. - // - // This is a highly theoretical concern though, and this is good - // enough for all cases we know of at this point. :) - // - HasNSW |= GEP->isInBounds(); + Flags = setFlags(Flags, SCEV::FlagNSW); + } else if (const GEPOperator *GEP = + dyn_cast(BEValueV)) { + // If the increment is an inbounds GEP, then we know the address + // space cannot be wrapped around. We cannot make any guarantee + // about signed or unsigned overflow because pointers are + // unsigned but we may have a negative index from the base + // pointer. + if (GEP->isInBounds()) + Flags = setFlags(Flags, SCEV::FlagNW); } const SCEV *StartVal = getSCEV(StartValueV); - const SCEV *PHISCEV = - getAddRecExpr(StartVal, Accum, L, HasNUW, HasNSW); + const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags); // Since the no-wrap flags are on the increment, they apply to the // post-incremented value as well. if (isLoopInvariant(Accum, L)) (void)getAddRecExpr(getAddExpr(StartVal, Accum), - Accum, L, HasNUW, HasNSW); + Accum, L, Flags); // Okay, for the entire analysis of this edge we assumed the PHI // to be symbolic. We now need to go back and purge all of the @@ -2834,8 +2920,11 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { // initial step of the addrec evolution. if (StartVal == getMinusSCEV(AddRec->getOperand(0), AddRec->getOperand(1))) { + // FIXME: For constant StartVal, we should be able to infer + // no-wrap flags. const SCEV *PHISCEV = - getAddRecExpr(StartVal, AddRec->getOperand(1), L); + getAddRecExpr(StartVal, AddRec->getOperand(1), L, + SCEV::FlagAnyWrap); // Okay, for the entire analysis of this edge we assumed the PHI // to be symbolic. We now need to go back and purge all of the @@ -2899,8 +2988,9 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) { IndexS = getTruncateOrSignExtend(IndexS, IntPtrTy); // Multiply the index by the element size to compute the element offset. - const SCEV *LocalOffset = getMulExpr(IndexS, ElementSize, /*NUW*/ false, - /*NSW*/ isInBounds); + const SCEV *LocalOffset = getMulExpr(IndexS, ElementSize, + isInBounds ? SCEV::FlagNSW : + SCEV::FlagAnyWrap); // Add the element offset to the running total offset. TotalOffset = getAddExpr(TotalOffset, LocalOffset); @@ -2911,8 +3001,8 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) { const SCEV *BaseS = getSCEV(Base); // Add the total offset from all the GEP indices to the base. - return getAddExpr(BaseS, TotalOffset, /*NUW*/ false, - /*NSW*/ isInBounds); + return getAddExpr(BaseS, TotalOffset, + isInBounds ? SCEV::FlagNSW : SCEV::FlagAnyWrap); } /// GetMinTrailingZeros - Determine the minimum number of zero bits that S is @@ -3074,7 +3164,7 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) { if (const SCEVAddRecExpr *AddRec = dyn_cast(S)) { // If there's no unsigned wrap, the value will never be less than its // initial value. - if (AddRec->hasNoUnsignedWrap()) + if (AddRec->getNoWrapFlags(SCEV::FlagNUW)) if (const SCEVConstant *C = dyn_cast(AddRec->getStart())) if (!C->getValue()->isZero()) ConservativeResult = @@ -3216,7 +3306,7 @@ ScalarEvolution::getSignedRange(const SCEV *S) { if (const SCEVAddRecExpr *AddRec = dyn_cast(S)) { // If there's no signed wrap, and all the operands have the same sign or // zero, the value won't ever change sign. - if (AddRec->hasNoSignedWrap()) { + if (AddRec->getNoWrapFlags(SCEV::FlagNSW)) { bool AllNonNeg = true; bool AllNonPos = true; for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) { @@ -3349,7 +3439,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { SmallVector MulOps; MulOps.push_back(getSCEV(U->getOperand(1))); for (Value *Op = U->getOperand(0); - Op->getValueID() == Instruction::Mul + Value::InstructionVal; + Op->getValueID() == Instruction::Mul + Value::InstructionVal; Op = U->getOperand(0)) { U = cast(Op); MulOps.push_back(getSCEV(U->getOperand(1))); @@ -3411,10 +3501,8 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { // transfer the no-wrap flags, since an or won't introduce a wrap. if (const SCEVAddRecExpr *NewAR = dyn_cast(S)) { const SCEVAddRecExpr *OldAR = cast(LHS); - if (OldAR->hasNoUnsignedWrap()) - const_cast(NewAR)->setHasNoUnsignedWrap(true); - if (OldAR->hasNoSignedWrap()) - const_cast(NewAR)->setHasNoSignedWrap(true); + const_cast(NewAR)->setNoWrapFlags( + OldAR->getNoWrapFlags()); } return S; } @@ -3700,19 +3788,20 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { if (!Pair.second) return Pair.first->second; - BackedgeTakenInfo BECount = ComputeBackedgeTakenCount(L); - if (BECount.Exact != getCouldNotCompute()) { - assert(isLoopInvariant(BECount.Exact, L) && - isLoopInvariant(BECount.Max, L) && + BackedgeTakenInfo Result = getCouldNotCompute(); + BackedgeTakenInfo Computed = ComputeBackedgeTakenCount(L); + if (Computed.Exact != getCouldNotCompute()) { + assert(isLoopInvariant(Computed.Exact, L) && + isLoopInvariant(Computed.Max, L) && "Computed backedge-taken count isn't loop invariant for loop!"); ++NumTripCountsComputed; // Update the value in the map. - Pair.first->second = BECount; + Result = Computed; } else { - if (BECount.Max != getCouldNotCompute()) + if (Computed.Max != getCouldNotCompute()) // Update the value in the map. - Pair.first->second = BECount; + Result = Computed; if (isa(L->getHeader()->begin())) // Only count loops that have phi nodes as not being computable. ++NumTripCountsNotComputed; @@ -3723,7 +3812,7 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { // conservative estimates made without the benefit of trip count // information. This is similar to the code in forgetLoop, except that // it handles SCEVUnknown PHI nodes specially. - if (BECount.hasAnyInfo()) { + if (Computed.hasAnyInfo()) { SmallVector Worklist; PushLoopPHIs(L, Worklist); @@ -3754,7 +3843,13 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) { PushDefUseChildren(I, Worklist); } } - return Pair.first->second; + + // Re-lookup the insert position, since the call to + // ComputeBackedgeTakenCount above could result in a + // recusive call to getBackedgeTakenInfo (on a different + // loop), which would invalidate the iterator computed + // earlier. + return BackedgeTakenCounts.find(L)->second = Result; } /// forgetLoop - This method should be called by the client when it has @@ -4022,105 +4117,6 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L, return ComputeBackedgeTakenCountExhaustively(L, ExitCond, !L->contains(TBB)); } -static const SCEVAddRecExpr * -isSimpleUnwrappingAddRec(const SCEV *S, const Loop *L) { - const SCEVAddRecExpr *SA = dyn_cast(S); - - // The SCEV must be an addrec of this loop. - if (!SA || SA->getLoop() != L || !SA->isAffine()) - return 0; - - // The SCEV must be known to not wrap in some way to be interesting. - if (!SA->hasNoUnsignedWrap() && !SA->hasNoSignedWrap()) - return 0; - - // The stride must be a constant so that we know if it is striding up or down. - if (!isa(SA->getOperand(1))) - return 0; - return SA; -} - -/// getMinusSCEVForExitTest - When considering an exit test for a loop with a -/// "x != y" exit test, we turn this into a computation that evaluates x-y != 0, -/// and this function returns the expression to use for x-y. We know and take -/// advantage of the fact that this subtraction is only being used in a -/// comparison by zero context. -/// -static const SCEV *getMinusSCEVForExitTest(const SCEV *LHS, const SCEV *RHS, - const Loop *L, ScalarEvolution &SE) { - // If either LHS or RHS is an AddRec SCEV (of this loop) that is known to not - // wrap (either NSW or NUW), then we know that the value will either become - // the other one (and thus the loop terminates), that the loop will terminate - // through some other exit condition first, or that the loop has undefined - // behavior. This information is useful when the addrec has a stride that is - // != 1 or -1, because it means we can't "miss" the exit value. - // - // In any of these three cases, it is safe to turn the exit condition into a - // "counting down" AddRec (to zero) by subtracting the two inputs as normal, - // but since we know that the "end cannot be missed" we can force the - // resulting AddRec to be a NUW addrec. Since it is counting down, this means - // that the AddRec *cannot* pass zero. - - // See if LHS and RHS are addrec's we can handle. - const SCEVAddRecExpr *LHSA = isSimpleUnwrappingAddRec(LHS, L); - const SCEVAddRecExpr *RHSA = isSimpleUnwrappingAddRec(RHS, L); - - // If neither addrec is interesting, just return a minus. - if (RHSA == 0 && LHSA == 0) - return SE.getMinusSCEV(LHS, RHS); - - // If only one of LHS and RHS are an AddRec of this loop, make sure it is LHS. - if (RHSA && LHSA == 0) { - // Safe because a-b === b-a for comparisons against zero. - std::swap(LHS, RHS); - std::swap(LHSA, RHSA); - } - - // Handle the case when only one is advancing in a non-overflowing way. - if (RHSA == 0) { - // If RHS is loop varying, then we can't predict when LHS will cross it. - if (!SE.isLoopInvariant(RHS, L)) - return SE.getMinusSCEV(LHS, RHS); - - // If LHS has a positive stride, then we compute RHS-LHS, because the loop - // is counting up until it crosses RHS (which must be larger than LHS). If - // it is negative, we compute LHS-RHS because we're counting down to RHS. - const ConstantInt *Stride = - cast(LHSA->getOperand(1))->getValue(); - if (Stride->getValue().isNegative()) - std::swap(LHS, RHS); - - return SE.getMinusSCEV(RHS, LHS, true /*HasNUW*/); - } - - // If both LHS and RHS are interesting, we have something like: - // a+i*4 != b+i*8. - const ConstantInt *LHSStride = - cast(LHSA->getOperand(1))->getValue(); - const ConstantInt *RHSStride = - cast(RHSA->getOperand(1))->getValue(); - - // If the strides are equal, then this is just a (complex) loop invariant - // comparison of a and b. - if (LHSStride == RHSStride) - return SE.getMinusSCEV(LHSA->getStart(), RHSA->getStart()); - - // If the signs of the strides differ, then the negative stride is counting - // down to the positive stride. - if (LHSStride->getValue().isNegative() != RHSStride->getValue().isNegative()){ - if (RHSStride->getValue().isNegative()) - std::swap(LHS, RHS); - } else { - // If LHS's stride is smaller than RHS's stride, then "b" must be less than - // "a" and "b" is RHS is counting up (catching up) to LHS. This is true - // whether the strides are positive or negative. - if (RHSStride->getValue().slt(LHSStride->getValue())) - std::swap(LHS, RHS); - } - - return SE.getMinusSCEV(LHS, RHS, true /*HasNUW*/); -} - /// ComputeBackedgeTakenCountFromExitCondICmp - Compute the number of times the /// backedge of the specified loop will execute if its exit condition /// were a conditional branch of the ICmpInst ExitCond, TBB, and FBB. @@ -4180,8 +4176,7 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L, switch (Cond) { case ICmpInst::ICMP_NE: { // while (X != Y) // Convert to: while (X-Y != 0) - BackedgeTakenInfo BTI = HowFarToZero(getMinusSCEVForExitTest(LHS, RHS, L, - *this), L); + BackedgeTakenInfo BTI = HowFarToZero(getMinusSCEV(LHS, RHS), L); if (BTI.hasAnyInfo()) return BTI; break; } @@ -4706,7 +4701,15 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { for (++i; i != e; ++i) NewOps.push_back(getSCEVAtScope(AddRec->getOperand(i), L)); - AddRec = cast(getAddRecExpr(NewOps, AddRec->getLoop())); + const SCEV *FoldedRec = + getAddRecExpr(NewOps, AddRec->getLoop(), + AddRec->getNoWrapFlags(SCEV::FlagNW)); + AddRec = dyn_cast(FoldedRec); + // The addrec may be folded to a nonrecurrence, for example, if the + // induction variable is multiplied by zero after constant folding. Go + // ahead and return the folded value. + if (!AddRec) + return FoldedRec; break; } @@ -4871,6 +4874,11 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) { /// HowFarToZero - Return the number of times a backedge comparing the specified /// value to zero will execute. If not computable, return CouldNotCompute. +/// +/// This is only used for loops with a "x != y" exit test. The exit condition is +/// now expressed as a single expression, V = x-y. So the exit test is +/// effectively V != 0. We know and take advantage of the fact that this +/// expression only being used in a comparison by zero context. ScalarEvolution::BackedgeTakenInfo ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) { // If the value is a constant @@ -4903,7 +4911,7 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) { R2->getValue()))) { if (CB->getZExtValue() == false) std::swap(R1, R2); // R1 is the minimum root now. - + // We can only use this value if the chrec ends up with an exact zero // value at this index. When solving for "X*X != 5", for example, we // should not accept a root of 2. @@ -4934,26 +4942,43 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) { const SCEV *Start = getSCEVAtScope(AddRec->getStart(), L->getParentLoop()); const SCEV *Step = getSCEVAtScope(AddRec->getOperand(1), L->getParentLoop()); - // If the AddRec is NUW, then (in an unsigned sense) it cannot be counting up - // to wrap to 0, it must be counting down to equal 0. Also, while counting - // down, it cannot "miss" 0 (which would cause it to wrap), regardless of what - // the stride is. As such, NUW addrec's will always become zero in - // "start / -stride" steps, and we know that the division is exact. - if (AddRec->hasNoUnsignedWrap()) - // FIXME: We really want an "isexact" bit for udiv. - return getUDivExpr(Start, getNegativeSCEV(Step)); - // For now we handle only constant steps. + // + // TODO: Handle a nonconstant Step given AddRec. If the + // AddRec is NUW, then (in an unsigned sense) it cannot be counting up to wrap + // to 0, it must be counting down to equal 0. Consequently, N = Start / -Step. + // We have not yet seen any such cases. const SCEVConstant *StepC = dyn_cast(Step); if (StepC == 0) return getCouldNotCompute(); - // First, handle unitary steps. - if (StepC->getValue()->equalsInt(1)) // 1*N = -Start (mod 2^BW), so: - return getNegativeSCEV(Start); // N = -Start (as unsigned) - - if (StepC->getValue()->isAllOnesValue()) // -1*N = -Start (mod 2^BW), so: - return Start; // N = Start (as unsigned) + // For positive steps (counting up until unsigned overflow): + // N = -Start/Step (as unsigned) + // For negative steps (counting down to zero): + // N = Start/-Step + // First compute the unsigned distance from zero in the direction of Step. + bool CountDown = StepC->getValue()->getValue().isNegative(); + const SCEV *Distance = CountDown ? Start : getNegativeSCEV(Start); + + // Handle unitary steps, which cannot wraparound. + // 1*N = -Start; -1*N = Start (mod 2^BW), so: + // N = Distance (as unsigned) + if (StepC->getValue()->equalsInt(1) || StepC->getValue()->isAllOnesValue()) + return Distance; + + // If the recurrence is known not to wraparound, unsigned divide computes the + // back edge count. We know that the value will either become zero (and thus + // the loop terminates), that the loop will terminate through some other exit + // condition first, or that the loop has undefined behavior. This means + // we can't "miss" the exit value, even with nonunit stride. + // + // FIXME: Prove that loops always exhibits *acceptable* undefined + // behavior. Loops must exhibit defined behavior until a wrapped value is + // actually used. So the trip count computed by udiv could be smaller than the + // number of well-defined iterations. + if (AddRec->getNoWrapFlags(SCEV::FlagNW)) + // FIXME: We really want an "isexact" bit for udiv. + return getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step); // Then, try to solve the above equation provided that Start is constant. if (const SCEVConstant *StartC = dyn_cast(Start)) @@ -5220,12 +5245,12 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred, case ICmpInst::ICMP_SLE: if (!getSignedRange(RHS).getSignedMax().isMaxSignedValue()) { RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS, - /*HasNUW=*/false, /*HasNSW=*/true); + SCEV::FlagNSW); Pred = ICmpInst::ICMP_SLT; Changed = true; } else if (!getSignedRange(LHS).getSignedMin().isMinSignedValue()) { LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS, - /*HasNUW=*/false, /*HasNSW=*/true); + SCEV::FlagNSW); Pred = ICmpInst::ICMP_SLT; Changed = true; } @@ -5233,12 +5258,12 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred, case ICmpInst::ICMP_SGE: if (!getSignedRange(RHS).getSignedMin().isMinSignedValue()) { RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS, - /*HasNUW=*/false, /*HasNSW=*/true); + SCEV::FlagNSW); Pred = ICmpInst::ICMP_SGT; Changed = true; } else if (!getSignedRange(LHS).getSignedMax().isMaxSignedValue()) { LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS, - /*HasNUW=*/false, /*HasNSW=*/true); + SCEV::FlagNSW); Pred = ICmpInst::ICMP_SGT; Changed = true; } @@ -5246,12 +5271,12 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred, case ICmpInst::ICMP_ULE: if (!getUnsignedRange(RHS).getUnsignedMax().isMaxValue()) { RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS, - /*HasNUW=*/true, /*HasNSW=*/false); + SCEV::FlagNUW); Pred = ICmpInst::ICMP_ULT; Changed = true; } else if (!getUnsignedRange(LHS).getUnsignedMin().isMinValue()) { LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS, - /*HasNUW=*/true, /*HasNSW=*/false); + SCEV::FlagNUW); Pred = ICmpInst::ICMP_ULT; Changed = true; } @@ -5259,12 +5284,12 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred, case ICmpInst::ICMP_UGE: if (!getUnsignedRange(RHS).getUnsignedMin().isMinValue()) { RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS, - /*HasNUW=*/true, /*HasNSW=*/false); + SCEV::FlagNUW); Pred = ICmpInst::ICMP_UGT; Changed = true; } else if (!getUnsignedRange(LHS).getUnsignedMax().isMaxValue()) { LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS, - /*HasNUW=*/true, /*HasNSW=*/false); + SCEV::FlagNUW); Pred = ICmpInst::ICMP_UGT; Changed = true; } @@ -5646,6 +5671,13 @@ const SCEV *ScalarEvolution::getBECount(const SCEV *Start, "This code doesn't handle negative strides yet!"); const Type *Ty = Start->getType(); + + // When Start == End, we have an exact BECount == 0. Short-circuit this case + // here because SCEV may not be able to determine that the unsigned division + // after rounding is zero. + if (Start == End) + return getConstant(Ty, 0); + const SCEV *NegOne = getConstant(Ty, (uint64_t)-1); const SCEV *Diff = getMinusSCEV(End, Start); const SCEV *RoundUp = getAddExpr(Step, NegOne); @@ -5683,8 +5715,8 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS, return getCouldNotCompute(); // Check to see if we have a flag which makes analysis easy. - bool NoWrap = isSigned ? AddRec->hasNoSignedWrap() : - AddRec->hasNoUnsignedWrap(); + bool NoWrap = isSigned ? AddRec->getNoWrapFlags(SCEV::FlagNSW) : + AddRec->getNoWrapFlags(SCEV::FlagNUW); if (AddRec->isAffine()) { unsigned BitWidth = getTypeSizeInBits(AddRec->getType()); @@ -5768,7 +5800,16 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS, // The maximum backedge count is similar, except using the minimum start // value and the maximum end value. - const SCEV *MaxBECount = getBECount(MinStart, MaxEnd, Step, NoWrap); + // If we already have an exact constant BECount, use it instead. + const SCEV *MaxBECount = isa(BECount) ? BECount + : getBECount(MinStart, MaxEnd, Step, NoWrap); + + // If the stride is nonconstant, and NoWrap == true, then + // getBECount(MinStart, MaxEnd) may not compute. This would result in an + // exact BECount and invalid MaxBECount, which should be avoided to catch + // more optimization opportunities. + if (isa(MaxBECount)) + MaxBECount = BECount; return BackedgeTakenInfo(BECount, MaxBECount); } @@ -5791,7 +5832,8 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, if (!SC->getValue()->isZero()) { SmallVector Operands(op_begin(), op_end()); Operands[0] = SE.getConstant(SC->getType(), 0); - const SCEV *Shifted = SE.getAddRecExpr(Operands, getLoop()); + const SCEV *Shifted = SE.getAddRecExpr(Operands, getLoop(), + getNoWrapFlags(FlagNW)); if (const SCEVAddRecExpr *ShiftedAddRec = dyn_cast(Shifted)) return ShiftedAddRec->getNumIterationsInRange( @@ -5852,7 +5894,9 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, // Range.getUpper() is crossed. SmallVector NewOps(op_begin(), op_end()); NewOps[0] = SE.getNegativeSCEV(SE.getConstant(Range.getUpper())); - const SCEV *NewAddRec = SE.getAddRecExpr(NewOps, getLoop()); + const SCEV *NewAddRec = SE.getAddRecExpr(NewOps, getLoop(), + // getNoWrapFlags(FlagNW) + FlagAnyWrap); // Next, solve the constructed addrec std::pair Roots = diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index b7c110f28cf9..8e5a40008d88 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -262,7 +262,8 @@ static bool FactorOutConstant(const SCEV *&S, const SCEV *Start = A->getStart(); if (!FactorOutConstant(Start, Remainder, Factor, SE, TD)) return false; - S = SE.getAddRecExpr(Start, Step, A->getLoop()); + // FIXME: can use A->getNoWrapFlags(FlagNW) + S = SE.getAddRecExpr(Start, Step, A->getLoop(), SCEV::FlagAnyWrap); return true; } @@ -314,7 +315,9 @@ static void SplitAddRecs(SmallVectorImpl &Ops, const SCEV *Zero = SE.getConstant(Ty, 0); AddRecs.push_back(SE.getAddRecExpr(Zero, A->getStepRecurrence(SE), - A->getLoop())); + A->getLoop(), + // FIXME: A->getNoWrapFlags(FlagNW) + SCEV::FlagAnyWrap)); if (const SCEVAddExpr *Add = dyn_cast(Start)) { Ops[i] = Zero; Ops.append(Add->op_begin(), Add->op_end()); @@ -823,7 +826,9 @@ static void ExposePointerBase(const SCEV *&Base, const SCEV *&Rest, Rest = SE.getAddExpr(Rest, SE.getAddRecExpr(SE.getConstant(A->getType(), 0), A->getStepRecurrence(SE), - A->getLoop())); + A->getLoop(), + // FIXME: A->getNoWrapFlags(FlagNW) + SCEV::FlagAnyWrap)); } if (const SCEVAddExpr *A = dyn_cast(Base)) { Base = A->getOperand(A->getNumOperands()-1); @@ -858,7 +863,8 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, // loop already visited by LSR for example, but it wouldn't have // to be. do { - if (IncV->getNumOperands() == 0 || isa(IncV)) { + if (IncV->getNumOperands() == 0 || isa(IncV) || + (isa(IncV) && !isa(IncV))) { IncV = 0; break; } @@ -926,14 +932,14 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin()); // Create the PHI. - Builder.SetInsertPoint(L->getHeader(), L->getHeader()->begin()); - PHINode *PN = Builder.CreatePHI(ExpandTy, "lsr.iv"); + BasicBlock *Header = L->getHeader(); + Builder.SetInsertPoint(Header, Header->begin()); + pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header); + PHINode *PN = Builder.CreatePHI(ExpandTy, std::distance(HPB, HPE), "lsr.iv"); rememberInstruction(PN); // Create the step instructions and populate the PHI. - BasicBlock *Header = L->getHeader(); - for (pred_iterator HPI = pred_begin(Header), HPE = pred_end(Header); - HPI != HPE; ++HPI) { + for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) { BasicBlock *Pred = *HPI; // Add a start value. @@ -1004,10 +1010,11 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { if (!SE.properlyDominates(Start, L->getHeader())) { PostLoopOffset = Start; Start = SE.getConstant(Normalized->getType(), 0); - Normalized = - cast(SE.getAddRecExpr(Start, - Normalized->getStepRecurrence(SE), - Normalized->getLoop())); + Normalized = cast( + SE.getAddRecExpr(Start, Normalized->getStepRecurrence(SE), + Normalized->getLoop(), + // FIXME: Normalized->getNoWrapFlags(FlagNW) + SCEV::FlagAnyWrap)); } // Strip off any non-loop-dominating component from the addrec step. @@ -1018,7 +1025,10 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { Step = SE.getConstant(Normalized->getType(), 1); Normalized = cast(SE.getAddRecExpr(Start, Step, - Normalized->getLoop())); + Normalized->getLoop(), + // FIXME: Normalized + // ->getNoWrapFlags(FlagNW) + SCEV::FlagAnyWrap)); } // Expand the core addrec. If we need post-loop scaling, force it to @@ -1081,7 +1091,9 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { SmallVector NewOps(S->getNumOperands()); for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i) NewOps[i] = SE.getAnyExtendExpr(S->op_begin()[i], CanonicalIV->getType()); - Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop())); + Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(), + // FIXME: S->getNoWrapFlags(FlagNW) + SCEV::FlagAnyWrap)); BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); BasicBlock::iterator NewInsertPt = @@ -1098,7 +1110,8 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { if (!S->getStart()->isZero()) { SmallVector NewOps(S->op_begin(), S->op_end()); NewOps[0] = SE.getConstant(Ty, 0); - const SCEV *Rest = SE.getAddRecExpr(NewOps, L); + // FIXME: can use S->getNoWrapFlags() + const SCEV *Rest = SE.getAddRecExpr(NewOps, L, SCEV::FlagAnyWrap); // Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the // comments on expandAddToGEP for details. @@ -1128,12 +1141,13 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { // Create and insert the PHI node for the induction variable in the // specified loop. BasicBlock *Header = L->getHeader(); - CanonicalIV = PHINode::Create(Ty, "indvar", Header->begin()); + pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header); + CanonicalIV = PHINode::Create(Ty, std::distance(HPB, HPE), "indvar", + Header->begin()); rememberInstruction(CanonicalIV); Constant *One = ConstantInt::get(Ty, 1); - for (pred_iterator HPI = pred_begin(Header), HPE = pred_end(Header); - HPI != HPE; ++HPI) { + for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) { BasicBlock *HP = *HPI; if (L->contains(HP)) { // Insert a unit add instruction right before the terminator @@ -1333,7 +1347,7 @@ void SCEVExpander::rememberInstruction(Value *I) { InsertedValues.insert(I); // If we just claimed an existing instruction and that instruction had - // been the insert point, adjust the insert point forward so that + // been the insert point, adjust the insert point forward so that // subsequently inserted code will be dominated. if (Builder.GetInsertPoint() == I) { BasicBlock::iterator It = cast(I); @@ -1361,8 +1375,9 @@ SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L, assert(Ty->isIntegerTy() && "Can only insert integer induction variables!"); // Build a SCEV for {0,+,1}. + // Conservatively use FlagAnyWrap for now. const SCEV *H = SE.getAddRecExpr(SE.getConstant(Ty, 0), - SE.getConstant(Ty, 1), L); + SE.getConstant(Ty, 1), L, SCEV::FlagAnyWrap); // Emit code for it. BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); diff --git a/lib/Analysis/ScalarEvolutionNormalization.cpp b/lib/Analysis/ScalarEvolutionNormalization.cpp index ac36cef89ebb..60e630aaab88 100644 --- a/lib/Analysis/ScalarEvolutionNormalization.cpp +++ b/lib/Analysis/ScalarEvolutionNormalization.cpp @@ -97,7 +97,8 @@ const SCEV *llvm::TransformForPostIncUse(TransformKind Kind, const SCEV *N = TransformForPostIncUse(Kind, O, LUser, 0, Loops, SE, DT); Operands.push_back(N); } - const SCEV *Result = SE.getAddRecExpr(Operands, L); + // Conservatively use AnyWrap until/unless we need FlagNW. + const SCEV *Result = SE.getAddRecExpr(Operands, L, SCEV::FlagAnyWrap); switch (Kind) { default: llvm_unreachable("Unexpected transform name!"); case NormalizeAutodetect: diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp index 40e18ab2fbfa..0faf1398ec76 100644 --- a/lib/Analysis/TypeBasedAliasAnalysis.cpp +++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -31,7 +31,7 @@ // // The second field identifies the type's parent node in the tree, or // is null or omitted for a root node. A type is considered to alias -// all of its decendents and all of its ancestors in the tree. Also, +// all of its descendants and all of its ancestors in the tree. Also, // a type is considered to alias all types in other trees, so that // bitcode produced from multiple front-ends is handled conservatively. // @@ -59,6 +59,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/Passes.h" +#include "llvm/Constants.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" #include "llvm/Metadata.h" diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 1060bc5349e4..8f18dd278aa0 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -429,6 +429,29 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, KnownZero |= LHSKnownZero & Mask; KnownOne |= LHSKnownOne & Mask; } + + // Are we still trying to solve for the sign bit? + if (Mask.isNegative() && !KnownZero.isNegative() && !KnownOne.isNegative()){ + OverflowingBinaryOperator *OBO = cast(I); + if (OBO->hasNoSignedWrap()) { + if (I->getOpcode() == Instruction::Add) { + // Adding two positive numbers can't wrap into negative + if (LHSKnownZero.isNegative() && KnownZero2.isNegative()) + KnownZero |= APInt::getSignBit(BitWidth); + // and adding two negative numbers can't wrap into positive. + else if (LHSKnownOne.isNegative() && KnownOne2.isNegative()) + KnownOne |= APInt::getSignBit(BitWidth); + } else { + // Subtracting a negative number from a positive one can't wrap + if (LHSKnownZero.isNegative() && KnownOne2.isNegative()) + KnownZero |= APInt::getSignBit(BitWidth); + // neither can subtracting a positive number from a negative one. + else if (LHSKnownOne.isNegative() && KnownZero2.isNegative()) + KnownOne |= APInt::getSignBit(BitWidth); + } + } + } + return; } case Instruction::SRem: @@ -460,6 +483,19 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); } } + + // The sign bit is the LHS's sign bit, except when the result of the + // remainder is zero. + if (Mask.isNegative() && KnownZero.isNonNegative()) { + APInt Mask2 = APInt::getSignBit(BitWidth); + APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); + ComputeMaskedBits(I->getOperand(0), Mask2, LHSKnownZero, LHSKnownOne, TD, + Depth+1); + // If it's known zero, our sign bit is also zero. + if (LHSKnownZero.isNegative()) + KnownZero |= LHSKnownZero; + } + break; case Instruction::URem: { if (ConstantInt *Rem = dyn_cast(I->getOperand(1))) { @@ -597,6 +633,10 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, // Otherwise take the unions of the known bit sets of the operands, // taking conservative care to avoid excessive recursion. if (Depth < MaxDepth - 1 && !KnownZero && !KnownOne) { + // Skip if every incoming value references to ourself. + if (P->hasConstantValue() == P) + break; + KnownZero = APInt::getAllOnesValue(BitWidth); KnownOne = APInt::getAllOnesValue(BitWidth); for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i) { @@ -684,6 +724,16 @@ bool llvm::isPowerOfTwo(Value *V, const TargetData *TD, unsigned Depth) { return isPowerOfTwo(SI->getTrueValue(), TD, Depth) && isPowerOfTwo(SI->getFalseValue(), TD, Depth); + // An exact divide or right shift can only shift off zero bits, so the result + // is a power of two only if the first operand is a power of two and not + // copying a sign bit (sdiv int_min, 2). + if (match(V, m_LShr(m_Value(), m_Value())) || + match(V, m_UDiv(m_Value(), m_Value()))) { + PossiblyExactOperator *PEO = cast(V); + if (PEO->isExact()) + return isPowerOfTwo(PEO->getOperand(0), TD, Depth); + } + return false; } @@ -720,6 +770,11 @@ bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) { // shl X, Y != 0 if X is odd. Note that the value of the shift is undefined // if the lowest bit is shifted off the end. if (BitWidth && match(V, m_Shl(m_Value(X), m_Value(Y)))) { + // shl nuw can't remove any non-zero bits. + BinaryOperator *BO = cast(V); + if (BO->hasNoUnsignedWrap()) + return isKnownNonZero(X, TD, Depth); + APInt KnownZero(BitWidth, 0); APInt KnownOne(BitWidth, 0); ComputeMaskedBits(X, APInt(BitWidth, 1), KnownZero, KnownOne, TD, Depth); @@ -729,11 +784,22 @@ bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) { // shr X, Y != 0 if X is negative. Note that the value of the shift is not // defined if the sign bit is shifted off the end. else if (match(V, m_Shr(m_Value(X), m_Value(Y)))) { + // shr exact can only shift out zero bits. + BinaryOperator *BO = cast(V); + if (BO->isExact()) + return isKnownNonZero(X, TD, Depth); + bool XKnownNonNegative, XKnownNegative; ComputeSignBit(X, XKnownNonNegative, XKnownNegative, TD, Depth); if (XKnownNegative) return true; } + // div exact can only produce a zero if the dividend is zero. + else if (match(V, m_IDiv(m_Value(X), m_Value()))) { + BinaryOperator *BO = cast(V); + if (BO->isExact()) + return isKnownNonZero(X, TD, Depth); + } // X + Y. else if (match(V, m_Add(m_Value(X), m_Value(Y)))) { bool XKnownNonNegative, XKnownNegative; @@ -1262,7 +1328,7 @@ static Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType, break; } } - // If we succesfully found a value for each of our subaggregates + // If we successfully found a value for each of our subaggregates if (To) return To; } @@ -1691,7 +1757,7 @@ llvm::GetUnderlyingObject(Value *V, const TargetData *TD, unsigned MaxLookup) { } else { // See if InstructionSimplify knows any relevant tricks. if (Instruction *I = dyn_cast(V)) - // TODO: Aquire a DominatorTree and use it. + // TODO: Acquire a DominatorTree and use it. if (Value *Simplified = SimplifyInstruction(I, TD, 0)) { V = Simplified; continue; diff --git a/lib/Archive/ArchiveWriter.cpp b/lib/Archive/ArchiveWriter.cpp index c5ad5fc41cd1..8fcc7aa29cc8 100644 --- a/lib/Archive/ArchiveWriter.cpp +++ b/lib/Archive/ArchiveWriter.cpp @@ -18,7 +18,6 @@ #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Process.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/Signals.h" #include "llvm/Support/system_error.h" #include @@ -28,7 +27,7 @@ using namespace llvm; // Write an integer using variable bit rate encoding. This saves a few bytes // per entry in the symbol table. -static inline void writeInteger(unsigned num, raw_ostream& ARFile) { +static inline void writeInteger(unsigned num, std::ofstream& ARFile) { while (1) { if (num < 0x80) { // done? ARFile << (unsigned char)num; @@ -202,14 +201,14 @@ Archive::addFileBefore(const sys::Path& filePath, iterator where, bool Archive::writeMember( const ArchiveMember& member, - raw_ostream& ARFile, + std::ofstream& ARFile, bool CreateSymbolTable, bool TruncateNames, bool ShouldCompress, std::string* ErrMsg ) { - unsigned filepos = ARFile.tell(); + unsigned filepos = ARFile.tellp(); filepos -= 8; // Get the data and its size either from the @@ -282,7 +281,7 @@ Archive::writeMember( ARFile.write(data,fSize); // Make sure the member is an even length - if ((ARFile.tell() & 1) == 1) + if ((ARFile.tellp() & 1) == 1) ARFile << ARFILE_PAD; // Close the mapped file if it was opened @@ -292,7 +291,7 @@ Archive::writeMember( // Write out the LLVM symbol table as an archive member to the file. void -Archive::writeSymbolTable(raw_ostream& ARFile) { +Archive::writeSymbolTable(std::ofstream& ARFile) { // Construct the symbol table's header ArchiveMemberHeader Hdr; @@ -316,7 +315,7 @@ Archive::writeSymbolTable(raw_ostream& ARFile) { #ifndef NDEBUG // Save the starting position of the symbol tables data content. - unsigned startpos = ARFile.tell(); + unsigned startpos = ARFile.tellp(); #endif // Write out the symbols sequentially @@ -333,7 +332,7 @@ Archive::writeSymbolTable(raw_ostream& ARFile) { #ifndef NDEBUG // Now that we're done with the symbol table, get the ending file position - unsigned endpos = ARFile.tell(); + unsigned endpos = ARFile.tellp(); #endif // Make sure that the amount we wrote is what we pre-computed. This is @@ -362,20 +361,25 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress, } // Create a temporary file to store the archive in - SmallString<128> TempArchivePath; - int ArchFD; - if (error_code ec = - sys::fs::unique_file("%%-%%-%%-%%-" + sys::path::filename(archPath.str()), - ArchFD, TempArchivePath)) { - if (ErrMsg) *ErrMsg = ec.message(); + sys::Path TmpArchive = archPath; + if (TmpArchive.createTemporaryFileOnDisk(ErrMsg)) return true; - } // Make sure the temporary gets removed if we crash - sys::RemoveFileOnSignal(sys::Path(TempArchivePath.str())); + sys::RemoveFileOnSignal(TmpArchive); // Create archive file for output. - raw_fd_ostream ArchiveFile(ArchFD, true); + std::ios::openmode io_mode = std::ios::out | std::ios::trunc | + std::ios::binary; + std::ofstream ArchiveFile(TmpArchive.c_str(), io_mode); + + // Check for errors opening or creating archive file. + if (!ArchiveFile.is_open() || ArchiveFile.bad()) { + TmpArchive.eraseFromDisk(); + if (ErrMsg) + *ErrMsg = "Error opening archive file: " + archPath.str(); + return true; + } // If we're creating a symbol table, reset it now if (CreateSymbolTable) { @@ -391,9 +395,8 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress, for (MembersList::iterator I = begin(), E = end(); I != E; ++I) { if (writeMember(*I, ArchiveFile, CreateSymbolTable, TruncateNames, Compress, ErrMsg)) { + TmpArchive.eraseFromDisk(); ArchiveFile.close(); - bool existed; - sys::fs::remove(TempArchivePath.str(), existed); return true; } } @@ -408,12 +411,12 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress, // ensure compatibility with other archivers we need to put the symbol // table first in the file. Unfortunately, this means mapping the file // we just wrote back in and copying it to the destination file. - SmallString<128> TempArchiveWithSymbolTablePath; + sys::Path FinalFilePath = archPath; // Map in the archive we just wrote. { OwningPtr arch; - if (error_code ec = MemoryBuffer::getFile(TempArchivePath.c_str(), arch)) { + if (error_code ec = MemoryBuffer::getFile(TmpArchive.c_str(), arch)) { if (ErrMsg) *ErrMsg = ec.message(); return true; @@ -422,15 +425,17 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress, // Open another temporary file in order to avoid invalidating the // mmapped data - if (error_code ec = - sys::fs::unique_file("%%-%%-%%-%%-" + sys::path::filename(archPath.str()), - ArchFD, TempArchiveWithSymbolTablePath)) { - if (ErrMsg) *ErrMsg = ec.message(); + if (FinalFilePath.createTemporaryFileOnDisk(ErrMsg)) + return true; + sys::RemoveFileOnSignal(FinalFilePath); + + std::ofstream FinalFile(FinalFilePath.c_str(), io_mode); + if (!FinalFile.is_open() || FinalFile.bad()) { + TmpArchive.eraseFromDisk(); + if (ErrMsg) + *ErrMsg = "Error opening archive file: " + FinalFilePath.str(); return true; } - sys::RemoveFileOnSignal(sys::Path(TempArchiveWithSymbolTablePath.str())); - - raw_fd_ostream FinalFile(ArchFD, true); // Write the file magic number FinalFile << ARFILE_MAGIC; @@ -443,8 +448,7 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress, if (foreignST) { if (writeMember(*foreignST, FinalFile, false, false, false, ErrMsg)) { FinalFile.close(); - bool existed; - sys::fs::remove(TempArchiveWithSymbolTablePath.str(), existed); + TmpArchive.eraseFromDisk(); return true; } } @@ -462,11 +466,8 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress, } // free arch. // Move the final file over top of TmpArchive - if (error_code ec = sys::fs::rename(TempArchiveWithSymbolTablePath.str(), - TempArchivePath.str())) { - if (ErrMsg) *ErrMsg = ec.message(); + if (FinalFilePath.renamePathOnDisk(TmpArchive, ErrMsg)) return true; - } } // Before we replace the actual archive, we need to forget all the @@ -474,11 +475,8 @@ Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, bool Compress, // this because we cannot replace an open file on Windows. cleanUpMemory(); - if (error_code ec = sys::fs::rename(TempArchivePath.str(), - archPath.str())) { - if (ErrMsg) *ErrMsg = ec.message(); + if (TmpArchive.renamePathOnDisk(archPath, ErrMsg)) return true; - } // Set correct read and write permissions after temporary file is moved // to final destination path. diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index cdfacbebbfc3..a2c53bef364f 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -249,11 +249,7 @@ bool LLParser::ParseModuleAsm() { if (ParseToken(lltok::kw_asm, "expected 'module asm'") || ParseStringConstant(AsmStr)) return true; - const std::string &AsmSoFar = M->getModuleInlineAsm(); - if (AsmSoFar.empty()) - M->setModuleInlineAsm(AsmStr); - else - M->setModuleInlineAsm(AsmSoFar+"\n"+AsmStr); + M->appendModuleInlineAsm(AsmStr); return false; } @@ -518,7 +514,7 @@ bool LLParser::ParseMDNodeID(MDNode *&Result) { if (Result) return false; // Otherwise, create MDNode forward reference. - MDNode *FwdNode = MDNode::getTemporary(Context, 0, 0); + MDNode *FwdNode = MDNode::getTemporary(Context, ArrayRef()); ForwardRefMDNodes[MID] = std::make_pair(FwdNode, Lex.getLoc()); if (NumberedMetadata.size() <= MID) @@ -576,7 +572,7 @@ bool LLParser::ParseStandaloneMetadata() { ParseToken(lltok::rbrace, "expected end of metadata node")) return true; - MDNode *Init = MDNode::get(Context, Elts.data(), Elts.size()); + MDNode *Init = MDNode::get(Context, Elts); // See if this was forward referenced, if so, handle it. std::map, LocTy> >::iterator @@ -2502,7 +2498,7 @@ bool LLParser::ParseMetadataListValue(ValID &ID, PerFunctionState *PFS) { ParseToken(lltok::rbrace, "expected end of metadata node")) return true; - ID.MDNodeVal = MDNode::get(Context, Elts.data(), Elts.size()); + ID.MDNodeVal = MDNode::get(Context, Elts); ID.Kind = ValID::t_MDNode; return false; } @@ -3638,8 +3634,7 @@ int LLParser::ParsePHI(Instruction *&Inst, PerFunctionState &PFS) { if (!Ty->isFirstClassType()) return Error(TypeLoc, "phi node must have first class type"); - PHINode *PN = PHINode::Create(Ty); - PN->reserveOperandSpace(PHIVals.size()); + PHINode *PN = PHINode::Create(Ty, PHIVals.size()); for (unsigned i = 0, e = PHIVals.size(); i != e; ++i) PN->addIncoming(PHIVals[i].first, PHIVals[i].second); Inst = PN; diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index dbf8da027996..19f57cf6907b 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -301,8 +301,7 @@ void BitcodeReaderValueList::ResolveConstantForwardRefs() { NewC = ConstantVector::get(NewOps); } else { assert(isa(UserC) && "Must be a ConstantExpr."); - NewC = cast(UserC)->getWithOperands(&NewOps[0], - NewOps.size()); + NewC = cast(UserC)->getWithOperands(NewOps); } UserC->replaceAllUsesWith(NewC); @@ -350,7 +349,7 @@ Value *BitcodeReaderMDValueList::getValueFwdRef(unsigned Idx) { } // Create and return a placeholder, which will later be RAUW'd. - Value *V = MDNode::getTemporary(Context, 0, 0); + Value *V = MDNode::getTemporary(Context, ArrayRef()); MDValuePtrs[Idx] = V; return V; } @@ -844,9 +843,7 @@ bool BitcodeReader::ParseMetadata() { else Elts.push_back(NULL); } - Value *V = MDNode::getWhenValsUnresolved(Context, - Elts.data(), Elts.size(), - IsFunctionLocal); + Value *V = MDNode::getWhenValsUnresolved(Context, Elts, IsFunctionLocal); IsFunctionLocal = false; MDValueList.AssignValue(V, NextMDValueNo++); break; @@ -2288,9 +2285,8 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { const Type *Ty = getTypeByID(Record[0]); if (!Ty) return Error("Invalid PHI record"); - PHINode *PN = PHINode::Create(Ty); + PHINode *PN = PHINode::Create(Ty, (Record.size()-1)/2); InstructionList.push_back(PN); - PN->reserveOperandSpace((Record.size()-1)/2); for (unsigned i = 0, e = Record.size()-1; i != e; i += 2) { Value *V = getFnValueByID(Record[1+i], Ty); diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index f8ef8c668c47..e34137f6155a 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -197,7 +197,7 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { // Loop over all of the types, emitting each in turn. for (unsigned i = 0, e = TypeList.size(); i != e; ++i) { - const Type *T = TypeList[i].first; + const Type *T = TypeList[i]; int AbbrevToUse = 0; unsigned Code = 0; diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp index 2f02262c36af..21f004a7dc53 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.cpp +++ b/lib/Bitcode/Writer/ValueEnumerator.cpp @@ -12,6 +12,8 @@ //===----------------------------------------------------------------------===// #include "ValueEnumerator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Module.h" @@ -21,22 +23,10 @@ #include using namespace llvm; -static bool isSingleValueType(const std::pair &P) { - return P.first->isSingleValueType(); -} - static bool isIntegerValue(const std::pair &V) { return V.first->getType()->isIntegerTy(); } -static bool CompareByFrequency(const std::pair &P1, - const std::pair &P2) { - return P1.second > P2.second; -} - /// ValueEnumerator - Enumerate module-level information. ValueEnumerator::ValueEnumerator(const Module *M) { // Enumerate the global variables. @@ -120,18 +110,72 @@ ValueEnumerator::ValueEnumerator(const Module *M) { // Optimize constant ordering. OptimizeConstants(FirstConstant, Values.size()); - // Sort the type table by frequency so that most commonly used types are early - // in the table (have low bit-width). - std::stable_sort(Types.begin(), Types.end(), CompareByFrequency); - - // Partition the Type ID's so that the single-value types occur before the - // aggregate types. This allows the aggregate types to be dropped from the - // type table after parsing the global variable initializers. - std::partition(Types.begin(), Types.end(), isSingleValueType); + OptimizeTypes(); // Now that we rearranged the type table, rebuild TypeMap. for (unsigned i = 0, e = Types.size(); i != e; ++i) - TypeMap[Types[i].first] = i+1; + TypeMap[Types[i]] = i+1; +} + +struct TypeAndDeps { + const Type *Ty; + unsigned NumDeps; +}; + +static int CompareByDeps(const void *a, const void *b) { + const TypeAndDeps &ta = *(const TypeAndDeps*) a; + const TypeAndDeps &tb = *(const TypeAndDeps*) b; + return ta.NumDeps - tb.NumDeps; +} + +static void VisitType(const Type *Ty, SmallPtrSet &Visited, + std::vector &Out) { + if (Visited.count(Ty)) + return; + + Visited.insert(Ty); + + for (Type::subtype_iterator I2 = Ty->subtype_begin(), + E2 = Ty->subtype_end(); I2 != E2; ++I2) { + const Type *InnerType = I2->get(); + VisitType(InnerType, Visited, Out); + } + + Out.push_back(Ty); +} + +void ValueEnumerator::OptimizeTypes(void) { + // If the types form a DAG, this will compute a topological sort and + // no forward references will be needed when reading them in. + // If there are cycles, this is a simple but reasonable heuristic for + // the minimum feedback arc set problem. + const unsigned NumTypes = Types.size(); + std::vector TypeDeps; + TypeDeps.resize(NumTypes); + + for (unsigned I = 0; I < NumTypes; ++I) { + const Type *Ty = Types[I]; + TypeDeps[I].Ty = Ty; + TypeDeps[I].NumDeps = 0; + } + + for (unsigned I = 0; I < NumTypes; ++I) { + const Type *Ty = TypeDeps[I].Ty; + for (Type::subtype_iterator I2 = Ty->subtype_begin(), + E2 = Ty->subtype_end(); I2 != E2; ++I2) { + const Type *InnerType = I2->get(); + unsigned InnerIndex = TypeMap.lookup(InnerType) - 1; + TypeDeps[InnerIndex].NumDeps++; + } + } + array_pod_sort(TypeDeps.begin(), TypeDeps.end(), CompareByDeps); + + SmallPtrSet Visited; + Types.clear(); + Types.reserve(NumTypes); + for (unsigned I = 0; I < NumTypes; ++I) { + VisitType(TypeDeps[I].Ty, Visited, Types); + } } unsigned ValueEnumerator::getInstructionID(const Instruction *Inst) const { @@ -319,7 +363,7 @@ void ValueEnumerator::EnumerateValue(const Value *V) { // Initializers for globals are handled explicitly elsewhere. } else if (isa(C) && cast(C)->isString()) { // Do not enumerate the initializers for an array of simple characters. - // The initializers just polute the value table, and we emit the strings + // The initializers just pollute the value table, and we emit the strings // specially. } else if (C->getNumOperands()) { // If a constant has operands, enumerate them. This makes sure that if a @@ -352,14 +396,12 @@ void ValueEnumerator::EnumerateValue(const Value *V) { void ValueEnumerator::EnumerateType(const Type *Ty) { unsigned &TypeID = TypeMap[Ty]; - if (TypeID) { - // If we've already seen this type, just increase its occurrence count. - Types[TypeID-1].second++; + // We've already seen this type. + if (TypeID) return; - } // First time we saw this type, add it. - Types.push_back(std::make_pair(Ty, 1U)); + Types.push_back(Ty); TypeID = Types.size(); // Enumerate subtypes. @@ -381,7 +423,7 @@ void ValueEnumerator::EnumerateOperandType(const Value *V) { // This constant may have operands, make sure to enumerate the types in // them. for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) { - const User *Op = C->getOperand(i); + const Value *Op = C->getOperand(i); // Don't enumerate basic blocks here, this happens as operands to // blockaddress. diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h index cd1d2371b701..1e42a2667669 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.h +++ b/lib/Bitcode/Writer/ValueEnumerator.h @@ -36,8 +36,7 @@ class MDSymbolTable; class ValueEnumerator { public: - // For each type, we remember its Type* and occurrence frequency. - typedef std::vector > TypeList; + typedef std::vector TypeList; // For each value, we remember its Value* and occurrence frequency. typedef std::vector > ValueList; @@ -136,6 +135,7 @@ class ValueEnumerator { private: void OptimizeConstants(unsigned CstStart, unsigned CstEnd); + void OptimizeTypes(); void EnumerateMDNodeOperands(const MDNode *N); void EnumerateMetadata(const Value *MD); diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index e2838c373a39..80118f081913 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -7,6 +7,7 @@ add_subdirectory(Transforms) add_subdirectory(Linker) add_subdirectory(Analysis) add_subdirectory(MC) +add_subdirectory(CompilerDriver) add_subdirectory(Object) add_subdirectory(ExecutionEngine) add_subdirectory(Target) diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index b520d8fcedc0..5c809f7fd668 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -357,7 +357,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, RegRefs = State->GetRegRefs(); // Handle dead defs by simulating a last-use of the register just - // after the def. A dead def can occur because the def is truely + // after the def. A dead def can occur because the def is truly // dead, or because only a subregister is live at the def. If we // don't do this the dead def will be incorrectly merged into the // previous def. diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp index 36638c36de67..125e64196f15 100644 --- a/lib/CodeGen/Analysis.cpp +++ b/lib/CodeGen/Analysis.cpp @@ -211,7 +211,6 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr, const BasicBlock *ExitBB = I->getParent(); const TerminatorInst *Term = ExitBB->getTerminator(); const ReturnInst *Ret = dyn_cast(Term); - const Function *F = ExitBB->getParent(); // The block must end in a return statement or unreachable. // @@ -250,6 +249,7 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr, // Conservatively require the attributes of the call to match those of // the return. Ignore noalias because it doesn't affect the call sequence. + const Function *F = ExitBB->getParent(); unsigned CallerRetAttr = F->getAttributes().getRetAttributes(); if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias) return false; diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp new file mode 100644 index 000000000000..0db28a636ad8 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -0,0 +1,87 @@ +//===-- CodeGen/AsmPrinter/ARMException.cpp - ARM EHABI Exception Impl ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing DWARF exception info into asm files. +// +//===----------------------------------------------------------------------===// + +#include "DwarfException.h" +#include "llvm/Module.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineLocation.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Twine.h" +using namespace llvm; + +ARMException::ARMException(AsmPrinter *A) + : DwarfException(A), + shouldEmitTable(false), shouldEmitMoves(false), shouldEmitTableModule(false) + {} + +ARMException::~ARMException() {} + +void ARMException::EndModule() { +} + +/// BeginFunction - Gather pre-function exception information. Assumes it's +/// being emitted immediately after the function entry point. +void ARMException::BeginFunction(const MachineFunction *MF) { + Asm->OutStreamer.EmitFnStart(); + if (!Asm->MF->getFunction()->doesNotThrow() || UnwindTablesMandatory) + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin", + Asm->getFunctionNumber())); +} + +/// EndFunction - Gather and emit post-function exception information. +/// +void ARMException::EndFunction() { + if (Asm->MF->getFunction()->doesNotThrow() && !UnwindTablesMandatory) + Asm->OutStreamer.EmitCantUnwind(); + else { + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end", + Asm->getFunctionNumber())); + + // Emit references to personality. + if (const Function * Personality = + MMI->getPersonalities()[MMI->getPersonalityIndex()]) { + MCSymbol *PerSym = Asm->Mang->getSymbol(Personality); + Asm->OutStreamer.EmitSymbolAttribute(PerSym, MCSA_Global); + Asm->OutStreamer.EmitPersonality(PerSym); + } + + // Map all labels and get rid of any dead landing pads. + MMI->TidyLandingPads(); + + Asm->OutStreamer.EmitHandlerData(); + + // Emit actual exception table + EmitExceptionTable(); + } + + Asm->OutStreamer.EmitFnEnd(); +} diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 9cb882e6a1bb..8116f8d5925f 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -33,10 +33,12 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetAsmInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Assembly/Writer.h" #include "llvm/ADT/SmallString.h" @@ -70,17 +72,17 @@ static unsigned getGVAlignmentLog2(const GlobalValue *GV, const TargetData &TD, unsigned NumBits = 0; if (const GlobalVariable *GVar = dyn_cast(GV)) NumBits = TD.getPreferredAlignmentLog(GVar); - + // If InBits is specified, round it to it. if (InBits > NumBits) NumBits = InBits; - + // If the GV has a specified alignment, take it into account. if (GV->getAlignment() == 0) return NumBits; - + unsigned GVAlign = Log2_32(GV->getAlignment()); - + // If the GVAlign is larger than NumBits, or if we are required to obey // NumBits because the GV has an assigned section, obey it. if (GVAlign > NumBits || GV->hasSection()) @@ -104,16 +106,16 @@ AsmPrinter::AsmPrinter(TargetMachine &tm, MCStreamer &Streamer) AsmPrinter::~AsmPrinter() { assert(DD == 0 && DE == 0 && "Debug/EH info didn't get finalized"); - + if (GCMetadataPrinters != 0) { gcp_map_type &GCMap = getGCMap(GCMetadataPrinters); - + for (gcp_map_type::iterator I = GCMap.begin(), E = GCMap.end(); I != E; ++I) delete I->second; delete &GCMap; GCMetadataPrinters = 0; } - + delete &OutStreamer; } @@ -156,9 +158,9 @@ bool AsmPrinter::doInitialization(Module &M) { // Initialize TargetLoweringObjectFile. const_cast(getObjFileLowering()) .Initialize(OutContext, TM); - + Mang = new Mangler(OutContext, *TM.getTargetData()); - + // Allow the target to emit any magic that it wants at the start of the file. EmitStartOfAsmFile(M); @@ -196,6 +198,9 @@ bool AsmPrinter::doInitialization(Module &M) { case ExceptionHandling::DwarfCFI: DE = new DwarfCFIException(this); break; + case ExceptionHandling::ARM: + DE = new ARMException(this); + break; } return false; @@ -250,56 +255,52 @@ void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const { /// EmitGlobalVariable - Emit the specified global variable to the .s file. void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { - if (!GV->hasInitializer()) // External globals require no code. - return; - - // Check to see if this is a special global used by LLVM, if so, emit it. - if (EmitSpecialLLVMGlobal(GV)) - return; + if (GV->hasInitializer()) { + // Check to see if this is a special global used by LLVM, if so, emit it. + if (EmitSpecialLLVMGlobal(GV)) + return; - if (isVerbose()) { - WriteAsOperand(OutStreamer.GetCommentOS(), GV, - /*PrintType=*/false, GV->getParent()); - OutStreamer.GetCommentOS() << '\n'; - } - - MCSymbol *GVSym = Mang->getSymbol(GV); - EmitVisibility(GVSym, GV->getVisibility()); - - if (MAI->hasDotTypeDotSizeDirective()) - OutStreamer.EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject); - - SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM); - - const TargetData *TD = TM.getTargetData(); - uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType()); - - // If the alignment is specified, we *must* obey it. Overaligning a global - // with a specified alignment is a prompt way to break globals emitted to - // sections and expected to be contiguous (e.g. ObjC metadata). - unsigned AlignLog = getGVAlignmentLog2(GV, *TD); - - // Handle common and BSS local symbols (.lcomm). - if (GVKind.isCommon() || GVKind.isBSSLocal()) { - if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it. - if (isVerbose()) { WriteAsOperand(OutStreamer.GetCommentOS(), GV, /*PrintType=*/false, GV->getParent()); OutStreamer.GetCommentOS() << '\n'; } - + } + + MCSymbol *GVSym = Mang->getSymbol(GV); + EmitVisibility(GVSym, GV->getVisibility()); + + if (!GV->hasInitializer()) // External globals require no extra code. + return; + + if (MAI->hasDotTypeDotSizeDirective()) + OutStreamer.EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject); + + SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM); + + const TargetData *TD = TM.getTargetData(); + uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType()); + + // If the alignment is specified, we *must* obey it. Overaligning a global + // with a specified alignment is a prompt way to break globals emitted to + // sections and expected to be contiguous (e.g. ObjC metadata). + unsigned AlignLog = getGVAlignmentLog2(GV, *TD); + + // Handle common and BSS local symbols (.lcomm). + if (GVKind.isCommon() || GVKind.isBSSLocal()) { + if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it. + // Handle common symbols. if (GVKind.isCommon()) { unsigned Align = 1 << AlignLog; if (!getObjFileLowering().getCommDirectiveSupportsAlignment()) Align = 0; - + // .comm _foo, 42, 4 OutStreamer.EmitCommonSymbol(GVSym, Size, Align); return; } - + // Handle local BSS symbols. if (MAI->hasMachoZeroFillDirective()) { const MCSection *TheSection = @@ -308,7 +309,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { OutStreamer.EmitZerofill(TheSection, GVSym, Size, 1 << AlignLog); return; } - + if (MAI->hasLCOMMDirective()) { // .lcomm _foo, 42 OutStreamer.EmitLocalCommonSymbol(GVSym, Size); @@ -318,14 +319,14 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { unsigned Align = 1 << AlignLog; if (!getObjFileLowering().getCommDirectiveSupportsAlignment()) Align = 0; - + // .local _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Local); // .comm _foo, 42, 4 OutStreamer.EmitCommonSymbol(GVSym, Size, Align); return; } - + const MCSection *TheSection = getObjFileLowering().SectionForGlobal(GV, GVKind, Mang, TM); @@ -333,14 +334,14 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // emission. if (GVKind.isBSSExtern() && MAI->hasMachoZeroFillDirective()) { if (Size == 0) Size = 1; // zerofill of 0 bytes is undefined. - + // .globl _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); // .zerofill __DATA, __common, _foo, 400, 5 OutStreamer.EmitZerofill(TheSection, GVSym, Size, 1 << AlignLog); return; } - + // Handle thread local data for mach-o which requires us to output an // additional structure of data and mangle the original symbol so that we // can reference it later. @@ -353,31 +354,31 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // specific code. if (GVKind.isThreadLocal() && MAI->hasMachoTBSSDirective()) { // Emit the .tbss symbol - MCSymbol *MangSym = + MCSymbol *MangSym = OutContext.GetOrCreateSymbol(GVSym->getName() + Twine("$tlv$init")); - + if (GVKind.isThreadBSS()) OutStreamer.EmitTBSSSymbol(TheSection, MangSym, Size, 1 << AlignLog); else if (GVKind.isThreadData()) { OutStreamer.SwitchSection(TheSection); - EmitAlignment(AlignLog, GV); + EmitAlignment(AlignLog, GV); OutStreamer.EmitLabel(MangSym); - + EmitGlobalConstant(GV->getInitializer()); } - + OutStreamer.AddBlankLine(); - + // Emit the variable struct for the runtime. - const MCSection *TLVSect + const MCSection *TLVSect = getObjFileLowering().getTLSExtraDataSection(); - + OutStreamer.SwitchSection(TLVSect); // Emit the linkage here. EmitLinkage(GV->getLinkage(), GVSym); OutStreamer.EmitLabel(GVSym); - + // Three pointers in size: // - __tlv_bootstrap - used to make sure support exists // - spare pointer, used when mapped by the runtime @@ -387,7 +388,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { PtrSize, 0); OutStreamer.EmitIntValue(0, PtrSize, 0); OutStreamer.EmitSymbolValue(MangSym, PtrSize, 0); - + OutStreamer.AddBlankLine(); return; } @@ -404,7 +405,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { if (MAI->hasDotTypeDotSizeDirective()) // .size foo, 42 OutStreamer.EmitELFSize(GVSym, MCConstantExpr::Create(Size, OutContext)); - + OutStreamer.AddBlankLine(); } @@ -413,7 +414,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { void AsmPrinter::EmitFunctionHeader() { // Print out constants referenced by the function EmitConstantPool(); - + // Print the 'header' of function. const Function *F = MF->getFunction(); @@ -435,7 +436,7 @@ void AsmPrinter::EmitFunctionHeader() { // Emit the CurrentFnSym. This is a virtual function to allow targets to // do their wild and crazy things as required. EmitFunctionEntryLabel(); - + // If the function had address-taken blocks that got deleted, then we have // references to the dangling symbols. Emit them at the start of the function // so that we don't get references to undefined symbols. @@ -445,17 +446,17 @@ void AsmPrinter::EmitFunctionHeader() { OutStreamer.AddComment("Address taken block that was later removed"); OutStreamer.EmitLabel(DeadBlockSyms[i]); } - + // Add some workaround for linkonce linkage on Cygwin\MinGW. if (MAI->getLinkOnceDirective() != 0 && (F->hasLinkOnceLinkage() || F->hasWeakLinkage())) { // FIXME: What is this? - MCSymbol *FakeStub = + MCSymbol *FakeStub = OutContext.GetOrCreateSymbol(Twine("Lllvm$workaround$fake$stub$")+ CurrentFnSym->getName()); OutStreamer.EmitLabel(FakeStub); } - + // Emit pre-function debug and/or EH information. if (DE) { NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled); @@ -480,44 +481,16 @@ void AsmPrinter::EmitFunctionEntryLabel() { } -static void EmitDebugLoc(DebugLoc DL, const MachineFunction *MF, - raw_ostream &CommentOS) { - const LLVMContext &Ctx = MF->getFunction()->getContext(); - if (!DL.isUnknown()) { // Print source line info. - DIScope Scope(DL.getScope(Ctx)); - // Omit the directory, because it's likely to be long and uninteresting. - if (Scope.Verify()) - CommentOS << Scope.getFilename(); - else - CommentOS << ""; - CommentOS << ':' << DL.getLine(); - if (DL.getCol() != 0) - CommentOS << ':' << DL.getCol(); - DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(DL.getInlinedAt(Ctx)); - if (!InlinedAtDL.isUnknown()) { - CommentOS << "[ "; - EmitDebugLoc(InlinedAtDL, MF, CommentOS); - CommentOS << " ]"; - } - } -} - /// EmitComments - Pretty-print comments for instructions. static void EmitComments(const MachineInstr &MI, raw_ostream &CommentOS) { const MachineFunction *MF = MI.getParent()->getParent(); const TargetMachine &TM = MF->getTarget(); - - DebugLoc DL = MI.getDebugLoc(); - if (!DL.isUnknown()) { // Print source line info. - EmitDebugLoc(DL, MF, CommentOS); - CommentOS << '\n'; - } - + // Check for spills and reloads int FI; - + const MachineFrameInfo *FrameInfo = MF->getFrameInfo(); - + // We assume a single instruction only has a spill or reload, not // both. const MachineMemOperand *MMO; @@ -538,7 +511,7 @@ static void EmitComments(const MachineInstr &MI, raw_ostream &CommentOS) { if (FrameInfo->isSpillSlotObjectIndex(FI)) CommentOS << MMO->getSize() << "-byte Folded Spill\n"; } - + // Check for spill-induced copies if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse)) CommentOS << " Reload Reuse\n"; @@ -612,21 +585,61 @@ static bool EmitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { } OS << AP.TM.getRegisterInfo()->getName(MI->getOperand(0).getReg()); } - + OS << '+' << MI->getOperand(1).getImm(); // NOTE: Want this comment at start of line, don't emit with AddComment. AP.OutStreamer.EmitRawText(OS.str()); return true; } +bool AsmPrinter::needsCFIMoves() { + if (UnwindTablesMandatory) + return true; + + if (MMI->hasDebugInfo() && !MAI->doesDwarfRequireFrameSection()) + return true; + + if (MF->getFunction()->doesNotThrow()) + return false; + + return true; +} + +void AsmPrinter::emitPrologLabel(const MachineInstr &MI) { + MCSymbol *Label = MI.getOperand(0).getMCSymbol(); + + if (MAI->doesDwarfRequireFrameSection() || + MAI->getExceptionHandlingType() != ExceptionHandling::DwarfCFI) + OutStreamer.EmitLabel(Label); + + if (MAI->getExceptionHandlingType() != ExceptionHandling::DwarfCFI) + return; + + if (!needsCFIMoves()) + return; + + MachineModuleInfo &MMI = MF->getMMI(); + std::vector &Moves = MMI.getFrameMoves(); + bool FoundOne = false; + (void)FoundOne; + for (std::vector::iterator I = Moves.begin(), + E = Moves.end(); I != E; ++I) { + if (I->getLabel() == Label) { + EmitCFIFrameMove(*I); + FoundOne = true; + } + } + assert(FoundOne); +} + /// EmitFunctionBody - This method emits the body and trailer for a /// function. void AsmPrinter::EmitFunctionBody() { // Emit target-specific gunk before the function body. EmitFunctionBodyStart(); - + bool ShouldPrintDebugScopes = DD && MMI->hasDebugInfo(); - + // Print out code for the function. bool HasAnyRealCode = false; const MachineInstr *LastMI = 0; @@ -649,12 +662,15 @@ void AsmPrinter::EmitFunctionBody() { NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); DD->beginInstruction(II); } - + if (isVerbose()) EmitComments(*II, OutStreamer.GetCommentOS()); switch (II->getOpcode()) { case TargetOpcode::PROLOG_LABEL: + emitPrologLabel(*II); + break; + case TargetOpcode::EH_LABEL: case TargetOpcode::GC_LABEL: OutStreamer.EmitLabel(II->getOperand(0).getMCSymbol()); @@ -675,10 +691,13 @@ void AsmPrinter::EmitFunctionBody() { if (isVerbose()) EmitKill(II, *this); break; default: + if (!TM.hasMCUseLoc()) + MCLineEntry::Make(&OutStreamer, getCurrentSection()); + EmitInstruction(II); break; } - + if (ShouldPrintDebugScopes) { NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); DD->endInstruction(II); @@ -705,10 +724,10 @@ void AsmPrinter::EmitFunctionBody() { } else // Target not mc-ized yet. OutStreamer.EmitRawText(StringRef("\tnop\n")); } - + // Emit target-specific gunk after the function body. EmitFunctionBodyEnd(); - + // If the target wants a .size directive for the size of the function, emit // it. if (MAI->hasDotTypeDotSizeDirective()) { @@ -716,14 +735,14 @@ void AsmPrinter::EmitFunctionBody() { // difference between the function label and the temp label. MCSymbol *FnEndLabel = OutContext.CreateTempSymbol(); OutStreamer.EmitLabel(FnEndLabel); - + const MCExpr *SizeExp = MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(FnEndLabel, OutContext), MCSymbolRefExpr::Create(CurrentFnSym, OutContext), OutContext); OutStreamer.EmitELFSize(CurrentFnSym, SizeExp); } - + // Emit post-function debug information. if (DD) { NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); @@ -734,20 +753,71 @@ void AsmPrinter::EmitFunctionBody() { DE->EndFunction(); } MMI->EndFunction(); - + // Print out jump tables referenced by the function. EmitJumpTableInfo(); - + OutStreamer.AddBlankLine(); } /// getDebugValueLocation - Get location information encoded by DBG_VALUE /// operands. -MachineLocation AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const { +MachineLocation AsmPrinter:: +getDebugValueLocation(const MachineInstr *MI) const { // Target specific DBG_VALUE instructions are handled by each target. return MachineLocation(); } +/// getDwarfRegOpSize - get size required to emit given machine location using +/// dwarf encoding. +unsigned AsmPrinter::getDwarfRegOpSize(const MachineLocation &MLoc) const { + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + unsigned DWReg = RI->getDwarfRegNum(MLoc.getReg(), false); + if (int Offset = MLoc.getOffset()) { + // If the value is at a certain offset from frame register then + // use DW_OP_breg. + if (DWReg < 32) + return 1 + MCAsmInfo::getSLEB128Size(Offset); + else + return 1 + MCAsmInfo::getULEB128Size(MLoc.getReg()) + + MCAsmInfo::getSLEB128Size(Offset); + } + if (DWReg < 32) + return 1; + + return 1 + MCAsmInfo::getULEB128Size(DWReg); +} + +/// EmitDwarfRegOp - Emit dwarf register operation. +void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const { + const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + unsigned Reg = TRI->getDwarfRegNum(MLoc.getReg(), false); + if (int Offset = MLoc.getOffset()) { + if (Reg < 32) { + OutStreamer.AddComment( + dwarf::OperationEncodingString(dwarf::DW_OP_breg0 + Reg)); + EmitInt8(dwarf::DW_OP_breg0 + Reg); + } else { + OutStreamer.AddComment("DW_OP_bregx"); + EmitInt8(dwarf::DW_OP_bregx); + OutStreamer.AddComment(Twine(Reg)); + EmitULEB128(Reg); + } + EmitSLEB128(Offset); + } else { + if (Reg < 32) { + OutStreamer.AddComment( + dwarf::OperationEncodingString(dwarf::DW_OP_reg0 + Reg)); + EmitInt8(dwarf::DW_OP_reg0 + Reg); + } else { + OutStreamer.AddComment("DW_OP_regx"); + EmitInt8(dwarf::DW_OP_regx); + OutStreamer.AddComment(Twine(Reg)); + EmitULEB128(Reg); + } + } +} + bool AsmPrinter::doFinalization(Module &M) { // Emit global variables. for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); @@ -782,7 +852,7 @@ bool AsmPrinter::doFinalization(Module &M) { } delete DD; DD = 0; } - + // If the target wants to know about weak references, print them all. if (MAI->getWeakRefDirective()) { // FIXME: This is not lazy, it would be nice to only print weak references @@ -796,7 +866,7 @@ bool AsmPrinter::doFinalization(Module &M) { if (!I->hasExternalWeakLinkage()) continue; OutStreamer.EmitSymbolAttribute(Mang->getSymbol(I), MCSA_WeakReference); } - + for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) { if (!I->hasExternalWeakLinkage()) continue; OutStreamer.EmitSymbolAttribute(Mang->getSymbol(I), MCSA_WeakReference); @@ -822,7 +892,7 @@ bool AsmPrinter::doFinalization(Module &M) { EmitVisibility(Name, I->getVisibility()); // Emit the directives as assignments aka .set: - OutStreamer.EmitAssignment(Name, + OutStreamer.EmitAssignment(Name, MCSymbolRefExpr::Create(Target, OutContext)); } } @@ -839,14 +909,14 @@ bool AsmPrinter::doFinalization(Module &M) { if (!InitTrampolineIntrinsic || InitTrampolineIntrinsic->use_empty()) if (const MCSection *S = MAI->getNonexecutableStackSection(OutContext)) OutStreamer.SwitchSection(S); - + // Allow the target to emit any magic that it wants at the end of the file, // after everything else has gone out. EmitEndOfAsmFile(M); - + delete Mang; Mang = 0; MMI = 0; - + OutStreamer.Finish(); return false; } @@ -886,7 +956,7 @@ void AsmPrinter::EmitConstantPool() { for (unsigned i = 0, e = CP.size(); i != e; ++i) { const MachineConstantPoolEntry &CPE = CP[i]; unsigned Align = CPE.getAlignment(); - + SectionKind Kind; switch (CPE.getRelocationInfo()) { default: llvm_unreachable("Unknown section kind"); @@ -904,7 +974,7 @@ void AsmPrinter::EmitConstantPool() { } const MCSection *S = getObjFileLowering().getSectionForConstant(Kind); - + // The number of sections are small, just do a linear search from the // last section to the first. bool Found = false; @@ -953,7 +1023,7 @@ void AsmPrinter::EmitConstantPool() { } /// EmitJumpTableInfo - Print assembly representations of the jump tables used -/// by the current function to the current output stream. +/// by the current function to the current output stream. /// void AsmPrinter::EmitJumpTableInfo() { const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo(); @@ -962,7 +1032,7 @@ void AsmPrinter::EmitJumpTableInfo() { const std::vector &JT = MJTI->getJumpTables(); if (JT.empty()) return; - // Pick the directive to use to print the jump table entries, and switch to + // Pick the directive to use to print the jump table entries, and switch to // the appropriate section. const Function *F = MF->getFunction(); bool JTInDiffSection = false; @@ -978,18 +1048,18 @@ void AsmPrinter::EmitJumpTableInfo() { OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F,Mang,TM)); } else { // Otherwise, drop it in the readonly section. - const MCSection *ReadOnlySection = + const MCSection *ReadOnlySection = getObjFileLowering().getSectionForConstant(SectionKind::getReadOnly()); OutStreamer.SwitchSection(ReadOnlySection); JTInDiffSection = true; } EmitAlignment(Log2_32(MJTI->getEntryAlignment(*TM.getTargetData()))); - + for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) { const std::vector &JTBBs = JT[JTI].MBBs; - - // If this jump table was deleted, ignore it. + + // If this jump table was deleted, ignore it. if (JTBBs.empty()) continue; // For the EK_LabelDifference32 entry, if the target supports .set, emit a @@ -1003,15 +1073,15 @@ void AsmPrinter::EmitJumpTableInfo() { for (unsigned ii = 0, ee = JTBBs.size(); ii != ee; ++ii) { const MachineBasicBlock *MBB = JTBBs[ii]; if (!EmittedSets.insert(MBB)) continue; - + // .set LJTSet, LBB32-base const MCExpr *LHS = MCSymbolRefExpr::Create(MBB->getSymbol(), OutContext); OutStreamer.EmitAssignment(GetJTSetSymbol(JTI, MBB->getNumber()), MCBinaryExpr::CreateSub(LHS, Base, OutContext)); } - } - + } + // On some targets (e.g. Darwin) we want to emit two consecutive labels // before each jump table. The first label is never referenced, but tells // the assembler and linker the extents of the jump table object. The @@ -1064,8 +1134,8 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI, // If the .set directive is supported, this is emitted as: // .set L4_5_set_123, LBB123 - LJTI1_2 // .word L4_5_set_123 - - // If we have emitted set directives for the jump table entries, print + + // If we have emitted set directives for the jump table entries, print // them rather than the entries themselves. If we're emitting PIC, then // emit the table entries as differences between two text section labels. if (MAI->hasSetDirective()) { @@ -1081,9 +1151,9 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI, break; } } - + assert(Value && "Unknown entry kind!"); - + unsigned EntrySize = MJTI->getEntrySize(*TM.getTargetData()); OutStreamer.EmitValue(Value, EntrySize, /*addrspace*/0); } @@ -1103,18 +1173,18 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { if (GV->getSection() == "llvm.metadata" || GV->hasAvailableExternallyLinkage()) return true; - + if (!GV->hasAppendingLinkage()) return false; assert(GV->hasInitializer() && "Not a special LLVM global!"); - + const TargetData *TD = TM.getTargetData(); unsigned Align = Log2_32(TD->getPointerPrefAlignment()); if (GV->getName() == "llvm.global_ctors") { OutStreamer.SwitchSection(getObjFileLowering().getStaticCtorSection()); EmitAlignment(Align); EmitXXStructorList(GV->getInitializer()); - + if (TM.getRelocationModel() == Reloc::Static && MAI->hasStaticCtorDtorReferenceInStaticMode()) { StringRef Sym(".constructors_used"); @@ -1122,8 +1192,8 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { MCSA_Reference); } return true; - } - + } + if (GV->getName() == "llvm.global_dtors") { OutStreamer.SwitchSection(getObjFileLowering().getStaticDtorSection()); EmitAlignment(Align); @@ -1137,7 +1207,7 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { } return true; } - + return false; } @@ -1148,7 +1218,7 @@ void AsmPrinter::EmitLLVMUsedList(Constant *List) { // Should be an array of 'i8*'. ConstantArray *InitList = dyn_cast(List); if (InitList == 0) return; - + for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) { const GlobalValue *GV = dyn_cast(InitList->getOperand(i)->stripPointerCasts()); @@ -1157,7 +1227,7 @@ void AsmPrinter::EmitLLVMUsedList(Constant *List) { } } -/// EmitXXStructorList - Emit the ctor or dtor list. This just prints out the +/// EmitXXStructorList - Emit the ctor or dtor list. This just prints out the /// function pointers, ignoring the init priority. void AsmPrinter::EmitXXStructorList(Constant *List) { // Should be an array of '{ int, void ()* }' structs. The first value is the @@ -1203,11 +1273,11 @@ void AsmPrinter::EmitInt32(int Value) const { void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo, unsigned Size) const { // Get the Hi-Lo expression. - const MCExpr *Diff = + const MCExpr *Diff = MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(Hi, OutContext), MCSymbolRefExpr::Create(Lo, OutContext), OutContext); - + if (!MAI->hasSetDirective()) { OutStreamer.EmitValue(Diff, Size, 0/*AddrSpace*/); return; @@ -1219,27 +1289,27 @@ void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo, OutStreamer.EmitSymbolValue(SetLabel, Size, 0/*AddrSpace*/); } -/// EmitLabelOffsetDifference - Emit something like ".long Hi+Offset-Lo" +/// EmitLabelOffsetDifference - Emit something like ".long Hi+Offset-Lo" /// where the size in bytes of the directive is specified by Size and Hi/Lo /// specify the labels. This implicitly uses .set if it is available. void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset, - const MCSymbol *Lo, unsigned Size) + const MCSymbol *Lo, unsigned Size) const { - + // Emit Hi+Offset - Lo // Get the Hi+Offset expression. const MCExpr *Plus = - MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Hi, OutContext), + MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Hi, OutContext), MCConstantExpr::Create(Offset, OutContext), OutContext); - + // Get the Hi+Offset-Lo expression. - const MCExpr *Diff = + const MCExpr *Diff = MCBinaryExpr::CreateSub(Plus, MCSymbolRefExpr::Create(Lo, OutContext), OutContext); - - if (!MAI->hasSetDirective()) + + if (!MAI->hasSetDirective()) OutStreamer.EmitValue(Diff, 4, 0/*AddrSpace*/); else { // Otherwise, emit with .set (aka assignment). @@ -1249,22 +1319,22 @@ void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset, } } -/// EmitLabelPlusOffset - Emit something like ".long Label+Offset" +/// EmitLabelPlusOffset - Emit something like ".long Label+Offset" /// where the size in bytes of the directive is specified by Size and Label /// specifies the label. This implicitly uses .set if it is available. void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, - unsigned Size) + unsigned Size) const { - + // Emit Label+Offset const MCExpr *Plus = - MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Label, OutContext), + MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(Label, OutContext), MCConstantExpr::Create(Offset, OutContext), OutContext); - + OutStreamer.EmitValue(Plus, 4, 0/*AddrSpace*/); } - + //===----------------------------------------------------------------------===// @@ -1276,9 +1346,9 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, // void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV) const { if (GV) NumBits = getGVAlignmentLog2(GV, *TM.getTargetData(), NumBits); - + if (NumBits == 0) return; // 1-byte aligned: no need to emit alignment. - + if (getCurrentSection()->getKind().isText()) OutStreamer.EmitCodeAlignment(1 << NumBits); else @@ -1293,25 +1363,25 @@ void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV) const { /// static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) { MCContext &Ctx = AP.OutContext; - + if (CV->isNullValue() || isa(CV)) return MCConstantExpr::Create(0, Ctx); if (const ConstantInt *CI = dyn_cast(CV)) return MCConstantExpr::Create(CI->getZExtValue(), Ctx); - + if (const GlobalValue *GV = dyn_cast(CV)) return MCSymbolRefExpr::Create(AP.Mang->getSymbol(GV), Ctx); if (const BlockAddress *BA = dyn_cast(CV)) return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx); - + const ConstantExpr *CE = dyn_cast(CV); if (CE == 0) { llvm_unreachable("Unknown constant value to lower!"); return MCConstantExpr::Create(0, Ctx); } - + switch (CE->getOpcode()) { default: // If the code isn't optimized, there may be outstanding folding @@ -1339,21 +1409,21 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) { SmallVector IdxVec(CE->op_begin()+1, CE->op_end()); int64_t Offset = TD.getIndexedOffset(PtrVal->getType(), &IdxVec[0], IdxVec.size()); - + const MCExpr *Base = LowerConstant(CE->getOperand(0), AP); if (Offset == 0) return Base; - + // Truncate/sext the offset to the pointer size. if (TD.getPointerSizeInBits() != 64) { int SExtAmount = 64-TD.getPointerSizeInBits(); Offset = (Offset << SExtAmount) >> SExtAmount; } - + return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx), Ctx); } - + case Instruction::Trunc: // We emit the value and depend on the assembler to truncate the generated // expression properly. This is important for differences between @@ -1372,7 +1442,7 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) { false/*ZExt*/); return LowerConstant(Op, AP); } - + case Instruction::PtrToInt: { const TargetData &TD = *AP.TM.getTargetData(); // Support only foldable casts to/from pointers that can be eliminated by @@ -1394,7 +1464,7 @@ static const MCExpr *LowerConstant(const Constant *CV, AsmPrinter &AP) { const MCExpr *MaskExpr = MCConstantExpr::Create(~0ULL >> (64-InBits), Ctx); return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx); } - + // The MC library also has a right-shift operator, but it isn't consistently // signed or unsigned between different targets. case Instruction::Add: @@ -1435,7 +1505,7 @@ static void EmitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace, EmitGlobalConstantImpl(CA->getOperand(i), AddrSpace, AP); return; } - + // Otherwise, it can be emitted as .ascii. SmallVector TmpVec; TmpVec.reserve(CA->getNumOperands()); @@ -1493,7 +1563,7 @@ static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace, AP.OutStreamer.EmitIntValue(Val, 8, AddrSpace); return; } - + if (CFP->getType()->isFloatTy()) { if (AP.isVerbose()) { float Val = CFP->getValueAPF().convertToFloat(); @@ -1503,7 +1573,7 @@ static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace, AP.OutStreamer.EmitIntValue(Val, 4, AddrSpace); return; } - + if (CFP->getType()->isX86_FP80Ty()) { // all long double variants are printed as hex // API needed to prevent premature destruction @@ -1518,7 +1588,7 @@ static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace, AP.OutStreamer.GetCommentOS() << "x86_fp80 ~= " << DoubleVal.convertToDouble() << '\n'; } - + if (AP.TM.getTargetData()->isBigEndian()) { AP.OutStreamer.EmitIntValue(p[1], 2, AddrSpace); AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace); @@ -1526,14 +1596,14 @@ static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace, AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace); AP.OutStreamer.EmitIntValue(p[1], 2, AddrSpace); } - + // Emit the tail padding for the long double. const TargetData &TD = *AP.TM.getTargetData(); AP.OutStreamer.EmitZeros(TD.getTypeAllocSize(CFP->getType()) - TD.getTypeStoreSize(CFP->getType()), AddrSpace); return; } - + assert(CFP->getType()->isPPC_FP128Ty() && "Floating point constant type not handled"); // All long double variants are printed as hex @@ -1588,10 +1658,10 @@ static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace, return; } } - + if (const ConstantArray *CVA = dyn_cast(CV)) return EmitGlobalConstantArray(CVA, AddrSpace, AP); - + if (const ConstantStruct *CVS = dyn_cast(CV)) return EmitGlobalConstantStruct(CVS, AddrSpace, AP); @@ -1603,10 +1673,10 @@ static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace, AP.OutStreamer.EmitIntValue(0, Size, AddrSpace); return; } - + if (const ConstantVector *V = dyn_cast(CV)) return EmitGlobalConstantVector(V, AddrSpace, AP); - + // Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it // thread the streamer with EmitValue. AP.OutStreamer.EmitValue(LowerConstant(CV, AP), @@ -1703,7 +1773,7 @@ MCSymbol *AsmPrinter::GetExternalSymbolSymbol(StringRef Sym) const { SmallString<60> NameStr; Mang->getNameWithPrefix(NameStr, Sym); return OutContext.GetOrCreateSymbol(NameStr.str()); -} +} @@ -1740,10 +1810,10 @@ static void EmitBasicBlockLoopComments(const MachineBasicBlock &MBB, // Add loop depth information const MachineLoop *Loop = LI->getLoopFor(&MBB); if (Loop == 0) return; - + MachineBasicBlock *Header = Loop->getHeader(); assert(Header && "No header for loop"); - + // If this block is not a loop header, just print out what is the loop header // and return. if (Header != &MBB) { @@ -1753,21 +1823,21 @@ static void EmitBasicBlockLoopComments(const MachineBasicBlock &MBB, " Depth="+Twine(Loop->getLoopDepth())); return; } - + // Otherwise, it is a loop header. Print out information about child and // parent loops. raw_ostream &OS = AP.OutStreamer.GetCommentOS(); - - PrintParentLoopComment(OS, Loop->getParentLoop(), AP.getFunctionNumber()); - + + PrintParentLoopComment(OS, Loop->getParentLoop(), AP.getFunctionNumber()); + OS << "=>"; OS.indent(Loop->getLoopDepth()*2-2); - + OS << "This "; if (Loop->empty()) OS << "Inner "; OS << "Loop Header: Depth=" + Twine(Loop->getLoopDepth()) << '\n'; - + PrintChildLoopComment(OS, Loop, AP.getFunctionNumber()); } @@ -1788,7 +1858,7 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const { const BasicBlock *BB = MBB->getBasicBlock(); if (isVerbose()) OutStreamer.AddComment("Block address taken"); - + std::vector Syms = MMI->getAddrLabelSymbolToEmit(BB); for (unsigned i = 0, e = Syms.size(); i != e; ++i) @@ -1801,9 +1871,9 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const { if (const BasicBlock *BB = MBB->getBasicBlock()) if (BB->hasName()) OutStreamer.AddComment("%" + BB->getName()); - + EmitBasicBlockLoopComments(*MBB, LI, *this); - + // NOTE: Want this comment at start of line, don't emit with AddComment. OutStreamer.EmitRawText(Twine(MAI->getCommentString()) + " BB#" + Twine(MBB->getNumber()) + ":"); @@ -1823,7 +1893,7 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const { void AsmPrinter::EmitVisibility(MCSymbol *Sym, unsigned Visibility, bool IsDefinition) const { MCSymbolAttr Attr = MCSA_Invalid; - + switch (Visibility) { default: break; case GlobalValue::HiddenVisibility: @@ -1850,23 +1920,23 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { // then nothing falls through to it. if (MBB->isLandingPad() || MBB->pred_empty()) return false; - + // If there isn't exactly one predecessor, it can't be a fall through. MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI; ++PI2; if (PI2 != MBB->pred_end()) return false; - + // The predecessor has to be immediately before this block. const MachineBasicBlock *Pred = *PI; - + if (!Pred->isLayoutSuccessor(MBB)) return false; - + // If the block is completely empty, then it definitely does fall through. if (Pred->empty()) return true; - + // Otherwise, check the last instruction. const MachineInstr &LastInst = Pred->back(); return !LastInst.getDesc().isBarrier(); @@ -1882,9 +1952,9 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) { gcp_map_type::iterator GCPI = GCMap.find(S); if (GCPI != GCMap.end()) return GCPI->second; - + const char *Name = S->getName().c_str(); - + for (GCMetadataPrinterRegistry::iterator I = GCMetadataPrinterRegistry::begin(), E = GCMetadataPrinterRegistry::end(); I != E; ++I) @@ -1894,7 +1964,7 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) { GCMap.insert(std::make_pair(S, GMP)); return GMP; } - + report_fatal_error("no GCMetadataPrinter registered for GC: " + Twine(Name)); return 0; } diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index 98a1bf2f1ce4..6d1708a2ce80 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -155,7 +155,7 @@ void AsmPrinter::EmitReference(const MCSymbol *Sym, unsigned Encoding) const { const TargetLoweringObjectFile &TLOF = getObjFileLowering(); const MCExpr *Exp = - TLOF.getExprForDwarfReference(Sym, Mang, MMI, Encoding, OutStreamer); + TLOF.getExprForDwarfReference(Sym, Encoding, OutStreamer); OutStreamer.EmitAbsValue(Exp, GetSizeOfEncodedValue(Encoding)); } @@ -277,42 +277,42 @@ void AsmPrinter::EmitFrameMoves(const std::vector &Moves, } } +/// EmitFrameMoves - Emit a frame instruction. +void AsmPrinter::EmitCFIFrameMove(const MachineMove &Move) const { + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + + const MachineLocation &Dst = Move.getDestination(); + const MachineLocation &Src = Move.getSource(); + + // If advancing cfa. + if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) { + assert(!Src.isReg() && "Machine move not supported yet."); + + if (Src.getReg() == MachineLocation::VirtualFP) { + OutStreamer.EmitCFIDefCfaOffset(-Src.getOffset()); + } else { + assert("Machine move not supported yet"); + // Reg + Offset + } + } else if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) { + assert(Dst.isReg() && "Machine move not supported yet."); + OutStreamer.EmitCFIDefCfaRegister(RI->getDwarfRegNum(Dst.getReg(), true)); + } else { + assert(!Dst.isReg() && "Machine move not supported yet."); + OutStreamer.EmitCFIOffset(RI->getDwarfRegNum(Src.getReg(), true), + Dst.getOffset()); + } +} + /// EmitFrameMoves - Emit frame instructions to describe the layout of the /// frame. void AsmPrinter::EmitCFIFrameMoves(const std::vector &Moves) const { - const TargetRegisterInfo *RI = TM.getRegisterInfo(); - - int stackGrowth = TM.getTargetData()->getPointerSize(); - if (TM.getFrameLowering()->getStackGrowthDirection() != - TargetFrameLowering::StackGrowsUp) - stackGrowth *= -1; - for (unsigned i = 0, N = Moves.size(); i < N; ++i) { const MachineMove &Move = Moves[i]; MCSymbol *Label = Move.getLabel(); // Throw out move if the label is invalid. if (Label && !Label->isDefined()) continue; // Not emitted, in dead code. - const MachineLocation &Dst = Move.getDestination(); - const MachineLocation &Src = Move.getSource(); - - // If advancing cfa. - if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) { - assert(!Src.isReg() && "Machine move not supported yet."); - - if (Src.getReg() == MachineLocation::VirtualFP) { - OutStreamer.EmitCFIDefCfaOffset(-Src.getOffset()); - } else { - assert("Machine move not supported yet"); - // Reg + Offset - } - } else if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) { - assert(Dst.isReg() && "Machine move not supported yet."); - OutStreamer.EmitCFIDefCfaRegister(RI->getDwarfRegNum(Dst.getReg(), true)); - } else { - assert(!Dst.isReg() && "Machine move not supported yet."); - OutStreamer.EmitCFIOffset(RI->getDwarfRegNum(Src.getReg(), true), - Dst.getOffset()); - } + EmitCFIFrameMove(Move); } } diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt index 306efade7d92..d2be5525d7d0 100644 --- a/lib/CodeGen/AsmPrinter/CMakeLists.txt +++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt @@ -1,9 +1,11 @@ add_llvm_library(LLVMAsmPrinter + ARMException.cpp AsmPrinter.cpp AsmPrinterDwarf.cpp AsmPrinterInlineAsm.cpp DIE.cpp DwarfCFIException.cpp + DwarfCompileUnit.cpp DwarfDebug.cpp DwarfException.cpp DwarfTableException.cpp diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h index d56c0947795e..7d61f1edff4a 100644 --- a/lib/CodeGen/AsmPrinter/DIE.h +++ b/lib/CodeGen/AsmPrinter/DIE.h @@ -70,7 +70,6 @@ namespace llvm { public: DIEAbbrev(unsigned T, unsigned C) : Tag(T), ChildrenFlag(C), Data() {} - virtual ~DIEAbbrev() {} // Accessors. unsigned getTag() const { return Tag; } diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index 68be2eed8f0e..dbd52c4981b3 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -32,6 +32,7 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/Dwarf.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" @@ -40,7 +41,7 @@ using namespace llvm; DwarfCFIException::DwarfCFIException(AsmPrinter *A) : DwarfException(A), - shouldEmitTable(false), shouldEmitMoves(false), shouldEmitTableModule(false) + shouldEmitPersonality(false), shouldEmitLSDA(false), shouldEmitMoves(false) {} DwarfCFIException::~DwarfCFIException() {} @@ -51,81 +52,85 @@ void DwarfCFIException::EndModule() { if (!Asm->MAI->isExceptionHandlingDwarf()) return; - if (!shouldEmitTableModule) - return; - const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + unsigned PerEncoding = TLOF.getPersonalityEncoding(); - // Begin eh frame section. - Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection()); + if ((PerEncoding & 0x70) != dwarf::DW_EH_PE_pcrel) + return; // Emit references to all used personality functions + bool AtLeastOne = false; const std::vector &Personalities = MMI->getPersonalities(); for (size_t i = 0, e = Personalities.size(); i != e; ++i) { - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("personality", i)); - Asm->EmitReference(Personalities[i], PerEncoding); + if (!Personalities[i]) + continue; + MCSymbol *Sym = Asm->Mang->getSymbol(Personalities[i]); + TLOF.emitPersonalityValue(Asm->OutStreamer, Asm->TM, Sym); + AtLeastOne = true; + } + + if (AtLeastOne && !TLOF.isFunctionEHFrameSymbolPrivate()) { + // This is a temporary hack to keep sections in the same order they + // were before. This lets us produce bit identical outputs while + // transitioning to CFI. + Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection()); } } /// BeginFunction - Gather pre-function exception information. Assumes it's /// being emitted immediately after the function entry point. void DwarfCFIException::BeginFunction(const MachineFunction *MF) { - shouldEmitTable = shouldEmitMoves = false; + shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false; // If any landing pads survive, we need an EH table. - shouldEmitTable = !MMI->getLandingPads().empty(); + bool hasLandingPads = !MMI->getLandingPads().empty(); // See if we need frame move info. - shouldEmitMoves = - !Asm->MF->getFunction()->doesNotThrow() || UnwindTablesMandatory; - - if (shouldEmitMoves || shouldEmitTable) - // Assumes in correct section after the entry point. - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin", - Asm->getFunctionNumber())); - - shouldEmitTableModule |= shouldEmitTable; - - if (shouldEmitMoves) { - const TargetFrameLowering *TFL = Asm->TM.getFrameLowering(); - Asm->OutStreamer.EmitCFIStartProc(); - - // Indicate locations of general callee saved registers in frame. - std::vector Moves; - TFL->getInitialFrameState(Moves); - Asm->EmitCFIFrameMoves(Moves); - Asm->EmitCFIFrameMoves(MMI->getFrameMoves()); - } - - if (!shouldEmitTable) - return; + shouldEmitMoves = Asm->needsCFIMoves(); const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + unsigned PerEncoding = TLOF.getPersonalityEncoding(); + const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()]; + + shouldEmitPersonality = hasLandingPads && + PerEncoding != dwarf::DW_EH_PE_omit && Per; - // Provide LSDA information. unsigned LSDAEncoding = TLOF.getLSDAEncoding(); - if (LSDAEncoding != dwarf::DW_EH_PE_omit) - Asm->OutStreamer.EmitCFILsda(Asm->GetTempSymbol("exception", - Asm->getFunctionNumber()), - LSDAEncoding); + shouldEmitLSDA = shouldEmitPersonality && + LSDAEncoding != dwarf::DW_EH_PE_omit; + + if (!shouldEmitPersonality && !shouldEmitMoves) + return; + + Asm->OutStreamer.EmitCFIStartProc(); // Indicate personality routine, if any. - unsigned PerEncoding = TLOF.getPersonalityEncoding(); - if (PerEncoding != dwarf::DW_EH_PE_omit && - MMI->getPersonalities()[MMI->getPersonalityIndex()]) - Asm->OutStreamer.EmitCFIPersonality(Asm->GetTempSymbol("personality", - MMI->getPersonalityIndex()), - PerEncoding); + if (!shouldEmitPersonality) + return; + + const MCSymbol *Sym = TLOF.getCFIPersonalitySymbol(Per, Asm->Mang, MMI); + Asm->OutStreamer.EmitCFIPersonality(Sym, PerEncoding); + + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin", + Asm->getFunctionNumber())); + + // Provide LSDA information. + if (!shouldEmitLSDA) + return; + + Asm->OutStreamer.EmitCFILsda(Asm->GetTempSymbol("exception", + Asm->getFunctionNumber()), + LSDAEncoding); } /// EndFunction - Gather and emit post-function exception information. /// void DwarfCFIException::EndFunction() { - if (!shouldEmitMoves && !shouldEmitTable) return; + if (!shouldEmitPersonality && !shouldEmitMoves) + return; - if (shouldEmitMoves) - Asm->OutStreamer.EmitCFIEndProc(); + Asm->OutStreamer.EmitCFIEndProc(); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber())); @@ -133,6 +138,6 @@ void DwarfCFIException::EndFunction() { // Map all labels and get rid of any dead landing pads. MMI->TidyLandingPads(); - if (shouldEmitTable) + if (shouldEmitPersonality) EmitExceptionTable(); } diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp new file mode 100644 index 000000000000..7ce0cfe8e79e --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -0,0 +1,983 @@ +//===-- llvm/CodeGen/DwarfCompileUnit.cpp - Dwarf Compile Unit ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf compile unit. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "dwarfdebug" + +#include "DwarfCompileUnit.h" +#include "DwarfDebug.h" +#include "llvm/Constants.h" +#include "llvm/Analysis/DIBuilder.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm; + +/// CompileUnit - Compile unit constructor. +CompileUnit::CompileUnit(unsigned I, DIE *D, AsmPrinter *A, DwarfDebug *DW) + : ID(I), CUDie(D), Asm(A), DD(DW), IndexTyDie(0) { + DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1); +} + +/// ~CompileUnit - Destructor for compile unit. +CompileUnit::~CompileUnit() { + for (unsigned j = 0, M = DIEBlocks.size(); j < M; ++j) + DIEBlocks[j]->~DIEBlock(); +} + +/// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug +/// information entry. +DIEEntry *CompileUnit::createDIEEntry(DIE *Entry) { + DIEEntry *Value = new (DIEValueAllocator) DIEEntry(Entry); + return Value; +} + +/// addUInt - Add an unsigned integer attribute data and value. +/// +void CompileUnit::addUInt(DIE *Die, unsigned Attribute, + unsigned Form, uint64_t Integer) { + if (!Form) Form = DIEInteger::BestForm(false, Integer); + DIEValue *Value = Integer == 1 ? + DIEIntegerOne : new (DIEValueAllocator) DIEInteger(Integer); + Die->addValue(Attribute, Form, Value); +} + +/// addSInt - Add an signed integer attribute data and value. +/// +void CompileUnit::addSInt(DIE *Die, unsigned Attribute, + unsigned Form, int64_t Integer) { + if (!Form) Form = DIEInteger::BestForm(true, Integer); + DIEValue *Value = new (DIEValueAllocator) DIEInteger(Integer); + Die->addValue(Attribute, Form, Value); +} + +/// addString - Add a string attribute data and value. DIEString only +/// keeps string reference. +void CompileUnit::addString(DIE *Die, unsigned Attribute, unsigned Form, + StringRef String) { + DIEValue *Value = new (DIEValueAllocator) DIEString(String); + Die->addValue(Attribute, Form, Value); +} + +/// addLabel - Add a Dwarf label attribute data and value. +/// +void CompileUnit::addLabel(DIE *Die, unsigned Attribute, unsigned Form, + const MCSymbol *Label) { + DIEValue *Value = new (DIEValueAllocator) DIELabel(Label); + Die->addValue(Attribute, Form, Value); +} + +/// addDelta - Add a label delta attribute data and value. +/// +void CompileUnit::addDelta(DIE *Die, unsigned Attribute, unsigned Form, + const MCSymbol *Hi, const MCSymbol *Lo) { + DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo); + Die->addValue(Attribute, Form, Value); +} + +/// addDIEEntry - Add a DIE attribute data and value. +/// +void CompileUnit::addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, + DIE *Entry) { + Die->addValue(Attribute, Form, createDIEEntry(Entry)); +} + + +/// addBlock - Add block data. +/// +void CompileUnit::addBlock(DIE *Die, unsigned Attribute, unsigned Form, + DIEBlock *Block) { + Block->ComputeSize(Asm); + DIEBlocks.push_back(Block); // Memoize so we can call the destructor later on. + Die->addValue(Attribute, Block->BestForm(), Block); +} + +/// addSourceLine - Add location information to specified debug information +/// entry. +void CompileUnit::addSourceLine(DIE *Die, DIVariable V) { + // Verify variable. + if (!V.Verify()) + return; + + unsigned Line = V.getLineNumber(); + if (Line == 0) + return; + unsigned FileID = DD->GetOrCreateSourceID(V.getContext().getFilename(), + V.getContext().getDirectory()); + assert(FileID && "Invalid file id"); + addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); +} + +/// addSourceLine - Add location information to specified debug information +/// entry. +void CompileUnit::addSourceLine(DIE *Die, DIGlobalVariable G) { + // Verify global variable. + if (!G.Verify()) + return; + + unsigned Line = G.getLineNumber(); + if (Line == 0) + return; + unsigned FileID = DD->GetOrCreateSourceID(G.getContext().getFilename(), + G.getContext().getDirectory()); + assert(FileID && "Invalid file id"); + addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); +} + +/// addSourceLine - Add location information to specified debug information +/// entry. +void CompileUnit::addSourceLine(DIE *Die, DISubprogram SP) { + // Verify subprogram. + if (!SP.Verify()) + return; + // If the line number is 0, don't add it. + if (SP.getLineNumber() == 0) + return; + + unsigned Line = SP.getLineNumber(); + if (!SP.getContext().Verify()) + return; + unsigned FileID = DD->GetOrCreateSourceID(SP.getFilename(), SP.getDirectory()); + assert(FileID && "Invalid file id"); + addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); +} + +/// addSourceLine - Add location information to specified debug information +/// entry. +void CompileUnit::addSourceLine(DIE *Die, DIType Ty) { + // Verify type. + if (!Ty.Verify()) + return; + + unsigned Line = Ty.getLineNumber(); + if (Line == 0 || !Ty.getContext().Verify()) + return; + unsigned FileID = DD->GetOrCreateSourceID(Ty.getFilename(), Ty.getDirectory()); + assert(FileID && "Invalid file id"); + addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); +} + +/// addSourceLine - Add location information to specified debug information +/// entry. +void CompileUnit::addSourceLine(DIE *Die, DINameSpace NS) { + // Verify namespace. + if (!NS.Verify()) + return; + + unsigned Line = NS.getLineNumber(); + if (Line == 0) + return; + StringRef FN = NS.getFilename(); + + unsigned FileID = DD->GetOrCreateSourceID(FN, NS.getDirectory()); + assert(FileID && "Invalid file id"); + addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); +} + +/// addVariableAddress - Add DW_AT_location attribute for a +/// DbgVariable based on provided MachineLocation. +void CompileUnit::addVariableAddress(DbgVariable *&DV, DIE *Die, + MachineLocation Location) { + if (DV->variableHasComplexAddress()) + addComplexAddress(DV, Die, dwarf::DW_AT_location, Location); + else if (DV->isBlockByrefVariable()) + addBlockByrefAddress(DV, Die, dwarf::DW_AT_location, Location); + else + addAddress(Die, dwarf::DW_AT_location, Location); +} + +/// addRegisterOp - Add register operand. +void CompileUnit::addRegisterOp(DIE *TheDie, unsigned Reg) { + const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); + unsigned DWReg = RI->getDwarfRegNum(Reg, false); + if (DWReg < 32) + addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + DWReg); + else { + addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx); + addUInt(TheDie, 0, dwarf::DW_FORM_udata, DWReg); + } +} + +/// addRegisterOffset - Add register offset. +void CompileUnit::addRegisterOffset(DIE *TheDie, unsigned Reg, + int64_t Offset) { + const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); + unsigned DWReg = RI->getDwarfRegNum(Reg, false); + const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo(); + if (Reg == TRI->getFrameRegister(*Asm->MF)) + // If variable offset is based in frame register then use fbreg. + addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_fbreg); + else if (DWReg < 32) + addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + DWReg); + else { + addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx); + addUInt(TheDie, 0, dwarf::DW_FORM_udata, DWReg); + } + addSInt(TheDie, 0, dwarf::DW_FORM_sdata, Offset); +} + +/// addAddress - Add an address attribute to a die based on the location +/// provided. +void CompileUnit::addAddress(DIE *Die, unsigned Attribute, + const MachineLocation &Location) { + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + + if (Location.isReg()) + addRegisterOp(Block, Location.getReg()); + else + addRegisterOffset(Block, Location.getReg(), Location.getOffset()); + + // Now attach the location information to the DIE. + addBlock(Die, Attribute, 0, Block); +} + +/// addComplexAddress - Start with the address based on the location provided, +/// and generate the DWARF information necessary to find the actual variable +/// given the extra address information encoded in the DIVariable, starting from +/// the starting location. Add the DWARF information to the die. +/// +void CompileUnit::addComplexAddress(DbgVariable *&DV, DIE *Die, + unsigned Attribute, + const MachineLocation &Location) { + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + unsigned N = DV->getNumAddrElements(); + unsigned i = 0; + if (Location.isReg()) { + if (N >= 2 && DV->getAddrElement(0) == DIBuilder::OpPlus) { + // If first address element is OpPlus then emit + // DW_OP_breg + Offset instead of DW_OP_reg + Offset. + addRegisterOffset(Block, Location.getReg(), DV->getAddrElement(1)); + i = 2; + } else + addRegisterOp(Block, Location.getReg()); + } + else + addRegisterOffset(Block, Location.getReg(), Location.getOffset()); + + for (;i < N; ++i) { + uint64_t Element = DV->getAddrElement(i); + if (Element == DIBuilder::OpPlus) { + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(Block, 0, dwarf::DW_FORM_udata, DV->getAddrElement(++i)); + } else if (Element == DIBuilder::OpDeref) { + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + } else llvm_unreachable("unknown DIBuilder Opcode"); + } + + // Now attach the location information to the DIE. + addBlock(Die, Attribute, 0, Block); +} + +/* Byref variables, in Blocks, are declared by the programmer as "SomeType + VarName;", but the compiler creates a __Block_byref_x_VarName struct, and + gives the variable VarName either the struct, or a pointer to the struct, as + its type. This is necessary for various behind-the-scenes things the + compiler needs to do with by-reference variables in Blocks. + + However, as far as the original *programmer* is concerned, the variable + should still have type 'SomeType', as originally declared. + + The function getBlockByrefType dives into the __Block_byref_x_VarName + struct to find the original type of the variable, which is then assigned to + the variable's Debug Information Entry as its real type. So far, so good. + However now the debugger will expect the variable VarName to have the type + SomeType. So we need the location attribute for the variable to be an + expression that explains to the debugger how to navigate through the + pointers and struct to find the actual variable of type SomeType. + + The following function does just that. We start by getting + the "normal" location for the variable. This will be the location + of either the struct __Block_byref_x_VarName or the pointer to the + struct __Block_byref_x_VarName. + + The struct will look something like: + + struct __Block_byref_x_VarName { + ... + struct __Block_byref_x_VarName *forwarding; + ... + SomeType VarName; + ... + }; + + If we are given the struct directly (as our starting point) we + need to tell the debugger to: + + 1). Add the offset of the forwarding field. + + 2). Follow that pointer to get the real __Block_byref_x_VarName + struct to use (the real one may have been copied onto the heap). + + 3). Add the offset for the field VarName, to find the actual variable. + + If we started with a pointer to the struct, then we need to + dereference that pointer first, before the other steps. + Translating this into DWARF ops, we will need to append the following + to the current location description for the variable: + + DW_OP_deref -- optional, if we start with a pointer + DW_OP_plus_uconst + DW_OP_deref + DW_OP_plus_uconst + + That is what this function does. */ + +/// addBlockByrefAddress - Start with the address based on the location +/// provided, and generate the DWARF information necessary to find the +/// actual Block variable (navigating the Block struct) based on the +/// starting location. Add the DWARF information to the die. For +/// more information, read large comment just above here. +/// +void CompileUnit::addBlockByrefAddress(DbgVariable *&DV, DIE *Die, + unsigned Attribute, + const MachineLocation &Location) { + DIType Ty = DV->getType(); + DIType TmpTy = Ty; + unsigned Tag = Ty.getTag(); + bool isPointer = false; + + StringRef varName = DV->getName(); + + if (Tag == dwarf::DW_TAG_pointer_type) { + DIDerivedType DTy = DIDerivedType(Ty); + TmpTy = DTy.getTypeDerivedFrom(); + isPointer = true; + } + + DICompositeType blockStruct = DICompositeType(TmpTy); + + // Find the __forwarding field and the variable field in the __Block_byref + // struct. + DIArray Fields = blockStruct.getTypeArray(); + DIDescriptor varField = DIDescriptor(); + DIDescriptor forwardingField = DIDescriptor(); + + for (unsigned i = 0, N = Fields.getNumElements(); i < N; ++i) { + DIDescriptor Element = Fields.getElement(i); + DIDerivedType DT = DIDerivedType(Element); + StringRef fieldName = DT.getName(); + if (fieldName == "__forwarding") + forwardingField = Element; + else if (fieldName == varName) + varField = Element; + } + + // Get the offsets for the forwarding field and the variable field. + unsigned forwardingFieldOffset = + DIDerivedType(forwardingField).getOffsetInBits() >> 3; + unsigned varFieldOffset = + DIDerivedType(varField).getOffsetInBits() >> 3; + + // Decode the original location, and use that as the start of the byref + // variable's location. + const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); + unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false); + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + + if (Location.isReg()) { + if (Reg < 32) + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg); + else { + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx); + addUInt(Block, 0, dwarf::DW_FORM_udata, Reg); + } + } else { + if (Reg < 32) + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg); + else { + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx); + addUInt(Block, 0, dwarf::DW_FORM_udata, Reg); + } + + addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset()); + } + + // If we started with a pointer to the __Block_byref... struct, then + // the first thing we need to do is dereference the pointer (DW_OP_deref). + if (isPointer) + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + + // Next add the offset for the '__forwarding' field: + // DW_OP_plus_uconst ForwardingFieldOffset. Note there's no point in + // adding the offset if it's 0. + if (forwardingFieldOffset > 0) { + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(Block, 0, dwarf::DW_FORM_udata, forwardingFieldOffset); + } + + // Now dereference the __forwarding field to get to the real __Block_byref + // struct: DW_OP_deref. + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + + // Now that we've got the real __Block_byref... struct, add the offset + // for the variable's field to get to the location of the actual variable: + // DW_OP_plus_uconst varFieldOffset. Again, don't add if it's 0. + if (varFieldOffset > 0) { + addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(Block, 0, dwarf::DW_FORM_udata, varFieldOffset); + } + + // Now attach the location information to the DIE. + addBlock(Die, Attribute, 0, Block); +} + +/// addConstantValue - Add constant value entry in variable DIE. +bool CompileUnit::addConstantValue(DIE *Die, const MachineOperand &MO) { + assert (MO.isImm() && "Invalid machine operand!"); + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + unsigned Imm = MO.getImm(); + addUInt(Block, 0, dwarf::DW_FORM_udata, Imm); + addBlock(Die, dwarf::DW_AT_const_value, 0, Block); + return true; +} + +/// addConstantFPValue - Add constant value entry in variable DIE. +bool CompileUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) { + assert (MO.isFPImm() && "Invalid machine operand!"); + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + APFloat FPImm = MO.getFPImm()->getValueAPF(); + + // Get the raw data form of the floating point. + const APInt FltVal = FPImm.bitcastToAPInt(); + const char *FltPtr = (const char*)FltVal.getRawData(); + + int NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte. + bool LittleEndian = Asm->getTargetData().isLittleEndian(); + int Incr = (LittleEndian ? 1 : -1); + int Start = (LittleEndian ? 0 : NumBytes - 1); + int Stop = (LittleEndian ? NumBytes : -1); + + // Output the constant to DWARF one byte at a time. + for (; Start != Stop; Start += Incr) + addUInt(Block, 0, dwarf::DW_FORM_data1, + (unsigned char)0xFF & FltPtr[Start]); + + addBlock(Die, dwarf::DW_AT_const_value, 0, Block); + return true; +} + +/// addConstantValue - Add constant value entry in variable DIE. +bool CompileUnit::addConstantValue(DIE *Die, ConstantInt *CI, + bool Unsigned) { + if (CI->getBitWidth() <= 64) { + if (Unsigned) + addUInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata, + CI->getZExtValue()); + else + addSInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, + CI->getSExtValue()); + return true; + } + + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + + // Get the raw data form of the large APInt. + const APInt Val = CI->getValue(); + const char *Ptr = (const char*)Val.getRawData(); + + int NumBytes = Val.getBitWidth() / 8; // 8 bits per byte. + bool LittleEndian = Asm->getTargetData().isLittleEndian(); + int Incr = (LittleEndian ? 1 : -1); + int Start = (LittleEndian ? 0 : NumBytes - 1); + int Stop = (LittleEndian ? NumBytes : -1); + + // Output the constant to DWARF one byte at a time. + for (; Start != Stop; Start += Incr) + addUInt(Block, 0, dwarf::DW_FORM_data1, + (unsigned char)0xFF & Ptr[Start]); + + addBlock(Die, dwarf::DW_AT_const_value, 0, Block); + return true; +} + +/// addTemplateParams - Add template parameters in buffer. +void CompileUnit::addTemplateParams(DIE &Buffer, DIArray TParams) { + // Add template parameters. + for (unsigned i = 0, e = TParams.getNumElements(); i != e; ++i) { + DIDescriptor Element = TParams.getElement(i); + if (Element.isTemplateTypeParameter()) + Buffer.addChild(getOrCreateTemplateTypeParameterDIE( + DITemplateTypeParameter(Element))); + else if (Element.isTemplateValueParameter()) + Buffer.addChild(getOrCreateTemplateValueParameterDIE( + DITemplateValueParameter(Element))); + } + +} +/// addToContextOwner - Add Die into the list of its context owner's children. +void CompileUnit::addToContextOwner(DIE *Die, DIDescriptor Context) { + if (Context.isType()) { + DIE *ContextDIE = getOrCreateTypeDIE(DIType(Context)); + ContextDIE->addChild(Die); + } else if (Context.isNameSpace()) { + DIE *ContextDIE = getOrCreateNameSpace(DINameSpace(Context)); + ContextDIE->addChild(Die); + } else if (Context.isSubprogram()) { + DIE *ContextDIE = DD->createSubprogramDIE(DISubprogram(Context)); + ContextDIE->addChild(Die); + } else if (DIE *ContextDIE = getDIE(Context)) + ContextDIE->addChild(Die); + else + addDie(Die); +} + +/// getOrCreateTypeDIE - Find existing DIE or create new DIE for the +/// given DIType. +DIE *CompileUnit::getOrCreateTypeDIE(DIType Ty) { + DIE *TyDIE = getDIE(Ty); + if (TyDIE) + return TyDIE; + + // Create new type. + TyDIE = new DIE(dwarf::DW_TAG_base_type); + insertDIE(Ty, TyDIE); + if (Ty.isBasicType()) + constructTypeDIE(*TyDIE, DIBasicType(Ty)); + else if (Ty.isCompositeType()) + constructTypeDIE(*TyDIE, DICompositeType(Ty)); + else { + assert(Ty.isDerivedType() && "Unknown kind of DIType"); + constructTypeDIE(*TyDIE, DIDerivedType(Ty)); + } + + addToContextOwner(TyDIE, Ty.getContext()); + return TyDIE; +} + +/// addType - Add a new type attribute to the specified entity. +void CompileUnit::addType(DIE *Entity, DIType Ty) { + if (!Ty.Verify()) + return; + + // Check for pre-existence. + DIEEntry *Entry = getDIEEntry(Ty); + // If it exists then use the existing value. + if (Entry) { + Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry); + return; + } + + // Construct type. + DIE *Buffer = getOrCreateTypeDIE(Ty); + + // Set up proxy. + Entry = createDIEEntry(Buffer); + insertDIEEntry(Ty, Entry); + + Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry); +} + +/// constructTypeDIE - Construct basic type die from DIBasicType. +void CompileUnit::constructTypeDIE(DIE &Buffer, DIBasicType BTy) { + // Get core information. + StringRef Name = BTy.getName(); + Buffer.setTag(dwarf::DW_TAG_base_type); + addUInt(&Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, + BTy.getEncoding()); + + // Add name if not anonymous or intermediate type. + if (!Name.empty()) + addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + uint64_t Size = BTy.getSizeInBits() >> 3; + addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); +} + +/// constructTypeDIE - Construct derived type die from DIDerivedType. +void CompileUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) { + // Get core information. + StringRef Name = DTy.getName(); + uint64_t Size = DTy.getSizeInBits() >> 3; + unsigned Tag = DTy.getTag(); + + // FIXME - Workaround for templates. + if (Tag == dwarf::DW_TAG_inheritance) Tag = dwarf::DW_TAG_reference_type; + + Buffer.setTag(Tag); + + // Map to main type, void will not have a type. + DIType FromTy = DTy.getTypeDerivedFrom(); + addType(&Buffer, FromTy); + + // Add name if not anonymous or intermediate type. + if (!Name.empty()) + addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + + // Add size if non-zero (derived types might be zero-sized.) + if (Size) + addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); + + // Add source line info if available and TyDesc is not a forward declaration. + if (!DTy.isForwardDecl()) + addSourceLine(&Buffer, DTy); +} + +/// constructTypeDIE - Construct type DIE from DICompositeType. +void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { + // Get core information. + StringRef Name = CTy.getName(); + + uint64_t Size = CTy.getSizeInBits() >> 3; + unsigned Tag = CTy.getTag(); + Buffer.setTag(Tag); + + switch (Tag) { + case dwarf::DW_TAG_vector_type: + case dwarf::DW_TAG_array_type: + constructArrayTypeDIE(Buffer, &CTy); + break; + case dwarf::DW_TAG_enumeration_type: { + DIArray Elements = CTy.getTypeArray(); + + // Add enumerators to enumeration type. + for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { + DIE *ElemDie = NULL; + DIDescriptor Enum(Elements.getElement(i)); + if (Enum.isEnumerator()) { + ElemDie = constructEnumTypeDIE(DIEnumerator(Enum)); + Buffer.addChild(ElemDie); + } + } + } + break; + case dwarf::DW_TAG_subroutine_type: { + // Add return type. + DIArray Elements = CTy.getTypeArray(); + DIDescriptor RTy = Elements.getElement(0); + addType(&Buffer, DIType(RTy)); + + bool isPrototyped = true; + // Add arguments. + for (unsigned i = 1, N = Elements.getNumElements(); i < N; ++i) { + DIDescriptor Ty = Elements.getElement(i); + if (Ty.isUnspecifiedParameter()) { + DIE *Arg = new DIE(dwarf::DW_TAG_unspecified_parameters); + Buffer.addChild(Arg); + isPrototyped = false; + } else { + DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); + addType(Arg, DIType(Ty)); + Buffer.addChild(Arg); + } + } + // Add prototype flag. + if (isPrototyped) + addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1); + } + break; + case dwarf::DW_TAG_structure_type: + case dwarf::DW_TAG_union_type: + case dwarf::DW_TAG_class_type: { + // Add elements to structure type. + DIArray Elements = CTy.getTypeArray(); + + // A forward struct declared type may not have elements available. + unsigned N = Elements.getNumElements(); + if (N == 0) + break; + + // Add elements to structure type. + for (unsigned i = 0; i < N; ++i) { + DIDescriptor Element = Elements.getElement(i); + DIE *ElemDie = NULL; + if (Element.isSubprogram()) { + DISubprogram SP(Element); + ElemDie = DD->createSubprogramDIE(DISubprogram(Element)); + if (SP.isProtected()) + addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, + dwarf::DW_ACCESS_protected); + else if (SP.isPrivate()) + addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, + dwarf::DW_ACCESS_private); + else + addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, + dwarf::DW_ACCESS_public); + if (SP.isExplicit()) + addUInt(ElemDie, dwarf::DW_AT_explicit, dwarf::DW_FORM_flag, 1); + } + else if (Element.isVariable()) { + DIVariable DV(Element); + ElemDie = new DIE(dwarf::DW_TAG_variable); + addString(ElemDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, + DV.getName()); + addType(ElemDie, DV.getType()); + addUInt(ElemDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); + addUInt(ElemDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); + addSourceLine(ElemDie, DV); + } else if (Element.isDerivedType()) + ElemDie = createMemberDIE(DIDerivedType(Element)); + else + continue; + Buffer.addChild(ElemDie); + } + + if (CTy.isAppleBlockExtension()) + addUInt(&Buffer, dwarf::DW_AT_APPLE_block, dwarf::DW_FORM_flag, 1); + + unsigned RLang = CTy.getRunTimeLang(); + if (RLang) + addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class, + dwarf::DW_FORM_data1, RLang); + + DICompositeType ContainingType = CTy.getContainingType(); + if (DIDescriptor(ContainingType).isCompositeType()) + addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, + getOrCreateTypeDIE(DIType(ContainingType))); + else { + DIDescriptor Context = CTy.getContext(); + addToContextOwner(&Buffer, Context); + } + + if (Tag == dwarf::DW_TAG_class_type) + addTemplateParams(Buffer, CTy.getTemplateParams()); + + break; + } + default: + break; + } + + // Add name if not anonymous or intermediate type. + if (!Name.empty()) + addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + + if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type + || Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) + { + // Add size if non-zero (derived types might be zero-sized.) + if (Size) + addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); + else { + // Add zero size if it is not a forward declaration. + if (CTy.isForwardDecl()) + addUInt(&Buffer, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); + else + addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, 0); + } + + // Add source line info if available. + if (!CTy.isForwardDecl()) + addSourceLine(&Buffer, CTy); + } +} + +/// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE +/// for the given DITemplateTypeParameter. +DIE * +CompileUnit::getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP) { + DIE *ParamDIE = getDIE(TP); + if (ParamDIE) + return ParamDIE; + + ParamDIE = new DIE(dwarf::DW_TAG_template_type_parameter); + addType(ParamDIE, TP.getType()); + addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TP.getName()); + return ParamDIE; +} + +/// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE +/// for the given DITemplateValueParameter. +DIE * +CompileUnit::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV) { + DIE *ParamDIE = getDIE(TPV); + if (ParamDIE) + return ParamDIE; + + ParamDIE = new DIE(dwarf::DW_TAG_template_value_parameter); + addType(ParamDIE, TPV.getType()); + if (!TPV.getName().empty()) + addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TPV.getName()); + addUInt(ParamDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata, + TPV.getValue()); + return ParamDIE; +} + +/// constructSubrangeDIE - Construct subrange DIE from DISubrange. +void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){ + DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type); + addDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy); + int64_t L = SR.getLo(); + int64_t H = SR.getHi(); + + // The L value defines the lower bounds which is typically zero for C/C++. The + // H value is the upper bounds. Values are 64 bit. H - L + 1 is the size + // of the array. If L > H then do not emit DW_AT_lower_bound and + // DW_AT_upper_bound attributes. If L is zero and H is also zero then the + // array has one element and in such case do not emit lower bound. + + if (L > H) { + Buffer.addChild(DW_Subrange); + return; + } + if (L) + addSInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L); + addSInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H); + Buffer.addChild(DW_Subrange); +} + +/// constructArrayTypeDIE - Construct array type DIE from DICompositeType. +void CompileUnit::constructArrayTypeDIE(DIE &Buffer, + DICompositeType *CTy) { + Buffer.setTag(dwarf::DW_TAG_array_type); + if (CTy->getTag() == dwarf::DW_TAG_vector_type) + addUInt(&Buffer, dwarf::DW_AT_GNU_vector, dwarf::DW_FORM_flag, 1); + + // Emit derived type. + addType(&Buffer, CTy->getTypeDerivedFrom()); + DIArray Elements = CTy->getTypeArray(); + + // Get an anonymous type for index type. + DIE *IdxTy = getIndexTyDie(); + if (!IdxTy) { + // Construct an anonymous type for index type. + IdxTy = new DIE(dwarf::DW_TAG_base_type); + addUInt(IdxTy, dwarf::DW_AT_byte_size, 0, sizeof(int32_t)); + addUInt(IdxTy, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, + dwarf::DW_ATE_signed); + addDie(IdxTy); + setIndexTyDie(IdxTy); + } + + // Add subranges to array type. + for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { + DIDescriptor Element = Elements.getElement(i); + if (Element.getTag() == dwarf::DW_TAG_subrange_type) + constructSubrangeDIE(Buffer, DISubrange(Element), IdxTy); + } +} + +/// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator. +DIE *CompileUnit::constructEnumTypeDIE(DIEnumerator ETy) { + DIE *Enumerator = new DIE(dwarf::DW_TAG_enumerator); + StringRef Name = ETy.getName(); + addString(Enumerator, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + int64_t Value = ETy.getEnumValue(); + addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value); + return Enumerator; +} + +/// createMemberDIE - Create new member DIE. +DIE *CompileUnit::createMemberDIE(DIDerivedType DT) { + DIE *MemberDie = new DIE(DT.getTag()); + StringRef Name = DT.getName(); + if (!Name.empty()) + addString(MemberDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + + addType(MemberDie, DT.getTypeDerivedFrom()); + + addSourceLine(MemberDie, DT); + + DIEBlock *MemLocationDie = new (DIEValueAllocator) DIEBlock(); + addUInt(MemLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + + uint64_t Size = DT.getSizeInBits(); + uint64_t FieldSize = DT.getOriginalTypeSize(); + + if (Size != FieldSize) { + // Handle bitfield. + addUInt(MemberDie, dwarf::DW_AT_byte_size, 0, DT.getOriginalTypeSize()>>3); + addUInt(MemberDie, dwarf::DW_AT_bit_size, 0, DT.getSizeInBits()); + + uint64_t Offset = DT.getOffsetInBits(); + uint64_t AlignMask = ~(DT.getAlignInBits() - 1); + uint64_t HiMark = (Offset + FieldSize) & AlignMask; + uint64_t FieldOffset = (HiMark - FieldSize); + Offset -= FieldOffset; + + // Maybe we need to work from the other end. + if (Asm->getTargetData().isLittleEndian()) + Offset = FieldSize - (Offset + Size); + addUInt(MemberDie, dwarf::DW_AT_bit_offset, 0, Offset); + + // Here WD_AT_data_member_location points to the anonymous + // field that includes this bit field. + addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, FieldOffset >> 3); + + } else + // This is not a bitfield. + addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits() >> 3); + + if (DT.getTag() == dwarf::DW_TAG_inheritance + && DT.isVirtual()) { + + // For C++, virtual base classes are not at fixed offset. Use following + // expression to extract appropriate offset from vtable. + // BaseAddr = ObAddr + *((*ObAddr) - Offset) + + DIEBlock *VBaseLocationDie = new (DIEValueAllocator) DIEBlock(); + addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_dup); + addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits()); + addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_minus); + addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); + + addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, + VBaseLocationDie); + } else + addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie); + + if (DT.isProtected()) + addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, + dwarf::DW_ACCESS_protected); + else if (DT.isPrivate()) + addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, + dwarf::DW_ACCESS_private); + // Otherwise C++ member and base classes are considered public. + else if (DT.getCompileUnit().getLanguage() == dwarf::DW_LANG_C_plus_plus) + addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, + dwarf::DW_ACCESS_public); + if (DT.isVirtual()) + addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, + dwarf::DW_VIRTUALITY_virtual); + + // Objective-C properties. + StringRef PropertyName = DT.getObjCPropertyName(); + if (!PropertyName.empty()) { + addString(MemberDie, dwarf::DW_AT_APPLE_property_name, dwarf::DW_FORM_string, + PropertyName); + StringRef GetterName = DT.getObjCPropertyGetterName(); + if (!GetterName.empty()) + addString(MemberDie, dwarf::DW_AT_APPLE_property_getter, + dwarf::DW_FORM_string, GetterName); + StringRef SetterName = DT.getObjCPropertySetterName(); + if (!SetterName.empty()) + addString(MemberDie, dwarf::DW_AT_APPLE_property_setter, + dwarf::DW_FORM_string, SetterName); + unsigned PropertyAttributes = 0; + if (DT.isReadOnlyObjCProperty()) + PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readonly; + if (DT.isReadWriteObjCProperty()) + PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readwrite; + if (DT.isAssignObjCProperty()) + PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_assign; + if (DT.isRetainObjCProperty()) + PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_retain; + if (DT.isCopyObjCProperty()) + PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_copy; + if (DT.isNonAtomicObjCProperty()) + PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_nonatomic; + if (PropertyAttributes) + addUInt(MemberDie, dwarf::DW_AT_APPLE_property_attribute, 0, + PropertyAttributes); + } + return MemberDie; +} diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h new file mode 100644 index 000000000000..f4f6fb8b0df4 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -0,0 +1,282 @@ +//===-- llvm/CodeGen/DwarfCompileUnit.h - Dwarf Compile Unit ---*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf compile unit. +// +//===----------------------------------------------------------------------===// + +#ifndef CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H +#define CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H + +#include "DIE.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/OwningPtr.h" + +namespace llvm { + +class DwarfDebug; +class MachineLocation; +class MachineOperand; +class ConstantInt; +class DbgVariable; + +//===----------------------------------------------------------------------===// +/// CompileUnit - This dwarf writer support class manages information associate +/// with a source file. +class CompileUnit { + /// ID - File identifier for source. + /// + unsigned ID; + + /// Die - Compile unit debug information entry. + /// + const OwningPtr CUDie; + + /// Asm - Target of Dwarf emission. + AsmPrinter *Asm; + + DwarfDebug *DD; + + /// IndexTyDie - An anonymous type for index type. Owned by CUDie. + DIE *IndexTyDie; + + /// MDNodeToDieMap - Tracks the mapping of unit level debug informaton + /// variables to debug information entries. + DenseMap MDNodeToDieMap; + + /// MDNodeToDIEEntryMap - Tracks the mapping of unit level debug informaton + /// descriptors to debug information entries using a DIEEntry proxy. + DenseMap MDNodeToDIEEntryMap; + + /// Globals - A map of globally visible named entities for this unit. + /// + StringMap Globals; + + /// GlobalTypes - A map of globally visible types for this unit. + /// + StringMap GlobalTypes; + + /// DIEBlocks - A list of all the DIEBlocks in use. + std::vector DIEBlocks; + +public: + CompileUnit(unsigned I, DIE *D, AsmPrinter *A, DwarfDebug *DW); + ~CompileUnit(); + + // Accessors. + unsigned getID() const { return ID; } + DIE* getCUDie() const { return CUDie.get(); } + const StringMap &getGlobals() const { return Globals; } + const StringMap &getGlobalTypes() const { return GlobalTypes; } + + /// hasContent - Return true if this compile unit has something to write out. + /// + bool hasContent() const { return !CUDie->getChildren().empty(); } + + /// addGlobal - Add a new global entity to the compile unit. + /// + void addGlobal(StringRef Name, DIE *Die) { Globals[Name] = Die; } + + /// addGlobalType - Add a new global type to the compile unit. + /// + void addGlobalType(StringRef Name, DIE *Die) { + GlobalTypes[Name] = Die; + } + + /// getDIE - Returns the debug information entry map slot for the + /// specified debug variable. + DIE *getDIE(const MDNode *N) { return MDNodeToDieMap.lookup(N); } + + DIEBlock *getDIEBlock() { + return new (DIEValueAllocator) DIEBlock(); + } + + /// insertDIE - Insert DIE into the map. + void insertDIE(const MDNode *N, DIE *D) { + MDNodeToDieMap.insert(std::make_pair(N, D)); + } + + /// getDIEEntry - Returns the debug information entry for the speciefied + /// debug variable. + DIEEntry *getDIEEntry(const MDNode *N) { + DenseMap::iterator I = + MDNodeToDIEEntryMap.find(N); + if (I == MDNodeToDIEEntryMap.end()) + return NULL; + return I->second; + } + + /// insertDIEEntry - Insert debug information entry into the map. + void insertDIEEntry(const MDNode *N, DIEEntry *E) { + MDNodeToDIEEntryMap.insert(std::make_pair(N, E)); + } + + /// addDie - Adds or interns the DIE to the compile unit. + /// + void addDie(DIE *Buffer) { + this->CUDie->addChild(Buffer); + } + + // getIndexTyDie - Get an anonymous type for index type. + DIE *getIndexTyDie() { + return IndexTyDie; + } + + // setIndexTyDie - Set D as anonymous type for index which can be reused + // later. + void setIndexTyDie(DIE *D) { + IndexTyDie = D; + } +public: + + /// addUInt - Add an unsigned integer attribute data and value. + /// + void addUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer); + + /// addSInt - Add an signed integer attribute data and value. + /// + void addSInt(DIE *Die, unsigned Attribute, unsigned Form, int64_t Integer); + + /// addString - Add a string attribute data and value. + /// + void addString(DIE *Die, unsigned Attribute, unsigned Form, + const StringRef Str); + + /// addLabel - Add a Dwarf label attribute data and value. + /// + void addLabel(DIE *Die, unsigned Attribute, unsigned Form, + const MCSymbol *Label); + + /// addDelta - Add a label delta attribute data and value. + /// + void addDelta(DIE *Die, unsigned Attribute, unsigned Form, + const MCSymbol *Hi, const MCSymbol *Lo); + + /// addDIEEntry - Add a DIE attribute data and value. + /// + void addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, DIE *Entry); + + /// addBlock - Add block data. + /// + void addBlock(DIE *Die, unsigned Attribute, unsigned Form, DIEBlock *Block); + + /// addSourceLine - Add location information to specified debug information + /// entry. + void addSourceLine(DIE *Die, DIVariable V); + void addSourceLine(DIE *Die, DIGlobalVariable G); + void addSourceLine(DIE *Die, DISubprogram SP); + void addSourceLine(DIE *Die, DIType Ty); + void addSourceLine(DIE *Die, DINameSpace NS); + + /// addAddress - Add an address attribute to a die based on the location + /// provided. + void addAddress(DIE *Die, unsigned Attribute, + const MachineLocation &Location); + + /// addConstantValue - Add constant value entry in variable DIE. + bool addConstantValue(DIE *Die, const MachineOperand &MO); + bool addConstantValue(DIE *Die, ConstantInt *CI, bool Unsigned); + + /// addConstantFPValue - Add constant value entry in variable DIE. + bool addConstantFPValue(DIE *Die, const MachineOperand &MO); + + /// addTemplateParams - Add template parameters in buffer. + void addTemplateParams(DIE &Buffer, DIArray TParams); + + /// addRegisterOp - Add register operand. + void addRegisterOp(DIE *TheDie, unsigned Reg); + + /// addRegisterOffset - Add register offset. + void addRegisterOffset(DIE *TheDie, unsigned Reg, int64_t Offset); + + /// addComplexAddress - Start with the address based on the location provided, + /// and generate the DWARF information necessary to find the actual variable + /// (navigating the extra location information encoded in the type) based on + /// the starting location. Add the DWARF information to the die. + /// + void addComplexAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute, + const MachineLocation &Location); + + // FIXME: Should be reformulated in terms of addComplexAddress. + /// addBlockByrefAddress - Start with the address based on the location + /// provided, and generate the DWARF information necessary to find the + /// actual Block variable (navigating the Block struct) based on the + /// starting location. Add the DWARF information to the die. Obsolete, + /// please use addComplexAddress instead. + /// + void addBlockByrefAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute, + const MachineLocation &Location); + + /// addVariableAddress - Add DW_AT_location attribute for a + /// DbgVariable based on provided MachineLocation. + void addVariableAddress(DbgVariable *&DV, DIE *Die, MachineLocation Location); + + /// addToContextOwner - Add Die into the list of its context owner's children. + void addToContextOwner(DIE *Die, DIDescriptor Context); + + /// addType - Add a new type attribute to the specified entity. + void addType(DIE *Entity, DIType Ty); + + /// getOrCreateNameSpace - Create a DIE for DINameSpace. + DIE *getOrCreateNameSpace(DINameSpace NS); + + /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the + /// given DIType. + DIE *getOrCreateTypeDIE(DIType Ty); + + /// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE + /// for the given DITemplateTypeParameter. + DIE *getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP); + + /// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE + /// for the given DITemplateValueParameter. + DIE *getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TVP); + + /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug + /// information entry. + DIEEntry *createDIEEntry(DIE *Entry); + + void addPubTypes(DISubprogram SP); + + /// constructTypeDIE - Construct basic type die from DIBasicType. + void constructTypeDIE(DIE &Buffer, + DIBasicType BTy); + + /// constructTypeDIE - Construct derived type die from DIDerivedType. + void constructTypeDIE(DIE &Buffer, + DIDerivedType DTy); + + /// constructTypeDIE - Construct type DIE from DICompositeType. + void constructTypeDIE(DIE &Buffer, + DICompositeType CTy); + + /// constructSubrangeDIE - Construct subrange DIE from DISubrange. + void constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy); + + /// constructArrayTypeDIE - Construct array type DIE from DICompositeType. + void constructArrayTypeDIE(DIE &Buffer, + DICompositeType *CTy); + + /// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator. + DIE *constructEnumTypeDIE(DIEnumerator ETy); + + /// createMemberDIE - Create new member DIE. + DIE *createMemberDIE(DIDerivedType DT); + +private: + + // DIEValueAllocator - All DIEValues are allocated through this allocator. + BumpPtrAllocator DIEValueAllocator; + DIEInteger *DIEIntegerOne; +}; + +} // end llvm namespace +#endif diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 780fa405ef51..26da8006b30e 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -14,6 +14,7 @@ #define DEBUG_TYPE "dwarfdebug" #include "DwarfDebug.h" #include "DIE.h" +#include "DwarfCompileUnit.h" #include "llvm/Constants.h" #include "llvm/Module.h" #include "llvm/Instructions.h" @@ -52,7 +53,7 @@ static cl::opt DisableDebugInfoPrinting("disable-debug-info-print", cl::desc("Disable debug info printing")); static cl::opt UnknownLocations("use-unknown-locations", cl::Hidden, - cl::desc("Make an absense of debug location information explicit."), + cl::desc("Make an absence of debug location information explicit."), cl::init(false)); #ifndef NDEBUG @@ -72,189 +73,56 @@ static const unsigned InitAbbreviationsSetSize = 9; // log2(512) namespace llvm { -//===----------------------------------------------------------------------===// -/// CompileUnit - This dwarf writer support class manages information associate -/// with a source file. -class CompileUnit { - /// ID - File identifier for source. - /// - unsigned ID; - - /// Die - Compile unit debug information entry. - /// - const OwningPtr CUDie; - - /// IndexTyDie - An anonymous type for index type. Owned by CUDie. - DIE *IndexTyDie; - - /// MDNodeToDieMap - Tracks the mapping of unit level debug informaton - /// variables to debug information entries. - DenseMap MDNodeToDieMap; - - /// MDNodeToDIEEntryMap - Tracks the mapping of unit level debug informaton - /// descriptors to debug information entries using a DIEEntry proxy. - DenseMap MDNodeToDIEEntryMap; - - /// Globals - A map of globally visible named entities for this unit. - /// - StringMap Globals; - - /// GlobalTypes - A map of globally visible types for this unit. - /// - StringMap GlobalTypes; - -public: - CompileUnit(unsigned I, DIE *D) - : ID(I), CUDie(D), IndexTyDie(0) {} - - // Accessors. - unsigned getID() const { return ID; } - DIE* getCUDie() const { return CUDie.get(); } - const StringMap &getGlobals() const { return Globals; } - const StringMap &getGlobalTypes() const { return GlobalTypes; } - - /// hasContent - Return true if this compile unit has something to write out. - /// - bool hasContent() const { return !CUDie->getChildren().empty(); } - - /// addGlobal - Add a new global entity to the compile unit. - /// - void addGlobal(StringRef Name, DIE *Die) { Globals[Name] = Die; } - - /// addGlobalType - Add a new global type to the compile unit. - /// - void addGlobalType(StringRef Name, DIE *Die) { - GlobalTypes[Name] = Die; - } - - /// getDIE - Returns the debug information entry map slot for the - /// specified debug variable. - DIE *getDIE(const MDNode *N) { return MDNodeToDieMap.lookup(N); } - - /// insertDIE - Insert DIE into the map. - void insertDIE(const MDNode *N, DIE *D) { - MDNodeToDieMap.insert(std::make_pair(N, D)); - } - - /// getDIEEntry - Returns the debug information entry for the speciefied - /// debug variable. - DIEEntry *getDIEEntry(const MDNode *N) { - DenseMap::iterator I = - MDNodeToDIEEntryMap.find(N); - if (I == MDNodeToDIEEntryMap.end()) - return NULL; - return I->second; - } - - /// insertDIEEntry - Insert debug information entry into the map. - void insertDIEEntry(const MDNode *N, DIEEntry *E) { - MDNodeToDIEEntryMap.insert(std::make_pair(N, E)); - } - - /// addDie - Adds or interns the DIE to the compile unit. - /// - void addDie(DIE *Buffer) { - this->CUDie->addChild(Buffer); - } - - // getIndexTyDie - Get an anonymous type for index type. - DIE *getIndexTyDie() { - return IndexTyDie; - } - - // setIndexTyDie - Set D as anonymous type for index which can be reused - // later. - void setIndexTyDie(DIE *D) { - IndexTyDie = D; - } - -}; - -//===----------------------------------------------------------------------===// -/// DbgVariable - This class is used to track local variable information. -/// -class DbgVariable { - DIVariable Var; // Variable Descriptor. - DIE *TheDIE; // Variable DIE. - unsigned DotDebugLocOffset; // Offset in DotDebugLocEntries. -public: - // AbsVar may be NULL. - DbgVariable(DIVariable V) : Var(V), TheDIE(0), DotDebugLocOffset(~0U) {} - - // Accessors. - DIVariable getVariable() const { return Var; } - void setDIE(DIE *D) { TheDIE = D; } - DIE *getDIE() const { return TheDIE; } - void setDotDebugLocOffset(unsigned O) { DotDebugLocOffset = O; } - unsigned getDotDebugLocOffset() const { return DotDebugLocOffset; } - StringRef getName() const { return Var.getName(); } - unsigned getTag() const { return Var.getTag(); } - bool variableHasComplexAddress() const { - assert(Var.Verify() && "Invalid complex DbgVariable!"); - return Var.hasComplexAddress(); - } - bool isBlockByrefVariable() const { - assert(Var.Verify() && "Invalid complex DbgVariable!"); - return Var.isBlockByrefVariable(); - } - unsigned getNumAddrElements() const { - assert(Var.Verify() && "Invalid complex DbgVariable!"); - return Var.getNumAddrElements(); - } - uint64_t getAddrElement(unsigned i) const { - return Var.getAddrElement(i); - } - DIType getType() const { - DIType Ty = Var.getType(); - // FIXME: isBlockByrefVariable should be reformulated in terms of complex - // addresses instead. - if (Var.isBlockByrefVariable()) { - /* Byref variables, in Blocks, are declared by the programmer as - "SomeType VarName;", but the compiler creates a - __Block_byref_x_VarName struct, and gives the variable VarName - either the struct, or a pointer to the struct, as its type. This - is necessary for various behind-the-scenes things the compiler - needs to do with by-reference variables in blocks. - - However, as far as the original *programmer* is concerned, the - variable should still have type 'SomeType', as originally declared. - - The following function dives into the __Block_byref_x_VarName - struct to find the original type of the variable. This will be - passed back to the code generating the type for the Debug - Information Entry for the variable 'VarName'. 'VarName' will then - have the original type 'SomeType' in its debug information. - - The original type 'SomeType' will be the type of the field named - 'VarName' inside the __Block_byref_x_VarName struct. - - NOTE: In order for this to not completely fail on the debugger - side, the Debug Information Entry for the variable VarName needs to - have a DW_AT_location that tells the debugger how to unwind through - the pointers and __Block_byref_x_VarName struct to find the actual - value of the variable. The function addBlockByrefType does this. */ - DIType subType = Ty; - unsigned tag = Ty.getTag(); - - if (tag == dwarf::DW_TAG_pointer_type) { - DIDerivedType DTy = DIDerivedType(Ty); - subType = DTy.getTypeDerivedFrom(); - } - - DICompositeType blockStruct = DICompositeType(subType); - DIArray Elements = blockStruct.getTypeArray(); - - for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { - DIDescriptor Element = Elements.getElement(i); - DIDerivedType DT = DIDerivedType(Element); - if (getName() == DT.getName()) - return (DT.getTypeDerivedFrom()); - } - return Ty; +DIType DbgVariable::getType() const { + DIType Ty = Var.getType(); + // FIXME: isBlockByrefVariable should be reformulated in terms of complex + // addresses instead. + if (Var.isBlockByrefVariable()) { + /* Byref variables, in Blocks, are declared by the programmer as + "SomeType VarName;", but the compiler creates a + __Block_byref_x_VarName struct, and gives the variable VarName + either the struct, or a pointer to the struct, as its type. This + is necessary for various behind-the-scenes things the compiler + needs to do with by-reference variables in blocks. + + However, as far as the original *programmer* is concerned, the + variable should still have type 'SomeType', as originally declared. + + The following function dives into the __Block_byref_x_VarName + struct to find the original type of the variable. This will be + passed back to the code generating the type for the Debug + Information Entry for the variable 'VarName'. 'VarName' will then + have the original type 'SomeType' in its debug information. + + The original type 'SomeType' will be the type of the field named + 'VarName' inside the __Block_byref_x_VarName struct. + + NOTE: In order for this to not completely fail on the debugger + side, the Debug Information Entry for the variable VarName needs to + have a DW_AT_location that tells the debugger how to unwind through + the pointers and __Block_byref_x_VarName struct to find the actual + value of the variable. The function addBlockByrefType does this. */ + DIType subType = Ty; + unsigned tag = Ty.getTag(); + + if (tag == dwarf::DW_TAG_pointer_type) { + DIDerivedType DTy = DIDerivedType(Ty); + subType = DTy.getTypeDerivedFrom(); + } + + DICompositeType blockStruct = DICompositeType(subType); + DIArray Elements = blockStruct.getTypeArray(); + + for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { + DIDescriptor Element = Elements.getElement(i); + DIDerivedType DT = DIDerivedType(Element); + if (getName() == DT.getName()) + return (DT.getTypeDerivedFrom()); } return Ty; } -}; + return Ty; +} //===----------------------------------------------------------------------===// /// DbgRange - This is used to track range of instructions with identical @@ -396,15 +264,12 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) DwarfStrSectionSym = TextSectionSym = 0; DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = 0; FunctionBeginSym = FunctionEndSym = 0; - DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1); { NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); beginModule(M); } } DwarfDebug::~DwarfDebug() { - for (unsigned j = 0, M = DIEBlocks.size(); j < M; ++j) - DIEBlocks[j]->~DIEBlock(); } MCSymbol *DwarfDebug::getStringPoolEntry(StringRef Str) { @@ -439,852 +304,6 @@ void DwarfDebug::assignAbbrevNumber(DIEAbbrev &Abbrev) { } } -/// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug -/// information entry. -DIEEntry *DwarfDebug::createDIEEntry(DIE *Entry) { - DIEEntry *Value = new (DIEValueAllocator) DIEEntry(Entry); - return Value; -} - -/// addUInt - Add an unsigned integer attribute data and value. -/// -void DwarfDebug::addUInt(DIE *Die, unsigned Attribute, - unsigned Form, uint64_t Integer) { - if (!Form) Form = DIEInteger::BestForm(false, Integer); - DIEValue *Value = Integer == 1 ? - DIEIntegerOne : new (DIEValueAllocator) DIEInteger(Integer); - Die->addValue(Attribute, Form, Value); -} - -/// addSInt - Add an signed integer attribute data and value. -/// -void DwarfDebug::addSInt(DIE *Die, unsigned Attribute, - unsigned Form, int64_t Integer) { - if (!Form) Form = DIEInteger::BestForm(true, Integer); - DIEValue *Value = new (DIEValueAllocator) DIEInteger(Integer); - Die->addValue(Attribute, Form, Value); -} - -/// addString - Add a string attribute data and value. DIEString only -/// keeps string reference. -void DwarfDebug::addString(DIE *Die, unsigned Attribute, unsigned Form, - StringRef String) { - DIEValue *Value = new (DIEValueAllocator) DIEString(String); - Die->addValue(Attribute, Form, Value); -} - -/// addLabel - Add a Dwarf label attribute data and value. -/// -void DwarfDebug::addLabel(DIE *Die, unsigned Attribute, unsigned Form, - const MCSymbol *Label) { - DIEValue *Value = new (DIEValueAllocator) DIELabel(Label); - Die->addValue(Attribute, Form, Value); -} - -/// addDelta - Add a label delta attribute data and value. -/// -void DwarfDebug::addDelta(DIE *Die, unsigned Attribute, unsigned Form, - const MCSymbol *Hi, const MCSymbol *Lo) { - DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo); - Die->addValue(Attribute, Form, Value); -} - -/// addDIEEntry - Add a DIE attribute data and value. -/// -void DwarfDebug::addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, - DIE *Entry) { - Die->addValue(Attribute, Form, createDIEEntry(Entry)); -} - - -/// addBlock - Add block data. -/// -void DwarfDebug::addBlock(DIE *Die, unsigned Attribute, unsigned Form, - DIEBlock *Block) { - Block->ComputeSize(Asm); - DIEBlocks.push_back(Block); // Memoize so we can call the destructor later on. - Die->addValue(Attribute, Block->BestForm(), Block); -} - -/// addSourceLine - Add location information to specified debug information -/// entry. -void DwarfDebug::addSourceLine(DIE *Die, DIVariable V) { - // Verify variable. - if (!V.Verify()) - return; - - unsigned Line = V.getLineNumber(); - if (Line == 0) - return; - unsigned FileID = GetOrCreateSourceID(V.getContext().getFilename()); - assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); -} - -/// addSourceLine - Add location information to specified debug information -/// entry. -void DwarfDebug::addSourceLine(DIE *Die, DIGlobalVariable G) { - // Verify global variable. - if (!G.Verify()) - return; - - unsigned Line = G.getLineNumber(); - if (Line == 0) - return; - unsigned FileID = GetOrCreateSourceID(G.getContext().getFilename()); - assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); -} - -/// addSourceLine - Add location information to specified debug information -/// entry. -void DwarfDebug::addSourceLine(DIE *Die, DISubprogram SP) { - // Verify subprogram. - if (!SP.Verify()) - return; - // If the line number is 0, don't add it. - if (SP.getLineNumber() == 0) - return; - - unsigned Line = SP.getLineNumber(); - if (!SP.getContext().Verify()) - return; - unsigned FileID = GetOrCreateSourceID(SP.getFilename()); - assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); -} - -/// addSourceLine - Add location information to specified debug information -/// entry. -void DwarfDebug::addSourceLine(DIE *Die, DIType Ty) { - // Verify type. - if (!Ty.Verify()) - return; - - unsigned Line = Ty.getLineNumber(); - if (Line == 0 || !Ty.getContext().Verify()) - return; - unsigned FileID = GetOrCreateSourceID(Ty.getFilename()); - assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); -} - -/// addSourceLine - Add location information to specified debug information -/// entry. -void DwarfDebug::addSourceLine(DIE *Die, DINameSpace NS) { - // Verify namespace. - if (!NS.Verify()) - return; - - unsigned Line = NS.getLineNumber(); - if (Line == 0) - return; - StringRef FN = NS.getFilename(); - - unsigned FileID = GetOrCreateSourceID(FN); - assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); -} - -/// addVariableAddress - Add DW_AT_location attribute for a DbgVariable based -/// on provided frame index. -void DwarfDebug::addVariableAddress(DbgVariable *&DV, DIE *Die, int64_t FI) { - MachineLocation Location; - unsigned FrameReg; - const TargetFrameLowering *TFI = Asm->TM.getFrameLowering(); - int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg); - Location.set(FrameReg, Offset); - - if (DV->variableHasComplexAddress()) - addComplexAddress(DV, Die, dwarf::DW_AT_location, Location); - else if (DV->isBlockByrefVariable()) - addBlockByrefAddress(DV, Die, dwarf::DW_AT_location, Location); - else - addAddress(Die, dwarf::DW_AT_location, Location); -} - -/// addComplexAddress - Start with the address based on the location provided, -/// and generate the DWARF information necessary to find the actual variable -/// given the extra address information encoded in the DIVariable, starting from -/// the starting location. Add the DWARF information to the die. -/// -void DwarfDebug::addComplexAddress(DbgVariable *&DV, DIE *Die, - unsigned Attribute, - const MachineLocation &Location) { - DIType Ty = DV->getType(); - - // Decode the original location, and use that as the start of the byref - // variable's location. - const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); - unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false); - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - - if (Location.isReg()) { - if (Reg < 32) { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg); - } else { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx); - addUInt(Block, 0, dwarf::DW_FORM_udata, Reg); - } - } else { - if (Reg < 32) - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg); - else { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx); - addUInt(Block, 0, dwarf::DW_FORM_udata, Reg); - } - - addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset()); - } - - for (unsigned i = 0, N = DV->getNumAddrElements(); i < N; ++i) { - uint64_t Element = DV->getAddrElement(i); - - if (Element == DIBuilder::OpPlus) { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - addUInt(Block, 0, dwarf::DW_FORM_udata, DV->getAddrElement(++i)); - } else if (Element == DIBuilder::OpDeref) { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); - } else llvm_unreachable("unknown DIBuilder Opcode"); - } - - // Now attach the location information to the DIE. - addBlock(Die, Attribute, 0, Block); -} - -/* Byref variables, in Blocks, are declared by the programmer as "SomeType - VarName;", but the compiler creates a __Block_byref_x_VarName struct, and - gives the variable VarName either the struct, or a pointer to the struct, as - its type. This is necessary for various behind-the-scenes things the - compiler needs to do with by-reference variables in Blocks. - - However, as far as the original *programmer* is concerned, the variable - should still have type 'SomeType', as originally declared. - - The function getBlockByrefType dives into the __Block_byref_x_VarName - struct to find the original type of the variable, which is then assigned to - the variable's Debug Information Entry as its real type. So far, so good. - However now the debugger will expect the variable VarName to have the type - SomeType. So we need the location attribute for the variable to be an - expression that explains to the debugger how to navigate through the - pointers and struct to find the actual variable of type SomeType. - - The following function does just that. We start by getting - the "normal" location for the variable. This will be the location - of either the struct __Block_byref_x_VarName or the pointer to the - struct __Block_byref_x_VarName. - - The struct will look something like: - - struct __Block_byref_x_VarName { - ... - struct __Block_byref_x_VarName *forwarding; - ... - SomeType VarName; - ... - }; - - If we are given the struct directly (as our starting point) we - need to tell the debugger to: - - 1). Add the offset of the forwarding field. - - 2). Follow that pointer to get the real __Block_byref_x_VarName - struct to use (the real one may have been copied onto the heap). - - 3). Add the offset for the field VarName, to find the actual variable. - - If we started with a pointer to the struct, then we need to - dereference that pointer first, before the other steps. - Translating this into DWARF ops, we will need to append the following - to the current location description for the variable: - - DW_OP_deref -- optional, if we start with a pointer - DW_OP_plus_uconst - DW_OP_deref - DW_OP_plus_uconst - - That is what this function does. */ - -/// addBlockByrefAddress - Start with the address based on the location -/// provided, and generate the DWARF information necessary to find the -/// actual Block variable (navigating the Block struct) based on the -/// starting location. Add the DWARF information to the die. For -/// more information, read large comment just above here. -/// -void DwarfDebug::addBlockByrefAddress(DbgVariable *&DV, DIE *Die, - unsigned Attribute, - const MachineLocation &Location) { - DIType Ty = DV->getType(); - DIType TmpTy = Ty; - unsigned Tag = Ty.getTag(); - bool isPointer = false; - - StringRef varName = DV->getName(); - - if (Tag == dwarf::DW_TAG_pointer_type) { - DIDerivedType DTy = DIDerivedType(Ty); - TmpTy = DTy.getTypeDerivedFrom(); - isPointer = true; - } - - DICompositeType blockStruct = DICompositeType(TmpTy); - - // Find the __forwarding field and the variable field in the __Block_byref - // struct. - DIArray Fields = blockStruct.getTypeArray(); - DIDescriptor varField = DIDescriptor(); - DIDescriptor forwardingField = DIDescriptor(); - - for (unsigned i = 0, N = Fields.getNumElements(); i < N; ++i) { - DIDescriptor Element = Fields.getElement(i); - DIDerivedType DT = DIDerivedType(Element); - StringRef fieldName = DT.getName(); - if (fieldName == "__forwarding") - forwardingField = Element; - else if (fieldName == varName) - varField = Element; - } - - // Get the offsets for the forwarding field and the variable field. - unsigned forwardingFieldOffset = - DIDerivedType(forwardingField).getOffsetInBits() >> 3; - unsigned varFieldOffset = - DIDerivedType(varField).getOffsetInBits() >> 3; - - // Decode the original location, and use that as the start of the byref - // variable's location. - const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); - unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false); - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - - if (Location.isReg()) { - if (Reg < 32) - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg); - else { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx); - addUInt(Block, 0, dwarf::DW_FORM_udata, Reg); - } - } else { - if (Reg < 32) - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg); - else { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx); - addUInt(Block, 0, dwarf::DW_FORM_udata, Reg); - } - - addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset()); - } - - // If we started with a pointer to the __Block_byref... struct, then - // the first thing we need to do is dereference the pointer (DW_OP_deref). - if (isPointer) - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); - - // Next add the offset for the '__forwarding' field: - // DW_OP_plus_uconst ForwardingFieldOffset. Note there's no point in - // adding the offset if it's 0. - if (forwardingFieldOffset > 0) { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - addUInt(Block, 0, dwarf::DW_FORM_udata, forwardingFieldOffset); - } - - // Now dereference the __forwarding field to get to the real __Block_byref - // struct: DW_OP_deref. - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); - - // Now that we've got the real __Block_byref... struct, add the offset - // for the variable's field to get to the location of the actual variable: - // DW_OP_plus_uconst varFieldOffset. Again, don't add if it's 0. - if (varFieldOffset > 0) { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - addUInt(Block, 0, dwarf::DW_FORM_udata, varFieldOffset); - } - - // Now attach the location information to the DIE. - addBlock(Die, Attribute, 0, Block); -} - -/// addAddress - Add an address attribute to a die based on the location -/// provided. -void DwarfDebug::addAddress(DIE *Die, unsigned Attribute, - const MachineLocation &Location) { - const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); - unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false); - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - - if (RI->getFrameRegister(*Asm->MF) == Location.getReg() - && Location.getOffset()) { - // If variable offset is based in frame register then use fbreg. - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_fbreg); - addSInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset()); - addBlock(Die, Attribute, 0, Block); - return; - } - - if (Location.isReg()) { - if (Reg < 32) { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg); - } else { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx); - addUInt(Block, 0, dwarf::DW_FORM_udata, Reg); - } - } else { - if (Reg < 32) { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg); - } else { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx); - addUInt(Block, 0, dwarf::DW_FORM_udata, Reg); - } - - addUInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset()); - } - - addBlock(Die, Attribute, 0, Block); -} - -/// addRegisterAddress - Add register location entry in variable DIE. -bool DwarfDebug::addRegisterAddress(DIE *Die, const MachineOperand &MO) { - assert (MO.isReg() && "Invalid machine operand!"); - if (!MO.getReg()) - return false; - MachineLocation Location; - Location.set(MO.getReg()); - addAddress(Die, dwarf::DW_AT_location, Location); - return true; -} - -/// addConstantValue - Add constant value entry in variable DIE. -bool DwarfDebug::addConstantValue(DIE *Die, const MachineOperand &MO) { - assert (MO.isImm() && "Invalid machine operand!"); - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - unsigned Imm = MO.getImm(); - addUInt(Block, 0, dwarf::DW_FORM_udata, Imm); - addBlock(Die, dwarf::DW_AT_const_value, 0, Block); - return true; -} - -/// addConstantFPValue - Add constant value entry in variable DIE. -bool DwarfDebug::addConstantFPValue(DIE *Die, const MachineOperand &MO) { - assert (MO.isFPImm() && "Invalid machine operand!"); - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - APFloat FPImm = MO.getFPImm()->getValueAPF(); - - // Get the raw data form of the floating point. - const APInt FltVal = FPImm.bitcastToAPInt(); - const char *FltPtr = (const char*)FltVal.getRawData(); - - int NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte. - bool LittleEndian = Asm->getTargetData().isLittleEndian(); - int Incr = (LittleEndian ? 1 : -1); - int Start = (LittleEndian ? 0 : NumBytes - 1); - int Stop = (LittleEndian ? NumBytes : -1); - - // Output the constant to DWARF one byte at a time. - for (; Start != Stop; Start += Incr) - addUInt(Block, 0, dwarf::DW_FORM_data1, - (unsigned char)0xFF & FltPtr[Start]); - - addBlock(Die, dwarf::DW_AT_const_value, 0, Block); - return true; -} - -/// addConstantValue - Add constant value entry in variable DIE. -bool DwarfDebug::addConstantValue(DIE *Die, ConstantInt *CI, - bool Unsigned) { - if (CI->getBitWidth() <= 64) { - if (Unsigned) - addUInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata, - CI->getZExtValue()); - else - addSInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, - CI->getSExtValue()); - return true; - } - - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - - // Get the raw data form of the large APInt. - const APInt Val = CI->getValue(); - const char *Ptr = (const char*)Val.getRawData(); - - int NumBytes = Val.getBitWidth() / 8; // 8 bits per byte. - bool LittleEndian = Asm->getTargetData().isLittleEndian(); - int Incr = (LittleEndian ? 1 : -1); - int Start = (LittleEndian ? 0 : NumBytes - 1); - int Stop = (LittleEndian ? NumBytes : -1); - - // Output the constant to DWARF one byte at a time. - for (; Start != Stop; Start += Incr) - addUInt(Block, 0, dwarf::DW_FORM_data1, - (unsigned char)0xFF & Ptr[Start]); - - addBlock(Die, dwarf::DW_AT_const_value, 0, Block); - return true; -} - -/// addToContextOwner - Add Die into the list of its context owner's children. -void DwarfDebug::addToContextOwner(DIE *Die, DIDescriptor Context) { - if (Context.isType()) { - DIE *ContextDIE = getOrCreateTypeDIE(DIType(Context)); - ContextDIE->addChild(Die); - } else if (Context.isNameSpace()) { - DIE *ContextDIE = getOrCreateNameSpace(DINameSpace(Context)); - ContextDIE->addChild(Die); - } else if (Context.isSubprogram()) { - DIE *ContextDIE = createSubprogramDIE(DISubprogram(Context)); - ContextDIE->addChild(Die); - } else if (DIE *ContextDIE = getCompileUnit(Context)->getDIE(Context)) - ContextDIE->addChild(Die); - else - getCompileUnit(Context)->addDie(Die); -} - -/// getOrCreateTypeDIE - Find existing DIE or create new DIE for the -/// given DIType. -DIE *DwarfDebug::getOrCreateTypeDIE(DIType Ty) { - CompileUnit *TypeCU = getCompileUnit(Ty); - DIE *TyDIE = TypeCU->getDIE(Ty); - if (TyDIE) - return TyDIE; - - // Create new type. - TyDIE = new DIE(dwarf::DW_TAG_base_type); - TypeCU->insertDIE(Ty, TyDIE); - if (Ty.isBasicType()) - constructTypeDIE(*TyDIE, DIBasicType(Ty)); - else if (Ty.isCompositeType()) - constructTypeDIE(*TyDIE, DICompositeType(Ty)); - else { - assert(Ty.isDerivedType() && "Unknown kind of DIType"); - constructTypeDIE(*TyDIE, DIDerivedType(Ty)); - } - - addToContextOwner(TyDIE, Ty.getContext()); - return TyDIE; -} - -/// addType - Add a new type attribute to the specified entity. -void DwarfDebug::addType(DIE *Entity, DIType Ty) { - if (!Ty.Verify()) - return; - - // Check for pre-existence. - CompileUnit *TypeCU = getCompileUnit(Ty); - DIEEntry *Entry = TypeCU->getDIEEntry(Ty); - // If it exists then use the existing value. - if (Entry) { - Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry); - return; - } - - // Construct type. - DIE *Buffer = getOrCreateTypeDIE(Ty); - - // Set up proxy. - Entry = createDIEEntry(Buffer); - TypeCU->insertDIEEntry(Ty, Entry); - - Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry); -} - -/// constructTypeDIE - Construct basic type die from DIBasicType. -void DwarfDebug::constructTypeDIE(DIE &Buffer, DIBasicType BTy) { - // Get core information. - StringRef Name = BTy.getName(); - Buffer.setTag(dwarf::DW_TAG_base_type); - addUInt(&Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, - BTy.getEncoding()); - - // Add name if not anonymous or intermediate type. - if (!Name.empty()) - addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); - uint64_t Size = BTy.getSizeInBits() >> 3; - addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); -} - -/// constructTypeDIE - Construct derived type die from DIDerivedType. -void DwarfDebug::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) { - // Get core information. - StringRef Name = DTy.getName(); - uint64_t Size = DTy.getSizeInBits() >> 3; - unsigned Tag = DTy.getTag(); - - // FIXME - Workaround for templates. - if (Tag == dwarf::DW_TAG_inheritance) Tag = dwarf::DW_TAG_reference_type; - - Buffer.setTag(Tag); - - // Map to main type, void will not have a type. - DIType FromTy = DTy.getTypeDerivedFrom(); - addType(&Buffer, FromTy); - - // Add name if not anonymous or intermediate type. - if (!Name.empty()) - addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); - - // Add size if non-zero (derived types might be zero-sized.) - if (Size) - addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); - - // Add source line info if available and TyDesc is not a forward declaration. - if (!DTy.isForwardDecl()) - addSourceLine(&Buffer, DTy); -} - -/// constructTypeDIE - Construct type DIE from DICompositeType. -void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { - // Get core information. - StringRef Name = CTy.getName(); - - uint64_t Size = CTy.getSizeInBits() >> 3; - unsigned Tag = CTy.getTag(); - Buffer.setTag(Tag); - - switch (Tag) { - case dwarf::DW_TAG_vector_type: - case dwarf::DW_TAG_array_type: - constructArrayTypeDIE(Buffer, &CTy); - break; - case dwarf::DW_TAG_enumeration_type: { - DIArray Elements = CTy.getTypeArray(); - - // Add enumerators to enumeration type. - for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { - DIE *ElemDie = NULL; - DIDescriptor Enum(Elements.getElement(i)); - if (Enum.isEnumerator()) { - ElemDie = constructEnumTypeDIE(DIEnumerator(Enum)); - Buffer.addChild(ElemDie); - } - } - } - break; - case dwarf::DW_TAG_subroutine_type: { - // Add return type. - DIArray Elements = CTy.getTypeArray(); - DIDescriptor RTy = Elements.getElement(0); - addType(&Buffer, DIType(RTy)); - - bool isPrototyped = true; - // Add arguments. - for (unsigned i = 1, N = Elements.getNumElements(); i < N; ++i) { - DIDescriptor Ty = Elements.getElement(i); - if (Ty.isUnspecifiedParameter()) { - DIE *Arg = new DIE(dwarf::DW_TAG_unspecified_parameters); - Buffer.addChild(Arg); - isPrototyped = false; - } else { - DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); - addType(Arg, DIType(Ty)); - Buffer.addChild(Arg); - } - } - // Add prototype flag. - if (isPrototyped) - addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1); - } - break; - case dwarf::DW_TAG_structure_type: - case dwarf::DW_TAG_union_type: - case dwarf::DW_TAG_class_type: { - // Add elements to structure type. - DIArray Elements = CTy.getTypeArray(); - - // A forward struct declared type may not have elements available. - unsigned N = Elements.getNumElements(); - if (N == 0) - break; - - // Add elements to structure type. - for (unsigned i = 0; i < N; ++i) { - DIDescriptor Element = Elements.getElement(i); - DIE *ElemDie = NULL; - if (Element.isSubprogram()) { - DISubprogram SP(Element); - ElemDie = createSubprogramDIE(DISubprogram(Element)); - if (SP.isProtected()) - addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, - dwarf::DW_ACCESS_protected); - else if (SP.isPrivate()) - addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, - dwarf::DW_ACCESS_private); - else - addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, - dwarf::DW_ACCESS_public); - if (SP.isExplicit()) - addUInt(ElemDie, dwarf::DW_AT_explicit, dwarf::DW_FORM_flag, 1); - } - else if (Element.isVariable()) { - DIVariable DV(Element); - ElemDie = new DIE(dwarf::DW_TAG_variable); - addString(ElemDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, - DV.getName()); - addType(ElemDie, DV.getType()); - addUInt(ElemDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); - addUInt(ElemDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); - addSourceLine(ElemDie, DV); - } else if (Element.isDerivedType()) - ElemDie = createMemberDIE(DIDerivedType(Element)); - else - continue; - Buffer.addChild(ElemDie); - } - - if (CTy.isAppleBlockExtension()) - addUInt(&Buffer, dwarf::DW_AT_APPLE_block, dwarf::DW_FORM_flag, 1); - - unsigned RLang = CTy.getRunTimeLang(); - if (RLang) - addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class, - dwarf::DW_FORM_data1, RLang); - - DICompositeType ContainingType = CTy.getContainingType(); - if (DIDescriptor(ContainingType).isCompositeType()) - addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, - getOrCreateTypeDIE(DIType(ContainingType))); - else { - DIDescriptor Context = CTy.getContext(); - addToContextOwner(&Buffer, Context); - } - - if (Tag == dwarf::DW_TAG_class_type) { - DIArray TParams = CTy.getTemplateParams(); - unsigned N = TParams.getNumElements(); - // Add template parameters. - for (unsigned i = 0; i < N; ++i) { - DIDescriptor Element = TParams.getElement(i); - if (Element.isTemplateTypeParameter()) - Buffer.addChild(getOrCreateTemplateTypeParameterDIE( - DITemplateTypeParameter(Element))); - else if (Element.isTemplateValueParameter()) - Buffer.addChild(getOrCreateTemplateValueParameterDIE( - DITemplateValueParameter(Element))); - } - } - break; - } - default: - break; - } - - // Add name if not anonymous or intermediate type. - if (!Name.empty()) - addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); - - if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type - || Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) - { - // Add size if non-zero (derived types might be zero-sized.) - if (Size) - addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); - else { - // Add zero size if it is not a forward declaration. - if (CTy.isForwardDecl()) - addUInt(&Buffer, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); - else - addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, 0); - } - - // Add source line info if available. - if (!CTy.isForwardDecl()) - addSourceLine(&Buffer, CTy); - } -} - -/// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE -/// for the given DITemplateTypeParameter. -DIE * -DwarfDebug::getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP) { - CompileUnit *TypeCU = getCompileUnit(TP); - DIE *ParamDIE = TypeCU->getDIE(TP); - if (ParamDIE) - return ParamDIE; - - ParamDIE = new DIE(dwarf::DW_TAG_template_type_parameter); - addType(ParamDIE, TP.getType()); - addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TP.getName()); - return ParamDIE; -} - -/// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE -/// for the given DITemplateValueParameter. -DIE * -DwarfDebug::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV) { - CompileUnit *TVCU = getCompileUnit(TPV); - DIE *ParamDIE = TVCU->getDIE(TPV); - if (ParamDIE) - return ParamDIE; - - ParamDIE = new DIE(dwarf::DW_TAG_template_value_parameter); - addType(ParamDIE, TPV.getType()); - addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TPV.getName()); - addUInt(ParamDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata, - TPV.getValue()); - return ParamDIE; -} - -/// constructSubrangeDIE - Construct subrange DIE from DISubrange. -void DwarfDebug::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){ - int64_t L = SR.getLo(); - int64_t H = SR.getHi(); - DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type); - - addDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy); - if (L) - addSInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L); - addSInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H); - - Buffer.addChild(DW_Subrange); -} - -/// constructArrayTypeDIE - Construct array type DIE from DICompositeType. -void DwarfDebug::constructArrayTypeDIE(DIE &Buffer, - DICompositeType *CTy) { - Buffer.setTag(dwarf::DW_TAG_array_type); - if (CTy->getTag() == dwarf::DW_TAG_vector_type) - addUInt(&Buffer, dwarf::DW_AT_GNU_vector, dwarf::DW_FORM_flag, 1); - - // Emit derived type. - addType(&Buffer, CTy->getTypeDerivedFrom()); - DIArray Elements = CTy->getTypeArray(); - - // Get an anonymous type for index type. - CompileUnit *TheCU = getCompileUnit(*CTy); - DIE *IdxTy = TheCU->getIndexTyDie(); - if (!IdxTy) { - // Construct an anonymous type for index type. - IdxTy = new DIE(dwarf::DW_TAG_base_type); - addUInt(IdxTy, dwarf::DW_AT_byte_size, 0, sizeof(int32_t)); - addUInt(IdxTy, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, - dwarf::DW_ATE_signed); - TheCU->addDie(IdxTy); - TheCU->setIndexTyDie(IdxTy); - } - - // Add subranges to array type. - for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { - DIDescriptor Element = Elements.getElement(i); - if (Element.getTag() == dwarf::DW_TAG_subrange_type) - constructSubrangeDIE(Buffer, DISubrange(Element), IdxTy); - } -} - -/// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator. -DIE *DwarfDebug::constructEnumTypeDIE(DIEnumerator ETy) { - DIE *Enumerator = new DIE(dwarf::DW_TAG_enumerator); - StringRef Name = ETy.getName(); - addString(Enumerator, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); - int64_t Value = ETy.getEnumValue(); - addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value); - return Enumerator; -} - /// getRealLinkageName - If special LLVM prefix that is used to inform the asm /// printer to not emit usual symbol prefix before the symbol name is used then /// return linkage name after skipping this special LLVM prefix. @@ -1295,84 +314,6 @@ static StringRef getRealLinkageName(StringRef LinkageName) { return LinkageName; } -/// createMemberDIE - Create new member DIE. -DIE *DwarfDebug::createMemberDIE(DIDerivedType DT) { - DIE *MemberDie = new DIE(DT.getTag()); - StringRef Name = DT.getName(); - if (!Name.empty()) - addString(MemberDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); - - addType(MemberDie, DT.getTypeDerivedFrom()); - - addSourceLine(MemberDie, DT); - - DIEBlock *MemLocationDie = new (DIEValueAllocator) DIEBlock(); - addUInt(MemLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - - uint64_t Size = DT.getSizeInBits(); - uint64_t FieldSize = DT.getOriginalTypeSize(); - - if (Size != FieldSize) { - // Handle bitfield. - addUInt(MemberDie, dwarf::DW_AT_byte_size, 0, DT.getOriginalTypeSize()>>3); - addUInt(MemberDie, dwarf::DW_AT_bit_size, 0, DT.getSizeInBits()); - - uint64_t Offset = DT.getOffsetInBits(); - uint64_t AlignMask = ~(DT.getAlignInBits() - 1); - uint64_t HiMark = (Offset + FieldSize) & AlignMask; - uint64_t FieldOffset = (HiMark - FieldSize); - Offset -= FieldOffset; - - // Maybe we need to work from the other end. - if (Asm->getTargetData().isLittleEndian()) - Offset = FieldSize - (Offset + Size); - addUInt(MemberDie, dwarf::DW_AT_bit_offset, 0, Offset); - - // Here WD_AT_data_member_location points to the anonymous - // field that includes this bit field. - addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, FieldOffset >> 3); - - } else - // This is not a bitfield. - addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits() >> 3); - - if (DT.getTag() == dwarf::DW_TAG_inheritance - && DT.isVirtual()) { - - // For C++, virtual base classes are not at fixed offset. Use following - // expression to extract appropriate offset from vtable. - // BaseAddr = ObAddr + *((*ObAddr) - Offset) - - DIEBlock *VBaseLocationDie = new (DIEValueAllocator) DIEBlock(); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_dup); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits()); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_minus); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); - - addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, - VBaseLocationDie); - } else - addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie); - - if (DT.isProtected()) - addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, - dwarf::DW_ACCESS_protected); - else if (DT.isPrivate()) - addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, - dwarf::DW_ACCESS_private); - // Otherwise C++ member and base classes are considered public. - else if (DT.getCompileUnit().getLanguage() == dwarf::DW_LANG_C_plus_plus) - addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag, - dwarf::DW_ACCESS_public); - if (DT.isVirtual()) - addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, - dwarf::DW_VIRTUALITY_virtual); - return MemberDie; -} - /// createSubprogramDIE - Create new DIE using SP. DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP) { CompileUnit *SPCU = getCompileUnit(SP); @@ -1381,19 +322,35 @@ DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP) { return SPDie; SPDie = new DIE(dwarf::DW_TAG_subprogram); + + // DW_TAG_inlined_subroutine may refer to this DIE. + SPCU->insertDIE(SP, SPDie); + + // Add to context owner. + SPCU->addToContextOwner(SPDie, SP.getContext()); + + // Add function template parameters. + SPCU->addTemplateParams(*SPDie, SP.getTemplateParams()); + + // If this DIE is going to refer declaration info using AT_specification + // then there is no need to add other attributes. + if (SP.getFunctionDeclaration().isSubprogram()) + return SPDie; + // Constructors and operators for anonymous aggregates do not have names. if (!SP.getName().empty()) - addString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, SP.getName()); + SPCU->addString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, + SP.getName()); StringRef LinkageName = SP.getLinkageName(); if (!LinkageName.empty()) - addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string, - getRealLinkageName(LinkageName)); + SPCU->addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string, + getRealLinkageName(LinkageName)); - addSourceLine(SPDie, SP); + SPCU->addSourceLine(SPDie, SP); if (SP.isPrototyped()) - addUInt(SPDie, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1); + SPCU->addUInt(SPDie, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1); // Add Return Type. DICompositeType SPTy = SP.getType(); @@ -1401,24 +358,24 @@ DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP) { unsigned SPTag = SPTy.getTag(); if (Args.getNumElements() == 0 || SPTag != dwarf::DW_TAG_subroutine_type) - addType(SPDie, SPTy); + SPCU->addType(SPDie, SPTy); else - addType(SPDie, DIType(Args.getElement(0))); + SPCU->addType(SPDie, DIType(Args.getElement(0))); unsigned VK = SP.getVirtuality(); if (VK) { - addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, VK); - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); - addUInt(Block, 0, dwarf::DW_FORM_udata, SP.getVirtualIndex()); - addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, 0, Block); + SPCU->addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, VK); + DIEBlock *Block = SPCU->getDIEBlock(); + SPCU->addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + SPCU->addUInt(Block, 0, dwarf::DW_FORM_udata, SP.getVirtualIndex()); + SPCU->addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, 0, Block); ContainingTypeMap.insert(std::make_pair(SPDie, SP.getContainingType())); } if (!SP.isDefinition()) { - addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); - + SPCU->addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); + // Add arguments. Do not add arguments for subprogram definition. They will // be handled while processing variables. DICompositeType SPTy = SP.getType(); @@ -1429,32 +386,26 @@ DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP) { for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); DIType ATy = DIType(DIType(Args.getElement(i))); - addType(Arg, ATy); + SPCU->addType(Arg, ATy); if (ATy.isArtificial()) - addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); + SPCU->addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); SPDie->addChild(Arg); } } if (SP.isArtificial()) - addUInt(SPDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); + SPCU->addUInt(SPDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); if (!SP.isLocalToUnit()) - addUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); + SPCU->addUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); if (SP.isOptimized()) - addUInt(SPDie, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1); + SPCU->addUInt(SPDie, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1); if (unsigned isa = Asm->getISAEncoding()) { - addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa); + SPCU->addUInt(SPDie, dwarf::DW_AT_APPLE_isa, dwarf::DW_FORM_flag, isa); } - // DW_TAG_inlined_subroutine may refer to this DIE. - SPCU->insertDIE(SP, SPDie); - - // Add to context owner. - addToContextOwner(SPDie, SP.getContext()); - return SPDie; } @@ -1509,51 +460,57 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(const MDNode *SPNode) { assert(SPDie && "Unable to find subprogram DIE!"); DISubprogram SP(SPNode); - // There is not any need to generate specification DIE for a function - // defined at compile unit level. If a function is defined inside another - // function then gdb prefers the definition at top level and but does not - // expect specification DIE in parent function. So avoid creating - // specification DIE for a function defined inside a function. - if (SP.isDefinition() && !SP.getContext().isCompileUnit() && - !SP.getContext().isFile() && - !isSubprogramContext(SP.getContext())) { - addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); - - // Add arguments. - DICompositeType SPTy = SP.getType(); - DIArray Args = SPTy.getTypeArray(); - unsigned SPTag = SPTy.getTag(); - if (SPTag == dwarf::DW_TAG_subroutine_type) - for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { - DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); - DIType ATy = DIType(DIType(Args.getElement(i))); - addType(Arg, ATy); - if (ATy.isArtificial()) - addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); - SPDie->addChild(Arg); - } - DIE *SPDeclDie = SPDie; - SPDie = new DIE(dwarf::DW_TAG_subprogram); - addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, - SPDeclDie); - SPCU->addDie(SPDie); + DISubprogram SPDecl = SP.getFunctionDeclaration(); + if (SPDecl.isSubprogram()) + // Refer function declaration directly. + SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, + createSubprogramDIE(SPDecl)); + else { + // There is not any need to generate specification DIE for a function + // defined at compile unit level. If a function is defined inside another + // function then gdb prefers the definition at top level and but does not + // expect specification DIE in parent function. So avoid creating + // specification DIE for a function defined inside a function. + if (SP.isDefinition() && !SP.getContext().isCompileUnit() && + !SP.getContext().isFile() && + !isSubprogramContext(SP.getContext())) { + SPCU-> addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); + + // Add arguments. + DICompositeType SPTy = SP.getType(); + DIArray Args = SPTy.getTypeArray(); + unsigned SPTag = SPTy.getTag(); + if (SPTag == dwarf::DW_TAG_subroutine_type) + for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { + DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); + DIType ATy = DIType(DIType(Args.getElement(i))); + SPCU->addType(Arg, ATy); + if (ATy.isArtificial()) + SPCU->addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); + SPDie->addChild(Arg); + } + DIE *SPDeclDie = SPDie; + SPDie = new DIE(dwarf::DW_TAG_subprogram); + SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, + SPDeclDie); + SPCU->addDie(SPDie); + } } - // Pick up abstract subprogram DIE. if (DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode)) { SPDie = new DIE(dwarf::DW_TAG_subprogram); - addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin, - dwarf::DW_FORM_ref4, AbsSPDIE); + SPCU->addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin, + dwarf::DW_FORM_ref4, AbsSPDIE); SPCU->addDie(SPDie); } - addLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, - Asm->GetTempSymbol("func_begin", Asm->getFunctionNumber())); - addLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, - Asm->GetTempSymbol("func_end", Asm->getFunctionNumber())); + SPCU->addLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, + Asm->GetTempSymbol("func_begin", Asm->getFunctionNumber())); + SPCU->addLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, + Asm->GetTempSymbol("func_end", Asm->getFunctionNumber())); const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); MachineLocation Location(RI->getFrameRegister(*Asm->MF)); - addAddress(SPDie, dwarf::DW_AT_frame_base, Location); + SPCU->addAddress(SPDie, dwarf::DW_AT_frame_base, Location); return SPDie; } @@ -1570,13 +527,14 @@ DIE *DwarfDebug::constructLexicalScopeDIE(DbgScope *Scope) { if (Ranges.empty()) return 0; + CompileUnit *TheCU = getCompileUnit(Scope->getScopeNode()); SmallVector::const_iterator RI = Ranges.begin(); if (Ranges.size() > 1) { // .debug_range section has not been laid out yet. Emit offset in // .debug_range as a uint, size 4, for now. emitDIE will handle // DW_AT_ranges appropriately. - addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4, - DebugRangeSymbols.size() * Asm->getTargetData().getPointerSize()); + TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4, + DebugRangeSymbols.size() * Asm->getTargetData().getPointerSize()); for (SmallVector::const_iterator RI = Ranges.begin(), RE = Ranges.end(); RI != RE; ++RI) { DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first)); @@ -1595,8 +553,8 @@ DIE *DwarfDebug::constructLexicalScopeDIE(DbgScope *Scope) { assert(Start->isDefined() && "Invalid starting label for an inlined scope!"); assert(End->isDefined() && "Invalid end label for an inlined scope!"); - addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, Start); - addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, End); + TheCU->addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, Start); + TheCU->addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, End); return ScopeDIE; } @@ -1636,11 +594,11 @@ DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) { CompileUnit *TheCU = getCompileUnit(InlinedSP); DIE *OriginDIE = TheCU->getDIE(InlinedSP); assert(OriginDIE && "Unable to find Origin DIE!"); - addDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin, - dwarf::DW_FORM_ref4, OriginDIE); + TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin, + dwarf::DW_FORM_ref4, OriginDIE); - addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, StartLabel); - addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, EndLabel); + TheCU->addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, StartLabel); + TheCU->addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, EndLabel); InlinedSubprogramDIEs.insert(OriginDIE); @@ -1656,8 +614,8 @@ DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) { I->second.push_back(std::make_pair(StartLabel, ScopeDIE)); DILocation DL(Scope->getInlinedAt()); - addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, TheCU->getID()); - addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber()); + TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, TheCU->getID()); + TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber()); return ScopeDIE; } @@ -1686,7 +644,7 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { // Define variable debug information entry. DIE *VariableDie = new DIE(Tag); - + CompileUnit *VariableCU = getCompileUnit(DV->getVariable()); DIE *AbsDIE = NULL; DenseMap::iterator V2AVI = VarToAbstractVarMap.find(DV); @@ -1694,20 +652,23 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { AbsDIE = V2AVI->second->getDIE(); if (AbsDIE) - addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin, - dwarf::DW_FORM_ref4, AbsDIE); + VariableCU->addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin, + dwarf::DW_FORM_ref4, AbsDIE); else { - addString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); - addSourceLine(VariableDie, DV->getVariable()); + VariableCU->addString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, + Name); + VariableCU->addSourceLine(VariableDie, DV->getVariable()); // Add variable type. - addType(VariableDie, DV->getType()); + VariableCU->addType(VariableDie, DV->getType()); } if (Tag == dwarf::DW_TAG_formal_parameter && DV->getType().isArtificial()) - addUInt(VariableDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); + VariableCU->addUInt(VariableDie, dwarf::DW_AT_artificial, + dwarf::DW_FORM_flag, 1); else if (DIVariable(DV->getVariable()).isArtificial()) - addUInt(VariableDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); + VariableCU->addUInt(VariableDie, dwarf::DW_AT_artificial, + dwarf::DW_FORM_flag, 1); if (Scope->isAbstractScope()) { DV->setDIE(VariableDie); @@ -1718,7 +679,7 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { unsigned Offset = DV->getDotDebugLocOffset(); if (Offset != ~0U) { - addLabel(VariableDie, dwarf::DW_AT_location, dwarf::DW_FORM_data4, + VariableCU->addLabel(VariableDie, dwarf::DW_AT_location, dwarf::DW_FORM_data4, Asm->GetTempSymbol("debug_loc", Offset)); DV->setDIE(VariableDie); UseDotDebugLocEntry.insert(VariableDie); @@ -1738,22 +699,30 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo(); if (DVInsn->getOperand(1).isImm() && TRI->getFrameRegister(*Asm->MF) == RegOp.getReg()) { - addVariableAddress(DV, VariableDie, DVInsn->getOperand(1).getImm()); - updated = true; - } else - updated = addRegisterAddress(VariableDie, RegOp); - } - else if (DVInsn->getOperand(0).isImm()) - updated = addConstantValue(VariableDie, DVInsn->getOperand(0)); - else if (DVInsn->getOperand(0).isFPImm()) - updated = - addConstantFPValue(VariableDie, DVInsn->getOperand(0)); - } else { - MachineLocation Location = Asm->getDebugValueLocation(DVInsn); - if (Location.getReg()) { - addAddress(VariableDie, dwarf::DW_AT_location, Location); + unsigned FrameReg = 0; + const TargetFrameLowering *TFI = Asm->TM.getFrameLowering(); + int Offset = + TFI->getFrameIndexReference(*Asm->MF, + DVInsn->getOperand(1).getImm(), + FrameReg); + MachineLocation Location(FrameReg, Offset); + VariableCU->addVariableAddress(DV, VariableDie, Location); + + } else if (RegOp.getReg()) + VariableCU->addVariableAddress(DV, VariableDie, + MachineLocation(RegOp.getReg())); updated = true; } + else if (DVInsn->getOperand(0).isImm()) + updated = VariableCU->addConstantValue(VariableDie, + DVInsn->getOperand(0)); + else if (DVInsn->getOperand(0).isFPImm()) + updated = + VariableCU->addConstantFPValue(VariableDie, DVInsn->getOperand(0)); + } else { + VariableCU->addVariableAddress(DV, VariableDie, + Asm->getDebugValueLocation(DVInsn)); + updated = true; } if (!updated) { // If variableDie is not updated then DBG_VALUE instruction does not @@ -1767,15 +736,21 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { // .. else use frame index, if available. int FI = 0; - if (findVariableFrameIndex(DV, &FI)) - addVariableAddress(DV, VariableDie, FI); - + if (findVariableFrameIndex(DV, &FI)) { + unsigned FrameReg = 0; + const TargetFrameLowering *TFI = Asm->TM.getFrameLowering(); + int Offset = + TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg); + MachineLocation Location(FrameReg, Offset); + VariableCU->addVariableAddress(DV, VariableDie, Location); + } + DV->setDIE(VariableDie); return VariableDie; } -void DwarfDebug::addPubTypes(DISubprogram SP) { +void CompileUnit::addPubTypes(DISubprogram SP) { DICompositeType SPTy = SP.getType(); unsigned SPTag = SPTy.getTag(); if (SPTag != dwarf::DW_TAG_subroutine_type) @@ -1789,9 +764,8 @@ void DwarfDebug::addPubTypes(DISubprogram SP) { DICompositeType CATy = getDICompositeType(ATy); if (DIDescriptor(CATy).Verify() && !CATy.getName().empty() && !CATy.isForwardDecl()) { - CompileUnit *TheCU = getCompileUnit(CATy); - if (DIEEntry *Entry = TheCU->getDIEEntry(CATy)) - TheCU->addGlobalType(CATy.getName(), Entry->getEntry()); + if (DIEEntry *Entry = getDIEEntry(CATy)) + addGlobalType(CATy.getName(), Entry->getEntry()); } } } @@ -1802,6 +776,14 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) { return NULL; SmallVector Children; + + // Collect arguments for current function. + if (Scope == CurrentFnDbgScope) + for (unsigned i = 0, N = CurrentFnArguments.size(); i < N; ++i) + if (DbgVariable *ArgDV = CurrentFnArguments[i]) + if (DIE *Arg = constructVariableDIE(ArgDV, Scope)) + Children.push_back(Arg); + // Collect lexical scope childrens first. const SmallVector &Variables = Scope->getDbgVariables(); for (unsigned i = 0, N = Variables.size(); i < N; ++i) @@ -1841,7 +823,7 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) { ScopeDIE->addChild(*I); if (DS.isSubprogram()) - addPubTypes(DISubprogram(DS)); + getCompileUnit(DS)->addPubTypes(DISubprogram(DS)); return ScopeDIE; } @@ -1851,10 +833,21 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) { /// in the SourceIds map. This can update DirectoryNames and SourceFileNames /// maps as well. -unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName){ +unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName, + StringRef DirName) { // If FE did not provide a file name, then assume stdin. if (FileName.empty()) - return GetOrCreateSourceID(""); + return GetOrCreateSourceID("", StringRef()); + + // MCStream expects full path name as filename. + if (!DirName.empty() && !FileName.startswith("/")) { + std::string FullPathName(DirName.data()); + if (!DirName.endswith("/")) + FullPathName += "/"; + FullPathName += FileName.data(); + // Here FullPathName will be copied into StringMap by GetOrCreateSourceID. + return GetOrCreateSourceID(StringRef(FullPathName), StringRef()); + } StringMapEntry &Entry = SourceIdMap.GetOrCreateValue(FileName); if (Entry.getValue()) @@ -1864,19 +857,18 @@ unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName){ Entry.setValue(SrcId); // Print out a .file directive to specify files for .loc directives. - Asm->OutStreamer.EmitDwarfFileDirective(SrcId, FileName); + Asm->OutStreamer.EmitDwarfFileDirective(SrcId, Entry.getKey()); return SrcId; } /// getOrCreateNameSpace - Create a DIE for DINameSpace. -DIE *DwarfDebug::getOrCreateNameSpace(DINameSpace NS) { - CompileUnit *TheCU = getCompileUnit(NS); - DIE *NDie = TheCU->getDIE(NS); +DIE *CompileUnit::getOrCreateNameSpace(DINameSpace NS) { + DIE *NDie = getDIE(NS); if (NDie) return NDie; NDie = new DIE(dwarf::DW_TAG_namespace); - TheCU->insertDIE(NS, NDie); + insertDIE(NS, NDie); if (!NS.getName().empty()) addString(NDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, NS.getName()); addSourceLine(NDie, NS); @@ -1890,40 +882,40 @@ void DwarfDebug::constructCompileUnit(const MDNode *N) { DICompileUnit DIUnit(N); StringRef FN = DIUnit.getFilename(); StringRef Dir = DIUnit.getDirectory(); - unsigned ID = GetOrCreateSourceID(FN); + unsigned ID = GetOrCreateSourceID(FN, Dir); DIE *Die = new DIE(dwarf::DW_TAG_compile_unit); - addString(Die, dwarf::DW_AT_producer, dwarf::DW_FORM_string, - DIUnit.getProducer()); - addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2, - DIUnit.getLanguage()); - addString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN); + CompileUnit *NewCU = new CompileUnit(ID, Die, Asm, this); + NewCU->addString(Die, dwarf::DW_AT_producer, dwarf::DW_FORM_string, + DIUnit.getProducer()); + NewCU->addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2, + DIUnit.getLanguage()); + NewCU->addString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN); // Use DW_AT_entry_pc instead of DW_AT_low_pc/DW_AT_high_pc pair. This // simplifies debug range entries. - addUInt(Die, dwarf::DW_AT_entry_pc, dwarf::DW_FORM_addr, 0); + NewCU->addUInt(Die, dwarf::DW_AT_entry_pc, dwarf::DW_FORM_addr, 0); // DW_AT_stmt_list is a offset of line number information for this // compile unit in debug_line section. if (Asm->MAI->doesDwarfUsesAbsoluteLabelForStmtList()) - addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_addr, - Asm->GetTempSymbol("section_line")); + NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_addr, + Asm->GetTempSymbol("section_line")); else - addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0); + NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0); if (!Dir.empty()) - addString(Die, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string, Dir); + NewCU->addString(Die, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string, Dir); if (DIUnit.isOptimized()) - addUInt(Die, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1); + NewCU->addUInt(Die, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1); StringRef Flags = DIUnit.getFlags(); if (!Flags.empty()) - addString(Die, dwarf::DW_AT_APPLE_flags, dwarf::DW_FORM_string, Flags); - + NewCU->addString(Die, dwarf::DW_AT_APPLE_flags, dwarf::DW_FORM_string, Flags); + unsigned RVer = DIUnit.getRunTimeVersion(); if (RVer) - addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers, + NewCU->addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers, dwarf::DW_FORM_data1, RVer); - CompileUnit *NewCU = new CompileUnit(ID, Die); if (!FirstCU) FirstCU = NewCU; CUMap.insert(std::make_pair(N, NewCU)); @@ -2019,14 +1011,15 @@ void DwarfDebug::constructGlobalVariableDIE(const MDNode *N) { bool isGlobalVariable = GV.getGlobal() != NULL; // Add name. - addString(VariableDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, - GV.getDisplayName()); + TheCU->addString(VariableDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, + GV.getDisplayName()); StringRef LinkageName = GV.getLinkageName(); if (!LinkageName.empty() && isGlobalVariable) - addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string, - getRealLinkageName(LinkageName)); + TheCU->addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name, + dwarf::DW_FORM_string, + getRealLinkageName(LinkageName)); // Add type. - addType(VariableDIE, GTy); + TheCU->addType(VariableDIE, GTy); if (GTy.isCompositeType() && !GTy.getName().empty() && !GTy.isForwardDecl()) { DIEEntry *Entry = TheCU->getDIEEntry(GTy); @@ -2035,22 +1028,22 @@ void DwarfDebug::constructGlobalVariableDIE(const MDNode *N) { } // Add scoping info. if (!GV.isLocalToUnit()) { - addUInt(VariableDIE, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); + TheCU->addUInt(VariableDIE, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); // Expose as global. TheCU->addGlobal(GV.getName(), VariableDIE); } // Add line number info. - addSourceLine(VariableDIE, GV); + TheCU->addSourceLine(VariableDIE, GV); // Add to map. TheCU->insertDIE(N, VariableDIE); // Add to context owner. DIDescriptor GVContext = GV.getContext(); - addToContextOwner(VariableDIE, GVContext); + TheCU->addToContextOwner(VariableDIE, GVContext); // Add location. if (isGlobalVariable) { DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); - addLabel(Block, 0, dwarf::DW_FORM_udata, + TheCU->addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); + TheCU->addLabel(Block, 0, dwarf::DW_FORM_udata, Asm->Mang->getSymbol(GV.getGlobal())); // Do not create specification DIE if context is either compile unit // or a subprogram. @@ -2058,28 +1051,28 @@ void DwarfDebug::constructGlobalVariableDIE(const MDNode *N) { !GVContext.isFile() && !isSubprogramContext(GVContext)) { // Create specification DIE. DIE *VariableSpecDIE = new DIE(dwarf::DW_TAG_variable); - addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification, + TheCU->addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, VariableDIE); - addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block); - addUInt(VariableDIE, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); + TheCU->addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block); + TheCU->addUInt(VariableDIE, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); TheCU->addDie(VariableSpecDIE); } else { - addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); + TheCU->addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); } } else if (ConstantInt *CI = dyn_cast_or_null(GV.getConstant())) - addConstantValue(VariableDIE, CI, isUnsignedDIType(GTy)); + TheCU->addConstantValue(VariableDIE, CI, isUnsignedDIType(GTy)); else if (const ConstantExpr *CE = getMergedGlobalExpr(N->getOperand(11))) { // GV is a merged global. DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); - addLabel(Block, 0, dwarf::DW_FORM_udata, - Asm->Mang->getSymbol(cast(CE->getOperand(0)))); + TheCU->addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); + TheCU->addLabel(Block, 0, dwarf::DW_FORM_udata, + Asm->Mang->getSymbol(cast(CE->getOperand(0)))); ConstantInt *CII = cast(CE->getOperand(2)); - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); - addUInt(Block, 0, dwarf::DW_FORM_udata, CII->getZExtValue()); - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); - addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); + TheCU->addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + TheCU->addUInt(Block, 0, dwarf::DW_FORM_udata, CII->getZExtValue()); + TheCU->addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); + TheCU->addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); } return; @@ -2105,7 +1098,7 @@ void DwarfDebug::constructSubprogramDIE(const MDNode *N) { TheCU->insertDIE(N, SubprogramDie); // Add to context owner. - addToContextOwner(SubprogramDie, SP.getContext()); + TheCU->addToContextOwner(SubprogramDie, SP.getContext()); // Expose as global. TheCU->addGlobal(SP.getName(), SubprogramDie); @@ -2160,12 +1153,16 @@ void DwarfDebug::beginModule(Module *M) { //getOrCreateTypeDIE if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.enum")) - for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) - getOrCreateTypeDIE(DIType(NMD->getOperand(i))); + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { + DIType Ty(NMD->getOperand(i)); + getCompileUnit(Ty)->getOrCreateTypeDIE(Ty); + } if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.ty")) - for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) - getOrCreateTypeDIE(DIType(NMD->getOperand(i))); + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { + DIType Ty(NMD->getOperand(i)); + getCompileUnit(Ty)->getOrCreateTypeDIE(Ty); + } // Prime section data. SectionMap.insert(Asm->getObjFileLowering().getTextSection()); @@ -2216,7 +1213,7 @@ void DwarfDebug::endModule() { for (SmallPtrSet::iterator AI = InlinedSubprogramDIEs.begin(), AE = InlinedSubprogramDIEs.end(); AI != AE; ++AI) { DIE *ISP = *AI; - addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined); + FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined); } for (DenseMap::iterator CI = ContainingTypeMap.begin(), @@ -2226,7 +1223,8 @@ void DwarfDebug::endModule() { if (!N) continue; DIE *NDie = getCompileUnit(N)->getDIE(N); if (!NDie) continue; - addDIEEntry(SPDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie); + getCompileUnit(N)->addDIEEntry(SPDie, dwarf::DW_AT_containing_type, + dwarf::DW_FORM_ref4, NDie); } // Standard sections final addresses. @@ -2309,6 +1307,30 @@ DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var, return AbsDbgVariable; } +/// addCurrentFnArgument - If Var is an current function argument that add +/// it in CurrentFnArguments list. +bool DwarfDebug::addCurrentFnArgument(const MachineFunction *MF, + DbgVariable *Var, DbgScope *Scope) { + if (Scope != CurrentFnDbgScope) + return false; + DIVariable DV = Var->getVariable(); + if (DV.getTag() != dwarf::DW_TAG_arg_variable) + return false; + unsigned ArgNo = DV.getArgNumber(); + if (ArgNo == 0) + return false; + + size_t Size = CurrentFnArguments.size(); + if (Size == 0) + CurrentFnArguments.resize(MF->getFunction()->arg_size()); + // llvm::Function argument size is not good indicator of how many + // arguments does the function have at source level. + if (ArgNo > Size) + CurrentFnArguments.resize(ArgNo * 2); + CurrentFnArguments[ArgNo - 1] = Var; + return true; +} + /// collectVariableInfoFromMMITable - Collect variable information from /// side table maintained by MMI. void @@ -2337,7 +1359,8 @@ DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction * MF, DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VP.second); DbgVariable *RegVar = new DbgVariable(DV); recordVariableFrameIndex(RegVar, VP.first); - Scope->addVariable(RegVar); + if (!addCurrentFnArgument(MF, RegVar, Scope)) + Scope->addVariable(RegVar); if (AbsDbgVariable) { recordVariableFrameIndex(AbsDbgVariable, VP.first); VarToAbstractVarMap[RegVar] = AbsDbgVariable; @@ -2349,9 +1372,9 @@ DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction * MF, /// DBG_VALUE instruction, is in a defined reg. static bool isDbgValueInDefinedReg(const MachineInstr *MI) { assert (MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!"); - if (MI->getOperand(0).isReg() && MI->getOperand(0).getReg()) - return true; - return false; + return MI->getNumOperands() == 3 && + MI->getOperand(0).isReg() && MI->getOperand(0).getReg() && + MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0; } /// collectVariableInfo - Populate DbgScope entries with variables' info. @@ -2362,41 +1385,21 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, /// collection info from MMI table. collectVariableInfoFromMMITable(MF, Processed); - SmallVector DbgValues; - // Collect variable information from DBG_VALUE machine instructions; - for (MachineFunction::const_iterator I = Asm->MF->begin(), E = Asm->MF->end(); - I != E; ++I) - for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); - II != IE; ++II) { - const MachineInstr *MInsn = II; - if (!MInsn->isDebugValue()) - continue; - DbgValues.push_back(MInsn); - } - - // This is a collection of DBV_VALUE instructions describing same variable. - SmallVector MultipleValues; - for(SmallVector::iterator I = DbgValues.begin(), - E = DbgValues.end(); I != E; ++I) { - const MachineInstr *MInsn = *I; - MultipleValues.clear(); - if (isDbgValueInDefinedReg(MInsn)) - MultipleValues.push_back(MInsn); - DIVariable DV(MInsn->getOperand(MInsn->getNumOperands() - 1).getMetadata()); - if (Processed.count(DV) != 0) + for (SmallVectorImpl::const_iterator + UVI = UserVariables.begin(), UVE = UserVariables.end(); UVI != UVE; + ++UVI) { + const MDNode *Var = *UVI; + if (Processed.count(Var)) continue; - const MachineInstr *PrevMI = MInsn; - for (SmallVector::iterator MI = I+1, - ME = DbgValues.end(); MI != ME; ++MI) { - const MDNode *Var = - (*MI)->getOperand((*MI)->getNumOperands()-1).getMetadata(); - if (Var == DV && - !PrevMI->isIdenticalTo(*MI)) - MultipleValues.push_back(*MI); - PrevMI = *MI; - } + // History contains relevant DBG_VALUE instructions for Var and instructions + // clobbering it. + SmallVectorImpl &History = DbgValues[Var]; + if (History.empty()) + continue; + const MachineInstr *MInsn = History.front(); + DIVariable DV(Var); DbgScope *Scope = NULL; if (DV.getTag() == dwarf::DW_TAG_arg_variable && DISubprogram(DV.getContext()).describes(MF->getFunction())) @@ -2408,32 +1411,29 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, continue; Processed.insert(DV); + assert(MInsn->isDebugValue() && "History must begin with debug value"); DbgVariable *RegVar = new DbgVariable(DV); - Scope->addVariable(RegVar); + if (!addCurrentFnArgument(MF, RegVar, Scope)) + Scope->addVariable(RegVar); if (DbgVariable *AbsVar = findAbstractVariable(DV, MInsn->getDebugLoc())) { DbgVariableToDbgInstMap[AbsVar] = MInsn; VarToAbstractVarMap[RegVar] = AbsVar; } - if (MultipleValues.size() <= 1) { + + // Simple ranges that are fully coalesced. + if (History.size() <= 1 || (History.size() == 2 && + MInsn->isIdenticalTo(History.back()))) { DbgVariableToDbgInstMap[RegVar] = MInsn; continue; } // handle multiple DBG_VALUE instructions describing one variable. - if (DotDebugLocEntries.empty()) - RegVar->setDotDebugLocOffset(0); - else - RegVar->setDotDebugLocOffset(DotDebugLocEntries.size()); - const MachineInstr *Begin = NULL; - const MachineInstr *End = NULL; - for (SmallVector::iterator - MVI = MultipleValues.begin(), MVE = MultipleValues.end(); - MVI != MVE; ++MVI) { - if (!Begin) { - Begin = *MVI; - continue; - } - End = *MVI; + RegVar->setDotDebugLocOffset(DotDebugLocEntries.size()); + + for (SmallVectorImpl::const_iterator + HI = History.begin(), HE = History.end(); HI != HE; ++HI) { + const MachineInstr *Begin = *HI; + assert(Begin->isDebugValue() && "Invalid History entry"); MachineLocation MLoc; if (Begin->getNumOperands() == 3) { if (Begin->getOperand(0).isReg() && Begin->getOperand(1).isImm()) @@ -2441,25 +1441,32 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, } else MLoc = Asm->getDebugValueLocation(Begin); - const MCSymbol *FLabel = getLabelBeforeInsn(Begin); - const MCSymbol *SLabel = getLabelBeforeInsn(End); - if (MLoc.getReg()) - DotDebugLocEntries.push_back(DotDebugLocEntry(FLabel, SLabel, MLoc)); + // FIXME: emitDebugLoc only understands registers. + if (!MLoc.getReg()) + continue; - Begin = End; - if (MVI + 1 == MVE) { - // If End is the last instruction then its value is valid - // until the end of the funtion. - MachineLocation EMLoc; - if (End->getNumOperands() == 3) { - if (End->getOperand(0).isReg() && Begin->getOperand(1).isImm()) - EMLoc.set(Begin->getOperand(0).getReg(), Begin->getOperand(1).getImm()); - } else - EMLoc = Asm->getDebugValueLocation(End); - if (EMLoc.getReg()) - DotDebugLocEntries. - push_back(DotDebugLocEntry(SLabel, FunctionEndSym, EMLoc)); + // Compute the range for a register location. + const MCSymbol *FLabel = getLabelBeforeInsn(Begin); + const MCSymbol *SLabel = 0; + + if (HI + 1 == HE) + // If Begin is the last instruction in History then its value is valid + // until the end of the function. + SLabel = FunctionEndSym; + else { + const MachineInstr *End = HI[1]; + if (End->isDebugValue()) + SLabel = getLabelBeforeInsn(End); + else { + // End is a normal instruction clobbering the range. + SLabel = getLabelAfterInsn(End); + assert(SLabel && "Forgot label after clobber instruction"); + ++HI; + } } + + // The value is valid until the next DBG_VALUE or clobber. + DotDebugLocEntries.push_back(DotDebugLocEntry(FLabel, SLabel, MLoc, Var)); } DotDebugLocEntries.push_back(DotDebugLocEntry()); } @@ -2480,66 +1487,74 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, /// getLabelBeforeInsn - Return Label preceding the instruction. const MCSymbol *DwarfDebug::getLabelBeforeInsn(const MachineInstr *MI) { - DenseMap::iterator I = - LabelsBeforeInsn.find(MI); - if (I == LabelsBeforeInsn.end()) - // FunctionBeginSym always preceeds all the instruction in current function. - return FunctionBeginSym; - return I->second; + MCSymbol *Label = LabelsBeforeInsn.lookup(MI); + assert(Label && "Didn't insert label before instruction"); + return Label; } /// getLabelAfterInsn - Return Label immediately following the instruction. const MCSymbol *DwarfDebug::getLabelAfterInsn(const MachineInstr *MI) { - DenseMap::iterator I = - LabelsAfterInsn.find(MI); - if (I == LabelsAfterInsn.end()) - return NULL; - return I->second; + return LabelsAfterInsn.lookup(MI); } /// beginInstruction - Process beginning of an instruction. void DwarfDebug::beginInstruction(const MachineInstr *MI) { - if (InsnNeedsLabel.count(MI) == 0) { - LabelsBeforeInsn[MI] = PrevLabel; - return; + // Check if source location changes, but ignore DBG_VALUE locations. + if (!MI->isDebugValue()) { + DebugLoc DL = MI->getDebugLoc(); + if (DL != PrevInstLoc && (!DL.isUnknown() || UnknownLocations)) { + PrevInstLoc = DL; + if (!DL.isUnknown()) { + const MDNode *Scope = DL.getScope(Asm->MF->getFunction()->getContext()); + recordSourceLine(DL.getLine(), DL.getCol(), Scope); + } else + recordSourceLine(0, 0, 0); + } } - // Check location. - DebugLoc DL = MI->getDebugLoc(); - if (!DL.isUnknown()) { - const MDNode *Scope = DL.getScope(Asm->MF->getFunction()->getContext()); - PrevLabel = recordSourceLine(DL.getLine(), DL.getCol(), Scope); - PrevInstLoc = DL; - LabelsBeforeInsn[MI] = PrevLabel; - return; - } + // Insert labels where requested. + DenseMap::iterator I = + LabelsBeforeInsn.find(MI); - // If location is unknown then use temp label for this DBG_VALUE - // instruction. - if (MI->isDebugValue()) { + // No label needed. + if (I == LabelsBeforeInsn.end()) + return; + + // Label already assigned. + if (I->second) + return; + + if (!PrevLabel) { PrevLabel = MMI->getContext().CreateTempSymbol(); Asm->OutStreamer.EmitLabel(PrevLabel); - LabelsBeforeInsn[MI] = PrevLabel; - return; } - - if (UnknownLocations) { - PrevLabel = recordSourceLine(0, 0, 0); - LabelsBeforeInsn[MI] = PrevLabel; - return; - } - - assert (0 && "Instruction is not processed!"); + I->second = PrevLabel; } /// endInstruction - Process end of an instruction. void DwarfDebug::endInstruction(const MachineInstr *MI) { - if (InsnsEndScopeSet.count(MI) != 0) { - // Emit a label if this instruction ends a scope. - MCSymbol *Label = MMI->getContext().CreateTempSymbol(); - Asm->OutStreamer.EmitLabel(Label); - LabelsAfterInsn[MI] = Label; + // Don't create a new label after DBG_VALUE instructions. + // They don't generate code. + if (!MI->isDebugValue()) + PrevLabel = 0; + + DenseMap::iterator I = + LabelsAfterInsn.find(MI); + + // No label needed. + if (I == LabelsAfterInsn.end()) + return; + + // Label already assigned. + if (I->second) + return; + + // We need a label after this instruction. + if (!PrevLabel) { + PrevLabel = MMI->getContext().CreateTempSymbol(); + Asm->OutStreamer.EmitLabel(PrevLabel); } + I->second = PrevLabel; } /// getOrCreateDbgScope - Create DbgScope for the scope. @@ -2799,7 +1814,8 @@ void DwarfDebug::identifyScopeMarkers() { RE = Ranges.end(); RI != RE; ++RI) { assert(RI->first && "DbgRange does not have first instruction!"); assert(RI->second && "DbgRange does not have second instruction!"); - InsnsEndScopeSet.insert(RI->second); + requestLabelBeforeInsn(RI->first); + requestLabelAfterInsn(RI->second); } } } @@ -2877,45 +1893,145 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { recordSourceLine(Line, Col, TheScope); + assert(UserVariables.empty() && DbgValues.empty() && "Maps weren't cleaned"); + /// ProcessedArgs - Collection of arguments already processed. SmallPtrSet ProcessedArgs; - DebugLoc PrevLoc; + const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo(); + + /// LiveUserVar - Map physreg numbers to the MDNode they contain. + std::vector LiveUserVar(TRI->getNumRegs()); + for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); - I != E; ++I) + I != E; ++I) { + bool AtBlockEntry = true; for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); II != IE; ++II) { const MachineInstr *MI = II; - DebugLoc DL = MI->getDebugLoc(); + if (MI->isDebugValue()) { assert (MI->getNumOperands() > 1 && "Invalid machine instruction!"); - DIVariable DV(MI->getOperand(MI->getNumOperands() - 1).getMetadata()); - if (!DV.Verify()) continue; - // If DBG_VALUE is for a local variable then it needs a label. - if (DV.getTag() != dwarf::DW_TAG_arg_variable) - InsnNeedsLabel.insert(MI); - // DBG_VALUE for inlined functions argument needs a label. - else if (!DISubprogram(getDISubprogram(DV.getContext())). - describes(MF->getFunction())) - InsnNeedsLabel.insert(MI); - // DBG_VALUE indicating argument location change needs a label. - else if (!ProcessedArgs.insert(DV)) - InsnNeedsLabel.insert(MI); + + // Keep track of user variables. + const MDNode *Var = + MI->getOperand(MI->getNumOperands() - 1).getMetadata(); + + // Variable is in a register, we need to check for clobbers. + if (isDbgValueInDefinedReg(MI)) + LiveUserVar[MI->getOperand(0).getReg()] = Var; + + // Check the history of this variable. + SmallVectorImpl &History = DbgValues[Var]; + if (History.empty()) { + UserVariables.push_back(Var); + // The first mention of a function argument gets the FunctionBeginSym + // label, so arguments are visible when breaking at function entry. + DIVariable DV(Var); + if (DV.Verify() && DV.getTag() == dwarf::DW_TAG_arg_variable && + DISubprogram(getDISubprogram(DV.getContext())) + .describes(MF->getFunction())) + LabelsBeforeInsn[MI] = FunctionBeginSym; + } else { + // We have seen this variable before. Try to coalesce DBG_VALUEs. + const MachineInstr *Prev = History.back(); + if (Prev->isDebugValue()) { + // Coalesce identical entries at the end of History. + if (History.size() >= 2 && + Prev->isIdenticalTo(History[History.size() - 2])) + History.pop_back(); + + // Terminate old register assignments that don't reach MI; + MachineFunction::const_iterator PrevMBB = Prev->getParent(); + if (PrevMBB != I && (!AtBlockEntry || llvm::next(PrevMBB) != I) && + isDbgValueInDefinedReg(Prev)) { + // Previous register assignment needs to terminate at the end of + // its basic block. + MachineBasicBlock::const_iterator LastMI = + PrevMBB->getLastNonDebugInstr(); + if (LastMI == PrevMBB->end()) + // Drop DBG_VALUE for empty range. + History.pop_back(); + else { + // Terminate after LastMI. + History.push_back(LastMI); + } + } + } + } + History.push_back(MI); } else { - // If location is unknown then instruction needs a location only if - // UnknownLocations flag is set. - if (DL.isUnknown()) { - if (UnknownLocations && !PrevLoc.isUnknown()) - InsnNeedsLabel.insert(MI); - } else if (DL != PrevLoc) - // Otherwise, instruction needs a location only if it is new location. - InsnNeedsLabel.insert(MI); + // Not a DBG_VALUE instruction. + if (!MI->isLabel()) + AtBlockEntry = false; + + // Check if the instruction clobbers any registers with debug vars. + for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(), + MOE = MI->operands_end(); MOI != MOE; ++MOI) { + if (!MOI->isReg() || !MOI->isDef() || !MOI->getReg()) + continue; + for (const unsigned *AI = TRI->getOverlaps(MOI->getReg()); + unsigned Reg = *AI; ++AI) { + const MDNode *Var = LiveUserVar[Reg]; + if (!Var) + continue; + // Reg is now clobbered. + LiveUserVar[Reg] = 0; + + // Was MD last defined by a DBG_VALUE referring to Reg? + DbgValueHistoryMap::iterator HistI = DbgValues.find(Var); + if (HistI == DbgValues.end()) + continue; + SmallVectorImpl &History = HistI->second; + if (History.empty()) + continue; + const MachineInstr *Prev = History.back(); + // Sanity-check: Register assignments are terminated at the end of + // their block. + if (!Prev->isDebugValue() || Prev->getParent() != MI->getParent()) + continue; + // Is the variable still in Reg? + if (!isDbgValueInDefinedReg(Prev) || + Prev->getOperand(0).getReg() != Reg) + continue; + // Var is clobbered. Make sure the next instruction gets a label. + History.push_back(MI); + } + } } - - if (!DL.isUnknown() || UnknownLocations) - PrevLoc = DL; } + } + for (DbgValueHistoryMap::iterator I = DbgValues.begin(), E = DbgValues.end(); + I != E; ++I) { + SmallVectorImpl &History = I->second; + if (History.empty()) + continue; + + // Make sure the final register assignments are terminated. + const MachineInstr *Prev = History.back(); + if (Prev->isDebugValue() && isDbgValueInDefinedReg(Prev)) { + const MachineBasicBlock *PrevMBB = Prev->getParent(); + MachineBasicBlock::const_iterator LastMI = PrevMBB->getLastNonDebugInstr(); + if (LastMI == PrevMBB->end()) + // Drop DBG_VALUE for empty range. + History.pop_back(); + else { + // Terminate after LastMI. + History.push_back(LastMI); + } + } + // Request labels for the full history. + for (unsigned i = 0, e = History.size(); i != e; ++i) { + const MachineInstr *MI = History[i]; + if (MI->isDebugValue()) + requestLabelBeforeInsn(MI); + else + requestLabelAfterInsn(MI); + } + } + + PrevInstLoc = DebugLoc(); PrevLabel = FunctionBeginSym; } @@ -2963,8 +2079,9 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { DIE *CurFnDIE = constructScopeDIE(CurrentFnDbgScope); if (!DisableFramePointerElim(*MF)) - addUInt(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr, - dwarf::DW_FORM_flag, 1); + getCompileUnit(CurrentFnDbgScope->getScopeNode())->addUInt(CurFnDIE, + dwarf::DW_AT_APPLE_omit_frame_ptr, + dwarf::DW_FORM_flag, 1); DebugFrames.push_back(FunctionDebugFrameInfo(Asm->getFunctionNumber(), @@ -2973,12 +2090,13 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { // Clear debug info CurrentFnDbgScope = NULL; - InsnNeedsLabel.clear(); + DeleteContainerPointers(CurrentFnArguments); DbgVariableToFrameIndexMap.clear(); VarToAbstractVarMap.clear(); DbgVariableToDbgInstMap.clear(); DeleteContainerSeconds(DbgScopeMap); - InsnsEndScopeSet.clear(); + UserVariables.clear(); + DbgValues.clear(); ConcreteScopes.clear(); DeleteContainerSeconds(AbstractScopes); AbstractScopesList.clear(); @@ -3029,10 +2147,9 @@ DbgScope *DwarfDebug::findDbgScope(const MachineInstr *MInsn) { /// recordSourceLine - Register a source line with debug info. Returns the /// unique label that was emitted and which provides correspondence to /// the source line list. -MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, - const MDNode *S) { +void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S){ StringRef Fn; - + StringRef Dir; unsigned Src = 1; if (S) { DIDescriptor Scope(S); @@ -3040,27 +2157,26 @@ MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, if (Scope.isCompileUnit()) { DICompileUnit CU(S); Fn = CU.getFilename(); + Dir = CU.getDirectory(); } else if (Scope.isFile()) { DIFile F(S); Fn = F.getFilename(); + Dir = F.getDirectory(); } else if (Scope.isSubprogram()) { DISubprogram SP(S); Fn = SP.getFilename(); + Dir = SP.getDirectory(); } else if (Scope.isLexicalBlock()) { DILexicalBlock DB(S); Fn = DB.getFilename(); + Dir = DB.getDirectory(); } else assert(0 && "Unexpected scope info"); - Src = GetOrCreateSourceID(Fn); + Src = GetOrCreateSourceID(Fn, Dir); } - Asm->OutStreamer.EmitDwarfLocDirective(Src, Line, Col, DWARF2_FLAG_IS_STMT, - 0, 0); - - MCSymbol *Label = MMI->getContext().CreateTempSymbol(); - Asm->OutStreamer.EmitLabel(Label); - return Label; + 0, 0, Fn); } //===----------------------------------------------------------------------===// @@ -3118,17 +2234,15 @@ DwarfDebug::computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last) { /// computeSizeAndOffsets - Compute the size and offset of all the DIEs. /// void DwarfDebug::computeSizeAndOffsets() { - unsigned PrevOffset = 0; for (DenseMap::iterator I = CUMap.begin(), E = CUMap.end(); I != E; ++I) { // Compute size of compile unit header. - static unsigned Offset = PrevOffset + + unsigned Offset = sizeof(int32_t) + // Length of Compilation Unit Info sizeof(int16_t) + // DWARF version number sizeof(int32_t) + // Offset Into Abbrev. Section sizeof(int8_t); // Pointer Size (in bytes) computeSizeAndOffset(I->second->getCUDie(), Offset, true); - PrevOffset = Offset; } } @@ -3289,8 +2403,7 @@ void DwarfDebug::emitDebugInfo() { unsigned ContentSize = Die->getSize() + sizeof(int16_t) + // DWARF version number sizeof(int32_t) + // Offset Into Abbrev. Section - sizeof(int8_t) + // Pointer Size (in bytes) - sizeof(int32_t); // FIXME - extra pad for gdb bug. + sizeof(int8_t); // Pointer Size (in bytes) Asm->OutStreamer.AddComment("Length of Compilation Unit Info"); Asm->EmitInt32(ContentSize); @@ -3303,12 +2416,6 @@ void DwarfDebug::emitDebugInfo() { Asm->EmitInt8(Asm->getTargetData().getPointerSize()); emitDIE(Die); - // FIXME - extra padding for gdb bug. - Asm->OutStreamer.AddComment("4 extra padding bytes for GDB"); - Asm->EmitInt8(0); - Asm->EmitInt8(0); - Asm->EmitInt8(0); - Asm->EmitInt8(0); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_end", TheCU->getID())); } } @@ -3614,32 +2721,38 @@ void DwarfDebug::emitDebugLoc() { } else { Asm->OutStreamer.EmitSymbolValue(Entry.Begin, Size, 0); Asm->OutStreamer.EmitSymbolValue(Entry.End, Size, 0); - const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); - unsigned Reg = RI->getDwarfRegNum(Entry.Loc.getReg(), false); - if (int Offset = Entry.Loc.getOffset()) { - // If the value is at a certain offset from frame register then - // use DW_OP_fbreg. - unsigned OffsetSize = Offset ? MCAsmInfo::getSLEB128Size(Offset) : 1; + DIVariable DV(Entry.Variable); + if (DV.hasComplexAddress()) { + unsigned N = DV.getNumAddrElements(); + unsigned i = 0; Asm->OutStreamer.AddComment("Loc expr size"); - Asm->EmitInt16(1 + OffsetSize); - Asm->OutStreamer.AddComment( - dwarf::OperationEncodingString(dwarf::DW_OP_fbreg)); - Asm->EmitInt8(dwarf::DW_OP_fbreg); - Asm->OutStreamer.AddComment("Offset"); - Asm->EmitSLEB128(Offset); - } else { - if (Reg < 32) { - Asm->OutStreamer.AddComment("Loc expr size"); - Asm->EmitInt16(1); - Asm->OutStreamer.AddComment( - dwarf::OperationEncodingString(dwarf::DW_OP_reg0 + Reg)); - Asm->EmitInt8(dwarf::DW_OP_reg0 + Reg); + if (N >= 2 && DV.getAddrElement(0) == DIBuilder::OpPlus) { + // If first address element is OpPlus then emit + // DW_OP_breg + Offset instead of DW_OP_reg + Offset. + MachineLocation Loc(Entry.Loc.getReg(), DV.getAddrElement(1)); + Asm->EmitInt16(Asm->getDwarfRegOpSize(Loc) + N - 2); + Asm->EmitDwarfRegOp(Loc); +// Asm->EmitULEB128(DV.getAddrElement(1)); + i = 2; } else { - Asm->OutStreamer.AddComment("Loc expr size"); - Asm->EmitInt16(1 + MCAsmInfo::getULEB128Size(Reg)); - Asm->EmitInt8(dwarf::DW_OP_regx); - Asm->EmitULEB128(Reg); + Asm->EmitInt16(Asm->getDwarfRegOpSize(Entry.Loc) + N); + Asm->EmitDwarfRegOp(Entry.Loc); } + + // Emit remaining complex address elements. + for (; i < N; ++i) { + uint64_t Element = DV.getAddrElement(i); + if (Element == DIBuilder::OpPlus) { + Asm->EmitInt8(dwarf::DW_OP_plus_uconst); + Asm->EmitULEB128(DV.getAddrElement(++i)); + } else if (Element == DIBuilder::OpDeref) + Asm->EmitInt8(dwarf::DW_OP_deref); + else llvm_unreachable("unknown Opcode found in complex address"); + } + } else { + Asm->OutStreamer.AddComment("Loc expr size"); + Asm->EmitInt16(Asm->getDwarfRegOpSize(Entry.Loc)); + Asm->EmitDwarfRegOp(Entry.Loc); } } } diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index 7df0510fbfba..25f2675d40f0 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -16,6 +16,7 @@ #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineLocation.h" +#include "llvm/Analysis/DebugInfo.h" #include "DIE.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/FoldingSet.h" @@ -40,21 +41,6 @@ class DIE; class DIEBlock; class DIEEntry; -class DIEnumerator; -class DIDescriptor; -class DIVariable; -class DIGlobal; -class DIGlobalVariable; -class DISubprogram; -class DIBasicType; -class DIDerivedType; -class DIType; -class DINameSpace; -class DISubrange; -class DICompositeType; -class DITemplateTypeParameter; -class DITemplateValueParameter; - //===----------------------------------------------------------------------===// /// SrcLineInfo - This class is used to record source line correspondence. /// @@ -80,10 +66,12 @@ typedef struct DotDebugLocEntry { const MCSymbol *Begin; const MCSymbol *End; MachineLocation Loc; + const MDNode *Variable; bool Merged; - DotDebugLocEntry() : Begin(0), End(0), Merged(false) {} - DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, MachineLocation &L) - : Begin(B), End(E), Loc(L), Merged(false) {} + DotDebugLocEntry() : Begin(0), End(0), Variable(0), Merged(false) {} + DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, MachineLocation &L, + const MDNode *V) + : Begin(B), End(E), Loc(L), Variable(V), Merged(false) {} /// Empty entries are also used as a trigger to emit temp label. Such /// labels are referenced is used to find debug_loc offset for a given DIE. bool isEmpty() { return Begin == 0 && End == 0; } @@ -96,6 +84,43 @@ typedef struct DotDebugLocEntry { } } DotDebugLocEntry; +//===----------------------------------------------------------------------===// +/// DbgVariable - This class is used to track local variable information. +/// +class DbgVariable { + DIVariable Var; // Variable Descriptor. + DIE *TheDIE; // Variable DIE. + unsigned DotDebugLocOffset; // Offset in DotDebugLocEntries. +public: + // AbsVar may be NULL. + DbgVariable(DIVariable V) : Var(V), TheDIE(0), DotDebugLocOffset(~0U) {} + + // Accessors. + DIVariable getVariable() const { return Var; } + void setDIE(DIE *D) { TheDIE = D; } + DIE *getDIE() const { return TheDIE; } + void setDotDebugLocOffset(unsigned O) { DotDebugLocOffset = O; } + unsigned getDotDebugLocOffset() const { return DotDebugLocOffset; } + StringRef getName() const { return Var.getName(); } + unsigned getTag() const { return Var.getTag(); } + bool variableHasComplexAddress() const { + assert(Var.Verify() && "Invalid complex DbgVariable!"); + return Var.hasComplexAddress(); + } + bool isBlockByrefVariable() const { + assert(Var.Verify() && "Invalid complex DbgVariable!"); + return Var.isBlockByrefVariable(); + } + unsigned getNumAddrElements() const { + assert(Var.Verify() && "Invalid complex DbgVariable!"); + return Var.getNumAddrElements(); + } + uint64_t getAddrElement(unsigned i) const { + return Var.getAddrElement(i); + } + DIType getType() const; +}; + class DwarfDebug { /// Asm - Target of Dwarf emission. AsmPrinter *Asm; @@ -122,12 +147,6 @@ class DwarfDebug { /// id mapped to a unique id. StringMap SourceIdMap; - /// DIEBlocks - A list of all the DIEBlocks in use. - std::vector DIEBlocks; - - // DIEValueAllocator - All DIEValues are allocated through this allocator. - BumpPtrAllocator DIEValueAllocator; - /// StringPool - A String->Symbol mapping of strings used by indirect /// references. StringMap > StringPool; @@ -139,10 +158,13 @@ class DwarfDebug { /// UniqueVector SectionMap; - // CurrentFnDbgScope - Top level scope for the current function. - // + /// CurrentFnDbgScope - Top level scope for the current function. + /// DbgScope *CurrentFnDbgScope; + /// CurrentFnArguments - List of Arguments (DbgValues) for current function. + SmallVector CurrentFnArguments; + /// DbgScopeMap - Tracks the scopes in the current function. Owns the /// contained DbgScope*s. /// @@ -195,10 +217,6 @@ class DwarfDebug { /// corresponds to the MDNode mapped with the subprogram DIE. DenseMap ContainingTypeMap; - typedef SmallVector ScopeVector; - - SmallPtrSet InsnsEndScopeSet; - /// InlineInfo - Keep track of inlined functions and their location. This /// information is used to populate debug_inlined section. typedef std::pair InlineInfoLabels; @@ -217,9 +235,16 @@ class DwarfDebug { /// instruction. DenseMap LabelsAfterInsn; - /// insnNeedsLabel - Collection of instructions that need a label to mark - /// a debuggging information entity. - SmallPtrSet InsnNeedsLabel; + /// UserVariables - Every user variable mentioned by a DBG_VALUE instruction + /// in order of appearance. + SmallVector UserVariables; + + /// DbgValues - For each user variable, keep a list of DBG_VALUE + /// instructions in order. The list can also contain normal instructions that + /// clobber the previous DBG_VALUE. + typedef DenseMap > + DbgValueHistoryMap; + DbgValueHistoryMap DbgValues; SmallVector DebugRangeSymbols; @@ -238,6 +263,9 @@ class DwarfDebug { std::vector DebugFrames; + // DIEValueAllocator - All DIEValues are allocated through this allocator. + BumpPtrAllocator DIEValueAllocator; + // Section Symbols: these are assembler temporary labels that are emitted at // the beginning of each supported dwarf section. These are used to form // section offsets and are created by EmitSectionLabels. @@ -246,150 +274,12 @@ class DwarfDebug { MCSymbol *DwarfDebugLocSectionSym; MCSymbol *FunctionBeginSym, *FunctionEndSym; - DIEInteger *DIEIntegerOne; private: - /// getNumSourceIds - Return the number of unique source ids. - unsigned getNumSourceIds() const { - return SourceIdMap.size(); - } - /// assignAbbrevNumber - Define a unique number for the abbreviation. /// void assignAbbrevNumber(DIEAbbrev &Abbrev); - /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug - /// information entry. - DIEEntry *createDIEEntry(DIE *Entry); - - /// addUInt - Add an unsigned integer attribute data and value. - /// - void addUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer); - - /// addSInt - Add an signed integer attribute data and value. - /// - void addSInt(DIE *Die, unsigned Attribute, unsigned Form, int64_t Integer); - - /// addString - Add a string attribute data and value. - /// - void addString(DIE *Die, unsigned Attribute, unsigned Form, - const StringRef Str); - - /// addLabel - Add a Dwarf label attribute data and value. - /// - void addLabel(DIE *Die, unsigned Attribute, unsigned Form, - const MCSymbol *Label); - - /// addDelta - Add a label delta attribute data and value. - /// - void addDelta(DIE *Die, unsigned Attribute, unsigned Form, - const MCSymbol *Hi, const MCSymbol *Lo); - - /// addDIEEntry - Add a DIE attribute data and value. - /// - void addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, DIE *Entry); - - /// addBlock - Add block data. - /// - void addBlock(DIE *Die, unsigned Attribute, unsigned Form, DIEBlock *Block); - - /// addSourceLine - Add location information to specified debug information - /// entry. - void addSourceLine(DIE *Die, DIVariable V); - void addSourceLine(DIE *Die, DIGlobalVariable G); - void addSourceLine(DIE *Die, DISubprogram SP); - void addSourceLine(DIE *Die, DIType Ty); - void addSourceLine(DIE *Die, DINameSpace NS); - - /// addAddress - Add an address attribute to a die based on the location - /// provided. - void addAddress(DIE *Die, unsigned Attribute, - const MachineLocation &Location); - - /// addRegisterAddress - Add register location entry in variable DIE. - bool addRegisterAddress(DIE *Die, const MachineOperand &MO); - - /// addConstantValue - Add constant value entry in variable DIE. - bool addConstantValue(DIE *Die, const MachineOperand &MO); - bool addConstantValue(DIE *Die, ConstantInt *CI, bool Unsigned); - - /// addConstantFPValue - Add constant value entry in variable DIE. - bool addConstantFPValue(DIE *Die, const MachineOperand &MO); - - /// addComplexAddress - Start with the address based on the location provided, - /// and generate the DWARF information necessary to find the actual variable - /// (navigating the extra location information encoded in the type) based on - /// the starting location. Add the DWARF information to the die. - /// - void addComplexAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute, - const MachineLocation &Location); - - // FIXME: Should be reformulated in terms of addComplexAddress. - /// addBlockByrefAddress - Start with the address based on the location - /// provided, and generate the DWARF information necessary to find the - /// actual Block variable (navigating the Block struct) based on the - /// starting location. Add the DWARF information to the die. Obsolete, - /// please use addComplexAddress instead. - /// - void addBlockByrefAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute, - const MachineLocation &Location); - - /// addVariableAddress - Add DW_AT_location attribute for a DbgVariable based - /// on provided frame index. - void addVariableAddress(DbgVariable *&DV, DIE *Die, int64_t FI); - - /// addToContextOwner - Add Die into the list of its context owner's children. - void addToContextOwner(DIE *Die, DIDescriptor Context); - - /// addType - Add a new type attribute to the specified entity. - void addType(DIE *Entity, DIType Ty); - - - /// getOrCreateNameSpace - Create a DIE for DINameSpace. - DIE *getOrCreateNameSpace(DINameSpace NS); - - /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the - /// given DIType. - DIE *getOrCreateTypeDIE(DIType Ty); - - /// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE - /// for the given DITemplateTypeParameter. - DIE *getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP); - - /// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE - /// for the given DITemplateValueParameter. - DIE *getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TVP); - - void addPubTypes(DISubprogram SP); - - /// constructTypeDIE - Construct basic type die from DIBasicType. - void constructTypeDIE(DIE &Buffer, - DIBasicType BTy); - - /// constructTypeDIE - Construct derived type die from DIDerivedType. - void constructTypeDIE(DIE &Buffer, - DIDerivedType DTy); - - /// constructTypeDIE - Construct type DIE from DICompositeType. - void constructTypeDIE(DIE &Buffer, - DICompositeType CTy); - - /// constructSubrangeDIE - Construct subrange DIE from DISubrange. - void constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy); - - /// constructArrayTypeDIE - Construct array type DIE from DICompositeType. - void constructArrayTypeDIE(DIE &Buffer, - DICompositeType *CTy); - - /// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator. - DIE *constructEnumTypeDIE(DIEnumerator ETy); - - /// createMemberDIE - Create new member DIE. - DIE *createMemberDIE(DIDerivedType DT); - - /// createSubprogramDIE - Create new DIE using SP. - DIE *createSubprogramDIE(DISubprogram SP); - /// getOrCreateDbgScope - Create DbgScope for the scope. DbgScope *getOrCreateDbgScope(const MDNode *Scope, const MDNode *InlinedAt); @@ -504,11 +394,6 @@ class DwarfDebug { /// inlining instance. void emitDebugInlineInfo(); - /// GetOrCreateSourceID - Look up the source id with the given directory and - /// source file names. If none currently exists, create a new id and insert it - /// in the SourceIds map. - unsigned GetOrCreateSourceID(StringRef FullName); - /// constructCompileUnit - Create new CompileUnit for the given /// metadata node with tag DW_TAG_compile_unit. void constructCompileUnit(const MDNode *N); @@ -525,7 +410,7 @@ class DwarfDebug { /// recordSourceLine - Register a source line with debug info. Returns the /// unique label that was emitted and which provides correspondence to /// the source line list. - MCSymbol *recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope); + void recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope); /// recordVariableFrameIndex - Record a variable's index. void recordVariableFrameIndex(const DbgVariable *V, int Index); @@ -546,6 +431,11 @@ class DwarfDebug { /// and collect DbgScopes. Return true, if atleast one scope was found. bool extractScopeInformation(); + /// addCurrentFnArgument - If Var is an current function argument that add + /// it in CurrentFnArguments list. + bool addCurrentFnArgument(const MachineFunction *MF, + DbgVariable *Var, DbgScope *Scope); + /// collectVariableInfo - Populate DbgScope entries with variables' info. void collectVariableInfo(const MachineFunction *, SmallPtrSet &ProcessedVars); @@ -554,6 +444,23 @@ class DwarfDebug { /// side table maintained by MMI. void collectVariableInfoFromMMITable(const MachineFunction * MF, SmallPtrSet &P); + + /// requestLabelBeforeInsn - Ensure that a label will be emitted before MI. + void requestLabelBeforeInsn(const MachineInstr *MI) { + LabelsBeforeInsn.insert(std::make_pair(MI, (MCSymbol*)0)); + } + + /// getLabelBeforeInsn - Return Label preceding the instruction. + const MCSymbol *getLabelBeforeInsn(const MachineInstr *MI); + + /// requestLabelAfterInsn - Ensure that a label will be emitted after MI. + void requestLabelAfterInsn(const MachineInstr *MI) { + LabelsAfterInsn.insert(std::make_pair(MI, (MCSymbol*)0)); + } + + /// getLabelAfterInsn - Return Label immediately following the instruction. + const MCSymbol *getLabelAfterInsn(const MachineInstr *MI); + public: //===--------------------------------------------------------------------===// // Main entry points. @@ -577,17 +484,19 @@ class DwarfDebug { /// void endFunction(const MachineFunction *MF); - /// getLabelBeforeInsn - Return Label preceding the instruction. - const MCSymbol *getLabelBeforeInsn(const MachineInstr *MI); - - /// getLabelAfterInsn - Return Label immediately following the instruction. - const MCSymbol *getLabelAfterInsn(const MachineInstr *MI); - /// beginInstruction - Process beginning of an instruction. void beginInstruction(const MachineInstr *MI); /// endInstruction - Prcess end of an instruction. void endInstruction(const MachineInstr *MI); + + /// GetOrCreateSourceID - Look up the source id with the given directory and + /// source file names. If none currently exists, create a new id and insert it + /// in the SourceIds map. + unsigned GetOrCreateSourceID(StringRef DirName, StringRef FullName); + + /// createSubprogramDIE - Create new DIE using SP. + DIE *createSubprogramDIE(DISubprogram SP); }; } // End of namespace llvm diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h index a172e53f8ac7..f11164122cc4 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.h +++ b/lib/CodeGen/AsmPrinter/DwarfException.h @@ -140,17 +140,18 @@ class DwarfException { }; class DwarfCFIException : public DwarfException { - /// shouldEmitTable - Per-function flag to indicate if EH tables should - /// be emitted. - bool shouldEmitTable; + /// shouldEmitPersonality - Per-function flag to indicate if .cfi_personality + /// should be emitted. + bool shouldEmitPersonality; + + /// shouldEmitLSDA - Per-function flag to indicate if .cfi_lsda + /// should be emitted. + bool shouldEmitLSDA; /// shouldEmitMoves - Per-function flag to indicate if frame moves info /// should be emitted. bool shouldEmitMoves; - /// shouldEmitTableModule - Per-module flag to indicate if EH tables - /// should be emitted. - bool shouldEmitTableModule; public: //===--------------------------------------------------------------------===// // Main entry points. @@ -237,6 +238,38 @@ class DwarfTableException : public DwarfException { virtual void EndFunction(); }; + +class ARMException : public DwarfException { + /// shouldEmitTable - Per-function flag to indicate if EH tables should + /// be emitted. + bool shouldEmitTable; + + /// shouldEmitMoves - Per-function flag to indicate if frame moves info + /// should be emitted. + bool shouldEmitMoves; + + /// shouldEmitTableModule - Per-module flag to indicate if EH tables + /// should be emitted. + bool shouldEmitTableModule; +public: + //===--------------------------------------------------------------------===// + // Main entry points. + // + ARMException(AsmPrinter *A); + virtual ~ARMException(); + + /// EndModule - Emit all exception information that should come after the + /// content. + virtual void EndModule(); + + /// BeginFunction - Gather pre-function exception information. Assumes being + /// emitted immediately after the function entry point. + virtual void BeginFunction(const MachineFunction *MF); + + /// EndFunction - Gather and emit post-function exception information. + virtual void EndFunction(); +}; + } // End of namespace llvm #endif diff --git a/lib/CodeGen/AsmPrinter/DwarfTableException.cpp b/lib/CodeGen/AsmPrinter/DwarfTableException.cpp index 751901183cd0..b50d8bd3cecc 100644 --- a/lib/CodeGen/AsmPrinter/DwarfTableException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfTableException.cpp @@ -92,7 +92,7 @@ void DwarfTableException::EmitCIE(const Function *PersonalityFn, unsigned Index) // personality function reference: unsigned LSDAEncoding = TLOF.getLSDAEncoding(); - unsigned FDEEncoding = TLOF.getFDEEncoding(); + unsigned FDEEncoding = TLOF.getFDEEncoding(false); unsigned PerEncoding = TLOF.getPersonalityEncoding(); char Augmentation[6] = { 0 }; @@ -168,7 +168,7 @@ void DwarfTableException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) { const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); unsigned LSDAEncoding = TLOF.getLSDAEncoding(); - unsigned FDEEncoding = TLOF.getFDEEncoding(); + unsigned FDEEncoding = TLOF.getFDEEncoding(false); Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection()); diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index 78a87431feaa..77043406bc85 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -1048,7 +1048,7 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { // AnalyzeBranch. if (PriorCond.empty() && !PriorTBB && MBB->pred_size() == 1 && PrevBB.succ_size() == 1 && - !MBB->hasAddressTaken()) { + !MBB->hasAddressTaken() && !MBB->isLandingPad()) { DEBUG(dbgs() << "\nMerging into block: " << PrevBB << "From MBB: " << *MBB); PrevBB.splice(PrevBB.end(), MBB, MBB->begin(), MBB->end()); diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index d7d0e1b3812b..2ca3859caf04 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -19,6 +19,7 @@ add_llvm_library(LLVMCodeGen GCStrategy.cpp IfConversion.cpp InlineSpiller.cpp + InterferenceCache.cpp IntrinsicLowering.cpp LLVMTargetMachine.cpp LatencyPriorityQueue.cpp diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp index 76bb3d148b0b..e5894b8cca9d 100644 --- a/lib/CodeGen/CalcSpillWeights.cpp +++ b/lib/CodeGen/CalcSpillWeights.cpp @@ -87,8 +87,8 @@ static unsigned copyHint(const MachineInstr *mi, unsigned reg, } void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { - MachineRegisterInfo &mri = mf_.getRegInfo(); - const TargetRegisterInfo &tri = *mf_.getTarget().getRegisterInfo(); + MachineRegisterInfo &mri = MF.getRegInfo(); + const TargetRegisterInfo &tri = *MF.getTarget().getRegisterInfo(); MachineBasicBlock *mbb = 0; MachineLoop *loop = 0; unsigned loopDepth = 0; @@ -103,6 +103,9 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { // Don't recompute a target specific hint. bool noHint = mri.getRegAllocationHint(li.reg).first != 0; + // Don't recompute spill weight for an unspillable register. + bool Spillable = li.isSpillable(); + for (MachineRegisterInfo::reg_iterator I = mri.reg_begin(li.reg); MachineInstr *mi = I.skipInstruction();) { if (mi->isIdentityCopy() || mi->isImplicitDef() || mi->isDebugValue()) @@ -110,34 +113,37 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { if (!visited.insert(mi)) continue; - // Get loop info for mi. - if (mi->getParent() != mbb) { - mbb = mi->getParent(); - loop = loops_.getLoopFor(mbb); - loopDepth = loop ? loop->getLoopDepth() : 0; - isExiting = loop ? loop->isLoopExiting(mbb) : false; + float weight = 1.0f; + if (Spillable) { + // Get loop info for mi. + if (mi->getParent() != mbb) { + mbb = mi->getParent(); + loop = Loops.getLoopFor(mbb); + loopDepth = loop ? loop->getLoopDepth() : 0; + isExiting = loop ? loop->isLoopExiting(mbb) : false; + } + + // Calculate instr weight. + bool reads, writes; + tie(reads, writes) = mi->readsWritesVirtualRegister(li.reg); + weight = LiveIntervals::getSpillWeight(writes, reads, loopDepth); + + // Give extra weight to what looks like a loop induction variable update. + if (writes && isExiting && LIS.isLiveOutOfMBB(li, mbb)) + weight *= 3; + + totalWeight += weight; } - // Calculate instr weight. - bool reads, writes; - tie(reads, writes) = mi->readsWritesVirtualRegister(li.reg); - float weight = LiveIntervals::getSpillWeight(writes, reads, loopDepth); - - // Give extra weight to what looks like a loop induction variable update. - if (writes && isExiting && lis_.isLiveOutOfMBB(li, mbb)) - weight *= 3; - - totalWeight += weight; - // Get allocation hints from copies. if (noHint || !mi->isCopy()) continue; unsigned hint = copyHint(mi, li.reg, tri, mri); if (!hint) continue; - float hweight = hint_[hint] += weight; + float hweight = Hint[hint] += weight; if (TargetRegisterInfo::isPhysicalRegister(hint)) { - if (hweight > bestPhys && lis_.isAllocatable(hint)) + if (hweight > bestPhys && LIS.isAllocatable(hint)) bestPhys = hweight, hintPhys = hint; } else { if (hweight > bestVirt) @@ -145,15 +151,19 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { } } - hint_.clear(); + Hint.clear(); // Always prefer the physreg hint. if (unsigned hint = hintPhys ? hintPhys : hintVirt) { mri.setRegAllocationHint(li.reg, 0, hint); - // Weakly boost the spill weifght of hinted registers. + // Weakly boost the spill weight of hinted registers. totalWeight *= 1.01F; } + // If the live interval was already unspillable, leave it that way. + if (!Spillable) + return; + // Mark li as unspillable if all live ranges are tiny. if (li.isZeroLength()) { li.markNotSpillable(); @@ -166,8 +176,7 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { // FIXME: this gets much more complicated once we support non-trivial // re-materialization. bool isLoad = false; - SmallVector spillIs; - if (lis_.isReMaterializable(li, spillIs, isLoad)) { + if (LIS.isReMaterializable(li, 0, isLoad)) { if (isLoad) totalWeight *= 0.9F; else @@ -178,50 +187,29 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { } void VirtRegAuxInfo::CalculateRegClass(unsigned reg) { - MachineRegisterInfo &mri = mf_.getRegInfo(); - const TargetRegisterInfo *tri = mf_.getTarget().getRegisterInfo(); - const TargetRegisterClass *orc = mri.getRegClass(reg); - SmallPtrSet rcs; + MachineRegisterInfo &MRI = MF.getRegInfo(); + const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + const TargetRegisterClass *OldRC = MRI.getRegClass(reg); + const TargetRegisterClass *NewRC = TRI->getLargestLegalSuperClass(OldRC); - for (MachineRegisterInfo::reg_nodbg_iterator I = mri.reg_nodbg_begin(reg), - E = mri.reg_nodbg_end(); I != E; ++I) { - // The targets don't have accurate enough regclass descriptions that we can - // handle subregs. We need something similar to - // TRI::getMatchingSuperRegClass, but returning a super class instead of a - // sub class. - if (I.getOperand().getSubReg()) { - DEBUG(dbgs() << "Cannot handle subregs: " << I.getOperand() << '\n'); + // Stop early if there is no room to grow. + if (NewRC == OldRC) + return; + + // Accumulate constraints from all uses. + for (MachineRegisterInfo::reg_nodbg_iterator I = MRI.reg_nodbg_begin(reg), + E = MRI.reg_nodbg_end(); I != E; ++I) { + // TRI doesn't have accurate enough information to model this yet. + if (I.getOperand().getSubReg()) + return; + const TargetRegisterClass *OpRC = + I->getDesc().getRegClass(I.getOperandNo(), TRI); + if (OpRC) + NewRC = getCommonSubClass(NewRC, OpRC); + if (!NewRC || NewRC == OldRC) return; - } - if (const TargetRegisterClass *rc = - I->getDesc().getRegClass(I.getOperandNo(), tri)) - rcs.insert(rc); } - - // If we found no regclass constraints, just leave reg as is. - // In theory, we could inflate to the largest superclass of reg's existing - // class, but that might not be legal for the current cpu setting. - // This could happen if reg is only used by COPY instructions, so we may need - // to improve on this. - if (rcs.empty()) { - return; - } - - // Compute the intersection of all classes in rcs. - // This ought to be independent of iteration order, but if the target register - // classes don't form a proper algebra, it is possible to get different - // results. The solution is to make sure the intersection of any two register - // classes is also a register class or the null set. - const TargetRegisterClass *rc = 0; - for (SmallPtrSet::iterator I = rcs.begin(), - E = rcs.end(); I != E; ++I) { - rc = rc ? getCommonSubClass(rc, *I) : *I; - assert(rc && "Incompatible regclass constraints found"); - } - - if (rc == orc) - return; - DEBUG(dbgs() << "Inflating " << orc->getName() << ':' << PrintReg(reg) - << " to " << rc->getName() <<".\n"); - mri.setRegClass(reg, rc); + DEBUG(dbgs() << "Inflating " << OldRC->getName() << ':' << PrintReg(reg) + << " to " << NewRC->getName() <<".\n"); + MRI.setRegClass(reg, NewRC); } diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp index 2ad80b4d3a75..bfb6ba10234f 100644 --- a/lib/CodeGen/CallingConvLower.cpp +++ b/lib/CodeGen/CallingConvLower.cpp @@ -19,15 +19,18 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetLowering.h" using namespace llvm; CCState::CCState(CallingConv::ID CC, bool isVarArg, const TargetMachine &tm, SmallVector &locs, LLVMContext &C) : CallingConv(CC), IsVarArg(isVarArg), TM(tm), - TRI(*TM.getRegisterInfo()), Locs(locs), Context(C) { + TRI(*TM.getRegisterInfo()), Locs(locs), Context(C), + CallOrPrologue(Invalid) { // No stack is used. StackOffset = 0; + clearFirstByValReg(); UsedRegs.resize((TRI.getNumRegs()+31)/32); } @@ -44,8 +47,8 @@ void CCState::HandleByVal(unsigned ValNo, MVT ValVT, Size = MinSize; if (MinAlign > (int)Align) Align = MinAlign; + TM.getTargetLowering()->HandleByVal(const_cast(this), Size); unsigned Offset = AllocateStack(Size, Align); - addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); } @@ -155,7 +158,7 @@ void CCState::AnalyzeCallResult(const SmallVectorImpl &Ins, if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this)) { #ifndef NDEBUG dbgs() << "Call result #" << i << " has unhandled type " - << EVT(VT).getEVTString(); + << EVT(VT).getEVTString() << "\n"; #endif llvm_unreachable(0); } diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp index 91a9536e7757..270c337ef67e 100644 --- a/lib/CodeGen/CodePlacementOpt.cpp +++ b/lib/CodeGen/CodePlacementOpt.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// // -// This file implements the pass that optimize code placement and align loop -// headers to target specific alignment boundary. +// This file implements the pass that optimizes code placement and aligns loop +// headers to target-specific alignment boundaries. // //===----------------------------------------------------------------------===// @@ -40,7 +40,7 @@ namespace { virtual bool runOnMachineFunction(MachineFunction &MF); virtual const char *getPassName() const { - return "Code Placement Optimizater"; + return "Code Placement Optimizer"; } virtual void getAnalysisUsage(AnalysisUsage &AU) const { @@ -254,7 +254,7 @@ bool CodePlacementOpt::MoveDiscontiguousLoopBlocks(MachineFunction &MF, // Determine a position to move orphaned loop blocks to. If TopMBB is not // entered via fallthrough and BotMBB is exited via fallthrough, prepend them - // to the top of the loop to avoid loosing that fallthrough. Otherwise append + // to the top of the loop to avoid losing that fallthrough. Otherwise append // them to the bottom, even if it previously had a fallthrough, on the theory // that it's worth an extra branch to keep the loop contiguous. MachineFunction::iterator InsertPt = diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp index 0ebb5b0db70e..34b1a396bb72 100644 --- a/lib/CodeGen/DwarfEHPrepare.cpp +++ b/lib/CodeGen/DwarfEHPrepare.cpp @@ -93,7 +93,8 @@ namespace { /// with the eh.exception call. This recursively looks past instructions /// which don't change the EH pointer value, like casts or PHI nodes. bool FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke, - SmallPtrSet &SelCalls); + SmallPtrSet &SelCalls, + SmallPtrSet &SeenPHIs); public: static char ID; // Pass identification, replacement for typeid. @@ -199,8 +200,8 @@ bool DwarfEHPrepare::CleanupSelectors(SmallPtrSet &Sels) { /// change the EH pointer value, like casts or PHI nodes. bool DwarfEHPrepare::FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke, - SmallPtrSet &SelCalls) { - SmallPtrSet SeenPHIs; + SmallPtrSet &SelCalls, + SmallPtrSet &SeenPHIs) { bool Changed = false; for (Value::use_iterator @@ -215,11 +216,11 @@ DwarfEHPrepare::FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke, if (Invoke->getCalledFunction() == URoR) URoRInvoke = true; } else if (CastInst *CI = dyn_cast(II)) { - Changed |= FindSelectorAndURoR(CI, URoRInvoke, SelCalls); + Changed |= FindSelectorAndURoR(CI, URoRInvoke, SelCalls, SeenPHIs); } else if (PHINode *PN = dyn_cast(II)) { if (SeenPHIs.insert(PN)) // Don't process a PHI node more than once. - Changed |= FindSelectorAndURoR(PN, URoRInvoke, SelCalls); + Changed |= FindSelectorAndURoR(PN, URoRInvoke, SelCalls, SeenPHIs); } } @@ -294,7 +295,8 @@ bool DwarfEHPrepare::HandleURoRInvokes() { bool URoRInvoke = false; SmallPtrSet SelCalls; - Changed |= FindSelectorAndURoR(EHPtr, URoRInvoke, SelCalls); + SmallPtrSet SeenPHIs; + Changed |= FindSelectorAndURoR(EHPtr, URoRInvoke, SelCalls, SeenPHIs); if (URoRInvoke) { // This EH pointer is being used by an invoke of an URoR instruction and @@ -437,8 +439,9 @@ bool DwarfEHPrepare::NormalizeLandingPads() { if (InVal == 0) { // Different unwind edges have different values. Create a new PHI node // in NewBB. - PHINode *NewPN = PHINode::Create(PN->getType(), PN->getName()+".unwind", - NewBB); + PHINode *NewPN = PHINode::Create(PN->getType(), + PN->getNumIncomingValues(), + PN->getName()+".unwind", NewBB); // Add an entry for each unwind edge, using the value from the old PHI. for (pred_iterator PI = PB; PI != PE; ++PI) NewPN->addIncoming(PN->getIncomingValueForBlock(*PI), *PI); diff --git a/lib/CodeGen/ELF.h b/lib/CodeGen/ELF.h index e08feeb27539..5b634682cc87 100644 --- a/lib/CodeGen/ELF.h +++ b/lib/CodeGen/ELF.h @@ -173,7 +173,7 @@ namespace llvm { unsigned Offset; // sh_offset - Offset from the file start unsigned Size; // sh_size - The section size. unsigned Link; // sh_link - Section header table index link. - unsigned Info; // sh_info - Auxillary information. + unsigned Info; // sh_info - Auxiliary information. unsigned Align; // sh_addralign - Alignment of section. unsigned EntSize; // sh_entsize - Size of entries in the section e diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp index 0fd1e8e83bd7..fa2319bff704 100644 --- a/lib/CodeGen/ELFWriter.cpp +++ b/lib/CodeGen/ELFWriter.cpp @@ -77,7 +77,7 @@ ELFWriter::ELFWriter(raw_ostream &o, TargetMachine &tm) // Create the object code emitter object for this target. ElfCE = new ELFCodeEmitter(*this); - // Inital number of sections + // Initial number of sections NumSections = 0; } @@ -660,19 +660,21 @@ bool ELFWriter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { /// EmitXXStructorList - Emit the ctor or dtor list. This just emits out the /// function pointers, ignoring the init priority. void ELFWriter::EmitXXStructorList(Constant *List, ELFSection &Xtor) { - // Should be an array of '{ int, void ()* }' structs. The first value is the + // Should be an array of '{ i32, void ()* }' structs. The first value is the // init priority, which we ignore. - if (!isa(List)) return; + if (List->isNullValue()) return; ConstantArray *InitList = cast(List); - for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) - if (ConstantStruct *CS = dyn_cast(InitList->getOperand(i))){ - if (CS->getNumOperands() != 2) return; // Not array of 2-element structs. + for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) { + if (InitList->getOperand(i)->isNullValue()) + continue; + ConstantStruct *CS = cast(InitList->getOperand(i)); - if (CS->getOperand(1)->isNullValue()) - return; // Found a null terminator, exit printing. - // Emit the function pointer. - EmitGlobalConstant(CS->getOperand(1), Xtor); - } + if (CS->getOperand(1)->isNullValue()) + continue; + + // Emit the function pointer. + EmitGlobalConstant(CS->getOperand(1), Xtor); + } } bool ELFWriter::runOnMachineFunction(MachineFunction &MF) { diff --git a/lib/CodeGen/EdgeBundles.cpp b/lib/CodeGen/EdgeBundles.cpp index aed8bc947991..646e01407a4f 100644 --- a/lib/CodeGen/EdgeBundles.cpp +++ b/lib/CodeGen/EdgeBundles.cpp @@ -53,6 +53,19 @@ bool EdgeBundles::runOnMachineFunction(MachineFunction &mf) { EC.compress(); if (ViewEdgeBundles) view(); + + // Compute the reverse mapping. + Blocks.clear(); + Blocks.resize(getNumBundles()); + + for (unsigned i = 0, e = MF->getNumBlockIDs(); i != e; ++i) { + unsigned b0 = getBundle(i, 0); + unsigned b1 = getBundle(i, 1); + Blocks[b0].push_back(i); + if (b1 != b0) + Blocks[b1].push_back(i); + } + return false; } @@ -82,5 +95,3 @@ raw_ostream &llvm::WriteGraph(raw_ostream &O, const EdgeBundles &G, O << "}\n"; return O; } - - diff --git a/lib/CodeGen/ExpandISelPseudos.cpp b/lib/CodeGen/ExpandISelPseudos.cpp index b5ec303f5d93..ebc2fc91efa3 100644 --- a/lib/CodeGen/ExpandISelPseudos.cpp +++ b/lib/CodeGen/ExpandISelPseudos.cpp @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// Expand Psuedo-instructions produced by ISel. These are usually to allow +// Expand Pseudo-instructions produced by ISel. These are usually to allow // the expansion to contain control flow, such as a conditional move // implemented with a conditional branch and a phi, or an atomic operation // implemented with a loop. diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index db53b0473a9a..790200b8df5f 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -27,7 +27,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" @@ -146,10 +145,6 @@ namespace { : BBI(b), Kind(k), NeedSubsumption(s), NumDups(d), NumDups2(d2) {} }; - /// Roots - Basic blocks that do not have successors. These are the starting - /// points of Graph traversal. - std::vector Roots; - /// BBAnalysis - Results of if-conversion feasibility analysis indexed by /// basic block number. std::vector BBAnalysis; @@ -287,11 +282,6 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { MF.RenumberBlocks(); BBAnalysis.resize(MF.getNumBlockIDs()); - // Look for root nodes, i.e. blocks without successors. - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) - if (I->succ_empty()) - Roots.push_back(I); - std::vector Tokens; MadeChange = false; unsigned NumIfCvts = NumSimple + NumSimpleFalse + NumTriangle + @@ -406,7 +396,6 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { } Tokens.clear(); - Roots.clear(); BBAnalysis.clear(); if (MadeChange && IfCvtBranchFold) { @@ -924,13 +913,9 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, /// candidates. void IfConverter::AnalyzeBlocks(MachineFunction &MF, std::vector &Tokens) { - std::set Visited; - for (unsigned i = 0, e = Roots.size(); i != e; ++i) { - for (idf_ext_iterator I=idf_ext_begin(Roots[i],Visited), - E = idf_ext_end(Roots[i], Visited); I != E; ++I) { - MachineBasicBlock *BB = *I; - AnalyzeBlock(BB, Tokens); - } + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { + MachineBasicBlock *BB = I; + AnalyzeBlock(BB, Tokens); } // Sort to favor more complex ifcvt scheme. diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index 38e6c8590269..b1a33a6afa42 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -19,41 +19,75 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; -static cl::opt -VerifySpills("verify-spills", cl::desc("Verify after each spill/split")); - namespace { class InlineSpiller : public Spiller { - MachineFunctionPass &pass_; - MachineFunction &mf_; - LiveIntervals &lis_; - LiveStacks &lss_; - AliasAnalysis *aa_; - VirtRegMap &vrm_; - MachineFrameInfo &mfi_; - MachineRegisterInfo &mri_; - const TargetInstrInfo &tii_; - const TargetRegisterInfo &tri_; - const BitVector reserved_; + MachineFunctionPass &Pass; + MachineFunction &MF; + LiveIntervals &LIS; + LiveStacks &LSS; + AliasAnalysis *AA; + MachineDominatorTree &MDT; + MachineLoopInfo &Loops; + VirtRegMap &VRM; + MachineFrameInfo &MFI; + MachineRegisterInfo &MRI; + const TargetInstrInfo &TII; + const TargetRegisterInfo &TRI; // Variables that are valid during spill(), but used by multiple methods. - LiveRangeEdit *edit_; - const TargetRegisterClass *rc_; - int stackSlot_; + LiveRangeEdit *Edit; + LiveInterval *StackInt; + int StackSlot; + unsigned Original; + + // All registers to spill to StackSlot, including the main register. + SmallVector RegsToSpill; + + // All COPY instructions to/from snippets. + // They are ignored since both operands refer to the same stack slot. + SmallPtrSet SnippetCopies; // Values that failed to remat at some point. - SmallPtrSet usedValues_; + SmallPtrSet UsedValues; + + // Information about a value that was defined by a copy from a sibling + // register. + struct SibValueInfo { + // True when all reaching defs were reloads: No spill is necessary. + bool AllDefsAreReloads; + + // The preferred register to spill. + unsigned SpillReg; + + // The value of SpillReg that should be spilled. + VNInfo *SpillVNI; + + // A defining instruction that is not a sibling copy or a reload, or NULL. + // This can be used as a template for rematerialization. + MachineInstr *DefMI; + + SibValueInfo(unsigned Reg, VNInfo *VNI) + : AllDefsAreReloads(false), SpillReg(Reg), SpillVNI(VNI), DefMI(0) {} + }; + + // Values in RegsToSpill defined by sibling copies. + typedef DenseMap SibValueMap; + SibValueMap SibValues; + + // Dead defs generated during spilling. + SmallVector DeadDefs; ~InlineSpiller() {} @@ -61,34 +95,52 @@ class InlineSpiller : public Spiller { InlineSpiller(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm) - : pass_(pass), - mf_(mf), - lis_(pass.getAnalysis()), - lss_(pass.getAnalysis()), - aa_(&pass.getAnalysis()), - vrm_(vrm), - mfi_(*mf.getFrameInfo()), - mri_(mf.getRegInfo()), - tii_(*mf.getTarget().getInstrInfo()), - tri_(*mf.getTarget().getRegisterInfo()), - reserved_(tri_.getReservedRegs(mf_)) {} - - void spill(LiveInterval *li, - SmallVectorImpl &newIntervals, - const SmallVectorImpl &spillIs); + : Pass(pass), + MF(mf), + LIS(pass.getAnalysis()), + LSS(pass.getAnalysis()), + AA(&pass.getAnalysis()), + MDT(pass.getAnalysis()), + Loops(pass.getAnalysis()), + VRM(vrm), + MFI(*mf.getFrameInfo()), + MRI(mf.getRegInfo()), + TII(*mf.getTarget().getInstrInfo()), + TRI(*mf.getTarget().getRegisterInfo()) {} void spill(LiveRangeEdit &); private: - bool reMaterializeFor(MachineBasicBlock::iterator MI); + bool isSnippet(const LiveInterval &SnipLI); + void collectRegsToSpill(); + + bool isRegToSpill(unsigned Reg) { + return std::find(RegsToSpill.begin(), + RegsToSpill.end(), Reg) != RegsToSpill.end(); + } + + bool isSibling(unsigned Reg); + MachineInstr *traceSiblingValue(unsigned, VNInfo*, VNInfo*); + void analyzeSiblingValues(); + + bool hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI); + void eliminateRedundantSpills(LiveInterval &LI, VNInfo *VNI); + + void markValueUsed(LiveInterval*, VNInfo*); + bool reMaterializeFor(LiveInterval&, MachineBasicBlock::iterator MI); void reMaterializeAll(); - bool coalesceStackAccess(MachineInstr *MI); + bool coalesceStackAccess(MachineInstr *MI, unsigned Reg); bool foldMemoryOperand(MachineBasicBlock::iterator MI, const SmallVectorImpl &Ops, MachineInstr *LoadMI = 0); - void insertReload(LiveInterval &NewLI, MachineBasicBlock::iterator MI); - void insertSpill(LiveInterval &NewLI, MachineBasicBlock::iterator MI); + void insertReload(LiveInterval &NewLI, SlotIndex, + MachineBasicBlock::iterator MI); + void insertSpill(LiveInterval &NewLI, const LiveInterval &OldLI, + SlotIndex, MachineBasicBlock::iterator MI); + + void spillAroundUses(unsigned Reg); + void spillAll(); }; } @@ -96,45 +148,489 @@ namespace llvm { Spiller *createInlineSpiller(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm) { - if (VerifySpills) - mf.verify(&pass, "When creating inline spiller"); return new InlineSpiller(pass, mf, vrm); } } -/// reMaterializeFor - Attempt to rematerialize before MI instead of reloading. -bool InlineSpiller::reMaterializeFor(MachineBasicBlock::iterator MI) { - SlotIndex UseIdx = lis_.getInstructionIndex(MI).getUseIndex(); - VNInfo *OrigVNI = edit_->getParent().getVNInfoAt(UseIdx); +//===----------------------------------------------------------------------===// +// Snippets +//===----------------------------------------------------------------------===// - if (!OrigVNI) { +// When spilling a virtual register, we also spill any snippets it is connected +// to. The snippets are small live ranges that only have a single real use, +// leftovers from live range splitting. Spilling them enables memory operand +// folding or tightens the live range around the single use. +// +// This minimizes register pressure and maximizes the store-to-load distance for +// spill slots which can be important in tight loops. + +/// isFullCopyOf - If MI is a COPY to or from Reg, return the other register, +/// otherwise return 0. +static unsigned isFullCopyOf(const MachineInstr *MI, unsigned Reg) { + if (!MI->isCopy()) + return 0; + if (MI->getOperand(0).getSubReg() != 0) + return 0; + if (MI->getOperand(1).getSubReg() != 0) + return 0; + if (MI->getOperand(0).getReg() == Reg) + return MI->getOperand(1).getReg(); + if (MI->getOperand(1).getReg() == Reg) + return MI->getOperand(0).getReg(); + return 0; +} + +/// isSnippet - Identify if a live interval is a snippet that should be spilled. +/// It is assumed that SnipLI is a virtual register with the same original as +/// Edit->getReg(). +bool InlineSpiller::isSnippet(const LiveInterval &SnipLI) { + unsigned Reg = Edit->getReg(); + + // A snippet is a tiny live range with only a single instruction using it + // besides copies to/from Reg or spills/fills. We accept: + // + // %snip = COPY %Reg / FILL fi# + // %snip = USE %snip + // %Reg = COPY %snip / SPILL %snip, fi# + // + if (SnipLI.getNumValNums() > 2 || !LIS.intervalIsInOneMBB(SnipLI)) + return false; + + MachineInstr *UseMI = 0; + + // Check that all uses satisfy our criteria. + for (MachineRegisterInfo::reg_nodbg_iterator + RI = MRI.reg_nodbg_begin(SnipLI.reg); + MachineInstr *MI = RI.skipInstruction();) { + + // Allow copies to/from Reg. + if (isFullCopyOf(MI, Reg)) + continue; + + // Allow stack slot loads. + int FI; + if (SnipLI.reg == TII.isLoadFromStackSlot(MI, FI) && FI == StackSlot) + continue; + + // Allow stack slot stores. + if (SnipLI.reg == TII.isStoreToStackSlot(MI, FI) && FI == StackSlot) + continue; + + // Allow a single additional instruction. + if (UseMI && MI != UseMI) + return false; + UseMI = MI; + } + return true; +} + +/// collectRegsToSpill - Collect live range snippets that only have a single +/// real use. +void InlineSpiller::collectRegsToSpill() { + unsigned Reg = Edit->getReg(); + + // Main register always spills. + RegsToSpill.assign(1, Reg); + SnippetCopies.clear(); + + // Snippets all have the same original, so there can't be any for an original + // register. + if (Original == Reg) + return; + + for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Reg); + MachineInstr *MI = RI.skipInstruction();) { + unsigned SnipReg = isFullCopyOf(MI, Reg); + if (!isSibling(SnipReg)) + continue; + LiveInterval &SnipLI = LIS.getInterval(SnipReg); + if (!isSnippet(SnipLI)) + continue; + SnippetCopies.insert(MI); + if (!isRegToSpill(SnipReg)) + RegsToSpill.push_back(SnipReg); + + DEBUG(dbgs() << "\talso spill snippet " << SnipLI << '\n'); + } +} + + +//===----------------------------------------------------------------------===// +// Sibling Values +//===----------------------------------------------------------------------===// + +// After live range splitting, some values to be spilled may be defined by +// copies from sibling registers. We trace the sibling copies back to the +// original value if it still exists. We need it for rematerialization. +// +// Even when the value can't be rematerialized, we still want to determine if +// the value has already been spilled, or we may want to hoist the spill from a +// loop. + +bool InlineSpiller::isSibling(unsigned Reg) { + return TargetRegisterInfo::isVirtualRegister(Reg) && + VRM.getOriginal(Reg) == Original; +} + +/// traceSiblingValue - Trace a value that is about to be spilled back to the +/// real defining instructions by looking through sibling copies. Always stay +/// within the range of OrigVNI so the registers are known to carry the same +/// value. +/// +/// Determine if the value is defined by all reloads, so spilling isn't +/// necessary - the value is already in the stack slot. +/// +/// Return a defining instruction that may be a candidate for rematerialization. +/// +MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI, + VNInfo *OrigVNI) { + DEBUG(dbgs() << "Tracing value " << PrintReg(UseReg) << ':' + << UseVNI->id << '@' << UseVNI->def << '\n'); + SmallPtrSet Visited; + SmallVector, 8> WorkList; + WorkList.push_back(std::make_pair(UseReg, UseVNI)); + + // Best spill candidate seen so far. This must dominate UseVNI. + SibValueInfo SVI(UseReg, UseVNI); + MachineBasicBlock *UseMBB = LIS.getMBBFromIndex(UseVNI->def); + unsigned SpillDepth = Loops.getLoopDepth(UseMBB); + bool SeenOrigPHI = false; // Original PHI met. + + do { + unsigned Reg; + VNInfo *VNI; + tie(Reg, VNI) = WorkList.pop_back_val(); + if (!Visited.insert(VNI)) + continue; + + // Is this value a better spill candidate? + if (!isRegToSpill(Reg)) { + MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def); + if (MBB != UseMBB && MDT.dominates(MBB, UseMBB)) { + // This is a valid spill location dominating UseVNI. + // Prefer to spill at a smaller loop depth. + unsigned Depth = Loops.getLoopDepth(MBB); + if (Depth < SpillDepth) { + DEBUG(dbgs() << " spill depth " << Depth << ": " << PrintReg(Reg) + << ':' << VNI->id << '@' << VNI->def << '\n'); + SVI.SpillReg = Reg; + SVI.SpillVNI = VNI; + SpillDepth = Depth; + } + } + } + + // Trace through PHI-defs created by live range splitting. + if (VNI->isPHIDef()) { + if (VNI->def == OrigVNI->def) { + DEBUG(dbgs() << " orig phi value " << PrintReg(Reg) << ':' + << VNI->id << '@' << VNI->def << '\n'); + SeenOrigPHI = true; + continue; + } + // Get values live-out of predecessors. + LiveInterval &LI = LIS.getInterval(Reg); + MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def); + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) { + VNInfo *PVNI = LI.getVNInfoAt(LIS.getMBBEndIdx(*PI).getPrevSlot()); + if (PVNI) + WorkList.push_back(std::make_pair(Reg, PVNI)); + } + continue; + } + + MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def); + assert(MI && "Missing def"); + + // Trace through sibling copies. + if (unsigned SrcReg = isFullCopyOf(MI, Reg)) { + if (isSibling(SrcReg)) { + LiveInterval &SrcLI = LIS.getInterval(SrcReg); + VNInfo *SrcVNI = SrcLI.getVNInfoAt(VNI->def.getUseIndex()); + assert(SrcVNI && "Copy from non-existing value"); + DEBUG(dbgs() << " copy of " << PrintReg(SrcReg) << ':' + << SrcVNI->id << '@' << SrcVNI->def << '\n'); + WorkList.push_back(std::make_pair(SrcReg, SrcVNI)); + continue; + } + } + + // Track reachable reloads. + int FI; + if (Reg == TII.isLoadFromStackSlot(MI, FI) && FI == StackSlot) { + DEBUG(dbgs() << " reload " << PrintReg(Reg) << ':' + << VNI->id << "@" << VNI->def << '\n'); + SVI.AllDefsAreReloads = true; + continue; + } + + // We have an 'original' def. Don't record trivial cases. + if (VNI == UseVNI) { + DEBUG(dbgs() << "Not a sibling copy.\n"); + return MI; + } + + // Potential remat candidate. + DEBUG(dbgs() << " def " << PrintReg(Reg) << ':' + << VNI->id << '@' << VNI->def << '\t' << *MI); + SVI.DefMI = MI; + } while (!WorkList.empty()); + + if (SeenOrigPHI || SVI.DefMI) + SVI.AllDefsAreReloads = false; + + DEBUG({ + if (SVI.AllDefsAreReloads) + dbgs() << "All defs are reloads.\n"; + else + dbgs() << "Prefer to spill " << PrintReg(SVI.SpillReg) << ':' + << SVI.SpillVNI->id << '@' << SVI.SpillVNI->def << '\n'; + }); + SibValues.insert(std::make_pair(UseVNI, SVI)); + return SVI.DefMI; +} + +/// analyzeSiblingValues - Trace values defined by sibling copies back to +/// something that isn't a sibling copy. +/// +/// Keep track of values that may be rematerializable. +void InlineSpiller::analyzeSiblingValues() { + SibValues.clear(); + + // No siblings at all? + if (Edit->getReg() == Original) + return; + + LiveInterval &OrigLI = LIS.getInterval(Original); + for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) { + unsigned Reg = RegsToSpill[i]; + LiveInterval &LI = LIS.getInterval(Reg); + for (LiveInterval::const_vni_iterator VI = LI.vni_begin(), + VE = LI.vni_end(); VI != VE; ++VI) { + VNInfo *VNI = *VI; + if (VNI->isUnused()) + continue; + MachineInstr *DefMI = 0; + // Check possible sibling copies. + if (VNI->isPHIDef() || VNI->getCopy()) { + VNInfo *OrigVNI = OrigLI.getVNInfoAt(VNI->def); + if (OrigVNI->def != VNI->def) + DefMI = traceSiblingValue(Reg, VNI, OrigVNI); + } + if (!DefMI && !VNI->isPHIDef()) + DefMI = LIS.getInstructionFromIndex(VNI->def); + if (DefMI && Edit->checkRematerializable(VNI, DefMI, TII, AA)) { + DEBUG(dbgs() << "Value " << PrintReg(Reg) << ':' << VNI->id << '@' + << VNI->def << " may remat from " << *DefMI); + } + } + } +} + +/// hoistSpill - Given a sibling copy that defines a value to be spilled, insert +/// a spill at a better location. +bool InlineSpiller::hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI) { + SlotIndex Idx = LIS.getInstructionIndex(CopyMI); + VNInfo *VNI = SpillLI.getVNInfoAt(Idx.getDefIndex()); + assert(VNI && VNI->def == Idx.getDefIndex() && "Not defined by copy"); + SibValueMap::iterator I = SibValues.find(VNI); + if (I == SibValues.end()) + return false; + + const SibValueInfo &SVI = I->second; + + // Let the normal folding code deal with the boring case. + if (!SVI.AllDefsAreReloads && SVI.SpillVNI == VNI) + return false; + + // SpillReg may have been deleted by remat and DCE. + if (!LIS.hasInterval(SVI.SpillReg)) { + DEBUG(dbgs() << "Stale interval: " << PrintReg(SVI.SpillReg) << '\n'); + SibValues.erase(I); + return false; + } + + LiveInterval &SibLI = LIS.getInterval(SVI.SpillReg); + if (!SibLI.containsValue(SVI.SpillVNI)) { + DEBUG(dbgs() << "Stale value: " << PrintReg(SVI.SpillReg) << '\n'); + SibValues.erase(I); + return false; + } + + // Conservatively extend the stack slot range to the range of the original + // value. We may be able to do better with stack slot coloring by being more + // careful here. + assert(StackInt && "No stack slot assigned yet."); + LiveInterval &OrigLI = LIS.getInterval(Original); + VNInfo *OrigVNI = OrigLI.getVNInfoAt(Idx); + StackInt->MergeValueInAsValue(OrigLI, OrigVNI, StackInt->getValNumInfo(0)); + DEBUG(dbgs() << "\tmerged orig valno " << OrigVNI->id << ": " + << *StackInt << '\n'); + + // Already spilled everywhere. + if (SVI.AllDefsAreReloads) + return true; + + // We are going to spill SVI.SpillVNI immediately after its def, so clear out + // any later spills of the same value. + eliminateRedundantSpills(SibLI, SVI.SpillVNI); + + MachineBasicBlock *MBB = LIS.getMBBFromIndex(SVI.SpillVNI->def); + MachineBasicBlock::iterator MII; + if (SVI.SpillVNI->isPHIDef()) + MII = MBB->SkipPHIsAndLabels(MBB->begin()); + else { + MachineInstr *DefMI = LIS.getInstructionFromIndex(SVI.SpillVNI->def); + assert(DefMI && "Defining instruction disappeared"); + MII = DefMI; + ++MII; + } + // Insert spill without kill flag immediately after def. + TII.storeRegToStackSlot(*MBB, MII, SVI.SpillReg, false, StackSlot, + MRI.getRegClass(SVI.SpillReg), &TRI); + --MII; // Point to store instruction. + LIS.InsertMachineInstrInMaps(MII); + VRM.addSpillSlotUse(StackSlot, MII); + DEBUG(dbgs() << "\thoisted: " << SVI.SpillVNI->def << '\t' << *MII); + return true; +} + +/// eliminateRedundantSpills - SLI:VNI is known to be on the stack. Remove any +/// redundant spills of this value in SLI.reg and sibling copies. +void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) { + assert(VNI && "Missing value"); + SmallVector, 8> WorkList; + WorkList.push_back(std::make_pair(&SLI, VNI)); + assert(StackInt && "No stack slot assigned yet."); + + do { + LiveInterval *LI; + tie(LI, VNI) = WorkList.pop_back_val(); + unsigned Reg = LI->reg; + DEBUG(dbgs() << "Checking redundant spills for " + << VNI->id << '@' << VNI->def << " in " << *LI << '\n'); + + // Regs to spill are taken care of. + if (isRegToSpill(Reg)) + continue; + + // Add all of VNI's live range to StackInt. + StackInt->MergeValueInAsValue(*LI, VNI, StackInt->getValNumInfo(0)); + DEBUG(dbgs() << "Merged to stack int: " << *StackInt << '\n'); + + // Find all spills and copies of VNI. + for (MachineRegisterInfo::use_nodbg_iterator UI = MRI.use_nodbg_begin(Reg); + MachineInstr *MI = UI.skipInstruction();) { + if (!MI->isCopy() && !MI->getDesc().mayStore()) + continue; + SlotIndex Idx = LIS.getInstructionIndex(MI); + if (LI->getVNInfoAt(Idx) != VNI) + continue; + + // Follow sibling copies down the dominator tree. + if (unsigned DstReg = isFullCopyOf(MI, Reg)) { + if (isSibling(DstReg)) { + LiveInterval &DstLI = LIS.getInterval(DstReg); + VNInfo *DstVNI = DstLI.getVNInfoAt(Idx.getDefIndex()); + assert(DstVNI && "Missing defined value"); + assert(DstVNI->def == Idx.getDefIndex() && "Wrong copy def slot"); + WorkList.push_back(std::make_pair(&DstLI, DstVNI)); + } + continue; + } + + // Erase spills. + int FI; + if (Reg == TII.isStoreToStackSlot(MI, FI) && FI == StackSlot) { + DEBUG(dbgs() << "Redundant spill " << Idx << '\t' << *MI); + // eliminateDeadDefs won't normally remove stores, so switch opcode. + MI->setDesc(TII.get(TargetOpcode::KILL)); + DeadDefs.push_back(MI); + } + } + } while (!WorkList.empty()); +} + + +//===----------------------------------------------------------------------===// +// Rematerialization +//===----------------------------------------------------------------------===// + +/// markValueUsed - Remember that VNI failed to rematerialize, so its defining +/// instruction cannot be eliminated. See through snippet copies +void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) { + SmallVector, 8> WorkList; + WorkList.push_back(std::make_pair(LI, VNI)); + do { + tie(LI, VNI) = WorkList.pop_back_val(); + if (!UsedValues.insert(VNI)) + continue; + + if (VNI->isPHIDef()) { + MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def); + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) { + VNInfo *PVNI = LI->getVNInfoAt(LIS.getMBBEndIdx(*PI).getPrevSlot()); + if (PVNI) + WorkList.push_back(std::make_pair(LI, PVNI)); + } + continue; + } + + // Follow snippet copies. + MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def); + if (!SnippetCopies.count(MI)) + continue; + LiveInterval &SnipLI = LIS.getInterval(MI->getOperand(1).getReg()); + assert(isRegToSpill(SnipLI.reg) && "Unexpected register in copy"); + VNInfo *SnipVNI = SnipLI.getVNInfoAt(VNI->def.getUseIndex()); + assert(SnipVNI && "Snippet undefined before copy"); + WorkList.push_back(std::make_pair(&SnipLI, SnipVNI)); + } while (!WorkList.empty()); +} + +/// reMaterializeFor - Attempt to rematerialize before MI instead of reloading. +bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, + MachineBasicBlock::iterator MI) { + SlotIndex UseIdx = LIS.getInstructionIndex(MI).getUseIndex(); + VNInfo *ParentVNI = VirtReg.getVNInfoAt(UseIdx); + + if (!ParentVNI) { DEBUG(dbgs() << "\tadding flags: "); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.isUse() && MO.getReg() == edit_->getReg()) + if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg) MO.setIsUndef(); } DEBUG(dbgs() << UseIdx << '\t' << *MI); return true; } - LiveRangeEdit::Remat RM(OrigVNI); - if (!edit_->canRematerializeAt(RM, UseIdx, false, lis_)) { - usedValues_.insert(OrigVNI); + if (SnippetCopies.count(MI)) + return false; + + // Use an OrigVNI from traceSiblingValue when ParentVNI is a sibling copy. + LiveRangeEdit::Remat RM(ParentVNI); + SibValueMap::const_iterator SibI = SibValues.find(ParentVNI); + if (SibI != SibValues.end()) + RM.OrigMI = SibI->second.DefMI; + if (!Edit->canRematerializeAt(RM, UseIdx, false, LIS)) { + markValueUsed(&VirtReg, ParentVNI); DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << *MI); return false; } - // If the instruction also writes edit_->getReg(), it had better not require - // the same register for uses and defs. + // If the instruction also writes VirtReg.reg, it had better not require the + // same register for uses and defs. bool Reads, Writes; SmallVector Ops; - tie(Reads, Writes) = MI->readsWritesVirtualRegister(edit_->getReg(), &Ops); + tie(Reads, Writes) = MI->readsWritesVirtualRegister(VirtReg.reg, &Ops); if (Writes) { for (unsigned i = 0, e = Ops.size(); i != e; ++i) { MachineOperand &MO = MI->getOperand(Ops[i]); if (MO.isUse() ? MI->isRegTiedToDefOperand(Ops[i]) : MO.getSubReg()) { - usedValues_.insert(OrigVNI); + markValueUsed(&VirtReg, ParentVNI); DEBUG(dbgs() << "\tcannot remat tied reg: " << UseIdx << '\t' << *MI); return false; } @@ -145,35 +641,31 @@ bool InlineSpiller::reMaterializeFor(MachineBasicBlock::iterator MI) { // fold a load into the instruction. That avoids allocating a new register. if (RM.OrigMI->getDesc().canFoldAsLoad() && foldMemoryOperand(MI, Ops, RM.OrigMI)) { - edit_->markRematerialized(RM.ParentVNI); + Edit->markRematerialized(RM.ParentVNI); return true; } // Alocate a new register for the remat. - LiveInterval &NewLI = edit_->create(mri_, lis_, vrm_); + LiveInterval &NewLI = Edit->createFrom(Original, LIS, VRM); NewLI.markNotSpillable(); - // Rematting for a copy: Set allocation hint to be the destination register. - if (MI->isCopy()) - mri_.setRegAllocationHint(NewLI.reg, 0, MI->getOperand(0).getReg()); - // Finally we can rematerialize OrigMI before MI. - SlotIndex DefIdx = edit_->rematerializeAt(*MI->getParent(), MI, NewLI.reg, RM, - lis_, tii_, tri_); + SlotIndex DefIdx = Edit->rematerializeAt(*MI->getParent(), MI, NewLI.reg, RM, + LIS, TII, TRI); DEBUG(dbgs() << "\tremat: " << DefIdx << '\t' - << *lis_.getInstructionFromIndex(DefIdx)); + << *LIS.getInstructionFromIndex(DefIdx)); // Replace operands for (unsigned i = 0, e = Ops.size(); i != e; ++i) { MachineOperand &MO = MI->getOperand(Ops[i]); - if (MO.isReg() && MO.isUse() && MO.getReg() == edit_->getReg()) { + if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg) { MO.setReg(NewLI.reg); MO.setIsKill(); } } DEBUG(dbgs() << "\t " << UseIdx << '\t' << *MI); - VNInfo *DefVNI = NewLI.getNextValue(DefIdx, 0, lis_.getVNInfoAllocator()); + VNInfo *DefVNI = NewLI.getNextValue(DefIdx, 0, LIS.getVNInfoAllocator()); NewLI.addRange(LiveRange(DefIdx, UseIdx.getDefIndex(), DefVNI)); DEBUG(dbgs() << "\tinterval: " << NewLI << '\n'); return true; @@ -182,75 +674,85 @@ bool InlineSpiller::reMaterializeFor(MachineBasicBlock::iterator MI) { /// reMaterializeAll - Try to rematerialize as many uses as possible, /// and trim the live ranges after. void InlineSpiller::reMaterializeAll() { - // Do a quick scan of the interval values to find if any are remattable. - if (!edit_->anyRematerializable(lis_, tii_, aa_)) + // analyzeSiblingValues has already tested all relevant defining instructions. + if (!Edit->anyRematerializable(LIS, TII, AA)) return; - usedValues_.clear(); + UsedValues.clear(); - // Try to remat before all uses of edit_->getReg(). + // Try to remat before all uses of snippets. bool anyRemat = false; - for (MachineRegisterInfo::use_nodbg_iterator - RI = mri_.use_nodbg_begin(edit_->getReg()); - MachineInstr *MI = RI.skipInstruction();) - anyRemat |= reMaterializeFor(MI); - + for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) { + unsigned Reg = RegsToSpill[i]; + LiveInterval &LI = LIS.getInterval(Reg); + for (MachineRegisterInfo::use_nodbg_iterator + RI = MRI.use_nodbg_begin(Reg); + MachineInstr *MI = RI.skipInstruction();) + anyRemat |= reMaterializeFor(LI, MI); + } if (!anyRemat) return; // Remove any values that were completely rematted. - bool anyRemoved = false; - for (LiveInterval::vni_iterator I = edit_->getParent().vni_begin(), - E = edit_->getParent().vni_end(); I != E; ++I) { - VNInfo *VNI = *I; - if (VNI->hasPHIKill() || !edit_->didRematerialize(VNI) || - usedValues_.count(VNI)) - continue; - MachineInstr *DefMI = lis_.getInstructionFromIndex(VNI->def); - DEBUG(dbgs() << "\tremoving dead def: " << VNI->def << '\t' << *DefMI); - lis_.RemoveMachineInstrFromMaps(DefMI); - vrm_.RemoveMachineInstrFromMaps(DefMI); - DefMI->eraseFromParent(); - VNI->def = SlotIndex(); - anyRemoved = true; - } - - if (!anyRemoved) - return; - - // Removing values may cause debug uses where parent is not live. - for (MachineRegisterInfo::use_iterator RI = mri_.use_begin(edit_->getReg()); - MachineInstr *MI = RI.skipInstruction();) { - if (!MI->isDebugValue()) - continue; - // Try to preserve the debug value if parent is live immediately after it. - MachineBasicBlock::iterator NextMI = MI; - ++NextMI; - if (NextMI != MI->getParent()->end() && !lis_.isNotInMIMap(NextMI)) { - SlotIndex Idx = lis_.getInstructionIndex(NextMI); - VNInfo *VNI = edit_->getParent().getVNInfoAt(Idx); - if (VNI && (VNI->hasPHIKill() || usedValues_.count(VNI))) + for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) { + unsigned Reg = RegsToSpill[i]; + LiveInterval &LI = LIS.getInterval(Reg); + for (LiveInterval::vni_iterator I = LI.vni_begin(), E = LI.vni_end(); + I != E; ++I) { + VNInfo *VNI = *I; + if (VNI->isUnused() || VNI->isPHIDef() || UsedValues.count(VNI)) continue; + MachineInstr *MI = LIS.getInstructionFromIndex(VNI->def); + MI->addRegisterDead(Reg, &TRI); + if (!MI->allDefsAreDead()) + continue; + DEBUG(dbgs() << "All defs dead: " << *MI); + DeadDefs.push_back(MI); } - DEBUG(dbgs() << "Removing debug info due to remat:" << "\t" << *MI); - MI->eraseFromParent(); } + + // Eliminate dead code after remat. Note that some snippet copies may be + // deleted here. + if (DeadDefs.empty()) + return; + DEBUG(dbgs() << "Remat created " << DeadDefs.size() << " dead defs.\n"); + Edit->eliminateDeadDefs(DeadDefs, LIS, VRM, TII); + + // Get rid of deleted and empty intervals. + for (unsigned i = RegsToSpill.size(); i != 0; --i) { + unsigned Reg = RegsToSpill[i-1]; + if (!LIS.hasInterval(Reg)) { + RegsToSpill.erase(RegsToSpill.begin() + (i - 1)); + continue; + } + LiveInterval &LI = LIS.getInterval(Reg); + if (!LI.empty()) + continue; + Edit->eraseVirtReg(Reg, LIS); + RegsToSpill.erase(RegsToSpill.begin() + (i - 1)); + } + DEBUG(dbgs() << RegsToSpill.size() << " registers to spill after remat.\n"); } -/// If MI is a load or store of stackSlot_, it can be removed. -bool InlineSpiller::coalesceStackAccess(MachineInstr *MI) { + +//===----------------------------------------------------------------------===// +// Spilling +//===----------------------------------------------------------------------===// + +/// If MI is a load or store of StackSlot, it can be removed. +bool InlineSpiller::coalesceStackAccess(MachineInstr *MI, unsigned Reg) { int FI = 0; - unsigned reg; - if (!(reg = tii_.isLoadFromStackSlot(MI, FI)) && - !(reg = tii_.isStoreToStackSlot(MI, FI))) + unsigned InstrReg; + if (!(InstrReg = TII.isLoadFromStackSlot(MI, FI)) && + !(InstrReg = TII.isStoreToStackSlot(MI, FI))) return false; // We have a stack access. Is it the right register and slot? - if (reg != edit_->getReg() || FI != stackSlot_) + if (InstrReg != Reg || FI != StackSlot) return false; DEBUG(dbgs() << "Coalescing stack access: " << *MI); - lis_.RemoveMachineInstrFromMaps(MI); + LIS.RemoveMachineInstrFromMaps(MI); MI->eraseFromParent(); return true; } @@ -283,13 +785,13 @@ bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI, } MachineInstr *FoldMI = - LoadMI ? tii_.foldMemoryOperand(MI, FoldOps, LoadMI) - : tii_.foldMemoryOperand(MI, FoldOps, stackSlot_); + LoadMI ? TII.foldMemoryOperand(MI, FoldOps, LoadMI) + : TII.foldMemoryOperand(MI, FoldOps, StackSlot); if (!FoldMI) return false; - lis_.ReplaceMachineInstrInMaps(MI, FoldMI); + LIS.ReplaceMachineInstrInMaps(MI, FoldMI); if (!LoadMI) - vrm_.addSpillSlotUse(stackSlot_, FoldMI); + VRM.addSpillSlotUse(StackSlot, FoldMI); MI->eraseFromParent(); DEBUG(dbgs() << "\tfolded: " << *FoldMI); return true; @@ -297,84 +799,40 @@ bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI, /// insertReload - Insert a reload of NewLI.reg before MI. void InlineSpiller::insertReload(LiveInterval &NewLI, + SlotIndex Idx, MachineBasicBlock::iterator MI) { MachineBasicBlock &MBB = *MI->getParent(); - SlotIndex Idx = lis_.getInstructionIndex(MI).getDefIndex(); - tii_.loadRegFromStackSlot(MBB, MI, NewLI.reg, stackSlot_, rc_, &tri_); + TII.loadRegFromStackSlot(MBB, MI, NewLI.reg, StackSlot, + MRI.getRegClass(NewLI.reg), &TRI); --MI; // Point to load instruction. - SlotIndex LoadIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex(); - vrm_.addSpillSlotUse(stackSlot_, MI); + SlotIndex LoadIdx = LIS.InsertMachineInstrInMaps(MI).getDefIndex(); + VRM.addSpillSlotUse(StackSlot, MI); DEBUG(dbgs() << "\treload: " << LoadIdx << '\t' << *MI); VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, 0, - lis_.getVNInfoAllocator()); + LIS.getVNInfoAllocator()); NewLI.addRange(LiveRange(LoadIdx, Idx, LoadVNI)); } /// insertSpill - Insert a spill of NewLI.reg after MI. -void InlineSpiller::insertSpill(LiveInterval &NewLI, - MachineBasicBlock::iterator MI) { +void InlineSpiller::insertSpill(LiveInterval &NewLI, const LiveInterval &OldLI, + SlotIndex Idx, MachineBasicBlock::iterator MI) { MachineBasicBlock &MBB = *MI->getParent(); - - // Get the defined value. It could be an early clobber so keep the def index. - SlotIndex Idx = lis_.getInstructionIndex(MI).getDefIndex(); - VNInfo *VNI = edit_->getParent().getVNInfoAt(Idx); - assert(VNI && VNI->def.getDefIndex() == Idx && "Inconsistent VNInfo"); - Idx = VNI->def; - - tii_.storeRegToStackSlot(MBB, ++MI, NewLI.reg, true, stackSlot_, rc_, &tri_); + TII.storeRegToStackSlot(MBB, ++MI, NewLI.reg, true, StackSlot, + MRI.getRegClass(NewLI.reg), &TRI); --MI; // Point to store instruction. - SlotIndex StoreIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex(); - vrm_.addSpillSlotUse(stackSlot_, MI); + SlotIndex StoreIdx = LIS.InsertMachineInstrInMaps(MI).getDefIndex(); + VRM.addSpillSlotUse(StackSlot, MI); DEBUG(dbgs() << "\tspilled: " << StoreIdx << '\t' << *MI); - VNInfo *StoreVNI = NewLI.getNextValue(Idx, 0, lis_.getVNInfoAllocator()); + VNInfo *StoreVNI = NewLI.getNextValue(Idx, 0, LIS.getVNInfoAllocator()); NewLI.addRange(LiveRange(Idx, StoreIdx, StoreVNI)); } -void InlineSpiller::spill(LiveInterval *li, - SmallVectorImpl &newIntervals, - const SmallVectorImpl &spillIs) { - LiveRangeEdit edit(*li, newIntervals, spillIs); - spill(edit); - if (VerifySpills) - mf_.verify(&pass_, "After inline spill"); -} +/// spillAroundUses - insert spill code around each use of Reg. +void InlineSpiller::spillAroundUses(unsigned Reg) { + LiveInterval &OldLI = LIS.getInterval(Reg); -void InlineSpiller::spill(LiveRangeEdit &edit) { - edit_ = &edit; - assert(!TargetRegisterInfo::isStackSlot(edit.getReg()) - && "Trying to spill a stack slot."); - DEBUG(dbgs() << "Inline spilling " - << mri_.getRegClass(edit.getReg())->getName() - << ':' << edit.getParent() << "\nFrom original " - << PrintReg(vrm_.getOriginal(edit.getReg())) << '\n'); - assert(edit.getParent().isSpillable() && - "Attempting to spill already spilled value."); - - reMaterializeAll(); - - // Remat may handle everything. - if (edit_->getParent().empty()) - return; - - rc_ = mri_.getRegClass(edit.getReg()); - - // Share a stack slot among all descendants of Orig. - unsigned Orig = vrm_.getOriginal(edit.getReg()); - stackSlot_ = vrm_.getStackSlot(Orig); - if (stackSlot_ == VirtRegMap::NO_STACK_SLOT) - stackSlot_ = vrm_.assignVirt2StackSlot(Orig); - - if (Orig != edit.getReg()) - vrm_.assignVirt2StackSlot(edit.getReg(), stackSlot_); - - // Update LiveStacks now that we are committed to spilling. - LiveInterval &stacklvr = lss_.getOrCreateInterval(stackSlot_, rc_); - if (!stacklvr.hasAtLeastOneValue()) - stacklvr.getNextValue(SlotIndex(), 0, lss_.getVNInfoAllocator()); - stacklvr.MergeRangesInAsValue(edit_->getParent(), stacklvr.getValNumInfo(0)); - - // Iterate over instructions using register. - for (MachineRegisterInfo::reg_iterator RI = mri_.reg_begin(edit.getReg()); + // Iterate over instructions using Reg. + for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Reg); MachineInstr *MI = RI.skipInstruction();) { // Debug values are not allowed to affect codegen. @@ -383,7 +841,7 @@ void InlineSpiller::spill(LiveRangeEdit &edit) { uint64_t Offset = MI->getOperand(1).getImm(); const MDNode *MDPtr = MI->getOperand(2).getMetadata(); DebugLoc DL = MI->getDebugLoc(); - if (MachineInstr *NewDV = tii_.emitFrameIndexDebugValue(mf_, stackSlot_, + if (MachineInstr *NewDV = TII.emitFrameIndexDebugValue(MF, StackSlot, Offset, MDPtr, DL)) { DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI); MachineBasicBlock *MBB = MI->getParent(); @@ -395,14 +853,44 @@ void InlineSpiller::spill(LiveRangeEdit &edit) { continue; } + // Ignore copies to/from snippets. We'll delete them. + if (SnippetCopies.count(MI)) + continue; + // Stack slot accesses may coalesce away. - if (coalesceStackAccess(MI)) + if (coalesceStackAccess(MI, Reg)) continue; // Analyze instruction. bool Reads, Writes; SmallVector Ops; - tie(Reads, Writes) = MI->readsWritesVirtualRegister(edit.getReg(), &Ops); + tie(Reads, Writes) = MI->readsWritesVirtualRegister(Reg, &Ops); + + // Find the slot index where this instruction reads and writes OldLI. + // This is usually the def slot, except for tied early clobbers. + SlotIndex Idx = LIS.getInstructionIndex(MI).getDefIndex(); + if (VNInfo *VNI = OldLI.getVNInfoAt(Idx.getUseIndex())) + if (SlotIndex::isSameInstr(Idx, VNI->def)) + Idx = VNI->def; + + // Check for a sibling copy. + unsigned SibReg = isFullCopyOf(MI, Reg); + if (SibReg && isSibling(SibReg)) { + if (Writes) { + // Hoist the spill of a sib-reg copy. + if (hoistSpill(OldLI, MI)) { + // This COPY is now dead, the value is already in the stack slot. + MI->getOperand(0).setIsDead(); + DeadDefs.push_back(MI); + continue; + } + } else { + // This is a reload for a sib-reg copy. Drop spills downstream. + LiveInterval &SibLI = LIS.getInterval(SibReg); + eliminateRedundantSpills(SibLI, SibLI.getVNInfoAt(Idx)); + // The COPY will fold to a reload below. + } + } // Attempt to fold memory ops. if (foldMemoryOperand(MI, Ops)) @@ -410,11 +898,11 @@ void InlineSpiller::spill(LiveRangeEdit &edit) { // Allocate interval around instruction. // FIXME: Infer regclass from instruction alone. - LiveInterval &NewLI = edit.create(mri_, lis_, vrm_); + LiveInterval &NewLI = Edit->createFrom(Reg, LIS, VRM); NewLI.markNotSpillable(); if (Reads) - insertReload(NewLI, MI); + insertReload(NewLI, Idx, MI); // Rewrite instruction operands. bool hasLiveDef = false; @@ -429,11 +917,84 @@ void InlineSpiller::spill(LiveRangeEdit &edit) { hasLiveDef = true; } } + DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << *MI); // FIXME: Use a second vreg if instruction has no tied ops. if (Writes && hasLiveDef) - insertSpill(NewLI, MI); + insertSpill(NewLI, OldLI, Idx, MI); DEBUG(dbgs() << "\tinterval: " << NewLI << '\n'); } } + +/// spillAll - Spill all registers remaining after rematerialization. +void InlineSpiller::spillAll() { + // Update LiveStacks now that we are committed to spilling. + if (StackSlot == VirtRegMap::NO_STACK_SLOT) { + StackSlot = VRM.assignVirt2StackSlot(Original); + StackInt = &LSS.getOrCreateInterval(StackSlot, MRI.getRegClass(Original)); + StackInt->getNextValue(SlotIndex(), 0, LSS.getVNInfoAllocator()); + } else + StackInt = &LSS.getInterval(StackSlot); + + if (Original != Edit->getReg()) + VRM.assignVirt2StackSlot(Edit->getReg(), StackSlot); + + assert(StackInt->getNumValNums() == 1 && "Bad stack interval values"); + for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) + StackInt->MergeRangesInAsValue(LIS.getInterval(RegsToSpill[i]), + StackInt->getValNumInfo(0)); + DEBUG(dbgs() << "Merged spilled regs: " << *StackInt << '\n'); + + // Spill around uses of all RegsToSpill. + for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) + spillAroundUses(RegsToSpill[i]); + + // Hoisted spills may cause dead code. + if (!DeadDefs.empty()) { + DEBUG(dbgs() << "Eliminating " << DeadDefs.size() << " dead defs\n"); + Edit->eliminateDeadDefs(DeadDefs, LIS, VRM, TII); + } + + // Finally delete the SnippetCopies. + for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Edit->getReg()); + MachineInstr *MI = RI.skipInstruction();) { + assert(SnippetCopies.count(MI) && "Remaining use wasn't a snippet copy"); + // FIXME: Do this with a LiveRangeEdit callback. + VRM.RemoveMachineInstrFromMaps(MI); + LIS.RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); + } + + // Delete all spilled registers. + for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) + Edit->eraseVirtReg(RegsToSpill[i], LIS); +} + +void InlineSpiller::spill(LiveRangeEdit &edit) { + Edit = &edit; + assert(!TargetRegisterInfo::isStackSlot(edit.getReg()) + && "Trying to spill a stack slot."); + // Share a stack slot among all descendants of Original. + Original = VRM.getOriginal(edit.getReg()); + StackSlot = VRM.getStackSlot(Original); + StackInt = 0; + + DEBUG(dbgs() << "Inline spilling " + << MRI.getRegClass(edit.getReg())->getName() + << ':' << edit.getParent() << "\nFrom original " + << LIS.getInterval(Original) << '\n'); + assert(edit.getParent().isSpillable() && + "Attempting to spill already spilled value."); + assert(DeadDefs.empty() && "Previous spill didn't remove dead defs"); + + collectRegsToSpill(); + analyzeSiblingValues(); + reMaterializeAll(); + + // Remat may handle everything. + if (!RegsToSpill.empty()) + spillAll(); + + Edit->calculateRegClassAndHint(MF, LIS, Loops); +} diff --git a/lib/CodeGen/InterferenceCache.cpp b/lib/CodeGen/InterferenceCache.cpp new file mode 100644 index 000000000000..b1014a97fa03 --- /dev/null +++ b/lib/CodeGen/InterferenceCache.cpp @@ -0,0 +1,155 @@ +//===-- InterferenceCache.h - Caching per-block interference ---*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// InterferenceCache remembers per-block interference in LiveIntervalUnions. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "regalloc" +#include "InterferenceCache.h" +#include "llvm/Target/TargetRegisterInfo.h" + +using namespace llvm; + +void InterferenceCache::init(MachineFunction *mf, + LiveIntervalUnion *liuarray, + SlotIndexes *indexes, + const TargetRegisterInfo *tri) { + MF = mf; + LIUArray = liuarray; + TRI = tri; + PhysRegEntries.assign(TRI->getNumRegs(), 0); + for (unsigned i = 0; i != CacheEntries; ++i) + Entries[i].clear(mf, indexes); +} + +InterferenceCache::Entry *InterferenceCache::get(unsigned PhysReg) { + unsigned E = PhysRegEntries[PhysReg]; + if (E < CacheEntries && Entries[E].getPhysReg() == PhysReg) { + if (!Entries[E].valid(LIUArray, TRI)) + Entries[E].revalidate(); + return &Entries[E]; + } + // No valid entry exists, pick the next round-robin entry. + E = RoundRobin; + if (++RoundRobin == CacheEntries) + RoundRobin = 0; + Entries[E].reset(PhysReg, LIUArray, TRI, MF); + PhysRegEntries[PhysReg] = E; + return &Entries[E]; +} + +/// revalidate - LIU contents have changed, update tags. +void InterferenceCache::Entry::revalidate() { + // Invalidate all block entries. + ++Tag; + // Invalidate all iterators. + PrevPos = SlotIndex(); + for (unsigned i = 0, e = Aliases.size(); i != e; ++i) + Aliases[i].second = Aliases[i].first->getTag(); +} + +void InterferenceCache::Entry::reset(unsigned physReg, + LiveIntervalUnion *LIUArray, + const TargetRegisterInfo *TRI, + const MachineFunction *MF) { + // LIU's changed, invalidate cache. + ++Tag; + PhysReg = physReg; + Blocks.resize(MF->getNumBlockIDs()); + Aliases.clear(); + for (const unsigned *AS = TRI->getOverlaps(PhysReg); *AS; ++AS) { + LiveIntervalUnion *LIU = LIUArray + *AS; + Aliases.push_back(std::make_pair(LIU, LIU->getTag())); + } + + // Reset iterators. + PrevPos = SlotIndex(); + unsigned e = Aliases.size(); + Iters.resize(e); + for (unsigned i = 0; i != e; ++i) + Iters[i].setMap(Aliases[i].first->getMap()); +} + +bool InterferenceCache::Entry::valid(LiveIntervalUnion *LIUArray, + const TargetRegisterInfo *TRI) { + unsigned i = 0, e = Aliases.size(); + for (const unsigned *AS = TRI->getOverlaps(PhysReg); *AS; ++AS, ++i) { + LiveIntervalUnion *LIU = LIUArray + *AS; + if (i == e || Aliases[i].first != LIU) + return false; + if (LIU->changedSince(Aliases[i].second)) + return false; + } + return i == e; +} + +void InterferenceCache::Entry::update(unsigned MBBNum) { + SlotIndex Start, Stop; + tie(Start, Stop) = Indexes->getMBBRange(MBBNum); + + // Use advanceTo only when possible. + if (PrevPos != Start) { + if (!PrevPos.isValid() || Start < PrevPos) + for (unsigned i = 0, e = Iters.size(); i != e; ++i) + Iters[i].find(Start); + else + for (unsigned i = 0, e = Iters.size(); i != e; ++i) + Iters[i].advanceTo(Start); + PrevPos = Start; + } + + MachineFunction::const_iterator MFI = MF->getBlockNumbered(MBBNum); + BlockInterference *BI = &Blocks[MBBNum]; + for (;;) { + BI->Tag = Tag; + BI->First = BI->Last = SlotIndex(); + + // Check for first interference. + for (unsigned i = 0, e = Iters.size(); i != e; ++i) { + Iter &I = Iters[i]; + if (!I.valid()) + continue; + SlotIndex StartI = I.start(); + if (StartI >= Stop) + continue; + if (!BI->First.isValid() || StartI < BI->First) + BI->First = StartI; + } + + PrevPos = Stop; + if (BI->First.isValid()) + break; + + // No interference in this block? Go ahead and precompute the next block. + if (++MFI == MF->end()) + return; + MBBNum = MFI->getNumber(); + BI = &Blocks[MBBNum]; + if (BI->Tag == Tag) + return; + tie(Start, Stop) = Indexes->getMBBRange(MBBNum); + } + + // Check for last interference in block. + for (unsigned i = 0, e = Iters.size(); i != e; ++i) { + Iter &I = Iters[i]; + if (!I.valid() || I.start() >= Stop) + continue; + I.advanceTo(Stop); + bool Backup = !I.valid() || I.start() >= Stop; + if (Backup) + --I; + SlotIndex StopI = I.stop(); + if (!BI->Last.isValid() || StopI > BI->Last) + BI->Last = StopI; + if (Backup) + ++I; + } +} diff --git a/lib/CodeGen/InterferenceCache.h b/lib/CodeGen/InterferenceCache.h new file mode 100644 index 000000000000..6c36fa4021fb --- /dev/null +++ b/lib/CodeGen/InterferenceCache.h @@ -0,0 +1,163 @@ +//===-- InterferenceCache.h - Caching per-block interference ---*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// InterferenceCache remembers per-block interference in LiveIntervalUnions. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_INTERFERENCECACHE +#define LLVM_CODEGEN_INTERFERENCECACHE + +#include "LiveIntervalUnion.h" + +namespace llvm { + +class InterferenceCache { + const TargetRegisterInfo *TRI; + LiveIntervalUnion *LIUArray; + SlotIndexes *Indexes; + MachineFunction *MF; + + /// BlockInterference - information about the interference in a single basic + /// block. + struct BlockInterference { + BlockInterference() : Tag(0) {} + unsigned Tag; + SlotIndex First; + SlotIndex Last; + }; + + /// Entry - A cache entry containing interference information for all aliases + /// of PhysReg in all basic blocks. + class Entry { + /// PhysReg - The register currently represented. + unsigned PhysReg; + + /// Tag - Cache tag is changed when any of the underlying LiveIntervalUnions + /// change. + unsigned Tag; + + /// MF - The current function. + MachineFunction *MF; + + /// Indexes - Mapping block numbers to SlotIndex ranges. + SlotIndexes *Indexes; + + /// PrevPos - The previous position the iterators were moved to. + SlotIndex PrevPos; + + /// AliasTags - A LiveIntervalUnion pointer and tag for each alias of + /// PhysReg. + SmallVector, 8> Aliases; + + typedef LiveIntervalUnion::SegmentIter Iter; + + /// Iters - an iterator for each alias + SmallVector Iters; + + /// Blocks - Interference for each block in the function. + SmallVector Blocks; + + /// update - Recompute Blocks[MBBNum] + void update(unsigned MBBNum); + + public: + Entry() : PhysReg(0), Tag(0), Indexes(0) {} + + void clear(MachineFunction *mf, SlotIndexes *indexes) { + PhysReg = 0; + MF = mf; + Indexes = indexes; + } + + unsigned getPhysReg() const { return PhysReg; } + + void revalidate(); + + /// valid - Return true if this is a valid entry for physReg. + bool valid(LiveIntervalUnion *LIUArray, const TargetRegisterInfo *TRI); + + /// reset - Initialize entry to represent physReg's aliases. + void reset(unsigned physReg, + LiveIntervalUnion *LIUArray, + const TargetRegisterInfo *TRI, + const MachineFunction *MF); + + /// get - Return an up to date BlockInterference. + BlockInterference *get(unsigned MBBNum) { + if (Blocks[MBBNum].Tag != Tag) + update(MBBNum); + return &Blocks[MBBNum]; + } + }; + + // We don't keep a cache entry for every physical register, that would use too + // much memory. Instead, a fixed number of cache entries are used in a round- + // robin manner. + enum { CacheEntries = 32 }; + + // Point to an entry for each physreg. The entry pointed to may not be up to + // date, and it may have been reused for a different physreg. + SmallVector PhysRegEntries; + + // Next round-robin entry to be picked. + unsigned RoundRobin; + + // The actual cache entries. + Entry Entries[CacheEntries]; + + // get - Get a valid entry for PhysReg. + Entry *get(unsigned PhysReg); + +public: + InterferenceCache() : TRI(0), LIUArray(0), Indexes(0), MF(0), RoundRobin(0) {} + + /// init - Prepare cache for a new function. + void init(MachineFunction*, LiveIntervalUnion*, SlotIndexes*, + const TargetRegisterInfo *); + + /// Cursor - The primary query interface for the block interference cache. + class Cursor { + Entry *CacheEntry; + BlockInterference *Current; + public: + /// Cursor - Create a cursor for the interference allocated to PhysReg and + /// all its aliases. + Cursor(InterferenceCache &Cache, unsigned PhysReg) + : CacheEntry(Cache.get(PhysReg)), Current(0) {} + + /// moveTo - Move cursor to basic block MBBNum. + void moveToBlock(unsigned MBBNum) { + Current = CacheEntry->get(MBBNum); + } + + /// hasInterference - Return true if the current block has any interference. + bool hasInterference() { + return Current->First.isValid(); + } + + /// first - Return the starting index of the first interfering range in the + /// current block. + SlotIndex first() { + return Current->First; + } + + /// last - Return the ending index of the last interfering range in the + /// current block. + SlotIndex last() { + return Current->Last; + } + }; + + friend class Cursor; +}; + +} // namespace llvm + +#endif diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 80dfc763af69..e1dad2efa98f 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -98,12 +98,6 @@ static cl::opt EnableFastISelOption("fast-isel", cl::Hidden, cl::desc("Enable the \"fast\" instruction selector")); -// Enable or disable an experimental optimization to split GEPs -// and run a special GVN pass which does not examine loads, in -// an effort to factor out redundancy implicit in complex GEPs. -static cl::opt EnableSplitGEPGVN("split-gep-gvn", cl::Hidden, - cl::desc("Split GEPs and run no-load GVN")); - LLVMTargetMachine::LLVMTargetMachine(const Target &T, const std::string &Triple) : TargetMachine(T), TargetTriple(Triple) { @@ -132,6 +126,9 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, return true; assert(Context != 0 && "Failed to get MCContext"); + if (hasMCSaveTempLabels()) + Context->setAllowTemporaryLabels(false); + const MCAsmInfo &MAI = *getMCAsmInfo(); OwningPtr AsmStreamer; @@ -139,7 +136,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, default: return true; case CGFT_AssemblyFile: { MCInstPrinter *InstPrinter = - getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI); + getTarget().createMCInstPrinter(*this, MAI.getAssemblerDialect(), MAI); // Create a code emitter if asked to show the encoding. MCCodeEmitter *MCE = 0; @@ -152,6 +149,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, MCStreamer *S = getTarget().createAsmStreamer(*Context, Out, getVerboseAsm(), hasMCUseLoc(), + hasMCUseCFI(), InstPrinter, MCE, TAB, ShowMCInst); @@ -230,11 +228,40 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM, /// bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx, + raw_ostream &Out, CodeGenOpt::Level OptLevel, bool DisableVerify) { // Add common CodeGen passes. if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Ctx)) return true; + + if (hasMCSaveTempLabels()) + Ctx->setAllowTemporaryLabels(false); + + // Create the code emitter for the target if it exists. If not, .o file + // emission fails. + MCCodeEmitter *MCE = getTarget().createCodeEmitter(*this, *Ctx); + TargetAsmBackend *TAB = getTarget().createAsmBackend(TargetTriple); + if (MCE == 0 || TAB == 0) + return true; + + OwningPtr AsmStreamer; + AsmStreamer.reset(getTarget().createObjectStreamer(TargetTriple, *Ctx, + *TAB, Out, MCE, + hasMCRelaxAll(), + hasMCNoExecStack())); + AsmStreamer.get()->InitSections(); + + // Create the AsmPrinter, which takes ownership of AsmStreamer if successful. + FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer); + if (Printer == 0) + return true; + + // If successful, createAsmPrinter took ownership of AsmStreamer. + AsmStreamer.take(); + + PM.add(Printer); + // Make sure the code model is set. setCodeModelForJIT(); @@ -272,12 +299,6 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, if (!DisableVerify) PM.add(createVerifierPass()); - // Optionally, tun split-GEPs and no-load GVN. - if (EnableSplitGEPGVN) { - PM.add(createGEPSplitterPass()); - PM.add(createGVNPass(/*NoLoads=*/true)); - } - // Run loop strength reduction before anything else. if (OptLevel != CodeGenOpt::None && !DisableLSR) { PM.add(createLoopStrengthReducePass(getTargetLowering())); @@ -304,6 +325,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, // FALLTHROUGH case ExceptionHandling::DwarfCFI: case ExceptionHandling::DwarfTable: + case ExceptionHandling::ARM: PM.add(createDwarfEHPass(this)); break; case ExceptionHandling::None: diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp index 853ec1ac7c13..8b214831d2cd 100644 --- a/lib/CodeGen/LiveDebugVariables.cpp +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -30,6 +30,7 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -76,6 +77,7 @@ typedef IntervalMap LocMap; /// held by the same virtual register. The equivalence class is the transitive /// closure of that relation. namespace { +class LDVImpl; class UserValue { const MDNode *variable; ///< The debug info variable we are part of. unsigned offset; ///< Byte offset into variable. @@ -99,10 +101,6 @@ class UserValue { void insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx, unsigned LocNo, LiveIntervals &LIS, const TargetInstrInfo &TII); - /// insertDebugKill - Insert an undef DBG_VALUE into MBB at Idx. - void insertDebugKill(MachineBasicBlock *MBB, SlotIndex Idx, - LiveIntervals &LIS, const TargetInstrInfo &TII); - public: /// UserValue - Create a new UserValue. UserValue(const MDNode *var, unsigned o, DebugLoc L, @@ -146,17 +144,31 @@ class UserValue { /// getLocationNo - Return the location number that matches Loc. unsigned getLocationNo(const MachineOperand &LocMO) { - if (LocMO.isReg() && LocMO.getReg() == 0) - return ~0u; - for (unsigned i = 0, e = locations.size(); i != e; ++i) - if (LocMO.isIdenticalTo(locations[i])) - return i; + if (LocMO.isReg()) { + if (LocMO.getReg() == 0) + return ~0u; + // For register locations we dont care about use/def and other flags. + for (unsigned i = 0, e = locations.size(); i != e; ++i) + if (locations[i].isReg() && + locations[i].getReg() == LocMO.getReg() && + locations[i].getSubReg() == LocMO.getSubReg()) + return i; + } else + for (unsigned i = 0, e = locations.size(); i != e; ++i) + if (LocMO.isIdenticalTo(locations[i])) + return i; locations.push_back(LocMO); // We are storing a MachineOperand outside a MachineInstr. locations.back().clearParent(); + // Don't store def operands. + if (locations.back().isReg()) + locations.back().setIsUse(); return locations.size() - 1; } + /// mapVirtRegs - Ensure that all virtual register locations are mapped. + void mapVirtRegs(LDVImpl *LDV); + /// addDef - Add a definition point to this value. void addDef(SlotIndex Idx, const MachineOperand &LocMO) { // Add a singular (Idx,Idx) -> Loc mapping. @@ -168,19 +180,36 @@ class UserValue { /// extendDef - Extend the current definition as far as possible down the /// dominator tree. Stop when meeting an existing def or when leaving the live /// range of VNI. + /// End points where VNI is no longer live are added to Kills. /// @param Idx Starting point for the definition. /// @param LocNo Location number to propagate. /// @param LI Restrict liveness to where LI has the value VNI. May be null. /// @param VNI When LI is not null, this is the value to restrict to. + /// @param Kills Append end points of VNI's live range to Kills. /// @param LIS Live intervals analysis. /// @param MDT Dominator tree. void extendDef(SlotIndex Idx, unsigned LocNo, LiveInterval *LI, const VNInfo *VNI, + SmallVectorImpl *Kills, LiveIntervals &LIS, MachineDominatorTree &MDT); + /// addDefsFromCopies - The value in LI/LocNo may be copies to other + /// registers. Determine if any of the copies are available at the kill + /// points, and add defs if possible. + /// @param LI Scan for copies of the value in LI->reg. + /// @param LocNo Location number of LI->reg. + /// @param Kills Points where the range of LocNo could be extended. + /// @param NewDefs Append (Idx, LocNo) of inserted defs here. + void addDefsFromCopies(LiveInterval *LI, unsigned LocNo, + const SmallVectorImpl &Kills, + SmallVectorImpl > &NewDefs, + MachineRegisterInfo &MRI, + LiveIntervals &LIS); + /// computeIntervals - Compute the live intervals of all locations after /// collecting all their def points. - void computeIntervals(LiveIntervals &LIS, MachineDominatorTree &MDT); + void computeIntervals(MachineRegisterInfo &MRI, + LiveIntervals &LIS, MachineDominatorTree &MDT); /// renameRegister - Update locations to rewrite OldReg as NewReg:SubIdx. void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx, @@ -230,9 +259,6 @@ class LDVImpl { /// lookupVirtReg - Find the EC leader for VirtReg or null. UserValue *lookupVirtReg(unsigned VirtReg); - /// mapVirtReg - Map virtual register to an equivalence class. - void mapVirtReg(unsigned VirtReg, UserValue *EC); - /// handleDebugValue - Add DBG_VALUE instruction to our maps. /// @param MI DBG_VALUE instruction /// @param Idx Last valid SLotIndex before instruction. @@ -261,7 +287,10 @@ class LDVImpl { userVarMap.clear(); } - /// renameRegister - Replace all references to OldReg wiht NewReg:SubIdx. + /// mapVirtReg - Map virtual register to an equivalence class. + void mapVirtReg(unsigned VirtReg, UserValue *EC); + + /// renameRegister - Replace all references to OldReg with NewReg:SubIdx. void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx); /// emitDebugVariables - Recreate DBG_VALUE instruction from data structures. @@ -322,6 +351,13 @@ void UserValue::coalesceLocation(unsigned LocNo) { } } +void UserValue::mapVirtRegs(LDVImpl *LDV) { + for (unsigned i = 0, e = locations.size(); i != e; ++i) + if (locations[i].isReg() && + TargetRegisterInfo::isVirtualRegister(locations[i].getReg())) + LDV->mapVirtReg(locations[i].getReg(), this); +} + UserValue *LDVImpl::getUserValue(const MDNode *Var, unsigned Offset, DebugLoc DL) { UserValue *&Leader = userVarMap[Var]; @@ -363,14 +399,6 @@ bool LDVImpl::handleDebugValue(MachineInstr *MI, SlotIndex Idx) { unsigned Offset = MI->getOperand(1).getImm(); const MDNode *Var = MI->getOperand(2).getMetadata(); UserValue *UV = getUserValue(Var, Offset, MI->getDebugLoc()); - - // If the location is a virtual register, make sure it is mapped. - if (MI->getOperand(0).isReg()) { - unsigned Reg = MI->getOperand(0).getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) - mapVirtReg(Reg, UV); - } - UV->addDef(Idx, MI->getOperand(0)); return true; } @@ -405,6 +433,7 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) { void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, LiveInterval *LI, const VNInfo *VNI, + SmallVectorImpl *Kills, LiveIntervals &LIS, MachineDominatorTree &MDT) { SmallVector Todo; Todo.push_back(Idx); @@ -419,8 +448,11 @@ void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, bool ToEnd = true; if (LI && VNI) { LiveRange *Range = LI->getLiveRangeContaining(Start); - if (!Range || Range->valno != VNI) + if (!Range || Range->valno != VNI) { + if (Kills) + Kills->push_back(Start); continue; + } if (Range->end < Stop) Stop = Range->end, ToEnd = false; } @@ -438,6 +470,9 @@ void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, // Limited by the next def. if (I.valid() && I.start() < Stop) Stop = I.start(), ToEnd = false; + // Limited by VNI's live range. + else if (!ToEnd && Kills) + Kills->push_back(Stop); if (Start >= Stop) continue; @@ -455,7 +490,82 @@ void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, } void -UserValue::computeIntervals(LiveIntervals &LIS, MachineDominatorTree &MDT) { +UserValue::addDefsFromCopies(LiveInterval *LI, unsigned LocNo, + const SmallVectorImpl &Kills, + SmallVectorImpl > &NewDefs, + MachineRegisterInfo &MRI, LiveIntervals &LIS) { + if (Kills.empty()) + return; + // Don't track copies from physregs, there are too many uses. + if (!TargetRegisterInfo::isVirtualRegister(LI->reg)) + return; + + // Collect all the (vreg, valno) pairs that are copies of LI. + SmallVector, 8> CopyValues; + for (MachineRegisterInfo::use_nodbg_iterator + UI = MRI.use_nodbg_begin(LI->reg), + UE = MRI.use_nodbg_end(); UI != UE; ++UI) { + // Copies of the full value. + if (UI.getOperand().getSubReg() || !UI->isCopy()) + continue; + MachineInstr *MI = &*UI; + unsigned DstReg = MI->getOperand(0).getReg(); + + // Don't follow copies to physregs. These are usually setting up call + // arguments, and the argument registers are always call clobbered. We are + // better off in the source register which could be a callee-saved register, + // or it could be spilled. + if (!TargetRegisterInfo::isVirtualRegister(DstReg)) + continue; + + // Is LocNo extended to reach this copy? If not, another def may be blocking + // it, or we are looking at a wrong value of LI. + SlotIndex Idx = LIS.getInstructionIndex(MI); + LocMap::iterator I = locInts.find(Idx.getUseIndex()); + if (!I.valid() || I.value() != LocNo) + continue; + + if (!LIS.hasInterval(DstReg)) + continue; + LiveInterval *DstLI = &LIS.getInterval(DstReg); + const VNInfo *DstVNI = DstLI->getVNInfoAt(Idx.getDefIndex()); + assert(DstVNI && DstVNI->def == Idx.getDefIndex() && "Bad copy value"); + CopyValues.push_back(std::make_pair(DstLI, DstVNI)); + } + + if (CopyValues.empty()) + return; + + DEBUG(dbgs() << "Got " << CopyValues.size() << " copies of " << *LI << '\n'); + + // Try to add defs of the copied values for each kill point. + for (unsigned i = 0, e = Kills.size(); i != e; ++i) { + SlotIndex Idx = Kills[i]; + for (unsigned j = 0, e = CopyValues.size(); j != e; ++j) { + LiveInterval *DstLI = CopyValues[j].first; + const VNInfo *DstVNI = CopyValues[j].second; + if (DstLI->getVNInfoAt(Idx) != DstVNI) + continue; + // Check that there isn't already a def at Idx + LocMap::iterator I = locInts.find(Idx); + if (I.valid() && I.start() <= Idx) + continue; + DEBUG(dbgs() << "Kill at " << Idx << " covered by valno #" + << DstVNI->id << " in " << *DstLI << '\n'); + MachineInstr *CopyMI = LIS.getInstructionFromIndex(DstVNI->def); + assert(CopyMI && CopyMI->isCopy() && "Bad copy value"); + unsigned LocNo = getLocationNo(CopyMI->getOperand(0)); + I.insert(Idx, Idx.getNextSlot(), LocNo); + NewDefs.push_back(std::make_pair(Idx, LocNo)); + break; + } + } +} + +void +UserValue::computeIntervals(MachineRegisterInfo &MRI, + LiveIntervals &LIS, + MachineDominatorTree &MDT) { SmallVector, 16> Defs; // Collect all defs to be extended (Skipping undefs). @@ -463,7 +573,8 @@ UserValue::computeIntervals(LiveIntervals &LIS, MachineDominatorTree &MDT) { if (I.value() != ~0u) Defs.push_back(std::make_pair(I.start(), I.value())); - for (unsigned i = 0, e = Defs.size(); i != e; ++i) { + // Extend all defs, and possibly add new ones along the way. + for (unsigned i = 0; i != Defs.size(); ++i) { SlotIndex Idx = Defs[i].first; unsigned LocNo = Defs[i].second; const MachineOperand &Loc = locations[LocNo]; @@ -472,9 +583,11 @@ UserValue::computeIntervals(LiveIntervals &LIS, MachineDominatorTree &MDT) { if (Loc.isReg() && LIS.hasInterval(Loc.getReg())) { LiveInterval *LI = &LIS.getInterval(Loc.getReg()); const VNInfo *VNI = LI->getVNInfoAt(Idx); - extendDef(Idx, LocNo, LI, VNI, LIS, MDT); + SmallVector Kills; + extendDef(Idx, LocNo, LI, VNI, &Kills, LIS, MDT); + addDefsFromCopies(LI, LocNo, Kills, Defs, MRI, LIS); } else - extendDef(Idx, LocNo, 0, 0, LIS, MDT); + extendDef(Idx, LocNo, 0, 0, 0, LIS, MDT); } // Finally, erase all the undefs. @@ -486,8 +599,10 @@ UserValue::computeIntervals(LiveIntervals &LIS, MachineDominatorTree &MDT) { } void LDVImpl::computeIntervals() { - for (unsigned i = 0, e = userValues.size(); i != e; ++i) - userValues[i]->computeIntervals(*LIS, *MDT); + for (unsigned i = 0, e = userValues.size(); i != e; ++i) { + userValues[i]->computeIntervals(MF->getRegInfo(), *LIS, *MDT); + userValues[i]->mapVirtRegs(this); + } } bool LDVImpl::runOnMachineFunction(MachineFunction &mf) { @@ -640,13 +755,6 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx, .addOperand(Loc).addImm(offset).addMetadata(variable); } -void UserValue::insertDebugKill(MachineBasicBlock *MBB, SlotIndex Idx, - LiveIntervals &LIS, const TargetInstrInfo &TII) { - MachineBasicBlock::iterator I = findInsertLocation(MBB, Idx, LIS); - BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE)).addReg(0) - .addImm(offset).addMetadata(variable); -} - void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, const TargetInstrInfo &TII) { MachineFunction::iterator MFEnd = VRM->getMachineFunction().end(); @@ -678,12 +786,6 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, break; ++I; - if (Stop == MBBEnd) - continue; - // The current interval ends before MBB. - // Insert a kill if there is a gap. - if (!I.valid() || I.start() > Stop) - insertDebugKill(MBB, Stop, LIS, TII); } } diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index c2dbd6ab75a1..cfade24b8d87 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -30,19 +30,22 @@ #include using namespace llvm; -// CompEnd - Compare LiveRange ends. -namespace { -struct CompEnd { - bool operator()(const LiveRange &A, const LiveRange &B) const { - return A.end < B.end; - } -}; -} - LiveInterval::iterator LiveInterval::find(SlotIndex Pos) { - assert(Pos.isValid() && "Cannot search for an invalid index"); - return std::upper_bound(begin(), end(), LiveRange(SlotIndex(), Pos, 0), - CompEnd()); + // This algorithm is basically std::upper_bound. + // Unfortunately, std::upper_bound cannot be used with mixed types until we + // adopt C++0x. Many libraries can do it, but not all. + if (empty() || Pos >= endIndex()) + return end(); + iterator I = begin(); + size_t Len = ranges.size(); + do { + size_t Mid = Len >> 1; + if (Pos < I[Mid].end) + Len = Mid; + else + I += Mid + 1, Len -= Mid + 1; + } while (Len); + return I; } /// killedInRange - Return true if the interval has kills in [Start,End). @@ -291,6 +294,22 @@ LiveInterval::addRangeFrom(LiveRange LR, iterator From) { return ranges.insert(it, LR); } +/// extendInBlock - If this interval is live before UseIdx in the basic +/// block that starts at StartIdx, extend it to be live at UseIdx and return +/// the value. If there is no live range before UseIdx, return NULL. +VNInfo *LiveInterval::extendInBlock(SlotIndex StartIdx, SlotIndex UseIdx) { + if (empty()) + return 0; + iterator I = std::upper_bound(begin(), end(), UseIdx); + if (I == begin()) + return 0; + --I; + if (I->end <= StartIdx) + return 0; + if (I->end <= UseIdx) + extendIntervalEndTo(I, UseIdx.getNextSlot()); + return I->valno; +} /// removeRange - Remove the specified range from this interval. Note that /// the range must be in a single LiveRange in its entirety. @@ -476,60 +495,19 @@ void LiveInterval::MergeRangesInAsValue(const LiveInterval &RHS, void LiveInterval::MergeValueInAsValue( const LiveInterval &RHS, const VNInfo *RHSValNo, VNInfo *LHSValNo) { - SmallVector ReplacedValNos; - iterator IP = begin(); + // TODO: Make this more efficient. + iterator InsertPos = begin(); for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) { - assert(I->valno == RHS.getValNumInfo(I->valno->id) && "Bad VNInfo"); if (I->valno != RHSValNo) continue; - SlotIndex Start = I->start, End = I->end; - IP = std::upper_bound(IP, end(), Start); - // If the start of this range overlaps with an existing liverange, trim it. - if (IP != begin() && IP[-1].end > Start) { - if (IP[-1].valno != LHSValNo) { - ReplacedValNos.push_back(IP[-1].valno); - IP[-1].valno = LHSValNo; // Update val#. - } - Start = IP[-1].end; - // Trimmed away the whole range? - if (Start >= End) continue; - } - // If the end of this range overlaps with an existing liverange, trim it. - if (IP != end() && End > IP->start) { - if (IP->valno != LHSValNo) { - ReplacedValNos.push_back(IP->valno); - IP->valno = LHSValNo; // Update val#. - } - End = IP->start; - // If this trimmed away the whole range, ignore it. - if (Start == End) continue; - } - // Map the valno in the other live range to the current live range. - IP = addRangeFrom(LiveRange(Start, End, LHSValNo), IP); - } - - - SmallSet Seen; - for (unsigned i = 0, e = ReplacedValNos.size(); i != e; ++i) { - VNInfo *V1 = ReplacedValNos[i]; - if (Seen.insert(V1)) { - bool isDead = true; - for (const_iterator I = begin(), E = end(); I != E; ++I) - if (I->valno == V1) { - isDead = false; - break; - } - if (isDead) { - // Now that V1 is dead, remove it. - markValNoForDeletion(V1); - } - } + LiveRange Tmp = *I; + Tmp.valno = LHSValNo; + InsertPos = addRangeFrom(Tmp, InsertPos); } } - /// MergeValueNumberInto - This method is called when two value nubmers /// are found to be equivalent. This eliminates V1, replacing all /// LiveRanges with the V1 value number with the V2 value number. This can @@ -700,8 +678,8 @@ void LiveRange::print(raw_ostream &os) const { unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) { // Create initial equivalence classes. - eqClass_.clear(); - eqClass_.grow(LI->getNumValNums()); + EqClass.clear(); + EqClass.grow(LI->getNumValNums()); const VNInfo *used = 0, *unused = 0; @@ -712,48 +690,65 @@ unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) { // Group all unused values into one class. if (VNI->isUnused()) { if (unused) - eqClass_.join(unused->id, VNI->id); + EqClass.join(unused->id, VNI->id); unused = VNI; continue; } used = VNI; if (VNI->isPHIDef()) { - const MachineBasicBlock *MBB = lis_.getMBBFromIndex(VNI->def); + const MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def); assert(MBB && "Phi-def has no defining MBB"); // Connect to values live out of predecessors. for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PE = MBB->pred_end(); PI != PE; ++PI) if (const VNInfo *PVNI = - LI->getVNInfoAt(lis_.getMBBEndIdx(*PI).getPrevSlot())) - eqClass_.join(VNI->id, PVNI->id); + LI->getVNInfoAt(LIS.getMBBEndIdx(*PI).getPrevSlot())) + EqClass.join(VNI->id, PVNI->id); } else { // Normal value defined by an instruction. Check for two-addr redef. // FIXME: This could be coincidental. Should we really check for a tied // operand constraint? // Note that VNI->def may be a use slot for an early clobber def. if (const VNInfo *UVNI = LI->getVNInfoAt(VNI->def.getPrevSlot())) - eqClass_.join(VNI->id, UVNI->id); + EqClass.join(VNI->id, UVNI->id); } } // Lump all the unused values in with the last used value. if (used && unused) - eqClass_.join(used->id, unused->id); + EqClass.join(used->id, unused->id); - eqClass_.compress(); - return eqClass_.getNumClasses(); + EqClass.compress(); + return EqClass.getNumClasses(); } -void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[]) { +void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[], + MachineRegisterInfo &MRI) { assert(LIV[0] && "LIV[0] must be set"); LiveInterval &LI = *LIV[0]; - // First move runs to new intervals. + // Rewrite instructions. + for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(LI.reg), + RE = MRI.reg_end(); RI != RE;) { + MachineOperand &MO = RI.getOperand(); + MachineInstr *MI = MO.getParent(); + ++RI; + if (MO.isUse() && MO.isUndef()) + continue; + // DBG_VALUE instructions should have been eliminated earlier. + SlotIndex Idx = LIS.getInstructionIndex(MI); + Idx = MO.isUse() ? Idx.getUseIndex() : Idx.getDefIndex(); + const VNInfo *VNI = LI.getVNInfoAt(Idx); + assert(VNI && "Interval not live at use."); + MO.setReg(LIV[getEqClass(VNI)]->reg); + } + + // Move runs to new intervals. LiveInterval::iterator J = LI.begin(), E = LI.end(); - while (J != E && eqClass_[J->valno->id] == 0) + while (J != E && EqClass[J->valno->id] == 0) ++J; for (LiveInterval::iterator I = J; I != E; ++I) { - if (unsigned eq = eqClass_[I->valno->id]) { + if (unsigned eq = EqClass[I->valno->id]) { assert((LIV[eq]->empty() || LIV[eq]->expiredAt(I->start)) && "New intervals should be empty"); LIV[eq]->ranges.push_back(*I); @@ -764,11 +759,11 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[]) { // Transfer VNInfos to their new owners and renumber them. unsigned j = 0, e = LI.getNumValNums(); - while (j != e && eqClass_[j] == 0) + while (j != e && EqClass[j] == 0) ++j; for (unsigned i = j; i != e; ++i) { VNInfo *VNI = LI.getValNumInfo(i); - if (unsigned eq = eqClass_[i]) { + if (unsigned eq = EqClass[i]) { VNI->id = LIV[eq]->getNumValNums(); LIV[eq]->valnos.push_back(VNI); } else { diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index aef5b5f77e78..9257191f7fc0 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -572,19 +572,12 @@ void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB, if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) handleVirtualRegisterDef(MBB, MI, MIIdx, MO, MOIdx, getOrCreateInterval(MO.getReg())); - else if (allocatableRegs_[MO.getReg()]) { + else { MachineInstr *CopyMI = NULL; if (MI->isCopyLike()) CopyMI = MI; handlePhysicalRegisterDef(MBB, MI, MIIdx, MO, getOrCreateInterval(MO.getReg()), CopyMI); - // Def of a register also defines its sub-registers. - for (const unsigned* AS = tri_->getSubRegisters(MO.getReg()); *AS; ++AS) - // If MI also modifies the sub-register explicitly, avoid processing it - // more than once. Do not pass in TRI here so it checks for exact match. - if (!MI->definesRegister(*AS)) - handlePhysicalRegisterDef(MBB, MI, MIIdx, MO, - getOrCreateInterval(*AS), 0); } } @@ -645,7 +638,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, end = MIIdx.getStoreIndex(); } else { DEBUG(dbgs() << " live through"); - end = baseIndex; + end = getMBBEndIdx(MBB); } } @@ -746,7 +739,8 @@ LiveInterval* LiveIntervals::dupInterval(LiveInterval *li) { /// shrinkToUses - After removing some uses of a register, shrink its live /// range to just the remaining uses. This method does not compute reaching /// defs for new uses, and it doesn't remove dead defs. -void LiveIntervals::shrinkToUses(LiveInterval *li) { +bool LiveIntervals::shrinkToUses(LiveInterval *li, + SmallVectorImpl *dead) { DEBUG(dbgs() << "Shrink: " << *li << '\n'); assert(TargetRegisterInfo::isVirtualRegister(li->reg) && "Can't only shrink physical registers"); @@ -760,7 +754,15 @@ void LiveIntervals::shrinkToUses(LiveInterval *li) { continue; SlotIndex Idx = getInstructionIndex(UseMI).getUseIndex(); VNInfo *VNI = li->getVNInfoAt(Idx); - assert(VNI && "Live interval not live into reading instruction"); + if (!VNI) { + // This shouldn't happen: readsVirtualRegister returns true, but there is + // no live value. It is likely caused by a target getting flags + // wrong. + DEBUG(dbgs() << Idx << '\t' << *UseMI + << "Warning: Instr claims to read non-existent value in " + << *li << '\n'); + continue; + } if (VNI->def == Idx) { // Special case: An early-clobber tied operand reads and writes the // register one slot early. @@ -778,49 +780,47 @@ void LiveIntervals::shrinkToUses(LiveInterval *li) { VNInfo *VNI = *I; if (VNI->isUnused()) continue; + // We may eliminate PHI values, so recompute PHIKill flags. + VNI->setHasPHIKill(false); NewLI.addRange(LiveRange(VNI->def, VNI->def.getNextSlot(), VNI)); + + // A use tied to an early-clobber def ends at the load slot and isn't caught + // above. Catch it here instead. This probably only ever happens for inline + // assembly. + if (VNI->def.isUse()) + if (VNInfo *UVNI = li->getVNInfoAt(VNI->def.getLoadIndex())) + WorkList.push_back(std::make_pair(VNI->def.getLoadIndex(), UVNI)); } + // Keep track of the PHIs that are in use. + SmallPtrSet UsedPHIs; + // Extend intervals to reach all uses in WorkList. while (!WorkList.empty()) { SlotIndex Idx = WorkList.back().first; VNInfo *VNI = WorkList.back().second; WorkList.pop_back(); - - // Extend the live range for VNI to be live at Idx. - LiveInterval::iterator I = NewLI.find(Idx); - - // Already got it? - if (I != NewLI.end() && I->start <= Idx) { - assert(I->valno == VNI && "Unexpected existing value number"); - continue; - } - - // Is there already a live range in the block containing Idx? const MachineBasicBlock *MBB = getMBBFromIndex(Idx); SlotIndex BlockStart = getMBBStartIdx(MBB); - DEBUG(dbgs() << "Shrink: Use val#" << VNI->id << " at " << Idx - << " in BB#" << MBB->getNumber() << '@' << BlockStart); - if (I != NewLI.begin() && (--I)->end > BlockStart) { - assert(I->valno == VNI && "Wrong reaching def"); - DEBUG(dbgs() << " extend [" << I->start << ';' << I->end << ")\n"); - // Is this the first use of a PHIDef in its defining block? - if (VNI->isPHIDef() && I->end == VNI->def.getNextSlot()) { - // The PHI is live, make sure the predecessors are live-out. - for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), - PE = MBB->pred_end(); PI != PE; ++PI) { - SlotIndex Stop = getMBBEndIdx(*PI).getPrevSlot(); - VNInfo *PVNI = li->getVNInfoAt(Stop); - // A predecessor is not required to have a live-out value for a PHI. - if (PVNI) { - assert(PVNI->hasPHIKill() && "Missing hasPHIKill flag"); - WorkList.push_back(std::make_pair(Stop, PVNI)); - } + + // Extend the live range for VNI to be live at Idx. + if (VNInfo *ExtVNI = NewLI.extendInBlock(BlockStart, Idx)) { + (void)ExtVNI; + assert(ExtVNI == VNI && "Unexpected existing value number"); + // Is this a PHIDef we haven't seen before? + if (!VNI->isPHIDef() || VNI->def != BlockStart || !UsedPHIs.insert(VNI)) + continue; + // The PHI is live, make sure the predecessors are live-out. + for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) { + SlotIndex Stop = getMBBEndIdx(*PI).getPrevSlot(); + VNInfo *PVNI = li->getVNInfoAt(Stop); + // A predecessor is not required to have a live-out value for a PHI. + if (PVNI) { + PVNI->setHasPHIKill(true); + WorkList.push_back(std::make_pair(Stop, PVNI)); } } - - // Extend the live range in the block to include Idx. - NewLI.addRange(LiveRange(I->end, Idx.getNextSlot(), VNI)); continue; } @@ -838,6 +838,7 @@ void LiveIntervals::shrinkToUses(LiveInterval *li) { } // Handle dead values. + bool CanSeparate = false; for (LiveInterval::vni_iterator I = li->vni_begin(), E = li->vni_end(); I != E; ++I) { VNInfo *VNI = *I; @@ -847,21 +848,28 @@ void LiveIntervals::shrinkToUses(LiveInterval *li) { assert(LII != NewLI.end() && "Missing live range for PHI"); if (LII->end != VNI->def.getNextSlot()) continue; - if (!VNI->isPHIDef()) { + if (VNI->isPHIDef()) { // This is a dead PHI. Remove it. VNI->setIsUnused(true); NewLI.removeRange(*LII); + DEBUG(dbgs() << "Dead PHI at " << VNI->def << " may separate interval\n"); + CanSeparate = true; } else { // This is a dead def. Make sure the instruction knows. MachineInstr *MI = getInstructionFromIndex(VNI->def); assert(MI && "No instruction defining live value"); MI->addRegisterDead(li->reg, tri_); + if (dead && MI->allDefsAreDead()) { + DEBUG(dbgs() << "All defs dead: " << VNI->def << '\t' << *MI); + dead->push_back(MI); + } } } // Move the trimmed ranges back. li->ranges.swap(NewLI.ranges); - DEBUG(dbgs() << "Shrink: " << *li << '\n'); + DEBUG(dbgs() << "Shrunk: " << *li << '\n'); + return CanSeparate; } @@ -955,7 +963,7 @@ bool LiveIntervals::isValNoAvailableAt(const LiveInterval &li, MachineInstr *MI, bool LiveIntervals::isReMaterializable(const LiveInterval &li, const VNInfo *ValNo, MachineInstr *MI, - const SmallVectorImpl &SpillIs, + const SmallVectorImpl *SpillIs, bool &isLoad) { if (DisableReMat) return false; @@ -982,9 +990,10 @@ LiveIntervals::isReMaterializable(const LiveInterval &li, // If a register operand of the re-materialized instruction is going to // be spilled next, then it's not legal to re-materialize this instruction. - for (unsigned i = 0, e = SpillIs.size(); i != e; ++i) - if (ImpUse == SpillIs[i]->reg) - return false; + if (SpillIs) + for (unsigned i = 0, e = SpillIs->size(); i != e; ++i) + if (ImpUse == (*SpillIs)[i]->reg) + return false; } return true; } @@ -993,16 +1002,15 @@ LiveIntervals::isReMaterializable(const LiveInterval &li, /// val# of the specified interval is re-materializable. bool LiveIntervals::isReMaterializable(const LiveInterval &li, const VNInfo *ValNo, MachineInstr *MI) { - SmallVector Dummy1; bool Dummy2; - return isReMaterializable(li, ValNo, MI, Dummy1, Dummy2); + return isReMaterializable(li, ValNo, MI, 0, Dummy2); } /// isReMaterializable - Returns true if every definition of MI of every /// val# of the specified interval is re-materializable. bool LiveIntervals::isReMaterializable(const LiveInterval &li, - const SmallVectorImpl &SpillIs, + const SmallVectorImpl *SpillIs, bool &isLoad) { isLoad = false; for (LiveInterval::const_vni_iterator i = li.vni_begin(), e = li.vni_end(); @@ -1499,7 +1507,7 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit, // ... // def = ... // = use - // It's better to start a new interval to avoid artifically + // It's better to start a new interval to avoid artificially // extend the new interval. if (MI->readsWritesVirtualRegister(li.reg) == std::make_pair(false,true)) { @@ -1702,7 +1710,9 @@ LiveIntervals::getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) { // overflow a float. This expression behaves like 10^d for small d, but is // more tempered for large d. At d=200 we get 6.7e33 which leaves a bit of // headroom before overflow. - float lc = std::pow(1 + (100.0f / (loopDepth+10)), (float)loopDepth); + // By the way, powf() might be unavailable here. For consistency, + // We may take pow(double,double). + float lc = std::pow(1 + (100.0 / (loopDepth + 10)), (double)loopDepth); return (isDef + isUse) * lc; } @@ -1715,7 +1725,7 @@ static void normalizeSpillWeights(std::vector &NewLIs) { std::vector LiveIntervals:: addIntervalsForSpills(const LiveInterval &li, - const SmallVectorImpl &SpillIs, + const SmallVectorImpl *SpillIs, const MachineLoopInfo *loopInfo, VirtRegMap &vrm) { assert(li.isSpillable() && "attempt to spill already spilled interval!"); diff --git a/lib/CodeGen/LiveIntervalUnion.cpp b/lib/CodeGen/LiveIntervalUnion.cpp index 205f28a0d65a..b67f96667bfd 100644 --- a/lib/CodeGen/LiveIntervalUnion.cpp +++ b/lib/CodeGen/LiveIntervalUnion.cpp @@ -35,12 +35,20 @@ void LiveIntervalUnion::unify(LiveInterval &VirtReg) { LiveInterval::iterator RegEnd = VirtReg.end(); SegmentIter SegPos = Segments.find(RegPos->start); - for (;;) { + while (SegPos.valid()) { SegPos.insert(RegPos->start, RegPos->end, &VirtReg); if (++RegPos == RegEnd) return; SegPos.advanceTo(RegPos->start); } + + // We have reached the end of Segments, so it is no longer necessary to search + // for the insertion position. + // It is faster to insert the end first. + --RegEnd; + SegPos.insert(RegEnd->start, RegEnd->end, &VirtReg); + for (; RegPos != RegEnd; ++RegPos, ++SegPos) + SegPos.insert(RegPos->start, RegPos->end, &VirtReg); } // Remove a live virtual register's segments from this union. @@ -168,6 +176,7 @@ LiveIntervalUnion::Query::firstInterference() { return FirstInterference; CheckedFirstInterference = true; InterferenceResult &IR = FirstInterference; + IR.LiveUnionI.setMap(LiveUnion->getMap()); // Quickly skip interference check for empty sets. if (VirtReg->empty() || LiveUnion->empty()) { @@ -176,10 +185,10 @@ LiveIntervalUnion::Query::firstInterference() { // VirtReg starts first, perform double binary search. IR.VirtRegI = VirtReg->find(LiveUnion->startIndex()); if (IR.VirtRegI != VirtReg->end()) - IR.LiveUnionI = LiveUnion->find(IR.VirtRegI->start); + IR.LiveUnionI.find(IR.VirtRegI->start); } else { // LiveUnion starts first, perform double binary search. - IR.LiveUnionI = LiveUnion->find(VirtReg->beginIndex()); + IR.LiveUnionI.find(VirtReg->beginIndex()); if (IR.LiveUnionI.valid()) IR.VirtRegI = VirtReg->find(IR.LiveUnionI.start()); else @@ -235,7 +244,7 @@ bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const { // // For comments on how to speed it up, see Query::findIntersection(). unsigned LiveIntervalUnion::Query:: -collectInterferingVRegs(unsigned MaxInterferingRegs) { +collectInterferingVRegs(unsigned MaxInterferingRegs, float MaxWeight) { InterferenceResult IR = firstInterference(); LiveInterval::iterator VirtRegEnd = VirtReg->end(); LiveInterval *RecentInterferingVReg = NULL; @@ -277,6 +286,11 @@ collectInterferingVRegs(unsigned MaxInterferingRegs) { // Cache the most recent interfering vreg to bypass isSeenInterference. RecentInterferingVReg = IR.LiveUnionI.value(); ++IR.LiveUnionI; + + // Stop collecting when the max weight is exceeded. + if (RecentInterferingVReg->weight >= MaxWeight) + return InterferingVRegs.size(); + continue; } // VirtRegI may have advanced far beyond LiveUnionI, diff --git a/lib/CodeGen/LiveIntervalUnion.h b/lib/CodeGen/LiveIntervalUnion.h index 6f9c5f4455e9..c83578e99c6c 100644 --- a/lib/CodeGen/LiveIntervalUnion.h +++ b/lib/CodeGen/LiveIntervalUnion.h @@ -95,6 +95,9 @@ class LiveIntervalUnion { // Remove a live virtual register's segments from this union. void extract(LiveInterval &VirtReg); + // Remove all inserted virtual registers. + void clear() { Segments.clear(); ++Tag; } + // Print union, using TRI to translate register names void print(raw_ostream &OS, const TargetRegisterInfo *TRI) const; @@ -163,10 +166,10 @@ class LiveIntervalUnion { bool CheckedFirstInterference; bool SeenAllInterferences; bool SeenUnspillableVReg; - unsigned Tag; + unsigned Tag, UserTag; public: - Query(): LiveUnion(), VirtReg() {} + Query(): LiveUnion(), VirtReg(), Tag(0), UserTag(0) {} Query(LiveInterval *VReg, LiveIntervalUnion *LIU): LiveUnion(LIU), VirtReg(VReg), CheckedFirstInterference(false), @@ -181,11 +184,13 @@ class LiveIntervalUnion { SeenAllInterferences = false; SeenUnspillableVReg = false; Tag = 0; + UserTag = 0; } - void init(LiveInterval *VReg, LiveIntervalUnion *LIU) { + void init(unsigned UTag, LiveInterval *VReg, LiveIntervalUnion *LIU) { assert(VReg && LIU && "Invalid arguments"); - if (VirtReg == VReg && LiveUnion == LIU && !LIU->changedSince(Tag)) { + if (UserTag == UTag && VirtReg == VReg && + LiveUnion == LIU && !LIU->changedSince(Tag)) { // Retain cached results, e.g. firstInterference. return; } @@ -193,6 +198,7 @@ class LiveIntervalUnion { LiveUnion = LIU; VirtReg = VReg; Tag = LIU->getTag(); + UserTag = UTag; } LiveInterval &virtReg() const { @@ -223,7 +229,8 @@ class LiveIntervalUnion { // Count the virtual registers in this union that interfere with this // query's live virtual register, up to maxInterferingRegs. - unsigned collectInterferingVRegs(unsigned MaxInterferingRegs = UINT_MAX); + unsigned collectInterferingVRegs(unsigned MaxInterferingRegs = UINT_MAX, + float MaxWeight = HUGE_VALF); // Was this virtual register visited during collectInterferingVRegs? bool isSeenInterference(LiveInterval *VReg) const; diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp index 3bbda1c2e609..f8a3dbb5fd7b 100644 --- a/lib/CodeGen/LiveRangeEdit.cpp +++ b/lib/CodeGen/LiveRangeEdit.cpp @@ -11,24 +11,41 @@ // is spilled or split. //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "regalloc" #include "LiveRangeEdit.h" #include "VirtRegMap.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; -LiveInterval &LiveRangeEdit::create(MachineRegisterInfo &mri, - LiveIntervals &lis, - VirtRegMap &vrm) { - const TargetRegisterClass *RC = mri.getRegClass(getReg()); - unsigned VReg = mri.createVirtualRegister(RC); - vrm.grow(); - vrm.setIsSplitFromReg(VReg, vrm.getOriginal(getReg())); - LiveInterval &li = lis.getOrCreateInterval(VReg); - newRegs_.push_back(&li); - return li; +LiveInterval &LiveRangeEdit::createFrom(unsigned OldReg, + LiveIntervals &LIS, + VirtRegMap &VRM) { + MachineRegisterInfo &MRI = VRM.getRegInfo(); + unsigned VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); + VRM.grow(); + VRM.setIsSplitFromReg(VReg, VRM.getOriginal(OldReg)); + LiveInterval &LI = LIS.getOrCreateInterval(VReg); + newRegs_.push_back(&LI); + return LI; +} + +bool LiveRangeEdit::checkRematerializable(VNInfo *VNI, + const MachineInstr *DefMI, + const TargetInstrInfo &tii, + AliasAnalysis *aa) { + assert(DefMI && "Missing instruction"); + scannedRemattable_ = true; + if (!tii.isTriviallyReMaterializable(DefMI, aa)) + return false; + remattable_.insert(VNI); + return true; } void LiveRangeEdit::scanRemattable(LiveIntervals &lis, @@ -42,8 +59,7 @@ void LiveRangeEdit::scanRemattable(LiveIntervals &lis, MachineInstr *DefMI = lis.getInstructionFromIndex(VNI->def); if (!DefMI) continue; - if (tii.isTriviallyReMaterializable(DefMI, aa)) - remattable_.insert(VNI); + checkRematerializable(VNI, DefMI, tii, aa); } scannedRemattable_ = true; } @@ -66,18 +82,16 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI, UseIdx = UseIdx.getUseIndex(); for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = OrigMI->getOperand(i); - if (!MO.isReg() || !MO.getReg() || MO.getReg() == getReg()) + if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue; // Reserved registers are OK. if (MO.isUndef() || !lis.hasInterval(MO.getReg())) continue; - // We don't want to move any defs. - if (MO.isDef()) - return false; // We cannot depend on virtual registers in uselessRegs_. - for (unsigned ui = 0, ue = uselessRegs_.size(); ui != ue; ++ui) - if (uselessRegs_[ui]->reg == MO.getReg()) - return false; + if (uselessRegs_) + for (unsigned ui = 0, ue = uselessRegs_->size(); ui != ue; ++ui) + if ((*uselessRegs_)[ui]->reg == MO.getReg()) + return false; LiveInterval &li = lis.getInterval(MO.getReg()); const VNInfo *OVNI = li.getVNInfoAt(OrigIdx); @@ -99,16 +113,22 @@ bool LiveRangeEdit::canRematerializeAt(Remat &RM, if (!remattable_.count(RM.ParentVNI)) return false; - // No defining instruction. - RM.OrigMI = lis.getInstructionFromIndex(RM.ParentVNI->def); - assert(RM.OrigMI && "Defining instruction for remattable value disappeared"); + // No defining instruction provided. + SlotIndex DefIdx; + if (RM.OrigMI) + DefIdx = lis.getInstructionIndex(RM.OrigMI); + else { + DefIdx = RM.ParentVNI->def; + RM.OrigMI = lis.getInstructionFromIndex(DefIdx); + assert(RM.OrigMI && "No defining instruction for remattable value"); + } // If only cheap remats were requested, bail out early. if (cheapAsAMove && !RM.OrigMI->getDesc().isAsCheapAsAMove()) return false; // Verify that all used registers are available with the same values. - if (!allUsesAvailableAt(RM.OrigMI, RM.ParentVNI->def, UseIdx, lis)) + if (!allUsesAvailableAt(RM.OrigMI, DefIdx, UseIdx, lis)) return false; return true; @@ -120,10 +140,174 @@ SlotIndex LiveRangeEdit::rematerializeAt(MachineBasicBlock &MBB, const Remat &RM, LiveIntervals &lis, const TargetInstrInfo &tii, - const TargetRegisterInfo &tri) { + const TargetRegisterInfo &tri, + bool Late) { assert(RM.OrigMI && "Invalid remat"); tii.reMaterialize(MBB, MI, DestReg, 0, RM.OrigMI, tri); rematted_.insert(RM.ParentVNI); - return lis.InsertMachineInstrInMaps(--MI).getDefIndex(); + return lis.getSlotIndexes()->insertMachineInstrInMaps(--MI, Late) + .getDefIndex(); } +void LiveRangeEdit::eraseVirtReg(unsigned Reg, LiveIntervals &LIS) { + if (delegate_ && delegate_->LRE_CanEraseVirtReg(Reg)) + LIS.removeInterval(Reg); +} + +bool LiveRangeEdit::foldAsLoad(LiveInterval *LI, + SmallVectorImpl &Dead, + MachineRegisterInfo &MRI, + LiveIntervals &LIS, + const TargetInstrInfo &TII) { + MachineInstr *DefMI = 0, *UseMI = 0; + + // Check that there is a single def and a single use. + for (MachineRegisterInfo::reg_nodbg_iterator I = MRI.reg_nodbg_begin(LI->reg), + E = MRI.reg_nodbg_end(); I != E; ++I) { + MachineOperand &MO = I.getOperand(); + MachineInstr *MI = MO.getParent(); + if (MO.isDef()) { + if (DefMI && DefMI != MI) + return false; + if (!MI->getDesc().canFoldAsLoad()) + return false; + DefMI = MI; + } else if (!MO.isUndef()) { + if (UseMI && UseMI != MI) + return false; + // FIXME: Targets don't know how to fold subreg uses. + if (MO.getSubReg()) + return false; + UseMI = MI; + } + } + if (!DefMI || !UseMI) + return false; + + DEBUG(dbgs() << "Try to fold single def: " << *DefMI + << " into single use: " << *UseMI); + + SmallVector Ops; + if (UseMI->readsWritesVirtualRegister(LI->reg, &Ops).second) + return false; + + MachineInstr *FoldMI = TII.foldMemoryOperand(UseMI, Ops, DefMI); + if (!FoldMI) + return false; + DEBUG(dbgs() << " folded: " << *FoldMI); + LIS.ReplaceMachineInstrInMaps(UseMI, FoldMI); + UseMI->eraseFromParent(); + DefMI->addRegisterDead(LI->reg, 0); + Dead.push_back(DefMI); + return true; +} + +void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl &Dead, + LiveIntervals &LIS, VirtRegMap &VRM, + const TargetInstrInfo &TII) { + SetVector, + SmallPtrSet > ToShrink; + MachineRegisterInfo &MRI = VRM.getRegInfo(); + + for (;;) { + // Erase all dead defs. + while (!Dead.empty()) { + MachineInstr *MI = Dead.pop_back_val(); + assert(MI->allDefsAreDead() && "Def isn't really dead"); + SlotIndex Idx = LIS.getInstructionIndex(MI).getDefIndex(); + + // Never delete inline asm. + if (MI->isInlineAsm()) { + DEBUG(dbgs() << "Won't delete: " << Idx << '\t' << *MI); + continue; + } + + // Use the same criteria as DeadMachineInstructionElim. + bool SawStore = false; + if (!MI->isSafeToMove(&TII, 0, SawStore)) { + DEBUG(dbgs() << "Can't delete: " << Idx << '\t' << *MI); + continue; + } + + DEBUG(dbgs() << "Deleting dead def " << Idx << '\t' << *MI); + + // Check for live intervals that may shrink + for (MachineInstr::mop_iterator MOI = MI->operands_begin(), + MOE = MI->operands_end(); MOI != MOE; ++MOI) { + if (!MOI->isReg()) + continue; + unsigned Reg = MOI->getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + LiveInterval &LI = LIS.getInterval(Reg); + + // Shrink read registers, unless it is likely to be expensive and + // unlikely to change anything. We typically don't want to shrink the + // PIC base register that has lots of uses everywhere. + // Always shrink COPY uses that probably come from live range splitting. + if (MI->readsVirtualRegister(Reg) && + (MI->isCopy() || MOI->isDef() || MRI.hasOneNonDBGUse(Reg) || + LI.killedAt(Idx))) + ToShrink.insert(&LI); + + // Remove defined value. + if (MOI->isDef()) { + if (VNInfo *VNI = LI.getVNInfoAt(Idx)) { + if (delegate_) + delegate_->LRE_WillShrinkVirtReg(LI.reg); + LI.removeValNo(VNI); + if (LI.empty()) { + ToShrink.remove(&LI); + eraseVirtReg(Reg, LIS); + } + } + } + } + + if (delegate_) + delegate_->LRE_WillEraseInstruction(MI); + LIS.RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); + } + + if (ToShrink.empty()) + break; + + // Shrink just one live interval. Then delete new dead defs. + LiveInterval *LI = ToShrink.back(); + ToShrink.pop_back(); + if (foldAsLoad(LI, Dead, MRI, LIS, TII)) + continue; + if (delegate_) + delegate_->LRE_WillShrinkVirtReg(LI->reg); + if (!LIS.shrinkToUses(LI, &Dead)) + continue; + + // LI may have been separated, create new intervals. + LI->RenumberValues(LIS); + ConnectedVNInfoEqClasses ConEQ(LIS); + unsigned NumComp = ConEQ.Classify(LI); + if (NumComp <= 1) + continue; + DEBUG(dbgs() << NumComp << " components: " << *LI << '\n'); + SmallVector Dups(1, LI); + for (unsigned i = 1; i != NumComp; ++i) { + Dups.push_back(&createFrom(LI->reg, LIS, VRM)); + if (delegate_) + delegate_->LRE_DidCloneVirtReg(Dups.back()->reg, LI->reg); + } + ConEQ.Distribute(&Dups[0], MRI); + } +} + +void LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF, + LiveIntervals &LIS, + const MachineLoopInfo &Loops) { + VirtRegAuxInfo VRAI(MF, LIS, Loops); + for (iterator I = begin(), E = end(); I != E; ++I) { + LiveInterval &LI = **I; + VRAI.CalculateRegClass(LI.reg); + VRAI.CalculateWeightAndHint(LI); + } +} diff --git a/lib/CodeGen/LiveRangeEdit.h b/lib/CodeGen/LiveRangeEdit.h index 73f69ed63983..14d227e61957 100644 --- a/lib/CodeGen/LiveRangeEdit.h +++ b/lib/CodeGen/LiveRangeEdit.h @@ -25,13 +25,36 @@ namespace llvm { class AliasAnalysis; class LiveIntervals; +class MachineLoopInfo; class MachineRegisterInfo; class VirtRegMap; class LiveRangeEdit { +public: + /// Callback methods for LiveRangeEdit owners. + struct Delegate { + /// Called immediately before erasing a dead machine instruction. + virtual void LRE_WillEraseInstruction(MachineInstr *MI) {} + + /// Called when a virtual register is no longer used. Return false to defer + /// its deletion from LiveIntervals. + virtual bool LRE_CanEraseVirtReg(unsigned) { return true; } + + /// Called before shrinking the live range of a virtual register. + virtual void LRE_WillShrinkVirtReg(unsigned) {} + + /// Called after cloning a virtual register. + /// This is used for new registers representing connected components of Old. + virtual void LRE_DidCloneVirtReg(unsigned New, unsigned Old) {} + + virtual ~Delegate() {} + }; + +private: LiveInterval &parent_; SmallVectorImpl &newRegs_; - const SmallVectorImpl &uselessRegs_; + Delegate *const delegate_; + const SmallVectorImpl *uselessRegs_; /// firstNew_ - Index of the first register added to newRegs_. const unsigned firstNew_; @@ -41,11 +64,11 @@ class LiveRangeEdit { /// remattable_ - Values defined by remattable instructions as identified by /// tii.isTriviallyReMaterializable(). - SmallPtrSet remattable_; + SmallPtrSet remattable_; /// rematted_ - Values that were actually rematted, and so need to have their /// live range trimmed or entirely removed. - SmallPtrSet rematted_; + SmallPtrSet rematted_; /// scanRemattable - Identify the parent_ values that may rematerialize. void scanRemattable(LiveIntervals &lis, @@ -57,6 +80,11 @@ class LiveRangeEdit { bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx, SlotIndex UseIdx, LiveIntervals &lis); + /// foldAsLoad - If LI has a single use and a single def that can be folded as + /// a load, eliminate the register by folding the def into the use. + bool foldAsLoad(LiveInterval *LI, SmallVectorImpl &Dead, + MachineRegisterInfo&, LiveIntervals&, const TargetInstrInfo&); + public: /// Create a LiveRangeEdit for breaking down parent into smaller pieces. /// @param parent The register being spilled or split. @@ -66,9 +94,13 @@ class LiveRangeEdit { /// rematerializing values because they are about to be removed. LiveRangeEdit(LiveInterval &parent, SmallVectorImpl &newRegs, - const SmallVectorImpl &uselessRegs) - : parent_(parent), newRegs_(newRegs), uselessRegs_(uselessRegs), - firstNew_(newRegs.size()), scannedRemattable_(false) {} + Delegate *delegate = 0, + const SmallVectorImpl *uselessRegs = 0) + : parent_(parent), newRegs_(newRegs), + delegate_(delegate), + uselessRegs_(uselessRegs), + firstNew_(newRegs.size()), + scannedRemattable_(false) {} LiveInterval &getParent() const { return parent_; } unsigned getReg() const { return parent_.reg; } @@ -81,16 +113,33 @@ class LiveRangeEdit { bool empty() const { return size() == 0; } LiveInterval *get(unsigned idx) const { return newRegs_[idx+firstNew_]; } - /// create - Create a new register with the same class and stack slot as + /// FIXME: Temporary accessors until we can get rid of + /// LiveIntervals::AddIntervalsForSpills + SmallVectorImpl *getNewVRegs() { return &newRegs_; } + const SmallVectorImpl *getUselessVRegs() { + return uselessRegs_; + } + + /// createFrom - Create a new virtual register based on OldReg. + LiveInterval &createFrom(unsigned OldReg, LiveIntervals&, VirtRegMap&); + + /// create - Create a new register with the same class and original slot as /// parent. - LiveInterval &create(MachineRegisterInfo&, LiveIntervals&, VirtRegMap&); + LiveInterval &create(LiveIntervals &LIS, VirtRegMap &VRM) { + return createFrom(getReg(), LIS, VRM); + } /// anyRematerializable - Return true if any parent values may be /// rematerializable. - /// This function must be called before ny rematerialization is attempted. + /// This function must be called before any rematerialization is attempted. bool anyRematerializable(LiveIntervals&, const TargetInstrInfo&, AliasAnalysis*); + /// checkRematerializable - Manually add VNI to the list of rematerializable + /// values if DefMI may be rematerializable. + bool checkRematerializable(VNInfo *VNI, const MachineInstr *DefMI, + const TargetInstrInfo&, AliasAnalysis*); + /// Remat - Information needed to rematerialize at a specific location. struct Remat { VNInfo *ParentVNI; // parent_'s value at the remat location. @@ -116,18 +165,35 @@ class LiveRangeEdit { const Remat &RM, LiveIntervals&, const TargetInstrInfo&, - const TargetRegisterInfo&); + const TargetRegisterInfo&, + bool Late = false); /// markRematerialized - explicitly mark a value as rematerialized after doing /// it manually. - void markRematerialized(VNInfo *ParentVNI) { + void markRematerialized(const VNInfo *ParentVNI) { rematted_.insert(ParentVNI); } /// didRematerialize - Return true if ParentVNI was rematerialized anywhere. - bool didRematerialize(VNInfo *ParentVNI) const { + bool didRematerialize(const VNInfo *ParentVNI) const { return rematted_.count(ParentVNI); } + + /// eraseVirtReg - Notify the delegate that Reg is no longer in use, and try + /// to erase it from LIS. + void eraseVirtReg(unsigned Reg, LiveIntervals &LIS); + + /// eliminateDeadDefs - Try to delete machine instructions that are now dead + /// (allDefsAreDead returns true). This may cause live intervals to be trimmed + /// and further dead efs to be eliminated. + void eliminateDeadDefs(SmallVectorImpl &Dead, + LiveIntervals&, VirtRegMap&, + const TargetInstrInfo&); + + /// calculateRegClassAndHint - Recompute register class and hint for each new + /// register. + void calculateRegClassAndHint(MachineFunction&, LiveIntervals&, + const MachineLoopInfo&); }; } diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index dd43ef2530c1..20bad60dedda 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -107,9 +107,7 @@ void LiveVariables::MarkVirtRegAliveInBlock(VarInfo& VRInfo, // Mark the variable known alive in this bb VRInfo.AliveBlocks.set(BBNum); - for (MachineBasicBlock::const_pred_reverse_iterator PI = MBB->pred_rbegin(), - E = MBB->pred_rend(); PI != E; ++PI) - WorkList.push_back(*PI); + WorkList.insert(WorkList.end(), MBB->pred_rbegin(), MBB->pred_rend()); } void LiveVariables::MarkVirtRegAliveInBlock(VarInfo &VRInfo, @@ -707,7 +705,7 @@ bool LiveVariables::isLiveOut(unsigned Reg, const MachineBasicBlock &MBB) { // Loop over all of the successors of the basic block, checking to see if // the value is either live in the block, or if it is killed in the block. - std::vector OpSuccBlocks; + SmallVector OpSuccBlocks; for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(), E = MBB.succ_end(); SI != E; ++SI) { MachineBasicBlock *SuccMBB = *SI; diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index ccbff0af5b2c..57f3e34d0c5a 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -363,8 +363,7 @@ void MachineBasicBlock::addPredecessor(MachineBasicBlock *pred) { } void MachineBasicBlock::removePredecessor(MachineBasicBlock *pred) { - std::vector::iterator I = - std::find(Predecessors.begin(), Predecessors.end(), pred); + pred_iterator I = std::find(Predecessors.begin(), Predecessors.end(), pred); assert(I != Predecessors.end() && "Pred is not a predecessor of this block!"); Predecessors.erase(I); } @@ -402,8 +401,7 @@ MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) { } bool MachineBasicBlock::isSuccessor(const MachineBasicBlock *MBB) const { - std::vector::const_iterator I = - std::find(Successors.begin(), Successors.end(), MBB); + const_succ_iterator I = std::find(Successors.begin(), Successors.end(), MBB); return I != Successors.end(); } diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index 07a7d27b019f..f97ccf65790f 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -365,6 +365,8 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { if (!FoundCSE) { // Look for trivial copy coalescing opportunities. if (PerformTrivialCoalescing(MI, MBB)) { + Changed = true; + // After coalescing MI itself may become a copy. if (MI->isCopyLike()) continue; @@ -379,10 +381,11 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { if (NewMI) { Commuted = true; FoundCSE = VNT.count(NewMI); - if (NewMI != MI) + if (NewMI != MI) { // New instruction. It doesn't need to be kept. NewMI->eraseFromParent(); - else if (!FoundCSE) + Changed = true; + } else if (!FoundCSE) // MI was changed but it didn't help, commute it back! (void)TII->commuteInstruction(MI); } @@ -450,6 +453,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { ++NumPhysCSEs; if (Commuted) ++NumCommutes; + Changed = true; } else { DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n"); VNT.insert(MI, CurrVN++); diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index aa9ea61acec7..71df6f8b7701 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -441,6 +441,10 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) { OS << ")"; } + // Print nontemporal info. + if (MMO.isNonTemporal()) + OS << "(nontemporal)"; + return OS; } @@ -451,7 +455,8 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) { /// MachineInstr ctor - This constructor creates a dummy MachineInstr with /// TID NULL and no operands. MachineInstr::MachineInstr() - : TID(0), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0), + : TID(0), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), + MemRefs(0), MemRefsEnd(0), Parent(0) { // Make sure that we get added to a machine basicblock LeakDetector::addGarbageObject(this); @@ -470,7 +475,7 @@ void MachineInstr::addImplicitDefUseOperands() { /// implicit operands. It reserves space for the number of operands specified by /// the TargetInstrDesc. MachineInstr::MachineInstr(const TargetInstrDesc &tid, bool NoImp) - : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0), + : TID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0), Parent(0) { if (!NoImp) NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses(); @@ -484,8 +489,8 @@ MachineInstr::MachineInstr(const TargetInstrDesc &tid, bool NoImp) /// MachineInstr ctor - As above, but with a DebugLoc. MachineInstr::MachineInstr(const TargetInstrDesc &tid, const DebugLoc dl, bool NoImp) - : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0), - Parent(0), debugLoc(dl) { + : TID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), + MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) { if (!NoImp) NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses(); Operands.reserve(NumImplicitOps + TID->getNumOperands()); @@ -499,7 +504,7 @@ MachineInstr::MachineInstr(const TargetInstrDesc &tid, const DebugLoc dl, /// that the MachineInstr is created and added to the end of the specified /// basic block. MachineInstr::MachineInstr(MachineBasicBlock *MBB, const TargetInstrDesc &tid) - : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0), + : TID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0), Parent(0) { assert(MBB && "Cannot use inserting ctor with null basic block!"); NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses(); @@ -514,8 +519,8 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const TargetInstrDesc &tid) /// MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl, const TargetInstrDesc &tid) - : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0), - Parent(0), debugLoc(dl) { + : TID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), + MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) { assert(MBB && "Cannot use inserting ctor with null basic block!"); NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses(); Operands.reserve(NumImplicitOps + TID->getNumOperands()); @@ -528,7 +533,7 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl, /// MachineInstr ctor - Copies MachineInstr arg exactly /// MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI) - : TID(&MI.getDesc()), NumImplicitOps(0), AsmPrinterFlags(0), + : TID(&MI.getDesc()), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0), MemRefs(MI.MemRefs), MemRefsEnd(MI.MemRefsEnd), Parent(0), debugLoc(MI.getDebugLoc()) { Operands.reserve(MI.getNumOperands()); @@ -538,6 +543,9 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI) addOperand(MI.getOperand(i)); NumImplicitOps = MI.NumImplicitOps; + // Copy all the flags. + Flags = MI.Flags; + // Set parent to null. Parent = 0; @@ -1417,6 +1425,14 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { } bool HaveSemi = false; + if (Flags) { + if (!HaveSemi) OS << ";"; HaveSemi = true; + OS << " flags: "; + + if (Flags & FrameSetup) + OS << "FrameSetup"; + } + if (!memoperands_empty()) { if (!HaveSemi) OS << ";"; HaveSemi = true; @@ -1447,13 +1463,14 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { } } + // Print debug location information. if (!debugLoc.isUnknown() && MF) { - if (!HaveSemi) OS << ";"; + if (!HaveSemi) OS << ";"; HaveSemi = true; OS << " dbg:"; printDebugLoc(debugLoc, MF, OS); } - OS << "\n"; + OS << '\n'; } bool MachineInstr::addRegisterKilled(unsigned IncomingReg, @@ -1530,13 +1547,8 @@ bool MachineInstr::addRegisterDead(unsigned IncomingReg, continue; if (Reg == IncomingReg) { - if (!Found) { - if (MO.isDead()) - // The register is already marked dead. - return true; - MO.setIsDead(); - Found = true; - } + MO.setIsDead(); + Found = true; } else if (hasAliases && MO.isDead() && TargetRegisterInfo::isPhysicalRegister(Reg)) { // There exists a super-register that's marked dead. diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index 443fc2d97bdf..b315702eef8f 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -39,7 +39,6 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" - using namespace llvm; STATISTIC(NumHoisted, @@ -169,6 +168,10 @@ namespace { /// bool IsLoopInvariantInst(MachineInstr &I); + /// HasAnyPHIUse - Return true if the specified register is used by any + /// phi node. + bool HasAnyPHIUse(unsigned Reg) const; + /// HasHighOperandLatency - Compute operand latency between a def of 'Reg' /// and an use in the current loop, return true if the target considered /// it 'high'. @@ -294,7 +297,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { RegLimit.resize(NumRC); for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), E = TRI->regclass_end(); I != E; ++I) - RegLimit[(*I)->getID()] = TLI->getRegPressureLimit(*I, MF); + RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, MF); } // Get our Loop information... @@ -758,18 +761,25 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { } -/// HasPHIUses - Return true if the specified register has any PHI use. -static bool HasPHIUses(unsigned Reg, MachineRegisterInfo *MRI) { +/// HasAnyPHIUse - Return true if the specified register is used by any +/// phi node. +bool MachineLICM::HasAnyPHIUse(unsigned Reg) const { for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), UE = MRI->use_end(); UI != UE; ++UI) { MachineInstr *UseMI = &*UI; if (UseMI->isPHI()) return true; + // Look pass copies as well. + if (UseMI->isCopy()) { + unsigned Def = UseMI->getOperand(0).getReg(); + if (TargetRegisterInfo::isVirtualRegister(Def) && + HasAnyPHIUse(Def)) + return true; + } } return false; } - /// HasHighOperandLatency - Compute operand latency between a def of 'Reg' /// and an use in the current loop, return true if the target considered /// it 'high'. @@ -976,14 +986,13 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { return false; } - // If result(s) of this instruction is used by PHIs, then don't hoist it. - // The presence of joins makes it difficult for current register allocator - // implementation to perform remat. + // If result(s) of this instruction is used by PHIs outside of the loop, then + // don't hoist it if the instruction because it will introduce an extra copy. for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || !MO.isDef()) continue; - if (HasPHIUses(MO.getReg(), MRI)) + if (HasAnyPHIUse(MO.getReg())) return false; } diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp index 8a93a24287b6..916dff70a41e 100644 --- a/lib/CodeGen/MachineSink.cpp +++ b/lib/CodeGen/MachineSink.cpp @@ -265,8 +265,11 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) { if (MI->isDebugValue()) continue; - if (PerformTrivialForwardCoalescing(MI, &MBB)) + bool Joined = PerformTrivialForwardCoalescing(MI, &MBB); + if (Joined) { + MadeChange = true; continue; + } if (SinkInstruction(MI, SawStore)) ++NumSunk, MadeChange = true; diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 7351119f4728..f95f4112aeda 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -402,6 +402,11 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { SmallVector Cond; if (!TII->AnalyzeBranch(*const_cast(MBB), TBB, FBB, Cond)) { + // If the block branches directly to a landing pad successor, pretend that + // the landing pad is a normal block. + LandingPadSuccs.erase(TBB); + LandingPadSuccs.erase(FBB); + // Ok, AnalyzeBranch thinks it knows what's going on with this block. Let's // check whether its answers match up with reality. if (!TBB && !FBB) { @@ -602,9 +607,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { // Check Live Variables. if (MI->isDebugValue()) { // Liveness checks are not valid for debug values. - } else if (MO->isUndef()) { - // An doesn't refer to any register, so just skip it. - } else if (MO->isUse()) { + } else if (MO->isUse() && !MO->isUndef()) { regsLiveInButUnused.erase(Reg); bool isKill = false; @@ -612,13 +615,9 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { if (MI->isRegTiedToDefOperand(MONum, &defIdx)) { // A two-addr use counts as a kill if use and def are the same. unsigned DefReg = MI->getOperand(defIdx).getReg(); - if (Reg == DefReg) { + if (Reg == DefReg) isKill = true; - // And in that case an explicit kill flag is not allowed. - if (MO->isKill()) - report("Illegal kill flag on two-address instruction operand", - MO, MONum); - } else if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + else if (TargetRegisterInfo::isPhysicalRegister(Reg)) { report("Two-address instruction operands must be identical", MO, MONum); } @@ -675,8 +674,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { MInfo.vregsLiveIn.insert(std::make_pair(Reg, MI)); } } - } else { - assert(MO->isDef()); + } else if (MO->isDef()) { // Register defined. // TODO: verify that earlyclobber ops are not used. if (MO->isDead()) diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index 5f7cf582c960..af65f13bf065 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -28,12 +28,17 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include -#include using namespace llvm; +static cl::opt +DisableEdgeSplitting("disable-phi-elim-edge-splitting", cl::init(false), + cl::Hidden, cl::desc("Disable critical edge splitting " + "during PHI elimination")); + namespace { class PHIElimination : public MachineFunctionPass { MachineRegisterInfo *MRI; // Machine register information @@ -105,10 +110,12 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; // Split critical edges to help the coalescer - if (LiveVariables *LV = getAnalysisIfAvailable()) { - MachineLoopInfo *MLI = getAnalysisIfAvailable(); - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) - Changed |= SplitPHIEdges(MF, *I, *LV, MLI); + if (!DisableEdgeSplitting) { + if (LiveVariables *LV = getAnalysisIfAvailable()) { + MachineLoopInfo *MLI = getAnalysisIfAvailable(); + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) + Changed |= SplitPHIEdges(MF, *I, *LV, MLI); + } } // Populate VRegPHIUseCount diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index 3489db2e9f4f..315aedddb9ef 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -55,6 +55,11 @@ FunctionPass *llvm::createRegisterAllocator(CodeGenOpt::Level OptLevel) { RegisterRegAlloc::setDefault(RegAlloc); } + // This forces linking of the linear scan register allocator, + // so -regalloc=linearscan still works in clang. + if (Ctor == createLinearScanRegisterAllocator) + return createLinearScanRegisterAllocator(); + if (Ctor != createDefaultRegisterAllocator) return Ctor(); @@ -63,6 +68,6 @@ FunctionPass *llvm::createRegisterAllocator(CodeGenOpt::Level OptLevel) { case CodeGenOpt::None: return createFastRegisterAllocator(); default: - return createLinearScanRegisterAllocator(); + return createGreedyRegisterAllocator(); } } diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index 5d7123caa017..c105bb06ebe5 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -30,6 +30,15 @@ // If the "sub" instruction all ready sets (or could be modified to set) the // same flag that the "cmp" instruction sets and that "bz" uses, then we can // eliminate the "cmp" instruction. +// +// - Optimize Bitcast pairs: +// +// v1 = bitcast v0 +// v2 = bitcast v1 +// = v2 +// => +// v1 = bitcast v0 +// = v0 // //===----------------------------------------------------------------------===// @@ -57,7 +66,8 @@ DisablePeephole("disable-peephole", cl::Hidden, cl::init(false), cl::desc("Disable the peephole optimizer")); STATISTIC(NumReuse, "Number of extension results reused"); -STATISTIC(NumEliminated, "Number of compares eliminated"); +STATISTIC(NumBitcasts, "Number of bitcasts eliminated"); +STATISTIC(NumCmps, "Number of compares eliminated"); STATISTIC(NumImmFold, "Number of move immediate foled"); namespace { @@ -85,6 +95,7 @@ namespace { } private: + bool OptimizeBitcastInstr(MachineInstr *MI, MachineBasicBlock *MBB); bool OptimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB); bool OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, SmallPtrSet &LocalMIs); @@ -243,12 +254,85 @@ OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, return Changed; } +/// OptimizeBitcastInstr - If the instruction is a bitcast instruction A that +/// cannot be optimized away during isel (e.g. ARM::VMOVSR, which bitcast +/// a value cross register classes), and the source is defined by another +/// bitcast instruction B. And if the register class of source of B matches +/// the register class of instruction A, then it is legal to replace all uses +/// of the def of A with source of B. e.g. +/// %vreg0 = VMOVSR %vreg1 +/// %vreg3 = VMOVRS %vreg0 +/// Replace all uses of vreg3 with vreg1. + +bool PeepholeOptimizer::OptimizeBitcastInstr(MachineInstr *MI, + MachineBasicBlock *MBB) { + unsigned NumDefs = MI->getDesc().getNumDefs(); + unsigned NumSrcs = MI->getDesc().getNumOperands() - NumDefs; + if (NumDefs != 1) + return false; + + unsigned Def = 0; + unsigned Src = 0; + for (unsigned i = 0, e = NumDefs + NumSrcs; i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (MO.isDef()) + Def = Reg; + else if (Src) + // Multiple sources? + return false; + else + Src = Reg; + } + + assert(Def && Src && "Malformed bitcast instruction!"); + + MachineInstr *DefMI = MRI->getVRegDef(Src); + if (!DefMI || !DefMI->getDesc().isBitcast()) + return false; + + unsigned SrcDef = 0; + unsigned SrcSrc = 0; + NumDefs = DefMI->getDesc().getNumDefs(); + NumSrcs = DefMI->getDesc().getNumOperands() - NumDefs; + if (NumDefs != 1) + return false; + for (unsigned i = 0, e = NumDefs + NumSrcs; i != e; ++i) { + const MachineOperand &MO = DefMI->getOperand(i); + if (!MO.isReg() || MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (MO.isDef()) + SrcDef = Reg; + else if (SrcSrc) + // Multiple sources? + return false; + else + SrcSrc = Reg; + } + + if (MRI->getRegClass(SrcSrc) != MRI->getRegClass(Def)) + return false; + + MRI->replaceRegWith(Def, SrcSrc); + MRI->clearKillFlags(SrcSrc); + MI->eraseFromParent(); + ++NumBitcasts; + return true; +} + /// OptimizeCmpInstr - If the instruction is a compare and the previous /// instruction it's comparing against all ready sets (or could be modified to /// set) the same flag as the compare, then we can remove the comparison and use /// the flag from the previous instruction. bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI, - MachineBasicBlock *MBB){ + MachineBasicBlock *MBB) { // If this instruction is a comparison against zero and isn't comparing a // physical register, we can try to optimize it. unsigned SrcReg; @@ -259,7 +343,7 @@ bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI, // Attempt to optimize the comparison instruction. if (TII->OptimizeCompareInstr(MI, SrcReg, CmpMask, CmpValue, MRI)) { - ++NumEliminated; + ++NumCmps; return true; } @@ -345,7 +429,16 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { continue; } - if (MI->getDesc().isCompare()) { + const TargetInstrDesc &TID = MI->getDesc(); + + if (TID.isBitcast()) { + if (OptimizeBitcastInstr(MI, MBB)) { + // MI is deleted. + Changed = true; + MII = First ? I->begin() : llvm::next(PMII); + continue; + } + } else if (TID.isCompare()) { if (OptimizeCmpInstr(MI, MBB)) { // MI is deleted. Changed = true; diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp index 9cd9941e56b3..c04d65637c94 100644 --- a/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/lib/CodeGen/ProcessImplicitDefs.cpp @@ -47,7 +47,6 @@ void ProcessImplicitDefs::getAnalysisUsage(AnalysisUsage &AU) const { bool ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI, unsigned Reg, unsigned OpIdx, - const TargetInstrInfo *tii_, SmallSet &ImpDefRegs) { switch(OpIdx) { case 1: @@ -61,7 +60,6 @@ ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI, } static bool isUndefCopy(MachineInstr *MI, unsigned Reg, - const TargetInstrInfo *tii_, SmallSet &ImpDefRegs) { if (MI->isCopy()) { MachineOperand &MO0 = MI->getOperand(0); @@ -86,11 +84,10 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { bool Changed = false; - const TargetInstrInfo *tii_ = fn.getTarget().getInstrInfo(); - const TargetRegisterInfo *tri_ = fn.getTarget().getRegisterInfo(); - MachineRegisterInfo *mri_ = &fn.getRegInfo(); - - LiveVariables *lv_ = &getAnalysis(); + TII = fn.getTarget().getInstrInfo(); + TRI = fn.getTarget().getRegisterInfo(); + MRI = &fn.getRegInfo(); + LV = &getAnalysis(); SmallSet ImpDefRegs; SmallVector ImpDefMIs; @@ -113,7 +110,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { unsigned Reg = MI->getOperand(0).getReg(); ImpDefRegs.insert(Reg); if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - for (const unsigned *SS = tri_->getSubRegisters(Reg); *SS; ++SS) + for (const unsigned *SS = TRI->getSubRegisters(Reg); *SS; ++SS) ImpDefRegs.insert(*SS); } ImpDefMIs.push_back(MI); @@ -125,7 +122,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { MachineOperand &MO = MI->getOperand(1); if (MO.isUndef() || ImpDefRegs.count(MO.getReg())) { if (MO.isKill()) { - LiveVariables::VarInfo& vi = lv_->getVarInfo(MO.getReg()); + LiveVariables::VarInfo& vi = LV->getVarInfo(MO.getReg()); vi.removeKill(MI); } MI->eraseFromParent(); @@ -145,14 +142,14 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { if (!ImpDefRegs.count(Reg)) continue; // Use is a copy, just turn it into an implicit_def. - if (CanTurnIntoImplicitDef(MI, Reg, i, tii_, ImpDefRegs)) { + if (CanTurnIntoImplicitDef(MI, Reg, i, ImpDefRegs)) { bool isKill = MO.isKill(); - MI->setDesc(tii_->get(TargetOpcode::IMPLICIT_DEF)); + MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF)); for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j) MI->RemoveOperand(j); if (isKill) { ImpDefRegs.erase(Reg); - LiveVariables::VarInfo& vi = lv_->getVarInfo(Reg); + LiveVariables::VarInfo& vi = LV->getVarInfo(Reg); vi.removeKill(MI); } ChangedToImpDef = true; @@ -210,8 +207,8 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { // uses. bool Skip = false; SmallVector DeadImpDefs; - for (MachineRegisterInfo::def_iterator DI = mri_->def_begin(Reg), - DE = mri_->def_end(); DI != DE; ++DI) { + for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(Reg), + DE = MRI->def_end(); DI != DE; ++DI) { MachineInstr *DeadImpDef = &*DI; if (!DeadImpDef->isImplicitDef()) { Skip = true; @@ -229,8 +226,8 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { Changed = true; // Process each use instruction once. - for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(Reg), - UE = mri_->use_end(); UI != UE; ++UI) { + for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), + UE = MRI->use_end(); UI != UE; ++UI) { if (UI.getOperand().isUndef()) continue; MachineInstr *RMI = &*UI; @@ -242,8 +239,8 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { MachineInstr *RMI = RUses[i]; // Turn a copy use into an implicit_def. - if (isUndefCopy(RMI, Reg, tii_, ImpDefRegs)) { - RMI->setDesc(tii_->get(TargetOpcode::IMPLICIT_DEF)); + if (isUndefCopy(RMI, Reg, ImpDefRegs)) { + RMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF)); bool isKill = false; SmallVector Ops; @@ -263,15 +260,15 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { // Update LiveVariables varinfo if the instruction is a kill. if (isKill) { - LiveVariables::VarInfo& vi = lv_->getVarInfo(Reg); + LiveVariables::VarInfo& vi = LV->getVarInfo(Reg); vi.removeKill(RMI); } continue; } // Replace Reg with a new vreg that's marked implicit. - const TargetRegisterClass* RC = mri_->getRegClass(Reg); - unsigned NewVReg = mri_->createVirtualRegister(RC); + const TargetRegisterClass* RC = MRI->getRegClass(Reg); + unsigned NewVReg = MRI->createVirtualRegister(RC); bool isKill = true; for (unsigned j = 0, ee = RMI->getNumOperands(); j != ee; ++j) { MachineOperand &RRMO = RMI->getOperand(j); diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index ad7b6e4aa97f..f1f3c9969cc8 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -337,7 +337,7 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { --BeforeI; // Restore all registers immediately before the return and any - // terminators that preceed it. + // terminators that precede it. if (!TFI->restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) { for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); @@ -437,7 +437,7 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { --BeforeI; // Restore all registers immediately before the return and any - // terminators that preceed it. + // terminators that precede it. for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) { unsigned Reg = blockCSI[i].getReg(); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); @@ -559,7 +559,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // Make sure the special register scavenging spill slot is closest to the // frame pointer if a frame pointer is required. const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); - if (RS && TFI.hasFP(Fn) && !RegInfo->needsStackRealignment(Fn)) { + if (RS && TFI.hasFP(Fn) && RegInfo->useFPForScavengingIndex(Fn) && + !RegInfo->needsStackRealignment(Fn)) { int SFI = RS->getScavengingFrameIndex(); if (SFI >= 0) AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign); @@ -641,7 +642,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // Make sure the special register scavenging spill slot is closest to the // stack pointer. - if (RS && (!TFI.hasFP(Fn) || RegInfo->needsStackRealignment(Fn))) { + if (RS && (!TFI.hasFP(Fn) || RegInfo->needsStackRealignment(Fn) || + !RegInfo->useFPForScavengingIndex(Fn))) { int SFI = RS->getScavengingFrameIndex(); if (SFI >= 0) AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign); @@ -811,7 +813,6 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { // directly. for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { MachineInstr *MI = I; - bool DoIncr = true; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { if (MI->getOperand(i).isReg()) { MachineOperand &MO = MI->getOperand(i); @@ -842,10 +843,8 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { } } - if (DoIncr) { - RS->forward(I); - ++I; - } + RS->forward(I); + ++I; } } } diff --git a/lib/CodeGen/README.txt b/lib/CodeGen/README.txt index b655dda41153..7f75f65167a3 100644 --- a/lib/CodeGen/README.txt +++ b/lib/CodeGen/README.txt @@ -26,7 +26,7 @@ and then "merge" mul and mov: sxth r3, r3 mla r4, r3, lr, r4 -It also increase the likelyhood the store may become dead. +It also increase the likelihood the store may become dead. //===---------------------------------------------------------------------===// @@ -162,7 +162,7 @@ synthesize the various copy insertion/inspection methods in TargetInstrInfo. //===---------------------------------------------------------------------===// -Stack coloring improvments: +Stack coloring improvements: 1. Do proper LiveStackAnalysis on all stack objects including those which are not spill slots. diff --git a/lib/CodeGen/RegAllocBase.h b/lib/CodeGen/RegAllocBase.h index 5af0ce79acf7..f431d5a5a026 100644 --- a/lib/CodeGen/RegAllocBase.h +++ b/lib/CodeGen/RegAllocBase.h @@ -61,6 +61,11 @@ class LiveVirtRegQueue; /// assignment order. class RegAllocBase { LiveIntervalUnion::Allocator UnionAllocator; + + // Cache tag for PhysReg2LiveUnion entries. Increment whenever virtual + // registers may have changed. + unsigned UserTag; + protected: // Array of LiveIntervalUnions indexed by physical register. class LiveUnionArray { @@ -92,7 +97,7 @@ class RegAllocBase { // query on a new live virtual register. OwningArrayPtr Queries; - RegAllocBase(): TRI(0), MRI(0), VRM(0), LIS(0) {} + RegAllocBase(): UserTag(0), TRI(0), MRI(0), VRM(0), LIS(0) {} virtual ~RegAllocBase() {} @@ -104,7 +109,7 @@ class RegAllocBase { // before querying a new live virtual register. This ties Queries and // PhysReg2LiveUnion together. LiveIntervalUnion::Query &query(LiveInterval &VirtReg, unsigned PhysReg) { - Queries[PhysReg].init(&VirtReg, &PhysReg2LiveUnion[PhysReg]); + Queries[PhysReg].init(UserTag, &VirtReg, &PhysReg2LiveUnion[PhysReg]); return Queries[PhysReg]; } diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp index 6923908a32d9..d92d80f181fc 100644 --- a/lib/CodeGen/RegAllocBasic.cpp +++ b/lib/CodeGen/RegAllocBasic.cpp @@ -13,7 +13,9 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "regalloc" +#include "LiveDebugVariables.h" #include "LiveIntervalUnion.h" +#include "LiveRangeEdit.h" #include "RegAllocBase.h" #include "RenderMachineFunction.h" #include "Spiller.h" @@ -136,6 +138,7 @@ char RABasic::ID = 0; } // end anonymous namespace RABasic::RABasic(): MachineFunctionPass(ID) { + initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry()); initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry()); @@ -154,6 +157,8 @@ void RABasic::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved(); AU.addRequired(); AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); if (StrongPHIElim) AU.addRequiredID(StrongPHIEliminationID); AU.addRequiredTransitive(); @@ -230,9 +235,12 @@ void RegAllocBase::init(VirtRegMap &vrm, LiveIntervals &lis) { MRI = &vrm.getRegInfo(); VRM = &vrm; LIS = &lis; - PhysReg2LiveUnion.init(UnionAllocator, TRI->getNumRegs()); - // Cache an interferece query for each physical reg - Queries.reset(new LiveIntervalUnion::Query[PhysReg2LiveUnion.numRegs()]); + const unsigned NumRegs = TRI->getNumRegs(); + if (NumRegs != PhysReg2LiveUnion.numRegs()) { + PhysReg2LiveUnion.init(UnionAllocator, NumRegs); + // Cache an interferece query for each physical reg + Queries.reset(new LiveIntervalUnion::Query[PhysReg2LiveUnion.numRegs()]); + } } void RegAllocBase::LiveUnionArray::clear() { @@ -246,13 +254,15 @@ void RegAllocBase::LiveUnionArray::clear() { } void RegAllocBase::releaseMemory() { - PhysReg2LiveUnion.clear(); + for (unsigned r = 0, e = PhysReg2LiveUnion.numRegs(); r != e; ++r) + PhysReg2LiveUnion[r].clear(); } // Visit all the live registers. If they are already assigned to a physical // register, unify them with the corresponding LiveIntervalUnion, otherwise push // them on the priority queue for later assignment. void RegAllocBase::seedLiveRegs() { + NamedRegionTimer T("Seed Live Regs", TimerGroupName, TimePassesIsEnabled); for (LiveIntervals::iterator I = LIS->begin(), E = LIS->end(); I != E; ++I) { unsigned RegNum = I->first; LiveInterval &VirtReg = *I->second; @@ -268,6 +278,7 @@ void RegAllocBase::assign(LiveInterval &VirtReg, unsigned PhysReg) { << " to " << PrintReg(PhysReg, TRI) << '\n'); assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment"); VRM->assignVirt2Phys(VirtReg.reg, PhysReg); + MRI->setPhysRegUsed(PhysReg); PhysReg2LiveUnion[PhysReg].unify(VirtReg); ++NumAssigned; } @@ -288,6 +299,18 @@ void RegAllocBase::allocatePhysRegs() { // Continue assigning vregs one at a time to available physical registers. while (LiveInterval *VirtReg = dequeue()) { + assert(!VRM->hasPhys(VirtReg->reg) && "Register already assigned"); + + // Unused registers can appear when the spiller coalesces snippets. + if (MRI->reg_nodbg_empty(VirtReg->reg)) { + DEBUG(dbgs() << "Dropping unused " << *VirtReg << '\n'); + LIS->removeInterval(VirtReg->reg); + continue; + } + + // Invalidate all interference queries, live ranges could have changed. + ++UserTag; + // selectOrSplit requests the allocator to return an available physical // register if possible and populate a list of new live intervals that // result from splitting. @@ -304,7 +327,12 @@ void RegAllocBase::allocatePhysRegs() { for (VirtRegVec::iterator I = SplitVRegs.begin(), E = SplitVRegs.end(); I != E; ++I) { LiveInterval *SplitVirtReg = *I; - if (SplitVirtReg->empty()) continue; + assert(!VRM->hasPhys(SplitVirtReg->reg) && "Register already assigned"); + if (MRI->reg_nodbg_empty(SplitVirtReg->reg)) { + DEBUG(dbgs() << "not queueing unused " << *SplitVirtReg << '\n'); + LIS->removeInterval(SplitVirtReg->reg); + continue; + } DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n"); assert(TargetRegisterInfo::isVirtualRegister(SplitVirtReg->reg) && "expect split value in virtual register"); @@ -344,7 +372,8 @@ void RegAllocBase::spillReg(LiveInterval& VirtReg, unsigned PhysReg, unassign(SpilledVReg, PhysReg); // Spill the extracted interval. - spiller().spill(&SpilledVReg, SplitVRegs, PendingSpills); + LiveRangeEdit LRE(SpilledVReg, SplitVRegs, 0, &PendingSpills); + spiller().spill(LRE); } // After extracting segments, the query's results are invalid. But keep the // contents valid until we're done accessing pendingSpills. @@ -381,29 +410,31 @@ RegAllocBase::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg, // Add newly allocated physical registers to the MBB live in sets. void RegAllocBase::addMBBLiveIns(MachineFunction *MF) { NamedRegionTimer T("MBB Live Ins", TimerGroupName, TimePassesIsEnabled); - typedef SmallVector MBBVec; - MBBVec liveInMBBs; - MachineBasicBlock &entryMBB = *MF->begin(); + SlotIndexes *Indexes = LIS->getSlotIndexes(); + if (MF->size() <= 1) + return; + LiveIntervalUnion::SegmentIter SI; for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) { LiveIntervalUnion &LiveUnion = PhysReg2LiveUnion[PhysReg]; if (LiveUnion.empty()) continue; - for (LiveIntervalUnion::SegmentIter SI = LiveUnion.begin(); SI.valid(); - ++SI) { - - // Find the set of basic blocks which this range is live into... - liveInMBBs.clear(); - if (!LIS->findLiveInMBBs(SI.start(), SI.stop(), liveInMBBs)) continue; - - // And add the physreg for this interval to their live-in sets. - for (MBBVec::iterator I = liveInMBBs.begin(), E = liveInMBBs.end(); - I != E; ++I) { - MachineBasicBlock *MBB = *I; - if (MBB == &entryMBB) continue; - if (MBB->isLiveIn(PhysReg)) continue; - MBB->addLiveIn(PhysReg); - } + MachineFunction::iterator MBB = llvm::next(MF->begin()); + MachineFunction::iterator MFE = MF->end(); + SlotIndex Start, Stop; + tie(Start, Stop) = Indexes->getMBBRange(MBB); + SI.setMap(LiveUnion.getMap()); + SI.find(Start); + while (SI.valid()) { + if (SI.start() <= Start) { + if (!MBB->isLiveIn(PhysReg)) + MBB->addLiveIn(PhysReg); + } else if (SI.start() > Stop) + MBB = Indexes->getMBBFromIndex(SI.start().getPrevIndex()); + if (++MBB == MFE) + break; + tie(Start, Stop) = Indexes->getMBBRange(MBB); + SI.advanceTo(Start); } } } @@ -469,9 +500,8 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg, } // No other spill candidates were found, so spill the current VirtReg. DEBUG(dbgs() << "spilling: " << VirtReg << '\n'); - SmallVector pendingSpills; - - spiller().spill(&VirtReg, SplitVRegs, pendingSpills); + LiveRangeEdit LRE(VirtReg, SplitVRegs); + spiller().spill(LRE); // The live virtual register requesting allocation was spilled, so tell // the caller not to allocate anything during this round. @@ -490,7 +520,7 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) { ReservedRegs = TRI->getReservedRegs(*MF); - SpillerInstance.reset(createSpiller(*this, *MF, *VRM)); + SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM)); allocatePhysRegs(); @@ -525,6 +555,9 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) { // Run rewriter VRM->rewrite(LIS->getSlotIndexes()); + // Write out new DBG_VALUE instructions. + getAnalysis().emitDebugValues(VRM); + // The pass output is in VirtRegMap. Release all the transient data. releaseMemory(); diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index 15036e38b893..b2fd6e092ce6 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -97,7 +97,7 @@ namespace { // immediately without checking aliases. regFree, - // A reserved register has been assigned expolicitly (e.g., setting up a + // A reserved register has been assigned explicitly (e.g., setting up a // call parameter), and it remains reserved until it is used. regReserved @@ -396,7 +396,6 @@ void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg, PhysRegState[PhysReg] = NewState; for (const unsigned *AS = TRI->getAliasSet(PhysReg); unsigned Alias = *AS; ++AS) { - UsedInInstr.set(Alias); switch (unsigned VirtReg = PhysRegState[Alias]) { case regDisabled: break; @@ -420,20 +419,25 @@ void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg, // can be allocated directly. // Returns spillImpossible when PhysReg or an alias can't be spilled. unsigned RAFast::calcSpillCost(unsigned PhysReg) const { - if (UsedInInstr.test(PhysReg)) + if (UsedInInstr.test(PhysReg)) { + DEBUG(dbgs() << "PhysReg: " << PhysReg << " is already used in instr.\n"); return spillImpossible; + } switch (unsigned VirtReg = PhysRegState[PhysReg]) { case regDisabled: break; case regFree: return 0; case regReserved: + DEBUG(dbgs() << "VirtReg: " << VirtReg << " corresponding to PhysReg: " + << PhysReg << " is reserved already.\n"); return spillImpossible; default: return LiveVirtRegs.lookup(VirtReg).Dirty ? spillDirty : spillClean; } - // This is a disabled register, add up const of aliases. + // This is a disabled register, add up cost of aliases. + DEBUG(dbgs() << "\tRegister: " << PhysReg << " is disabled.\n"); unsigned Cost = 0; for (const unsigned *AS = TRI->getAliasSet(PhysReg); unsigned Alias = *AS; ++AS) { @@ -511,9 +515,14 @@ void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) { unsigned BestReg = 0, BestCost = spillImpossible; for (TargetRegisterClass::iterator I = AOB; I != AOE; ++I) { - if (!Allocatable.test(*I)) + if (!Allocatable.test(*I)) { + DEBUG(dbgs() << "\tRegister " << *I << " is not allocatable.\n"); continue; + } unsigned Cost = calcSpillCost(*I); + DEBUG(dbgs() << "\tRegister: " << *I << "\n"); + DEBUG(dbgs() << "\tCost: " << Cost << "\n"); + DEBUG(dbgs() << "\tBestCost: " << BestCost << "\n"); // Cost is 0 when all aliases are already disabled. if (Cost == 0) return assignVirtToPhysReg(LRE, *I); @@ -722,9 +731,8 @@ void RAFast::handleThroughOperands(MachineInstr *MI, if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue; unsigned Reg = MO.getReg(); if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + DEBUG(dbgs() << "\tSetting reg " << Reg << " as used in instr\n"); UsedInInstr.set(Reg); - for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) - UsedInInstr.set(*AS); } // Also mark PartialDefs as used to avoid reallocation. diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index 406485aaf496..7c461d8ea787 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -14,7 +14,8 @@ #define DEBUG_TYPE "regalloc" #include "AllocationOrder.h" -#include "LiveIntervalUnion.h" +#include "InterferenceCache.h" +#include "LiveDebugVariables.h" #include "LiveRangeEdit.h" #include "RegAllocBase.h" #include "Spiller.h" @@ -49,14 +50,16 @@ using namespace llvm; STATISTIC(NumGlobalSplits, "Number of split global live ranges"); STATISTIC(NumLocalSplits, "Number of split local live ranges"); -STATISTIC(NumReassigned, "Number of interferences reassigned"); STATISTIC(NumEvicted, "Number of interferences evicted"); static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator", createGreedyRegisterAllocator); namespace { -class RAGreedy : public MachineFunctionPass, public RegAllocBase { +class RAGreedy : public MachineFunctionPass, + public RegAllocBase, + private LiveRangeEdit::Delegate { + // context MachineFunction *MF; BitVector ReservedRegs; @@ -72,14 +75,73 @@ class RAGreedy : public MachineFunctionPass, public RegAllocBase { // state std::auto_ptr SpillerInstance; - std::auto_ptr SA; std::priority_queue > Queue; - IndexedMap Generation; + + // Live ranges pass through a number of stages as we try to allocate them. + // Some of the stages may also create new live ranges: + // + // - Region splitting. + // - Per-block splitting. + // - Local splitting. + // - Spilling. + // + // Ranges produced by one of the stages skip the previous stages when they are + // dequeued. This improves performance because we can skip interference checks + // that are unlikely to give any results. It also guarantees that the live + // range splitting algorithm terminates, something that is otherwise hard to + // ensure. + enum LiveRangeStage { + RS_New, ///< Never seen before. + RS_First, ///< First time in the queue. + RS_Second, ///< Second time in the queue. + RS_Global, ///< Produced by global splitting. + RS_Local, ///< Produced by local splitting. + RS_Spill ///< Produced by spilling. + }; + + IndexedMap LRStage; + + LiveRangeStage getStage(const LiveInterval &VirtReg) const { + return LiveRangeStage(LRStage[VirtReg.reg]); + } + + template + void setStage(Iterator Begin, Iterator End, LiveRangeStage NewStage) { + LRStage.resize(MRI->getNumVirtRegs()); + for (;Begin != End; ++Begin) { + unsigned Reg = (*Begin)->reg; + if (LRStage[Reg] == RS_New) + LRStage[Reg] = NewStage; + } + } // splitting state. + std::auto_ptr SA; + std::auto_ptr SE; - /// All basic blocks where the current register is live. - SmallVector SpillConstraints; + /// Cached per-block interference maps + InterferenceCache IntfCache; + + /// All basic blocks where the current register has uses. + SmallVector SplitConstraints; + + /// Global live range splitting candidate info. + struct GlobalSplitCandidate { + unsigned PhysReg; + BitVector LiveBundles; + SmallVector ActiveBlocks; + + void reset(unsigned Reg) { + PhysReg = Reg; + LiveBundles.clear(); + ActiveBlocks.clear(); + } + }; + + /// Candidate info for for each PhysReg in AllocationOrder. + /// This vector never shrinks, but grows to the size of the largest register + /// class. + SmallVector GlobalCand; /// For every instruction in SA->UseSlots, store the previous non-copy /// instruction. @@ -108,42 +170,50 @@ class RAGreedy : public MachineFunctionPass, public RegAllocBase { static char ID; private: - bool checkUncachedInterference(LiveInterval&, unsigned); - LiveInterval *getSingleInterference(LiveInterval&, unsigned); - bool reassignVReg(LiveInterval &InterferingVReg, unsigned OldPhysReg); - float calcInterferenceWeight(LiveInterval&, unsigned); - float calcInterferenceInfo(LiveInterval&, unsigned); - float calcGlobalSplitCost(const BitVector&); - void splitAroundRegion(LiveInterval&, unsigned, const BitVector&, + void LRE_WillEraseInstruction(MachineInstr*); + bool LRE_CanEraseVirtReg(unsigned); + void LRE_WillShrinkVirtReg(unsigned); + void LRE_DidCloneVirtReg(unsigned, unsigned); + + float calcSpillCost(); + bool addSplitConstraints(InterferenceCache::Cursor, float&); + void addThroughConstraints(InterferenceCache::Cursor, ArrayRef); + void growRegion(GlobalSplitCandidate &Cand, InterferenceCache::Cursor); + float calcGlobalSplitCost(GlobalSplitCandidate&, InterferenceCache::Cursor); + void splitAroundRegion(LiveInterval&, GlobalSplitCandidate&, SmallVectorImpl&); void calcGapWeights(unsigned, SmallVectorImpl&); SlotIndex getPrevMappedIndex(const MachineInstr*); void calcPrevSlots(); unsigned nextSplitPoint(unsigned); - bool canEvictInterference(LiveInterval&, unsigned, unsigned, float&); + bool canEvictInterference(LiveInterval&, unsigned, float&); - unsigned tryReassign(LiveInterval&, AllocationOrder&, - SmallVectorImpl&); + unsigned tryAssign(LiveInterval&, AllocationOrder&, + SmallVectorImpl&); unsigned tryEvict(LiveInterval&, AllocationOrder&, - SmallVectorImpl&); + SmallVectorImpl&, unsigned = ~0u); unsigned tryRegionSplit(LiveInterval&, AllocationOrder&, SmallVectorImpl&); unsigned tryLocalSplit(LiveInterval&, AllocationOrder&, SmallVectorImpl&); unsigned trySplit(LiveInterval&, AllocationOrder&, SmallVectorImpl&); - unsigned trySpillInterferences(LiveInterval&, AllocationOrder&, - SmallVectorImpl&); }; } // end anonymous namespace char RAGreedy::ID = 0; +// Hysteresis to use when comparing floats. +// This helps stabilize decisions based on float comparisons. +const float Hysteresis = 0.98f; + + FunctionPass* llvm::createGreedyRegisterAllocator() { return new RAGreedy(); } -RAGreedy::RAGreedy(): MachineFunctionPass(ID) { +RAGreedy::RAGreedy(): MachineFunctionPass(ID), LRStage(RS_New) { + initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry()); initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); @@ -166,6 +236,8 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addRequired(); AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); if (StrongPHIElim) AU.addRequiredID(StrongPHIEliminationID); AU.addRequiredTransitive(); @@ -185,9 +257,49 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } + +//===----------------------------------------------------------------------===// +// LiveRangeEdit delegate methods +//===----------------------------------------------------------------------===// + +void RAGreedy::LRE_WillEraseInstruction(MachineInstr *MI) { + // LRE itself will remove from SlotIndexes and parent basic block. + VRM->RemoveMachineInstrFromMaps(MI); +} + +bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) { + if (unsigned PhysReg = VRM->getPhys(VirtReg)) { + unassign(LIS->getInterval(VirtReg), PhysReg); + return true; + } + // Unassigned virtreg is probably in the priority queue. + // RegAllocBase will erase it after dequeueing. + return false; +} + +void RAGreedy::LRE_WillShrinkVirtReg(unsigned VirtReg) { + unsigned PhysReg = VRM->getPhys(VirtReg); + if (!PhysReg) + return; + + // Register is assigned, put it back on the queue for reassignment. + LiveInterval &LI = LIS->getInterval(VirtReg); + unassign(LI, PhysReg); + enqueue(&LI); +} + +void RAGreedy::LRE_DidCloneVirtReg(unsigned New, unsigned Old) { + // LRE may clone a virtual register because dead code elimination causes it to + // be split into connected components. Ensure that the new register gets the + // same stage as the parent. + LRStage.grow(New); + LRStage[New] = LRStage[Old]; +} + void RAGreedy::releaseMemory() { SpillerInstance.reset(0); - Generation.clear(); + LRStage.clear(); + GlobalCand.clear(); RegAllocBase::releaseMemory(); } @@ -198,20 +310,26 @@ void RAGreedy::enqueue(LiveInterval *LI) { const unsigned Reg = LI->reg; assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Can only enqueue virtual registers"); - const unsigned Hint = VRM->getRegAllocPref(Reg); unsigned Prio; - Generation.grow(Reg); - if (++Generation[Reg] == 1) - // 1st generation ranges are handled first, long -> short. - Prio = (1u << 31) + Size; - else - // Repeat offenders are handled second, short -> long - Prio = (1u << 30) - Size; + LRStage.grow(Reg); + if (LRStage[Reg] == RS_New) + LRStage[Reg] = RS_First; - // Boost ranges that have a physical register hint. - if (TargetRegisterInfo::isPhysicalRegister(Hint)) - Prio |= (1u << 30); + if (LRStage[Reg] == RS_Second) + // Unsplit ranges that couldn't be allocated immediately are deferred until + // everything else has been allocated. Long ranges are allocated last so + // they are split against realistic interference. + Prio = (1u << 31) - Size; + else { + // Everything else is allocated in long->short order. Long ranges that don't + // fit should be spilled ASAP so they don't create interference. + Prio = (1u << 31) + Size; + + // Boost ranges that have a physical register hint. + if (TargetRegisterInfo::isPhysicalRegister(VRM->getRegAllocPref(Reg))) + Prio |= (1u << 30); + } Queue.push(std::make_pair(Prio, Reg)); } @@ -224,97 +342,34 @@ LiveInterval *RAGreedy::dequeue() { return LI; } + //===----------------------------------------------------------------------===// -// Register Reassignment +// Direct Assignment //===----------------------------------------------------------------------===// -// Check interference without using the cache. -bool RAGreedy::checkUncachedInterference(LiveInterval &VirtReg, - unsigned PhysReg) { - for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) { - LiveIntervalUnion::Query subQ(&VirtReg, &PhysReg2LiveUnion[*AliasI]); - if (subQ.checkInterference()) - return true; - } - return false; -} - -/// getSingleInterference - Return the single interfering virtual register -/// assigned to PhysReg. Return 0 if more than one virtual register is -/// interfering. -LiveInterval *RAGreedy::getSingleInterference(LiveInterval &VirtReg, - unsigned PhysReg) { - // Check physreg and aliases. - LiveInterval *Interference = 0; - for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) { - LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI); - if (Q.checkInterference()) { - if (Interference) - return 0; - if (Q.collectInterferingVRegs(2) > 1) - return 0; - Interference = Q.interferingVRegs().front(); - } - } - return Interference; -} - -// Attempt to reassign this virtual register to a different physical register. -// -// FIXME: we are not yet caching these "second-level" interferences discovered -// in the sub-queries. These interferences can change with each call to -// selectOrSplit. However, we could implement a "may-interfere" cache that -// could be conservatively dirtied when we reassign or split. -// -// FIXME: This may result in a lot of alias queries. We could summarize alias -// live intervals in their parent register's live union, but it's messy. -bool RAGreedy::reassignVReg(LiveInterval &InterferingVReg, - unsigned WantedPhysReg) { - assert(TargetRegisterInfo::isVirtualRegister(InterferingVReg.reg) && - "Can only reassign virtual registers"); - assert(TRI->regsOverlap(WantedPhysReg, VRM->getPhys(InterferingVReg.reg)) && - "inconsistent phys reg assigment"); - - AllocationOrder Order(InterferingVReg.reg, *VRM, ReservedRegs); - while (unsigned PhysReg = Order.next()) { - // Don't reassign to a WantedPhysReg alias. - if (TRI->regsOverlap(PhysReg, WantedPhysReg)) - continue; - - if (checkUncachedInterference(InterferingVReg, PhysReg)) - continue; - - // Reassign the interfering virtual reg to this physical reg. - unsigned OldAssign = VRM->getPhys(InterferingVReg.reg); - DEBUG(dbgs() << "reassigning: " << InterferingVReg << " from " << - TRI->getName(OldAssign) << " to " << TRI->getName(PhysReg) << '\n'); - unassign(InterferingVReg, OldAssign); - assign(InterferingVReg, PhysReg); - ++NumReassigned; - return true; - } - return false; -} - -/// tryReassign - Try to reassign a single interference to a different physreg. -/// @param VirtReg Currently unassigned virtual register. -/// @param Order Physregs to try. -/// @return Physreg to assign VirtReg, or 0. -unsigned RAGreedy::tryReassign(LiveInterval &VirtReg, AllocationOrder &Order, - SmallVectorImpl &NewVRegs){ - NamedRegionTimer T("Reassign", TimerGroupName, TimePassesIsEnabled); - +/// tryAssign - Try to assign VirtReg to an available register. +unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, + AllocationOrder &Order, + SmallVectorImpl &NewVRegs) { Order.rewind(); - while (unsigned PhysReg = Order.next()) { - LiveInterval *InterferingVReg = getSingleInterference(VirtReg, PhysReg); - if (!InterferingVReg) - continue; - if (TargetRegisterInfo::isPhysicalRegister(InterferingVReg->reg)) - continue; - if (reassignVReg(*InterferingVReg, PhysReg)) - return PhysReg; - } - return 0; + unsigned PhysReg; + while ((PhysReg = Order.next())) + if (!checkPhysRegInterference(VirtReg, PhysReg)) + break; + if (!PhysReg || Order.isHint(PhysReg)) + return PhysReg; + + // PhysReg is available. Try to evict interference from a cheaper alternative. + unsigned Cost = TRI->getCostPerUse(PhysReg); + + // Most registers have 0 additional cost. + if (!Cost) + return PhysReg; + + DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is available at cost " << Cost + << '\n'); + unsigned CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost); + return CheapReg ? CheapReg : PhysReg; } @@ -323,22 +378,24 @@ unsigned RAGreedy::tryReassign(LiveInterval &VirtReg, AllocationOrder &Order, //===----------------------------------------------------------------------===// /// canEvict - Return true if all interferences between VirtReg and PhysReg can -/// be evicted. Set maxWeight to the maximal spill weight of an interference. +/// be evicted. +/// Return false if any interference is heavier than MaxWeight. +/// On return, set MaxWeight to the maximal spill weight of an interference. bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, - unsigned Size, float &MaxWeight) { + float &MaxWeight) { float Weight = 0; for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) { LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI); - // If there is 10 or more interferences, chances are one is smaller. - if (Q.collectInterferingVRegs(10) >= 10) + // If there is 10 or more interferences, chances are one is heavier. + if (Q.collectInterferingVRegs(10, MaxWeight) >= 10) return false; - // CHeck if any interfering live range is shorter than VirtReg. - for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) { - LiveInterval *Intf = Q.interferingVRegs()[i]; + // Check if any interfering live range is heavier than MaxWeight. + for (unsigned i = Q.interferingVRegs().size(); i; --i) { + LiveInterval *Intf = Q.interferingVRegs()[i - 1]; if (TargetRegisterInfo::isPhysicalRegister(Intf->reg)) return false; - if (Intf->getSize() <= Size) + if (Intf->weight >= MaxWeight) return false; Weight = std::max(Weight, Intf->weight); } @@ -353,25 +410,28 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, /// @return Physreg to assign VirtReg, or 0. unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, AllocationOrder &Order, - SmallVectorImpl &NewVRegs){ + SmallVectorImpl &NewVRegs, + unsigned CostPerUseLimit) { NamedRegionTimer T("Evict", TimerGroupName, TimePassesIsEnabled); - // We can only evict interference if all interfering registers are virtual and - // longer than VirtReg. - const unsigned Size = VirtReg.getSize(); - // Keep track of the lightest single interference seen so far. - float BestWeight = 0; + float BestWeight = VirtReg.weight; unsigned BestPhys = 0; Order.rewind(); while (unsigned PhysReg = Order.next()) { - float Weight = 0; - if (!canEvictInterference(VirtReg, PhysReg, Size, Weight)) + if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit) + continue; + // The first use of a register in a function has cost 1. + if (CostPerUseLimit == 1 && !MRI->isPhysRegUsed(PhysReg)) + continue; + + float Weight = BestWeight; + if (!canEvictInterference(VirtReg, PhysReg, Weight)) continue; // This is an eviction candidate. - DEBUG(dbgs() << "max " << PrintReg(PhysReg, TRI) << " interference = " + DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " interference = " << Weight << '\n'); if (BestPhys && Weight >= BestWeight) continue; @@ -406,201 +466,228 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, // Region Splitting //===----------------------------------------------------------------------===// -/// calcInterferenceInfo - Compute per-block outgoing and ingoing constraints -/// when considering interference from PhysReg. Also compute an optimistic local -/// cost of this interference pattern. -/// -/// The final cost of a split is the local cost + global cost of preferences -/// broken by SpillPlacement. -/// -float RAGreedy::calcInterferenceInfo(LiveInterval &VirtReg, unsigned PhysReg) { +/// addSplitConstraints - Fill out the SplitConstraints vector based on the +/// interference pattern in Physreg and its aliases. Add the constraints to +/// SpillPlacement and return the static cost of this split in Cost, assuming +/// that all preferences in SplitConstraints are met. +/// Return false if there are no bundles with positive bias. +bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, + float &Cost) { + ArrayRef UseBlocks = SA->getUseBlocks(); + // Reset interference dependent info. - SpillConstraints.resize(SA->LiveBlocks.size()); - for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) { - SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i]; - SpillPlacement::BlockConstraint &BC = SpillConstraints[i]; + SplitConstraints.resize(UseBlocks.size()); + float StaticCost = 0; + for (unsigned i = 0; i != UseBlocks.size(); ++i) { + const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; + SpillPlacement::BlockConstraint &BC = SplitConstraints[i]; + BC.Number = BI.MBB->getNumber(); - BC.Entry = (BI.Uses && BI.LiveIn) ? - SpillPlacement::PrefReg : SpillPlacement::DontCare; - BC.Exit = (BI.Uses && BI.LiveOut) ? - SpillPlacement::PrefReg : SpillPlacement::DontCare; - BI.OverlapEntry = BI.OverlapExit = false; - } + Intf.moveToBlock(BC.Number); + BC.Entry = BI.LiveIn ? SpillPlacement::PrefReg : SpillPlacement::DontCare; + BC.Exit = BI.LiveOut ? SpillPlacement::PrefReg : SpillPlacement::DontCare; - // Add interference info from each PhysReg alias. - for (const unsigned *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) { - if (!query(VirtReg, *AI).checkInterference()) - continue; - LiveIntervalUnion::SegmentIter IntI = - PhysReg2LiveUnion[*AI].find(VirtReg.beginIndex()); - if (!IntI.valid()) + if (!Intf.hasInterference()) continue; - // Determine which blocks have interference live in or after the last split - // point. - for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) { - SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i]; - SpillPlacement::BlockConstraint &BC = SpillConstraints[i]; - SlotIndex Start, Stop; - tie(Start, Stop) = Indexes->getMBBRange(BI.MBB); + // Number of spill code instructions to insert. + unsigned Ins = 0; - // Skip interference-free blocks. - if (IntI.start() >= Stop) - continue; - - // Is the interference live-in? - if (BI.LiveIn) { - IntI.advanceTo(Start); - if (!IntI.valid()) - break; - if (IntI.start() <= Start) - BC.Entry = SpillPlacement::MustSpill; - } - - // Is the interference overlapping the last split point? - if (BI.LiveOut) { - if (IntI.stop() < BI.LastSplitPoint) - IntI.advanceTo(BI.LastSplitPoint.getPrevSlot()); - if (!IntI.valid()) - break; - if (IntI.start() < Stop) - BC.Exit = SpillPlacement::MustSpill; - } + // Interference for the live-in value. + if (BI.LiveIn) { + if (Intf.first() <= Indexes->getMBBStartIdx(BC.Number)) + BC.Entry = SpillPlacement::MustSpill, ++Ins; + else if (Intf.first() < BI.FirstUse) + BC.Entry = SpillPlacement::PrefSpill, ++Ins; + else if (Intf.first() < (BI.LiveThrough ? BI.LastUse : BI.Kill)) + ++Ins; } - // Rewind iterator and check other interferences. - IntI.find(VirtReg.beginIndex()); - for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) { - SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i]; - SpillPlacement::BlockConstraint &BC = SpillConstraints[i]; - SlotIndex Start, Stop; - tie(Start, Stop) = Indexes->getMBBRange(BI.MBB); + // Interference for the live-out value. + if (BI.LiveOut) { + if (Intf.last() >= SA->getLastSplitPoint(BC.Number)) + BC.Exit = SpillPlacement::MustSpill, ++Ins; + else if (Intf.last() > BI.LastUse) + BC.Exit = SpillPlacement::PrefSpill, ++Ins; + else if (Intf.last() > (BI.LiveThrough ? BI.FirstUse : BI.Def)) + ++Ins; + } - // Skip interference-free blocks. - if (IntI.start() >= Stop) - continue; + // Accumulate the total frequency of inserted spill code. + if (Ins) + StaticCost += Ins * SpillPlacer->getBlockFrequency(BC.Number); + } + Cost = StaticCost; - // Handle transparent blocks with interference separately. - // Transparent blocks never incur any fixed cost. - if (BI.LiveThrough && !BI.Uses) { - IntI.advanceTo(Start); - if (!IntI.valid()) - break; - if (IntI.start() >= Stop) + // Add constraints for use-blocks. Note that these are the only constraints + // that may add a positive bias, it is downhill from here. + SpillPlacer->addConstraints(SplitConstraints); + return SpillPlacer->scanActiveBundles(); +} + + +/// addThroughConstraints - Add constraints and links to SpillPlacer from the +/// live-through blocks in Blocks. +void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf, + ArrayRef Blocks) { + const unsigned GroupSize = 8; + SpillPlacement::BlockConstraint BCS[GroupSize]; + unsigned TBS[GroupSize]; + unsigned B = 0, T = 0; + + for (unsigned i = 0; i != Blocks.size(); ++i) { + unsigned Number = Blocks[i]; + Intf.moveToBlock(Number); + + if (!Intf.hasInterference()) { + assert(T < GroupSize && "Array overflow"); + TBS[T] = Number; + if (++T == GroupSize) { + SpillPlacer->addLinks(ArrayRef(TBS, T)); + T = 0; + } + continue; + } + + assert(B < GroupSize && "Array overflow"); + BCS[B].Number = Number; + + // Interference for the live-in value. + if (Intf.first() <= Indexes->getMBBStartIdx(Number)) + BCS[B].Entry = SpillPlacement::MustSpill; + else + BCS[B].Entry = SpillPlacement::PrefSpill; + + // Interference for the live-out value. + if (Intf.last() >= SA->getLastSplitPoint(Number)) + BCS[B].Exit = SpillPlacement::MustSpill; + else + BCS[B].Exit = SpillPlacement::PrefSpill; + + if (++B == GroupSize) { + ArrayRef Array(BCS, B); + SpillPlacer->addConstraints(Array); + B = 0; + } + } + + ArrayRef Array(BCS, B); + SpillPlacer->addConstraints(Array); + SpillPlacer->addLinks(ArrayRef(TBS, T)); +} + +void RAGreedy::growRegion(GlobalSplitCandidate &Cand, + InterferenceCache::Cursor Intf) { + // Keep track of through blocks that have not been added to SpillPlacer. + BitVector Todo = SA->getThroughBlocks(); + SmallVectorImpl &ActiveBlocks = Cand.ActiveBlocks; + unsigned AddedTo = 0; +#ifndef NDEBUG + unsigned Visited = 0; +#endif + + for (;;) { + ArrayRef NewBundles = SpillPlacer->getRecentPositive(); + if (NewBundles.empty()) + break; + // Find new through blocks in the periphery of PrefRegBundles. + for (int i = 0, e = NewBundles.size(); i != e; ++i) { + unsigned Bundle = NewBundles[i]; + // Look at all blocks connected to Bundle in the full graph. + ArrayRef Blocks = Bundles->getBlocks(Bundle); + for (ArrayRef::iterator I = Blocks.begin(), E = Blocks.end(); + I != E; ++I) { + unsigned Block = *I; + if (!Todo.test(Block)) continue; - - if (BC.Entry != SpillPlacement::MustSpill) - BC.Entry = SpillPlacement::PrefSpill; - if (BC.Exit != SpillPlacement::MustSpill) - BC.Exit = SpillPlacement::PrefSpill; - continue; - } - - // Now we only have blocks with uses left. - // Check if the interference overlaps the uses. - assert(BI.Uses && "Non-transparent block without any uses"); - - // Check interference on entry. - if (BI.LiveIn && BC.Entry != SpillPlacement::MustSpill) { - IntI.advanceTo(Start); - if (!IntI.valid()) - break; - // Not live in, but before the first use. - if (IntI.start() < BI.FirstUse) { - BC.Entry = SpillPlacement::PrefSpill; - // If the block contains a kill from an earlier split, never split - // again in the same block. - if (!BI.LiveThrough && !SA->isOriginalEndpoint(BI.Kill)) - BC.Entry = SpillPlacement::MustSpill; - } - } - - // Does interference overlap the uses in the entry segment - // [FirstUse;Kill)? - if (BI.LiveIn && !BI.OverlapEntry) { - IntI.advanceTo(BI.FirstUse); - if (!IntI.valid()) - break; - // A live-through interval has no kill. - // Check [FirstUse;LastUse) instead. - if (IntI.start() < (BI.LiveThrough ? BI.LastUse : BI.Kill)) - BI.OverlapEntry = true; - } - - // Does interference overlap the uses in the exit segment [Def;LastUse)? - if (BI.LiveOut && !BI.LiveThrough && !BI.OverlapExit) { - IntI.advanceTo(BI.Def); - if (!IntI.valid()) - break; - if (IntI.start() < BI.LastUse) - BI.OverlapExit = true; - } - - // Check interference on exit. - if (BI.LiveOut && BC.Exit != SpillPlacement::MustSpill) { - // Check interference between LastUse and Stop. - if (BC.Exit != SpillPlacement::PrefSpill) { - IntI.advanceTo(BI.LastUse); - if (!IntI.valid()) - break; - if (IntI.start() < Stop) { - BC.Exit = SpillPlacement::PrefSpill; - // Avoid splitting twice in the same block. - if (!BI.LiveThrough && !SA->isOriginalEndpoint(BI.Def)) - BC.Exit = SpillPlacement::MustSpill; - } - } + Todo.reset(Block); + // This is a new through block. Add it to SpillPlacer later. + ActiveBlocks.push_back(Block); +#ifndef NDEBUG + ++Visited; +#endif } } + // Any new blocks to add? + if (ActiveBlocks.size() > AddedTo) { + ArrayRef Add(&ActiveBlocks[AddedTo], + ActiveBlocks.size() - AddedTo); + addThroughConstraints(Intf, Add); + AddedTo = ActiveBlocks.size(); + } + // Perhaps iterating can enable more bundles? + SpillPlacer->iterate(); } + DEBUG(dbgs() << ", v=" << Visited); +} - // Accumulate a local cost of this interference pattern. - float LocalCost = 0; - for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) { - SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i]; - if (!BI.Uses) - continue; - SpillPlacement::BlockConstraint &BC = SpillConstraints[i]; - unsigned Inserts = 0; +/// calcSpillCost - Compute how expensive it would be to split the live range in +/// SA around all use blocks instead of forming bundle regions. +float RAGreedy::calcSpillCost() { + float Cost = 0; + const LiveInterval &LI = SA->getParent(); + ArrayRef UseBlocks = SA->getUseBlocks(); + for (unsigned i = 0; i != UseBlocks.size(); ++i) { + const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; + unsigned Number = BI.MBB->getNumber(); + // We normally only need one spill instruction - a load or a store. + Cost += SpillPlacer->getBlockFrequency(Number); - // Do we need spill code for the entry segment? - if (BI.LiveIn) - Inserts += BI.OverlapEntry || BC.Entry != SpillPlacement::PrefReg; - - // For the exit segment? - if (BI.LiveOut) - Inserts += BI.OverlapExit || BC.Exit != SpillPlacement::PrefReg; - - // The local cost of spill code in this block is the block frequency times - // the number of spill instructions inserted. - if (Inserts) - LocalCost += Inserts * SpillPlacer->getBlockFrequency(BI.MBB); + // Unless the value is redefined in the block. + if (BI.LiveIn && BI.LiveOut) { + SlotIndex Start, Stop; + tie(Start, Stop) = Indexes->getMBBRange(Number); + LiveInterval::const_iterator I = LI.find(Start); + assert(I != LI.end() && "Expected live-in value"); + // Is there a different live-out value? If so, we need an extra spill + // instruction. + if (I->end < Stop) + Cost += SpillPlacer->getBlockFrequency(Number); + } } - DEBUG(dbgs() << "Local cost of " << PrintReg(PhysReg, TRI) << " = " - << LocalCost << '\n'); - return LocalCost; + return Cost; } /// calcGlobalSplitCost - Return the global split cost of following the split /// pattern in LiveBundles. This cost should be added to the local cost of the -/// interference pattern in SpillConstraints. +/// interference pattern in SplitConstraints. /// -float RAGreedy::calcGlobalSplitCost(const BitVector &LiveBundles) { +float RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand, + InterferenceCache::Cursor Intf) { float GlobalCost = 0; - for (unsigned i = 0, e = SpillConstraints.size(); i != e; ++i) { - SpillPlacement::BlockConstraint &BC = SpillConstraints[i]; - unsigned Inserts = 0; - // Broken entry preference? - Inserts += LiveBundles[Bundles->getBundle(BC.Number, 0)] != - (BC.Entry == SpillPlacement::PrefReg); - // Broken exit preference? - Inserts += LiveBundles[Bundles->getBundle(BC.Number, 1)] != - (BC.Exit == SpillPlacement::PrefReg); - if (Inserts) - GlobalCost += - Inserts * SpillPlacer->getBlockFrequency(SA->LiveBlocks[i].MBB); + const BitVector &LiveBundles = Cand.LiveBundles; + ArrayRef UseBlocks = SA->getUseBlocks(); + for (unsigned i = 0; i != UseBlocks.size(); ++i) { + const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; + SpillPlacement::BlockConstraint &BC = SplitConstraints[i]; + bool RegIn = LiveBundles[Bundles->getBundle(BC.Number, 0)]; + bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, 1)]; + unsigned Ins = 0; + + if (BI.LiveIn) + Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg); + if (BI.LiveOut) + Ins += RegOut != (BC.Exit == SpillPlacement::PrefReg); + if (Ins) + GlobalCost += Ins * SpillPlacer->getBlockFrequency(BC.Number); + } + + for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) { + unsigned Number = Cand.ActiveBlocks[i]; + bool RegIn = LiveBundles[Bundles->getBundle(Number, 0)]; + bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)]; + if (!RegIn && !RegOut) + continue; + if (RegIn && RegOut) { + // We need double spill code if this block has interference. + Intf.moveToBlock(Number); + if (Intf.hasInterference()) + GlobalCost += 2*SpillPlacer->getBlockFrequency(Number); + continue; + } + // live-in / stack-out or stack-in live-out. + GlobalCost += SpillPlacer->getBlockFrequency(Number); } - DEBUG(dbgs() << "Global cost = " << GlobalCost << '\n'); return GlobalCost; } @@ -611,113 +698,74 @@ float RAGreedy::calcGlobalSplitCost(const BitVector &LiveBundles) { /// avoiding interference. The 'stack' interval is the complement constructed by /// SplitEditor. It will contain the rest. /// -void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, unsigned PhysReg, - const BitVector &LiveBundles, +void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, + GlobalSplitCandidate &Cand, SmallVectorImpl &NewVRegs) { + const BitVector &LiveBundles = Cand.LiveBundles; + DEBUG({ - dbgs() << "Splitting around region for " << PrintReg(PhysReg, TRI) + dbgs() << "Splitting around region for " << PrintReg(Cand.PhysReg, TRI) << " with bundles"; for (int i = LiveBundles.find_first(); i>=0; i = LiveBundles.find_next(i)) dbgs() << " EB#" << i; dbgs() << ".\n"; }); - // First compute interference ranges in the live blocks. - typedef std::pair IndexPair; - SmallVector InterferenceRanges; - InterferenceRanges.resize(SA->LiveBlocks.size()); - for (const unsigned *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) { - if (!query(VirtReg, *AI).checkInterference()) - continue; - LiveIntervalUnion::SegmentIter IntI = - PhysReg2LiveUnion[*AI].find(VirtReg.beginIndex()); - if (!IntI.valid()) - continue; - for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) { - const SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i]; - IndexPair &IP = InterferenceRanges[i]; - SlotIndex Start, Stop; - tie(Start, Stop) = Indexes->getMBBRange(BI.MBB); - // Skip interference-free blocks. - if (IntI.start() >= Stop) - continue; - - // First interference in block. - if (BI.LiveIn) { - IntI.advanceTo(Start); - if (!IntI.valid()) - break; - if (IntI.start() >= Stop) - continue; - if (!IP.first.isValid() || IntI.start() < IP.first) - IP.first = IntI.start(); - } - - // Last interference in block. - if (BI.LiveOut) { - IntI.advanceTo(Stop); - if (!IntI.valid() || IntI.start() >= Stop) - --IntI; - if (IntI.stop() <= Start) - continue; - if (!IP.second.isValid() || IntI.stop() > IP.second) - IP.second = IntI.stop(); - } - } - } - - SmallVector SpillRegs; - LiveRangeEdit LREdit(VirtReg, NewVRegs, SpillRegs); - SplitEditor SE(*SA, *LIS, *VRM, *DomTree, LREdit); + InterferenceCache::Cursor Intf(IntfCache, Cand.PhysReg); + LiveRangeEdit LREdit(VirtReg, NewVRegs, this); + SE->reset(LREdit); // Create the main cross-block interval. - SE.openIntv(); + const unsigned MainIntv = SE->openIntv(); // First add all defs that are live out of a block. - for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) { - SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i]; + ArrayRef UseBlocks = SA->getUseBlocks(); + for (unsigned i = 0; i != UseBlocks.size(); ++i) { + const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; bool RegIn = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 0)]; bool RegOut = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 1)]; + // Create separate intervals for isolated blocks with multiple uses. + if (!RegIn && !RegOut && BI.FirstUse != BI.LastUse) { + DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " isolated.\n"); + SE->splitSingleBlock(BI); + SE->selectIntv(MainIntv); + continue; + } + // Should the register be live out? if (!BI.LiveOut || !RegOut) continue; - IndexPair &IP = InterferenceRanges[i]; SlotIndex Start, Stop; tie(Start, Stop) = Indexes->getMBBRange(BI.MBB); - + Intf.moveToBlock(BI.MBB->getNumber()); DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " -> EB#" << Bundles->getBundle(BI.MBB->getNumber(), 1) - << " intf [" << IP.first << ';' << IP.second << ')'); + << " [" << Start << ';' + << SA->getLastSplitPoint(BI.MBB->getNumber()) << '-' << Stop + << ") intf [" << Intf.first() << ';' << Intf.last() << ')'); // The interference interval should either be invalid or overlap MBB. - assert((!IP.first.isValid() || IP.first < Stop) && "Bad interference"); - assert((!IP.second.isValid() || IP.second > Start) && "Bad interference"); + assert((!Intf.hasInterference() || Intf.first() < Stop) + && "Bad interference"); + assert((!Intf.hasInterference() || Intf.last() > Start) + && "Bad interference"); // Check interference leaving the block. - if (!IP.second.isValid()) { + if (!Intf.hasInterference()) { // Block is interference-free. DEBUG(dbgs() << ", no interference"); - if (!BI.Uses) { - assert(BI.LiveThrough && "No uses, but not live through block?"); - // Block is live-through without interference. - DEBUG(dbgs() << ", no uses" - << (RegIn ? ", live-through.\n" : ", stack in.\n")); - if (!RegIn) - SE.enterIntvAtEnd(*BI.MBB); - continue; - } if (!BI.LiveThrough) { DEBUG(dbgs() << ", not live-through.\n"); - SE.useIntv(SE.enterIntvBefore(BI.Def), Stop); + SE->useIntv(SE->enterIntvBefore(BI.Def), Stop); continue; } if (!RegIn) { // Block is live-through, but entry bundle is on the stack. // Reload just before the first use. DEBUG(dbgs() << ", not live-in, enter before first use.\n"); - SE.useIntv(SE.enterIntvBefore(BI.FirstUse), Stop); + SE->useIntv(SE->enterIntvBefore(BI.FirstUse), Stop); continue; } DEBUG(dbgs() << ", live-through.\n"); @@ -725,53 +773,45 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, unsigned PhysReg, } // Block has interference. - DEBUG(dbgs() << ", interference to " << IP.second); + DEBUG(dbgs() << ", interference to " << Intf.last()); - if (!BI.LiveThrough && IP.second <= BI.Def) { + if (!BI.LiveThrough && Intf.last() <= BI.Def) { // The interference doesn't reach the outgoing segment. DEBUG(dbgs() << " doesn't affect def from " << BI.Def << '\n'); - SE.useIntv(BI.Def, Stop); + SE->useIntv(BI.Def, Stop); continue; } - - if (!BI.Uses) { - // No uses in block, avoid interference by reloading as late as possible. - DEBUG(dbgs() << ", no uses.\n"); - SlotIndex SegStart = SE.enterIntvAtEnd(*BI.MBB); - assert(SegStart >= IP.second && "Couldn't avoid interference"); - continue; - } - - if (IP.second.getBoundaryIndex() < BI.LastUse) { + SlotIndex LastSplitPoint = SA->getLastSplitPoint(BI.MBB->getNumber()); + if (Intf.last().getBoundaryIndex() < BI.LastUse) { // There are interference-free uses at the end of the block. // Find the first use that can get the live-out register. SmallVectorImpl::const_iterator UI = std::lower_bound(SA->UseSlots.begin(), SA->UseSlots.end(), - IP.second.getBoundaryIndex()); + Intf.last().getBoundaryIndex()); assert(UI != SA->UseSlots.end() && "Couldn't find last use"); SlotIndex Use = *UI; assert(Use <= BI.LastUse && "Couldn't find last use"); // Only attempt a split befroe the last split point. - if (Use.getBaseIndex() <= BI.LastSplitPoint) { + if (Use.getBaseIndex() <= LastSplitPoint) { DEBUG(dbgs() << ", free use at " << Use << ".\n"); - SlotIndex SegStart = SE.enterIntvBefore(Use); - assert(SegStart >= IP.second && "Couldn't avoid interference"); - assert(SegStart < BI.LastSplitPoint && "Impossible split point"); - SE.useIntv(SegStart, Stop); + SlotIndex SegStart = SE->enterIntvBefore(Use); + assert(SegStart >= Intf.last() && "Couldn't avoid interference"); + assert(SegStart < LastSplitPoint && "Impossible split point"); + SE->useIntv(SegStart, Stop); continue; } } // Interference is after the last use. DEBUG(dbgs() << " after last use.\n"); - SlotIndex SegStart = SE.enterIntvAtEnd(*BI.MBB); - assert(SegStart >= IP.second && "Couldn't avoid interference"); + SlotIndex SegStart = SE->enterIntvAtEnd(*BI.MBB); + assert(SegStart >= Intf.last() && "Couldn't avoid interference"); } // Now all defs leading to live bundles are handled, do everything else. - for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) { - SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i]; + for (unsigned i = 0; i != UseBlocks.size(); ++i) { + const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; bool RegIn = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 0)]; bool RegOut = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 1)]; @@ -780,152 +820,207 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, unsigned PhysReg, continue; // We have an incoming register. Check for interference. - IndexPair &IP = InterferenceRanges[i]; SlotIndex Start, Stop; tie(Start, Stop) = Indexes->getMBBRange(BI.MBB); - + Intf.moveToBlock(BI.MBB->getNumber()); DEBUG(dbgs() << "EB#" << Bundles->getBundle(BI.MBB->getNumber(), 0) - << " -> BB#" << BI.MBB->getNumber()); + << " -> BB#" << BI.MBB->getNumber() << " [" << Start << ';' + << SA->getLastSplitPoint(BI.MBB->getNumber()) << '-' << Stop + << ')'); // Check interference entering the block. - if (!IP.first.isValid()) { + if (!Intf.hasInterference()) { // Block is interference-free. DEBUG(dbgs() << ", no interference"); - if (!BI.Uses) { - assert(BI.LiveThrough && "No uses, but not live through block?"); - // Block is live-through without interference. - if (RegOut) { - DEBUG(dbgs() << ", no uses, live-through.\n"); - SE.useIntv(Start, Stop); - } else { - DEBUG(dbgs() << ", no uses, stack-out.\n"); - SE.leaveIntvAtTop(*BI.MBB); - } - continue; - } if (!BI.LiveThrough) { DEBUG(dbgs() << ", killed in block.\n"); - SE.useIntv(Start, SE.leaveIntvAfter(BI.Kill)); + SE->useIntv(Start, SE->leaveIntvAfter(BI.Kill)); continue; } if (!RegOut) { + SlotIndex LastSplitPoint = SA->getLastSplitPoint(BI.MBB->getNumber()); // Block is live-through, but exit bundle is on the stack. // Spill immediately after the last use. - if (BI.LastUse < BI.LastSplitPoint) { + if (BI.LastUse < LastSplitPoint) { DEBUG(dbgs() << ", uses, stack-out.\n"); - SE.useIntv(Start, SE.leaveIntvAfter(BI.LastUse)); + SE->useIntv(Start, SE->leaveIntvAfter(BI.LastUse)); continue; } // The last use is after the last split point, it is probably an // indirect jump. DEBUG(dbgs() << ", uses at " << BI.LastUse << " after split point " - << BI.LastSplitPoint << ", stack-out.\n"); - SlotIndex SegEnd = SE.leaveIntvBefore(BI.LastSplitPoint); - SE.useIntv(Start, SegEnd); + << LastSplitPoint << ", stack-out.\n"); + SlotIndex SegEnd = SE->leaveIntvBefore(LastSplitPoint); + SE->useIntv(Start, SegEnd); // Run a double interval from the split to the last use. // This makes it possible to spill the complement without affecting the // indirect branch. - SE.overlapIntv(SegEnd, BI.LastUse); + SE->overlapIntv(SegEnd, BI.LastUse); continue; } // Register is live-through. DEBUG(dbgs() << ", uses, live-through.\n"); - SE.useIntv(Start, Stop); + SE->useIntv(Start, Stop); continue; } // Block has interference. - DEBUG(dbgs() << ", interference from " << IP.first); + DEBUG(dbgs() << ", interference from " << Intf.first()); - if (!BI.LiveThrough && IP.first >= BI.Kill) { + if (!BI.LiveThrough && Intf.first() >= BI.Kill) { // The interference doesn't reach the outgoing segment. DEBUG(dbgs() << " doesn't affect kill at " << BI.Kill << '\n'); - SE.useIntv(Start, BI.Kill); + SE->useIntv(Start, BI.Kill); continue; } - if (!BI.Uses) { - // No uses in block, avoid interference by spilling as soon as possible. - DEBUG(dbgs() << ", no uses.\n"); - SlotIndex SegEnd = SE.leaveIntvAtTop(*BI.MBB); - assert(SegEnd <= IP.first && "Couldn't avoid interference"); - continue; - } - if (IP.first.getBaseIndex() > BI.FirstUse) { + if (Intf.first().getBaseIndex() > BI.FirstUse) { // There are interference-free uses at the beginning of the block. // Find the last use that can get the register. SmallVectorImpl::const_iterator UI = std::lower_bound(SA->UseSlots.begin(), SA->UseSlots.end(), - IP.first.getBaseIndex()); + Intf.first().getBaseIndex()); assert(UI != SA->UseSlots.begin() && "Couldn't find first use"); SlotIndex Use = (--UI)->getBoundaryIndex(); DEBUG(dbgs() << ", free use at " << *UI << ".\n"); - SlotIndex SegEnd = SE.leaveIntvAfter(Use); - assert(SegEnd <= IP.first && "Couldn't avoid interference"); - SE.useIntv(Start, SegEnd); + SlotIndex SegEnd = SE->leaveIntvAfter(Use); + assert(SegEnd <= Intf.first() && "Couldn't avoid interference"); + SE->useIntv(Start, SegEnd); continue; } // Interference is before the first use. DEBUG(dbgs() << " before first use.\n"); - SlotIndex SegEnd = SE.leaveIntvAtTop(*BI.MBB); - assert(SegEnd <= IP.first && "Couldn't avoid interference"); + SlotIndex SegEnd = SE->leaveIntvAtTop(*BI.MBB); + assert(SegEnd <= Intf.first() && "Couldn't avoid interference"); } - SE.closeIntv(); + // Handle live-through blocks. + for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) { + unsigned Number = Cand.ActiveBlocks[i]; + bool RegIn = LiveBundles[Bundles->getBundle(Number, 0)]; + bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)]; + DEBUG(dbgs() << "Live through BB#" << Number << '\n'); + if (RegIn && RegOut) { + Intf.moveToBlock(Number); + if (!Intf.hasInterference()) { + SE->useIntv(Indexes->getMBBStartIdx(Number), + Indexes->getMBBEndIdx(Number)); + continue; + } + } + MachineBasicBlock *MBB = MF->getBlockNumbered(Number); + if (RegIn) + SE->leaveIntvAtTop(*MBB); + if (RegOut) + SE->enterIntvAtEnd(*MBB); + } - // FIXME: Should we be more aggressive about splitting the stack region into - // per-block segments? The current approach allows the stack region to - // separate into connected components. Some components may be allocatable. - SE.finish(); ++NumGlobalSplits; - if (VerifyEnabled) { - MF->verify(this, "After splitting live range around region"); + SmallVector IntvMap; + SE->finish(&IntvMap); + LRStage.resize(MRI->getNumVirtRegs()); + unsigned OrigBlocks = SA->getNumThroughBlocks() + SA->getUseBlocks().size(); -#ifndef NDEBUG - // Make sure that at least one of the new intervals can allocate to PhysReg. - // That was the whole point of splitting the live range. - bool found = false; - for (LiveRangeEdit::iterator I = LREdit.begin(), E = LREdit.end(); I != E; - ++I) - if (!checkUncachedInterference(**I, PhysReg)) { - found = true; - break; + // Sort out the new intervals created by splitting. We get four kinds: + // - Remainder intervals should not be split again. + // - Candidate intervals can be assigned to Cand.PhysReg. + // - Block-local splits are candidates for local splitting. + // - DCE leftovers should go back on the queue. + for (unsigned i = 0, e = LREdit.size(); i != e; ++i) { + unsigned Reg = LREdit.get(i)->reg; + + // Ignore old intervals from DCE. + if (LRStage[Reg] != RS_New) + continue; + + // Remainder interval. Don't try splitting again, spill if it doesn't + // allocate. + if (IntvMap[i] == 0) { + LRStage[Reg] = RS_Global; + continue; + } + + // Main interval. Allow repeated splitting as long as the number of live + // blocks is strictly decreasing. + if (IntvMap[i] == MainIntv) { + if (SA->countLiveBlocks(LREdit.get(i)) >= OrigBlocks) { + DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks + << " blocks as original.\n"); + // Don't allow repeated splitting as a safe guard against looping. + LRStage[Reg] = RS_Global; } - assert(found && "No allocatable intervals after pointless splitting"); -#endif + continue; + } + + // Other intervals are treated as new. This includes local intervals created + // for blocks with multiple uses, and anything created by DCE. } + + if (VerifyEnabled) + MF->verify(this, "After splitting live range around region"); } unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl &NewVRegs) { - BitVector LiveBundles, BestBundles; - float BestCost = 0; - unsigned BestReg = 0; + float BestCost = Hysteresis * calcSpillCost(); + DEBUG(dbgs() << "Cost of isolating all blocks = " << BestCost << '\n'); + const unsigned NoCand = ~0u; + unsigned BestCand = NoCand; + Order.rewind(); - while (unsigned PhysReg = Order.next()) { - float Cost = calcInterferenceInfo(VirtReg, PhysReg); - if (BestReg && Cost >= BestCost) - continue; + for (unsigned Cand = 0; unsigned PhysReg = Order.next(); ++Cand) { + if (GlobalCand.size() <= Cand) + GlobalCand.resize(Cand+1); + GlobalCand[Cand].reset(PhysReg); + + SpillPlacer->prepare(GlobalCand[Cand].LiveBundles); + float Cost; + InterferenceCache::Cursor Intf(IntfCache, PhysReg); + if (!addSplitConstraints(Intf, Cost)) { + DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tno positive bundles\n"); + continue; + } + DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tstatic = " << Cost); + if (Cost >= BestCost) { + DEBUG({ + if (BestCand == NoCand) + dbgs() << " worse than no bundles\n"; + else + dbgs() << " worse than " + << PrintReg(GlobalCand[BestCand].PhysReg, TRI) << '\n'; + }); + continue; + } + growRegion(GlobalCand[Cand], Intf); + + SpillPlacer->finish(); - SpillPlacer->placeSpills(SpillConstraints, LiveBundles); // No live bundles, defer to splitSingleBlocks(). - if (!LiveBundles.any()) + if (!GlobalCand[Cand].LiveBundles.any()) { + DEBUG(dbgs() << " no bundles.\n"); continue; + } - Cost += calcGlobalSplitCost(LiveBundles); - if (!BestReg || Cost < BestCost) { - BestReg = PhysReg; - BestCost = Cost; - BestBundles.swap(LiveBundles); + Cost += calcGlobalSplitCost(GlobalCand[Cand], Intf); + DEBUG({ + dbgs() << ", total = " << Cost << " with bundles"; + for (int i = GlobalCand[Cand].LiveBundles.find_first(); i>=0; + i = GlobalCand[Cand].LiveBundles.find_next(i)) + dbgs() << " EB#" << i; + dbgs() << ".\n"; + }); + if (Cost < BestCost) { + BestCand = Cand; + BestCost = Hysteresis * Cost; // Prevent rounding effects. } } - if (!BestReg) + if (BestCand == NoCand) return 0; - splitAroundRegion(VirtReg, BestReg, BestBundles, NewVRegs); + splitAroundRegion(VirtReg, GlobalCand[BestCand], NewVRegs); return 0; } @@ -942,8 +1037,8 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, /// void RAGreedy::calcGapWeights(unsigned PhysReg, SmallVectorImpl &GapWeight) { - assert(SA->LiveBlocks.size() == 1 && "Not a local interval"); - const SplitAnalysis::BlockInfo &BI = SA->LiveBlocks.front(); + assert(SA->getUseBlocks().size() == 1 && "Not a local interval"); + const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front(); const SmallVectorImpl &Uses = SA->UseSlots; const unsigned NumGaps = Uses.size()-1; @@ -1034,8 +1129,8 @@ unsigned RAGreedy::nextSplitPoint(unsigned i) { /// unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl &NewVRegs) { - assert(SA->LiveBlocks.size() == 1 && "Not a local interval"); - const SplitAnalysis::BlockInfo &BI = SA->LiveBlocks.front(); + assert(SA->getUseBlocks().size() == 1 && "Not a local interval"); + const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front(); // Note that it is possible to have an interval that is live-in or live-out // while only covering a single block - A phi-def can use undef values from @@ -1065,7 +1160,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, unsigned BestAfter = 0; float BestDiff = 0; - const float blockFreq = SpillPlacer->getBlockFrequency(BI.MBB); + const float blockFreq = SpillPlacer->getBlockFrequency(BI.MBB->getNumber()); SmallVector GapWeight; Order.rewind(); @@ -1130,13 +1225,13 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, PrevSlot[SplitBefore].distance(Uses[SplitAfter])); // Would this split be possible to allocate? // Never allocate all gaps, we wouldn't be making progress. - float Diff = EstWeight - MaxGap; - DEBUG(dbgs() << " w=" << EstWeight << " d=" << Diff); - if (Diff > 0) { + DEBUG(dbgs() << " w=" << EstWeight); + if (EstWeight * Hysteresis >= MaxGap) { Shrink = false; + float Diff = EstWeight - MaxGap; if (Diff > BestDiff) { DEBUG(dbgs() << " (best)"); - BestDiff = Diff; + BestDiff = Hysteresis * Diff; BestBefore = SplitBefore; BestAfter = SplitAfter; } @@ -1181,16 +1276,15 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, << '-' << Uses[BestAfter] << ", " << BestDiff << ", " << (BestAfter - BestBefore + 1) << " instrs\n"); - SmallVector SpillRegs; - LiveRangeEdit LREdit(VirtReg, NewVRegs, SpillRegs); - SplitEditor SE(*SA, *LIS, *VRM, *DomTree, LREdit); + LiveRangeEdit LREdit(VirtReg, NewVRegs, this); + SE->reset(LREdit); - SE.openIntv(); - SlotIndex SegStart = SE.enterIntvBefore(Uses[BestBefore]); - SlotIndex SegStop = SE.leaveIntvAfter(Uses[BestAfter]); - SE.useIntv(SegStart, SegStop); - SE.closeIntv(); - SE.finish(); + SE->openIntv(); + SlotIndex SegStart = SE->enterIntvBefore(Uses[BestBefore]); + SlotIndex SegStop = SE->leaveIntvAfter(Uses[BestAfter]); + SE->useIntv(SegStart, SegStop); + SE->finish(); + setStage(NewVRegs.begin(), NewVRegs.end(), RS_Local); ++NumLocalSplits; return 0; @@ -1205,16 +1299,22 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, /// @return Physreg when VirtReg may be assigned and/or new NewVRegs. unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, SmallVectorImpl&NewVRegs) { - SA->analyze(&VirtReg); - // Local intervals are handled separately. if (LIS->intervalIsInOneMBB(VirtReg)) { NamedRegionTimer T("Local Splitting", TimerGroupName, TimePassesIsEnabled); + SA->analyze(&VirtReg); return tryLocalSplit(VirtReg, Order, NewVRegs); } NamedRegionTimer T("Global Splitting", TimerGroupName, TimePassesIsEnabled); + // Don't iterate global splitting. + // Move straight to spilling if this range was produced by a global split. + if (getStage(VirtReg) >= RS_Global) + return 0; + + SA->analyze(&VirtReg); + // First try to split around a region spanning multiple blocks. unsigned PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs); if (PhysReg || !NewVRegs.empty()) @@ -1223,9 +1323,10 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, // Then isolate blocks with multiple uses. SplitAnalysis::BlockPtrSet Blocks; if (SA->getMultiUseBlocks(Blocks)) { - SmallVector SpillRegs; - LiveRangeEdit LREdit(VirtReg, NewVRegs, SpillRegs); - SplitEditor(*SA, *LIS, *VRM, *DomTree, LREdit).splitSingleBlocks(Blocks); + LiveRangeEdit LREdit(VirtReg, NewVRegs, this); + SE->reset(LREdit); + SE->splitSingleBlocks(Blocks); + setStage(NewVRegs.begin(), NewVRegs.end(), RS_Global); if (VerifyEnabled) MF->verify(this, "After splitting live range around basic blocks"); } @@ -1235,68 +1336,6 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, } -//===----------------------------------------------------------------------===// -// Spilling -//===----------------------------------------------------------------------===// - -/// calcInterferenceWeight - Calculate the combined spill weight of -/// interferences when assigning VirtReg to PhysReg. -float RAGreedy::calcInterferenceWeight(LiveInterval &VirtReg, unsigned PhysReg){ - float Sum = 0; - for (const unsigned *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) { - LiveIntervalUnion::Query &Q = query(VirtReg, *AI); - Q.collectInterferingVRegs(); - if (Q.seenUnspillableVReg()) - return HUGE_VALF; - for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) - Sum += Q.interferingVRegs()[i]->weight; - } - return Sum; -} - -/// trySpillInterferences - Try to spill interfering registers instead of the -/// current one. Only do it if the accumulated spill weight is smaller than the -/// current spill weight. -unsigned RAGreedy::trySpillInterferences(LiveInterval &VirtReg, - AllocationOrder &Order, - SmallVectorImpl &NewVRegs) { - NamedRegionTimer T("Spill Interference", TimerGroupName, TimePassesIsEnabled); - unsigned BestPhys = 0; - float BestWeight = 0; - - Order.rewind(); - while (unsigned PhysReg = Order.next()) { - float Weight = calcInterferenceWeight(VirtReg, PhysReg); - if (Weight == HUGE_VALF || Weight >= VirtReg.weight) - continue; - if (!BestPhys || Weight < BestWeight) - BestPhys = PhysReg, BestWeight = Weight; - } - - // No candidates found. - if (!BestPhys) - return 0; - - // Collect all interfering registers. - SmallVector Spills; - for (const unsigned *AI = TRI->getOverlaps(BestPhys); *AI; ++AI) { - LiveIntervalUnion::Query &Q = query(VirtReg, *AI); - Spills.append(Q.interferingVRegs().begin(), Q.interferingVRegs().end()); - for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) { - LiveInterval *VReg = Q.interferingVRegs()[i]; - unassign(*VReg, *AI); - } - } - - // Spill them all. - DEBUG(dbgs() << "spilling " << Spills.size() << " interferences with weight " - << BestWeight << '\n'); - for (unsigned i = 0, e = Spills.size(); i != e; ++i) - spiller().spill(Spills[i], NewVRegs, Spills); - return BestPhys; -} - - //===----------------------------------------------------------------------===// // Main Entry Point //===----------------------------------------------------------------------===// @@ -1305,12 +1344,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, SmallVectorImpl &NewVRegs) { // First try assigning a free register. AllocationOrder Order(VirtReg.reg, *VRM, ReservedRegs); - while (unsigned PhysReg = Order.next()) { - if (!checkPhysRegInterference(VirtReg, PhysReg)) - return PhysReg; - } - - if (unsigned PhysReg = tryReassign(VirtReg, Order, NewVRegs)) + if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) return PhysReg; if (unsigned PhysReg = tryEvict(VirtReg, Order, NewVRegs)) @@ -1321,25 +1355,29 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, // The first time we see a live range, don't try to split or spill. // Wait until the second time, when all smaller ranges have been allocated. // This gives a better picture of the interference to split around. - if (Generation[VirtReg.reg] == 1) { + LiveRangeStage Stage = getStage(VirtReg); + if (Stage == RS_First) { + LRStage[VirtReg.reg] = RS_Second; + DEBUG(dbgs() << "wait for second round\n"); NewVRegs.push_back(&VirtReg); return 0; } + assert(Stage < RS_Spill && "Cannot allocate after spilling"); + // Try splitting VirtReg or interferences. unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs); if (PhysReg || !NewVRegs.empty()) return PhysReg; - // Try to spill another interfering reg with less spill weight. - PhysReg = trySpillInterferences(VirtReg, Order, NewVRegs); - if (PhysReg) - return PhysReg; - // Finally spill VirtReg itself. NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled); - SmallVector pendingSpills; - spiller().spill(&VirtReg, NewVRegs, pendingSpills); + LiveRangeEdit LRE(VirtReg, NewVRegs, this); + spiller().spill(LRE); + setStage(NewVRegs.begin(), NewVRegs.end(), RS_Spill); + + if (VerifyEnabled) + MF->verify(this, "After spilling"); // The live virtual register requesting allocation was spilled, so tell // the caller not to allocate anything during this round. @@ -1366,6 +1404,10 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { SpillPlacer = &getAnalysis(); SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops)); + SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree)); + LRStage.clear(); + LRStage.resize(MRI->getNumVirtRegs()); + IntfCache.init(MF, &PhysReg2LiveUnion[0], Indexes, TRI); allocatePhysRegs(); addMBBLiveIns(MF); @@ -1377,6 +1419,9 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { VRM->rewrite(Indexes); } + // Write out new DBG_VALUE instructions. + getAnalysis().emitDebugValues(VRM); + // The pass output is in VirtRegMap. Release all the transient data. releaseMemory(); diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp index b959878bcdba..5ef88cb74ba5 100644 --- a/lib/CodeGen/RegAllocLinearScan.cpp +++ b/lib/CodeGen/RegAllocLinearScan.cpp @@ -13,6 +13,7 @@ #define DEBUG_TYPE "regalloc" #include "LiveDebugVariables.h" +#include "LiveRangeEdit.h" #include "VirtRegMap.h" #include "VirtRegRewriter.h" #include "Spiller.h" @@ -39,7 +40,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include -#include #include #include #include @@ -66,6 +66,11 @@ TrivCoalesceEnds("trivial-coalesce-ends", cl::desc("Attempt trivial coalescing of interval ends"), cl::init(false), cl::Hidden); +static cl::opt +AvoidWAWHazard("avoid-waw-hazard", + cl::desc("Avoid write-write hazards for some register classes"), + cl::init(false), cl::Hidden); + static RegisterRegAlloc linearscanRegAlloc("linearscan", "linear scan register allocator", createLinearScanRegisterAllocator); @@ -109,6 +114,7 @@ namespace { if (NumRecentlyUsedRegs > 0) RecentRegs.resize(NumRecentlyUsedRegs, 0); RecentNext = RecentRegs.begin(); + avoidWAW_ = 0; } typedef std::pair IntervalPtr; @@ -179,6 +185,9 @@ namespace { SmallVector RecentRegs; SmallVector::iterator RecentNext; + // Last write-after-write register written. + unsigned avoidWAW_; + // Record that we just picked this register. void recordRecentlyUsed(unsigned reg) { assert(reg != 0 && "Recently used register is NOREG!"); @@ -226,8 +235,8 @@ namespace { // Determine if we skip this register due to its being recently used. bool isRecentlyUsed(unsigned reg) const { - return std::find(RecentRegs.begin(), RecentRegs.end(), reg) != - RecentRegs.end(); + return reg == avoidWAW_ || + std::find(RecentRegs.begin(), RecentRegs.end(), reg) != RecentRegs.end(); } private: @@ -374,7 +383,7 @@ namespace { dbgs() << str << " intervals:\n"; for (; i != e; ++i) { - dbgs() << "\t" << *i->first << " -> "; + dbgs() << '\t' << *i->first << " -> "; unsigned reg = i->first->reg; if (TargetRegisterInfo::isVirtualRegister(reg)) @@ -389,7 +398,7 @@ namespace { } INITIALIZE_PASS_BEGIN(RALinScan, "linearscan-regalloc", - "Linear Scan Register Allocator", false, false) + "Linear Scan Register Allocator", false, false) INITIALIZE_PASS_DEPENDENCY(LiveIntervals) INITIALIZE_PASS_DEPENDENCY(StrongPHIElimination) INITIALIZE_PASS_DEPENDENCY(CalculateSpillWeights) @@ -400,7 +409,7 @@ INITIALIZE_PASS_DEPENDENCY(VirtRegMap) INITIALIZE_AG_DEPENDENCY(RegisterCoalescer) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_END(RALinScan, "linearscan-regalloc", - "Linear Scan Register Allocator", false, false) + "Linear Scan Register Allocator", false, false) void RALinScan::ComputeRelatedRegClasses() { // First pass, add all reg classes to the union, and determine at least one @@ -458,7 +467,7 @@ unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) { const LiveRange &range = cur.ranges.front(); VNInfo *vni = range.valno; - if (vni->isUnused()) + if (vni->isUnused() || !vni->def.isValid()) return Reg; unsigned CandReg; @@ -571,7 +580,7 @@ void RALinScan::initIntervalSets() for (LiveIntervals::iterator i = li_->begin(), e = li_->end(); i != e; ++i) { if (TargetRegisterInfo::isPhysicalRegister(i->second->reg)) { - if (!i->second->empty()) { + if (!i->second->empty() && allocatableRegs_.test(i->second->reg)) { mri_->setPhysRegUsed(i->second->reg); fixed_.push_back(std::make_pair(i->second, i->second->begin())); } @@ -791,7 +800,7 @@ void RALinScan::updateSpillWeights(std::vector &Weights, // register class we are trying to allocate. Then add the weight to all // sub-registers of the super-register even if they are not aliases. // e.g. allocating for GR32, bh is not used, updating bl spill weight. - // bl should get the same spill weight otherwise it will be choosen + // bl should get the same spill weight otherwise it will be chosen // as a spill candidate since spilling bh doesn't make ebx available. for (unsigned i = 0, e = Supers.size(); i != e; ++i) { for (const unsigned *sr = tri_->getSubRegisters(Supers[i]); *sr; ++sr) @@ -993,7 +1002,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { // one, e.g. X86::mov32to32_. These move instructions are not coalescable. if (!vrm_->getRegAllocPref(cur->reg) && cur->hasAtLeastOneValue()) { VNInfo *vni = cur->begin()->valno; - if (!vni->isUnused()) { + if (!vni->isUnused() && vni->def.isValid()) { MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def); if (CopyMI && CopyMI->isCopy()) { unsigned DstSubReg = CopyMI->getOperand(0).getSubReg(); @@ -1109,11 +1118,18 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { // list. if (physReg) { DEBUG(dbgs() << tri_->getName(physReg) << '\n'); + assert(RC->contains(physReg) && "Invalid candidate"); vrm_->assignVirt2Phys(cur->reg, physReg); addRegUse(physReg); active_.push_back(std::make_pair(cur, cur->begin())); handled_.push_back(cur); + // Remember physReg for avoiding a write-after-write hazard in the next + // instruction. + if (AvoidWAWHazard && + tri_->avoidWriteAfterWrite(mri_->getRegClass(cur->reg))) + avoidWAW_ = physReg; + // "Upgrade" the physical register since it has been allocated. UpgradeRegister(physReg); if (LiveInterval *NextReloadLI = hasNextReloadInterval(cur)) { @@ -1229,8 +1245,9 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { // linearscan. if (cur->weight != HUGE_VALF && cur->weight <= minWeight) { DEBUG(dbgs() << "\t\t\tspilling(c): " << *cur << '\n'); - SmallVector spillIs, added; - spiller_->spill(cur, added, spillIs); + SmallVector added; + LiveRangeEdit LRE(*cur, added); + spiller_->spill(LRE); std::sort(added.begin(), added.end(), LISorter()); if (added.empty()) @@ -1306,7 +1323,8 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { DEBUG(dbgs() << "\t\t\tspilling(a): " << *sli << '\n'); if (sli->beginIndex() < earliestStart) earliestStart = sli->beginIndex(); - spiller_->spill(sli, added, spillIs); + LiveRangeEdit LRE(*sli, added, 0, &spillIs); + spiller_->spill(LRE); spilled.insert(sli->reg); } @@ -1442,7 +1460,7 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur, if (reservedRegs_.test(Reg)) continue; // Skip recently allocated registers. - if (isRegAvail(Reg) && !isRecentlyUsed(Reg)) { + if (isRegAvail(Reg) && (!SkipDGRegs || !isRecentlyUsed(Reg))) { FreeReg = Reg; if (FreeReg < inactiveCounts.size()) FreeRegInactiveCount = inactiveCounts[FreeReg]; @@ -1473,7 +1491,8 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur, if (reservedRegs_.test(Reg)) continue; if (isRegAvail(Reg) && Reg < inactiveCounts.size() && - FreeRegInactiveCount < inactiveCounts[Reg] && !isRecentlyUsed(Reg)) { + FreeRegInactiveCount < inactiveCounts[Reg] && + (!SkipDGRegs || !isRecentlyUsed(Reg))) { FreeReg = Reg; FreeRegInactiveCount = inactiveCounts[Reg]; if (FreeRegInactiveCount == MaxInactiveCount) @@ -1524,12 +1543,10 @@ unsigned RALinScan::getFreePhysReg(LiveInterval *cur) { return Preference; } - if (!DowngradedRegs.empty()) { - unsigned FreeReg = getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts, - true); - if (FreeReg) - return FreeReg; - } + unsigned FreeReg = getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts, + true); + if (FreeReg) + return FreeReg; return getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts, false); } diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index ea0d1fe0233f..1e1f1e0d3470 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -534,10 +534,9 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem, vregsToAlloc.erase(vreg); const LiveInterval* spillInterval = &lis->getInterval(vreg); double oldWeight = spillInterval->weight; - SmallVector spillIs; rmf->rememberUseDefs(spillInterval); std::vector newSpills = - lis->addIntervalsForSpills(*spillInterval, spillIs, loopInfo, *vrm); + lis->addIntervalsForSpills(*spillInterval, 0, loopInfo, *vrm); addStackInterval(spillInterval, mri); rmf->rememberSpills(spillInterval, newSpills); diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index a2580b85bcc3..ebfe533838d5 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -126,9 +126,10 @@ void RegScavenger::forward() { MBBI = MBB->begin(); Tracking = true; } else { - assert(MBBI != MBB->end() && "Already at the end of the basic block!"); + assert(MBBI != MBB->end() && "Already past the end of the basic block!"); MBBI = llvm::next(MBBI); } + assert(MBBI != MBB->end() && "Already at the end of the basic block!"); MachineInstr *MI = MBBI; @@ -241,12 +242,13 @@ unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const { /// getRegsAvailable - Return all available registers in the register class /// in Mask. -void RegScavenger::getRegsAvailable(const TargetRegisterClass *RC, - BitVector &Mask) { +BitVector RegScavenger::getRegsAvailable(const TargetRegisterClass *RC) { + BitVector Mask(TRI->getNumRegs()); for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I != E; ++I) if (!isAliasUsed(*I)) Mask.set(*I); + return Mask; } /// findSurvivorReg - Return the candidate register that is unused for the @@ -335,9 +337,13 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, } // Try to find a register that's unused if there is one, as then we won't - // have to spill. - if ((Candidates & RegsAvailable).any()) - Candidates &= RegsAvailable; + // have to spill. Search explicitly rather than masking out based on + // RegsAvailable, as RegsAvailable does not take aliases into account. + // That's what getRegsAvailable() is for. + BitVector Available = getRegsAvailable(RC); + + if ((Candidates & Available).any()) + Candidates &= Available; // Find the register whose use is furthest away. MachineBasicBlock::iterator UseMI; diff --git a/lib/CodeGen/RenderMachineFunction.cpp b/lib/CodeGen/RenderMachineFunction.cpp index cbfd5a23d63d..c8de3823553c 100644 --- a/lib/CodeGen/RenderMachineFunction.cpp +++ b/lib/CodeGen/RenderMachineFunction.cpp @@ -47,7 +47,7 @@ outputFileSuffix("rmf-file-suffix", static cl::opt machineFuncsToRender("rmf-funcs", - cl::desc("Coma seperated list of functions to render" + cl::desc("Comma separated list of functions to render" ", or \"*\"."), cl::init(""), cl::Hidden); diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index 3388889c9e91..1302395f423e 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -472,7 +472,7 @@ void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() { #endif } -/// AddPred - Updates the topological ordering to accomodate an edge +/// AddPred - Updates the topological ordering to accommodate an edge /// to be added from SUnit X to SUnit Y. void ScheduleDAGTopologicalSort::AddPred(SUnit *Y, SUnit *X) { int UpperBound, LowerBound; @@ -490,7 +490,7 @@ void ScheduleDAGTopologicalSort::AddPred(SUnit *Y, SUnit *X) { } } -/// RemovePred - Updates the topological ordering to accomodate an +/// RemovePred - Updates the topological ordering to accommodate an /// an edge to be removed from the specified node N from the predecessors /// of the current node M. void ScheduleDAGTopologicalSort::RemovePred(SUnit *M, SUnit *N) { diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index f17023eabb72..67c209ea1977 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -371,7 +371,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) { // will be overlapped by work done outside the current // scheduling region. Latency -= std::min(Latency, Count); - // Add the artifical edge. + // Add the artificial edge. ExitSU.addPred(SDep(SU, SDep::Order, Latency, /*Reg=*/0, /*isNormalMemory=*/false, /*isMustAlias=*/false, diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp index 027f6150e26b..4b55a2284f85 100644 --- a/lib/CodeGen/ScheduleDAGPrinter.cpp +++ b/lib/CodeGen/ScheduleDAGPrinter.cpp @@ -51,7 +51,8 @@ namespace llvm { /// If you want to override the dot attributes printed for a particular /// edge, override this method. static std::string getEdgeAttributes(const SUnit *Node, - SUnitIterator EI) { + SUnitIterator EI, + const ScheduleDAG *Graph) { if (EI.isArtificialDep()) return "color=cyan,style=dashed"; if (EI.isCtrlDep()) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 9cc70a30927d..f42751167a45 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -319,6 +319,10 @@ void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) { ((DAGCombiner*)DC)->AddToWorkList(N); } +void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) { + ((DAGCombiner*)DC)->removeFromWorkList(N); +} + SDValue TargetLowering::DAGCombinerInfo:: CombineTo(SDNode *N, const std::vector &To, bool AddTo) { return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo); @@ -1290,6 +1294,16 @@ SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1, return SDValue(); } +/// isCarryMaterialization - Returns true if V is an ADDE node that is known to +/// return 0 or 1 depending on the carry flag. +static bool isCarryMaterialization(SDValue V) { + if (V.getOpcode() != ISD::ADDE) + return false; + + ConstantSDNode *C = dyn_cast(V.getOperand(0)); + return C && C->isNullValue() && V.getOperand(0) == V.getOperand(1); +} + SDValue DAGCombiner::visitADD(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -1453,6 +1467,18 @@ SDValue DAGCombiner::visitADD(SDNode *N) { return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt); } + // add (adde 0, 0, glue), X -> adde X, 0, glue + if (N0->hasOneUse() && isCarryMaterialization(N0)) + return DAG.getNode(ISD::ADDE, N->getDebugLoc(), + DAG.getVTList(VT, MVT::Glue), N1, N0.getOperand(0), + N0.getOperand(2)); + + // add X, (adde 0, 0, glue) -> adde X, 0, glue + if (N1->hasOneUse() && isCarryMaterialization(N1)) + return DAG.getNode(ISD::ADDE, N->getDebugLoc(), + DAG.getVTList(VT, MVT::Glue), N0, N1.getOperand(0), + N1.getOperand(2)); + return SDValue(); } @@ -1496,6 +1522,16 @@ SDValue DAGCombiner::visitADDC(SDNode *N) { N->getDebugLoc(), MVT::Glue)); } + // addc (adde 0, 0, glue), X -> adde X, 0, glue + if (N0->hasOneUse() && isCarryMaterialization(N0)) + return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(), N1, + DAG.getConstant(0, VT), N0.getOperand(2)); + + // addc X, (adde 0, 0, glue) -> adde X, 0, glue + if (N1->hasOneUse() && isCarryMaterialization(N1)) + return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(), N0, + DAG.getConstant(0, VT), N1.getOperand(2)); + return SDValue(); } @@ -1506,6 +1542,12 @@ SDValue DAGCombiner::visitADDE(SDNode *N) { ConstantSDNode *N0C = dyn_cast(N0); ConstantSDNode *N1C = dyn_cast(N1); + // If both operands are null we know that carry out will always be false. + if (N0C && N0C->isNullValue() && N0 == N1) + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), DAG.getNode(ISD::CARRY_FALSE, + N->getDebugLoc(), + MVT::Glue)); + // canonicalize constant to RHS if (N0C && !N1C) return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(), @@ -3281,8 +3323,10 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { return DAG.getUNDEF(VT); if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) { + uint64_t ShiftAmt = N1C->getZExtValue(); SDValue SmallShift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), SmallVT, - N0.getOperand(0), N1); + N0.getOperand(0), + DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT))); AddToWorkList(SmallShift.getNode()); return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift); } @@ -3688,7 +3732,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // fold (sext (load x)) -> (sext (truncate (sextload x))) // None of the supported targets knows how to perform load and sign extend - // in one instruction. We only perform this transformation on scalars. + // on vectors in one instruction. We only perform this transformation on + // scalars. if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && ((!LegalOperations && !cast(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()))) { @@ -3839,7 +3884,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // CombineTo deleted the truncate, if needed, but not what's under it. AddToWorkList(oye); } - return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, NarrowLoad); + return SDValue(N, 0); // Return N so it doesn't get rechecked! } } @@ -3892,7 +3937,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // fold (zext (load x)) -> (zext (truncate (zextload x))) // None of the supported targets knows how to perform load and vector_zext - // in one instruction. We only perform this transformation on scalar zext. + // on vectors in one instruction. We only perform this transformation on + // scalars. if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && ((!LegalOperations && !cast(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()))) { @@ -4066,7 +4112,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { // CombineTo deleted the truncate, if needed, but not what's under it. AddToWorkList(oye); } - return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, NarrowLoad); + return SDValue(N, 0); // Return N so it doesn't get rechecked! } } @@ -4101,7 +4147,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { // fold (aext (load x)) -> (aext (truncate (extload x))) // None of the supported targets knows how to perform load and any_ext - // in one instruction. We only perform this transformation on scalars. + // on vectors in one instruction. We only perform this transformation on + // scalars. if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && ((!LegalOperations && !cast(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) { @@ -4514,7 +4561,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // See if we can simplify the input to this truncate through knowledge that // only the low bits are being used. // For example "trunc (or (shl x, 8), y)" // -> trunc y - // Currenly we only perform this optimization on scalars because vectors + // Currently we only perform this optimization on scalars because vectors // may have different active low bits. if (!VT.isVector()) { SDValue Shorter = @@ -5101,7 +5148,9 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { EVT OpVT = N0.getValueType(); // fold (sint_to_fp c1) -> c1fp - if (N0C && OpVT != MVT::ppcf128) + if (N0C && OpVT != MVT::ppcf128 && + // ...but only if the target supports immediate floating-point values + (Level == llvm::Unrestricted || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0); // If the input is a legal type, and SINT_TO_FP is not legal on this target, @@ -5123,7 +5172,9 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { EVT OpVT = N0.getValueType(); // fold (uint_to_fp c1) -> c1fp - if (N0C && OpVT != MVT::ppcf128) + if (N0C && OpVT != MVT::ppcf128 && + // ...but only if the target supports immediate floating-point values + (Level == llvm::Unrestricted || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0); // If the input is a legal type, and UINT_TO_FP is not legal on this target, @@ -5817,8 +5868,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { // value. // TODO: Handle store large -> read small portion. // TODO: Handle TRUNCSTORE/LOADEXT - if (LD->getExtensionType() == ISD::NON_EXTLOAD && - !LD->isVolatile()) { + if (ISD::isNormalLoad(N) && !LD->isVolatile()) { if (ISD::isNON_TRUNCStore(Chain.getNode())) { StoreSDNode *PrevST = cast(Chain); if (PrevST->getBasePtr() == Ptr && @@ -6217,6 +6267,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { ST->isNonTemporal(), OrigAlign); } + // Turn 'store undef, Ptr' -> nothing. + if (Value.getOpcode() == ISD::UNDEF && ST->isUnindexed()) + return Chain; + // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' if (ConstantFPSDNode *CFP = dyn_cast(Value)) { // NOTE: If the original store is volatile, this transform must not increase @@ -6250,8 +6304,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { return DAG.getStore(Chain, N->getDebugLoc(), Tmp, Ptr, ST->getPointerInfo(), ST->isVolatile(), ST->isNonTemporal(), ST->getAlignment()); - } else if (!ST->isVolatile() && - TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { + } + + if (!ST->isVolatile() && + TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { // Many FP stores are not made apparent until after legalize, e.g. for // argument passing. Since this is so common, custom legalize the // 64-bit integer store into two 32-bit stores. diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 490b857b0e9c..3af948288daf 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -43,6 +43,7 @@ #include "llvm/GlobalVariable.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" +#include "llvm/Operator.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -121,10 +122,9 @@ unsigned FastISel::getRegForValue(const Value *V) { // only locally. This is because Instructions already have the SSA // def-dominates-use requirement enforced. DenseMap::iterator I = FuncInfo.ValueMap.find(V); - if (I != FuncInfo.ValueMap.end()) { - unsigned Reg = I->second; - return Reg; - } + if (I != FuncInfo.ValueMap.end()) + return I->second; + unsigned Reg = LocalValueMap[V]; if (Reg != 0) return Reg; @@ -164,8 +164,12 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { Reg = getRegForValue(Constant::getNullValue(TD.getIntPtrType(V->getContext()))); } else if (const ConstantFP *CF = dyn_cast(V)) { - // Try to emit the constant directly. - Reg = FastEmit_f(VT, VT, ISD::ConstantFP, CF); + if (CF->isNullValue()) { + Reg = TargetMaterializeFloatZero(CF); + } else { + // Try to emit the constant directly. + Reg = FastEmit_f(VT, VT, ISD::ConstantFP, CF); + } if (!Reg) { // Try to emit the constant by using an integer constant with a cast. @@ -330,23 +334,51 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) { return false; } + // Check if the first operand is a constant, and handle it as "ri". At -O0, + // we don't have anything that canonicalizes operand order. + if (ConstantInt *CI = dyn_cast(I->getOperand(0))) + if (isa(I) && cast(I)->isCommutative()) { + unsigned Op1 = getRegForValue(I->getOperand(1)); + if (Op1 == 0) return false; + + bool Op1IsKill = hasTrivialKill(I->getOperand(1)); + + unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op1, + Op1IsKill, CI->getZExtValue(), + VT.getSimpleVT()); + if (ResultReg == 0) return false; + + // We successfully emitted code for the given LLVM Instruction. + UpdateValueMap(I, ResultReg); + return true; + } + + unsigned Op0 = getRegForValue(I->getOperand(0)); - if (Op0 == 0) - // Unhandled operand. Halt "fast" selection and bail. + if (Op0 == 0) // Unhandled operand. Halt "fast" selection and bail. return false; bool Op0IsKill = hasTrivialKill(I->getOperand(0)); // Check if the second operand is a constant and handle it appropriately. if (ConstantInt *CI = dyn_cast(I->getOperand(1))) { - unsigned ResultReg = FastEmit_ri(VT.getSimpleVT(), VT.getSimpleVT(), - ISDOpcode, Op0, Op0IsKill, - CI->getZExtValue()); - if (ResultReg != 0) { - // We successfully emitted code for the given LLVM Instruction. - UpdateValueMap(I, ResultReg); - return true; + uint64_t Imm = CI->getZExtValue(); + + // Transform "sdiv exact X, 8" -> "sra X, 3". + if (ISDOpcode == ISD::SDIV && isa(I) && + cast(I)->isExact() && + isPowerOf2_64(Imm)) { + Imm = Log2_64(Imm); + ISDOpcode = ISD::SRA; } + + unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0, + Op0IsKill, Imm, VT.getSimpleVT()); + if (ResultReg == 0) return false; + + // We successfully emitted code for the given LLVM Instruction. + UpdateValueMap(I, ResultReg); + return true; } // Check if the second operand is a constant float. @@ -454,15 +486,35 @@ bool FastISel::SelectGetElementPtr(const User *I) { } bool FastISel::SelectCall(const User *I) { - const Function *F = cast(I)->getCalledFunction(); + const CallInst *Call = cast(I); + + // Handle simple inline asms. + if (const InlineAsm *IA = dyn_cast(Call->getArgOperand(0))) { + // Don't attempt to handle constraints. + if (!IA->getConstraintString().empty()) + return false; + + unsigned ExtraInfo = 0; + if (IA->hasSideEffects()) + ExtraInfo |= InlineAsm::Extra_HasSideEffects; + if (IA->isAlignStack()) + ExtraInfo |= InlineAsm::Extra_IsAlignStack; + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::INLINEASM)) + .addExternalSymbol(IA->getAsmString().c_str()) + .addImm(ExtraInfo); + return true; + } + + const Function *F = Call->getCalledFunction(); if (!F) return false; // Handle selected intrinsic function calls. - unsigned IID = F->getIntrinsicID(); - switch (IID) { + switch (F->getIntrinsicID()) { default: break; case Intrinsic::dbg_declare: { - const DbgDeclareInst *DI = cast(I); + const DbgDeclareInst *DI = cast(Call); if (!DIVariable(DI->getVariable()).Verify() || !FuncInfo.MF->getMMI().hasDebugInfo()) return true; @@ -494,7 +546,7 @@ bool FastISel::SelectCall(const User *I) { } case Intrinsic::dbg_value: { // This form of DBG_VALUE is target-independent. - const DbgValueInst *DI = cast(I); + const DbgValueInst *DI = cast(Call); const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); const Value *V = DI->getValue(); if (!V) { @@ -523,65 +575,58 @@ bool FastISel::SelectCall(const User *I) { return true; } case Intrinsic::eh_exception: { - EVT VT = TLI.getValueType(I->getType()); - switch (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)) { - default: break; - case TargetLowering::Expand: { - assert(FuncInfo.MBB->isLandingPad() && - "Call to eh.exception not in landing pad!"); - unsigned Reg = TLI.getExceptionAddressRegister(); - const TargetRegisterClass *RC = TLI.getRegClassFor(VT); - unsigned ResultReg = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(Reg); - UpdateValueMap(I, ResultReg); - return true; - } - } - break; + EVT VT = TLI.getValueType(Call->getType()); + if (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)!=TargetLowering::Expand) + break; + + assert(FuncInfo.MBB->isLandingPad() && + "Call to eh.exception not in landing pad!"); + unsigned Reg = TLI.getExceptionAddressRegister(); + const TargetRegisterClass *RC = TLI.getRegClassFor(VT); + unsigned ResultReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(Reg); + UpdateValueMap(Call, ResultReg); + return true; } case Intrinsic::eh_selector: { - EVT VT = TLI.getValueType(I->getType()); - switch (TLI.getOperationAction(ISD::EHSELECTION, VT)) { - default: break; - case TargetLowering::Expand: { - if (FuncInfo.MBB->isLandingPad()) - AddCatchInfo(*cast(I), &FuncInfo.MF->getMMI(), FuncInfo.MBB); - else { + EVT VT = TLI.getValueType(Call->getType()); + if (TLI.getOperationAction(ISD::EHSELECTION, VT) != TargetLowering::Expand) + break; + if (FuncInfo.MBB->isLandingPad()) + AddCatchInfo(*Call, &FuncInfo.MF->getMMI(), FuncInfo.MBB); + else { #ifndef NDEBUG - FuncInfo.CatchInfoLost.insert(cast(I)); + FuncInfo.CatchInfoLost.insert(Call); #endif - // FIXME: Mark exception selector register as live in. Hack for PR1508. - unsigned Reg = TLI.getExceptionSelectorRegister(); - if (Reg) FuncInfo.MBB->addLiveIn(Reg); - } - + // FIXME: Mark exception selector register as live in. Hack for PR1508. unsigned Reg = TLI.getExceptionSelectorRegister(); - EVT SrcVT = TLI.getPointerTy(); - const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT); - unsigned ResultReg = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), - ResultReg).addReg(Reg); - - bool ResultRegIsKill = hasTrivialKill(I); - - // Cast the register to the type of the selector. - if (SrcVT.bitsGT(MVT::i32)) - ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE, - ResultReg, ResultRegIsKill); - else if (SrcVT.bitsLT(MVT::i32)) - ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, - ISD::SIGN_EXTEND, ResultReg, ResultRegIsKill); - if (ResultReg == 0) - // Unhandled operand. Halt "fast" selection and bail. - return false; - - UpdateValueMap(I, ResultReg); - - return true; + if (Reg) FuncInfo.MBB->addLiveIn(Reg); } - } - break; + + unsigned Reg = TLI.getExceptionSelectorRegister(); + EVT SrcVT = TLI.getPointerTy(); + const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT); + unsigned ResultReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(Reg); + + bool ResultRegIsKill = hasTrivialKill(Call); + + // Cast the register to the type of the selector. + if (SrcVT.bitsGT(MVT::i32)) + ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, ISD::TRUNCATE, + ResultReg, ResultRegIsKill); + else if (SrcVT.bitsLT(MVT::i32)) + ResultReg = FastEmit_r(SrcVT.getSimpleVT(), MVT::i32, + ISD::SIGN_EXTEND, ResultReg, ResultRegIsKill); + if (ResultReg == 0) + // Unhandled operand. Halt "fast" selection and bail. + return false; + + UpdateValueMap(Call, ResultReg); + + return true; } } @@ -966,59 +1011,33 @@ unsigned FastISel::FastEmit_rri(MVT, MVT, unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode, unsigned Op0, bool Op0IsKill, uint64_t Imm, MVT ImmType) { + // If this is a multiply by a power of two, emit this as a shift left. + if (Opcode == ISD::MUL && isPowerOf2_64(Imm)) { + Opcode = ISD::SHL; + Imm = Log2_64(Imm); + } else if (Opcode == ISD::UDIV && isPowerOf2_64(Imm)) { + // div x, 8 -> srl x, 3 + Opcode = ISD::SRL; + Imm = Log2_64(Imm); + } + + // Horrible hack (to be removed), check to make sure shift amounts are + // in-range. + if ((Opcode == ISD::SHL || Opcode == ISD::SRA || Opcode == ISD::SRL) && + Imm >= VT.getSizeInBits()) + return 0; + // First check if immediate type is legal. If not, we can't use the ri form. unsigned ResultReg = FastEmit_ri(VT, VT, Opcode, Op0, Op0IsKill, Imm); if (ResultReg != 0) return ResultReg; unsigned MaterialReg = FastEmit_i(ImmType, ImmType, ISD::Constant, Imm); - if (MaterialReg == 0) - return 0; - return FastEmit_rr(VT, VT, Opcode, - Op0, Op0IsKill, - MaterialReg, /*Kill=*/true); -} - -/// FastEmit_rf_ - This method is a wrapper of FastEmit_ri. It first tries -/// to emit an instruction with a floating-point immediate operand using -/// FastEmit_rf. If that fails, it materializes the immediate into a register -/// and try FastEmit_rr instead. -unsigned FastISel::FastEmit_rf_(MVT VT, unsigned Opcode, - unsigned Op0, bool Op0IsKill, - const ConstantFP *FPImm, MVT ImmType) { - // First check if immediate type is legal. If not, we can't use the rf form. - unsigned ResultReg = FastEmit_rf(VT, VT, Opcode, Op0, Op0IsKill, FPImm); - if (ResultReg != 0) - return ResultReg; - - // Materialize the constant in a register. - unsigned MaterialReg = FastEmit_f(ImmType, ImmType, ISD::ConstantFP, FPImm); if (MaterialReg == 0) { - // If the target doesn't have a way to directly enter a floating-point - // value into a register, use an alternate approach. - // TODO: The current approach only supports floating-point constants - // that can be constructed by conversion from integer values. This should - // be replaced by code that creates a load from a constant-pool entry, - // which will require some target-specific work. - const APFloat &Flt = FPImm->getValueAPF(); - EVT IntVT = TLI.getPointerTy(); - - uint64_t x[2]; - uint32_t IntBitWidth = IntVT.getSizeInBits(); - bool isExact; - (void) Flt.convertToInteger(x, IntBitWidth, /*isSigned=*/true, - APFloat::rmTowardZero, &isExact); - if (!isExact) - return 0; - APInt IntVal(IntBitWidth, 2, x); - - unsigned IntegerReg = FastEmit_i(IntVT.getSimpleVT(), IntVT.getSimpleVT(), - ISD::Constant, IntVal.getZExtValue()); - if (IntegerReg == 0) - return 0; - MaterialReg = FastEmit_r(IntVT.getSimpleVT(), VT, - ISD::SINT_TO_FP, IntegerReg, /*Kill=*/true); - if (MaterialReg == 0) - return 0; + // This is a bit ugly/slow, but failing here means falling out of + // fast-isel, which would be very slow. + const IntegerType *ITy = IntegerType::get(FuncInfo.Fn->getContext(), + VT.getSizeInBits()); + MaterialReg = getRegForValue(ConstantInt::get(ITy, Imm)); } return FastEmit_rr(VT, VT, Opcode, Op0, Op0IsKill, @@ -1099,6 +1118,29 @@ unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode, return ResultReg; } +unsigned FastISel::FastEmitInst_rii(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + uint64_t Imm1, uint64_t Imm2) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addImm(Imm1) + .addImm(Imm2); + else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addImm(Imm1) + .addImm(Imm2); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(II.ImplicitDefs[0]); + } + return ResultReg; +} + unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, @@ -1160,6 +1202,23 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode, return ResultReg; } +unsigned FastISel::FastEmitInst_ii(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + uint64_t Imm1, uint64_t Imm2) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addImm(Imm1).addImm(Imm2); + else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II).addImm(Imm1).addImm(Imm2); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(II.ImplicitDefs[0]); + } + return ResultReg; +} + unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT, unsigned Op0, bool Op0IsKill, uint32_t Idx) { @@ -1215,7 +1274,7 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // Only handle legal types. Two interesting things to note here. First, // by bailing out early, we may leave behind some dead instructions, // since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its - // own moves. Second, this check is necessary becuase FastISel doesn't + // own moves. Second, this check is necessary because FastISel doesn't // use CreateRegs to create registers, so it always creates // exactly one register for each non-void instruction. EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true); diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 2ae3286829dd..d8a5770d36c0 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -448,16 +448,30 @@ void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI, } } -void llvm::CopyCatchInfo(const BasicBlock *SrcBB, const BasicBlock *DestBB, +void llvm::CopyCatchInfo(const BasicBlock *SuccBB, const BasicBlock *LPad, MachineModuleInfo *MMI, FunctionLoweringInfo &FLI) { - for (BasicBlock::const_iterator I = SrcBB->begin(), E = --SrcBB->end(); - I != E; ++I) - if (const EHSelectorInst *EHSel = dyn_cast(I)) { - // Apply the catch info to DestBB. - AddCatchInfo(*EHSel, MMI, FLI.MBBMap[DestBB]); + SmallPtrSet Visited; + + // The 'eh.selector' call may not be in the direct successor of a basic block, + // but could be several successors deeper. If we don't find it, try going one + // level further. + while (Visited.insert(SuccBB)) { + for (BasicBlock::const_iterator I = SuccBB->begin(), E = --SuccBB->end(); + I != E; ++I) + if (const EHSelectorInst *EHSel = dyn_cast(I)) { + // Apply the catch info to LPad. + AddCatchInfo(*EHSel, MMI, FLI.MBBMap[LPad]); #ifndef NDEBUG - if (!FLI.MBBMap[SrcBB]->isLandingPad()) - FLI.CatchInfoFound.insert(EHSel); + if (!FLI.MBBMap[SuccBB]->isLandingPad()) + FLI.CatchInfoFound.insert(EHSel); #endif - } + return; + } + + const BranchInst *Br = dyn_cast(SuccBB->getTerminator()); + if (Br && Br->isUnconditional()) + SuccBB = Br->getSuccessor(0); + else + break; + } } diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index f08528fe2dc3..2b6c56eafd73 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -61,10 +61,10 @@ class SelectionDAGLegalize { // Libcall insertion helpers. - /// LastCALLSEQ_END - This keeps track of the CALLSEQ_END node that has been + /// LastCALLSEQ - This keeps track of the CALLSEQ_END node that has been /// legalized. We use this to ensure that calls are properly serialized /// against each other, including inserted libcalls. - SDValue LastCALLSEQ_END; + SmallVector LastCALLSEQ; enum LegalizeAction { Legal, // The target natively supports this operation. @@ -142,6 +142,9 @@ class SelectionDAGLegalize { DebugLoc dl); SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); + SDValue ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops, + unsigned NumOps, bool isSigned, DebugLoc dl); + std::pair ExpandChainLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32, @@ -153,6 +156,7 @@ class SelectionDAGLegalize { RTLIB::Libcall Call_I32, RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I128); + void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl &Results); SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, DebugLoc dl); SDValue ExpandBUILD_VECTOR(SDNode *Node); @@ -178,6 +182,15 @@ class SelectionDAGLegalize { void ExpandNode(SDNode *Node, SmallVectorImpl &Results); void PromoteNode(SDNode *Node, SmallVectorImpl &Results); + + SDValue getLastCALLSEQ() { return LastCALLSEQ.back(); } + void setLastCALLSEQ(const SDValue s) { LastCALLSEQ.back() = s; } + void pushLastCALLSEQ(SDValue s) { + LastCALLSEQ.push_back(s); + } + void popLastCALLSEQ() { + LastCALLSEQ.pop_back(); + } }; } @@ -223,7 +236,7 @@ SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag, } void SelectionDAGLegalize::LegalizeDAG() { - LastCALLSEQ_END = DAG.getEntryNode(); + pushLastCALLSEQ(DAG.getEntryNode()); // The legalize process is inherently a bottom-up recursive process (users // legalize their uses before themselves). Given infinite stack space, we @@ -251,14 +264,15 @@ void SelectionDAGLegalize::LegalizeDAG() { /// FindCallEndFromCallStart - Given a chained node that is part of a call /// sequence, find the CALLSEQ_END node that terminates the call sequence. static SDNode *FindCallEndFromCallStart(SDNode *Node, int depth = 0) { - // Nested CALLSEQ_START/END constructs aren't yet legal, - // but we can DTRT and handle them correctly here. + int next_depth = depth; if (Node->getOpcode() == ISD::CALLSEQ_START) - depth++; - else if (Node->getOpcode() == ISD::CALLSEQ_END) { - depth--; - if (depth == 0) + next_depth = depth + 1; + if (Node->getOpcode() == ISD::CALLSEQ_END) { + assert(depth > 0 && "negative depth!"); + if (depth == 1) return Node; + else + next_depth = depth - 1; } if (Node->use_empty()) return 0; // No CallSeqEnd @@ -289,7 +303,7 @@ static SDNode *FindCallEndFromCallStart(SDNode *Node, int depth = 0) { SDNode *User = *UI; for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) if (User->getOperand(i) == TheChain) - if (SDNode *Result = FindCallEndFromCallStart(User, depth)) + if (SDNode *Result = FindCallEndFromCallStart(User, next_depth)) return Result; } return 0; @@ -786,7 +800,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { } } } - return SDValue(); + return SDValue(0, 0); } /// LegalizeOp - We know that the specified value has a legal type, and @@ -934,11 +948,12 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { case ISD::BR_JT: case ISD::BR_CC: case ISD::BRCOND: - // Branches tweak the chain to include LastCALLSEQ_END + assert(LastCALLSEQ.size() == 1 && "branch inside CALLSEQ_BEGIN/END?"); + // Branches tweak the chain to include LastCALLSEQ Ops[0] = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ops[0], - LastCALLSEQ_END); + getLastCALLSEQ()); Ops[0] = LegalizeOp(Ops[0]); - LastCALLSEQ_END = DAG.getEntryNode(); + setLastCALLSEQ(DAG.getEntryNode()); break; case ISD::SHL: case ISD::SRL: @@ -948,7 +963,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // Legalizing shifts/rotates requires adjusting the shift amount // to the appropriate width. if (!Ops[1].getValueType().isVector()) - Ops[1] = LegalizeOp(DAG.getShiftAmountOperand(Ops[1])); + Ops[1] = LegalizeOp(DAG.getShiftAmountOperand(Ops[0].getValueType(), + Ops[1])); break; case ISD::SRL_PARTS: case ISD::SRA_PARTS: @@ -956,7 +972,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // Legalizing shifts/rotates requires adjusting the shift amount // to the appropriate width. if (!Ops[2].getValueType().isVector()) - Ops[2] = LegalizeOp(DAG.getShiftAmountOperand(Ops[2])); + Ops[2] = LegalizeOp(DAG.getShiftAmountOperand(Ops[0].getValueType(), + Ops[2])); break; } @@ -1024,8 +1041,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { } break; case ISD::CALLSEQ_START: { - static int depth = 0; SDNode *CallEnd = FindCallEndFromCallStart(Node); + assert(CallEnd && "didn't find CALLSEQ_END!"); // Recursively Legalize all of the inputs of the call end that do not lead // to this call start. This ensures that any libcalls that need be inserted @@ -1042,9 +1059,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // Merge in the last call to ensure that this call starts after the last // call ended. - if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken && depth == 0) { + if (getLastCALLSEQ().getOpcode() != ISD::EntryToken) { Tmp1 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - Tmp1, LastCALLSEQ_END); + Tmp1, getLastCALLSEQ()); Tmp1 = LegalizeOp(Tmp1); } @@ -1065,29 +1082,28 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // sequence have been legalized, legalize the call itself. During this // process, no libcalls can/will be inserted, guaranteeing that no calls // can overlap. - - SDValue Saved_LastCALLSEQ_END = LastCALLSEQ_END ; // Note that we are selecting this call! - LastCALLSEQ_END = SDValue(CallEnd, 0); + setLastCALLSEQ(SDValue(CallEnd, 0)); - depth++; // Legalize the call, starting from the CALLSEQ_END. - LegalizeOp(LastCALLSEQ_END); - depth--; - assert(depth >= 0 && "Un-matched CALLSEQ_START?"); - if (depth > 0) - LastCALLSEQ_END = Saved_LastCALLSEQ_END; + LegalizeOp(getLastCALLSEQ()); return Result; } case ISD::CALLSEQ_END: - // If the CALLSEQ_START node hasn't been legalized first, legalize it. This - // will cause this node to be legalized as well as handling libcalls right. - if (LastCALLSEQ_END.getNode() != Node) { - LegalizeOp(SDValue(FindCallStartFromCallEnd(Node), 0)); - DenseMap::iterator I = LegalizedNodes.find(Op); - assert(I != LegalizedNodes.end() && - "Legalizing the call start should have legalized this node!"); - return I->second; + { + SDNode *myCALLSEQ_BEGIN = FindCallStartFromCallEnd(Node); + + // If the CALLSEQ_START node hasn't been legalized first, legalize it. This + // will cause this node to be legalized as well as handling libcalls right. + if (getLastCALLSEQ().getNode() != Node) { + LegalizeOp(SDValue(myCALLSEQ_BEGIN, 0)); + DenseMap::iterator I = LegalizedNodes.find(Op); + assert(I != LegalizedNodes.end() && + "Legalizing the call start should have legalized this node!"); + return I->second; + } + + pushLastCALLSEQ(SDValue(myCALLSEQ_BEGIN, 0)); } // Otherwise, the call start has been legalized and everything is going @@ -1116,6 +1132,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { } } // This finishes up call legalization. + popLastCALLSEQ(); + // If the CALLSEQ_END node has a flag, remember that we legalized it. AddLegalizedOperand(SDValue(Node, 0), Result.getValue(0)); if (Node->getNumValues() == 2) @@ -2034,10 +2052,44 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, // It's a tailcall, return the chain (which is the DAG root). return DAG.getRoot(); + // Legalize the call sequence, starting with the chain. This will advance + // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that + // was added by LowerCallTo (guaranteeing proper serialization of calls). + LegalizeOp(CallInfo.second); + return CallInfo.first; +} + +/// ExpandLibCall - Generate a libcall taking the given operands as arguments +/// and returning a result of type RetVT. +SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, + const SDValue *Ops, unsigned NumOps, + bool isSigned, DebugLoc dl) { + TargetLowering::ArgListTy Args; + Args.reserve(NumOps); + + TargetLowering::ArgListEntry Entry; + for (unsigned i = 0; i != NumOps; ++i) { + Entry.Node = Ops[i]; + Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + } + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy()); + + const Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); + std::pair CallInfo = + TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, + false, 0, TLI.getLibcallCallingConv(LC), false, + /*isReturnValueUsed=*/true, + Callee, Args, DAG, dl); + // Legalize the call sequence, starting with the chain. This will advance // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that // was added by LowerCallTo (guaranteeing proper serialization of calls). LegalizeOp(CallInfo.second); + return CallInfo.first; } @@ -2072,7 +2124,7 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, Callee, Args, DAG, Node->getDebugLoc()); // Legalize the call sequence, starting with the chain. This will advance - // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that + // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that // was added by LowerCallTo (guaranteeing proper serialization of calls). LegalizeOp(CallInfo.second); return CallInfo; @@ -2112,6 +2164,113 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, return ExpandLibCall(LC, Node, isSigned); } +/// isDivRemLibcallAvailable - Return true if divmod libcall is available. +static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, + const TargetLowering &TLI) { + RTLIB::Libcall LC; + switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + default: assert(0 && "Unexpected request for libcall!"); + case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; + case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; + case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; + case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break; + case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break; + } + + return TLI.getLibcallName(LC) != 0; +} + +/// UseDivRem - Only issue divrem libcall if both quotient and remainder are +/// needed. +static bool UseDivRem(SDNode *Node, bool isSigned, bool isDIV) { + unsigned OtherOpcode = 0; + if (isSigned) + OtherOpcode = isDIV ? ISD::SREM : ISD::SDIV; + else + OtherOpcode = isDIV ? ISD::UREM : ISD::UDIV; + + SDValue Op0 = Node->getOperand(0); + SDValue Op1 = Node->getOperand(1); + for (SDNode::use_iterator UI = Op0.getNode()->use_begin(), + UE = Op0.getNode()->use_end(); UI != UE; ++UI) { + SDNode *User = *UI; + if (User == Node) + continue; + if (User->getOpcode() == OtherOpcode && + User->getOperand(0) == Op0 && + User->getOperand(1) == Op1) + return true; + } + return false; +} + +/// ExpandDivRemLibCall - Issue libcalls to __{u}divmod to compute div / rem +/// pairs. +void +SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, + SmallVectorImpl &Results) { + unsigned Opcode = Node->getOpcode(); + bool isSigned = Opcode == ISD::SDIVREM; + + RTLIB::Libcall LC; + switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + default: assert(0 && "Unexpected request for libcall!"); + case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; + case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; + case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; + case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break; + case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break; + } + + // The input chain to this libcall is the entry node of the function. + // Legalizing the call will automatically add the previous call to the + // dependence. + SDValue InChain = DAG.getEntryNode(); + + EVT RetVT = Node->getValueType(0); + const Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) { + EVT ArgVT = Node->getOperand(i).getValueType(); + const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Entry.Node = Node->getOperand(i); Entry.Ty = ArgTy; + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + } + + // Also pass the return address of the remainder. + SDValue FIPtr = DAG.CreateStackTemporary(RetVT); + Entry.Node = FIPtr; + Entry.Ty = RetTy->getPointerTo(); + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy()); + + // Splice the libcall in wherever FindInputOutputChains tells us to. + DebugLoc dl = Node->getDebugLoc(); + std::pair CallInfo = + TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, + 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, + /*isReturnValueUsed=*/true, Callee, Args, DAG, dl); + + // Legalize the call sequence, starting with the chain. This will advance + // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that + // was added by LowerCallTo (guaranteeing proper serialization of calls). + LegalizeOp(CallInfo.second); + + // Remainder is loaded back from the stack frame. + SDValue Rem = DAG.getLoad(RetVT, dl, getLastCALLSEQ(), FIPtr, + MachinePointerInfo(), false, false, 0); + Results.push_back(CallInfo.first); + Results.push_back(Rem); +} + /// ExpandLegalINT_TO_FP - This function is responsible for legalizing a /// INT_TO_FP operation of the specified operand when the target requests that /// we expand it. At this point, we know that the result and operand types are @@ -2759,7 +2918,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, } case ISD::FP_ROUND_INREG: { // The only way we can lower this is to turn it into a TRUNCSTORE, - // EXTLOAD pair, targetting a temporary location (a stack slot). + // EXTLOAD pair, targeting a temporary location (a stack slot). // NOTE: there is a choice here between constantly creating new stack // slots and always reusing the same one. We currently always create @@ -3085,24 +3244,25 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; Tmp2 = Node->getOperand(0); Tmp3 = Node->getOperand(1); - if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) { + if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) || + (isDivRemLibcallAvailable(Node, isSigned, TLI) && + UseDivRem(Node, isSigned, false))) { Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1); } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) { // X % Y -> X-X/Y*Y Tmp1 = DAG.getNode(DivOpc, dl, VT, Tmp2, Tmp3); Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Tmp3); Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp1); - } else if (isSigned) { + } else if (isSigned) Tmp1 = ExpandIntLibCall(Node, true, RTLIB::SREM_I8, RTLIB::SREM_I16, RTLIB::SREM_I32, RTLIB::SREM_I64, RTLIB::SREM_I128); - } else { + else Tmp1 = ExpandIntLibCall(Node, false, RTLIB::UREM_I8, RTLIB::UREM_I16, RTLIB::UREM_I32, RTLIB::UREM_I64, RTLIB::UREM_I128); - } Results.push_back(Tmp1); break; } @@ -3112,7 +3272,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; EVT VT = Node->getValueType(0); SDVTList VTs = DAG.getVTList(VT, VT); - if (TLI.isOperationLegalOrCustom(DivRemOpc, VT)) + if (TLI.isOperationLegalOrCustom(DivRemOpc, VT) || + (isDivRemLibcallAvailable(Node, isSigned, TLI) && + UseDivRem(Node, isSigned, true))) Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0), Node->getOperand(1)); else if (isSigned) @@ -3141,6 +3303,11 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, Results.push_back(Tmp1.getValue(1)); break; } + case ISD::SDIVREM: + case ISD::UDIVREM: + // Expand into divrem libcall + ExpandDivRemLibCall(Node, Results); + break; case ISD::MUL: { EVT VT = Node->getValueType(0); SDVTList VTs = DAG.getVTList(VT, VT); @@ -3225,6 +3392,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, case ISD::UMULO: case ISD::SMULO: { EVT VT = Node->getValueType(0); + EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2); SDValue LHS = Node->getOperand(0); SDValue RHS = Node->getOperand(1); SDValue BottomHalf; @@ -3242,7 +3410,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, TopHalf = BottomHalf.getValue(1); } else if (TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2))) { - EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2); LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS); RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS); Tmp1 = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS); @@ -3255,7 +3422,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, // have a libcall big enough. // Also, we can fall back to a division in some cases, but that's a big // performance hit in the general case. - EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2); RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; if (WideVT == MVT::i16) LC = RTLIB::MUL_I16; @@ -3266,15 +3432,27 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, else if (WideVT == MVT::i128) LC = RTLIB::MUL_I128; assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!"); - LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS); - RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS); + + // The high part is obtained by SRA'ing all but one of the bits of low + // part. + unsigned LoSize = VT.getSizeInBits(); + SDValue HiLHS = DAG.getNode(ISD::SRA, dl, VT, RHS, + DAG.getConstant(LoSize-1, TLI.getPointerTy())); + SDValue HiRHS = DAG.getNode(ISD::SRA, dl, VT, LHS, + DAG.getConstant(LoSize-1, TLI.getPointerTy())); - SDValue Ret = ExpandLibCall(LC, Node, isSigned); - BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Ret); - TopHalf = DAG.getNode(ISD::SRL, dl, Ret.getValueType(), Ret, - DAG.getConstant(VT.getSizeInBits(), TLI.getPointerTy())); - TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, TopHalf); + // Here we're passing the 2 arguments explicitly as 4 arguments that are + // pre-lowered to the correct types. This all depends upon WideVT not + // being a legal type for the architecture and thus has to be split to + // two arguments. + SDValue Args[] = { LHS, HiLHS, RHS, HiRHS }; + SDValue Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl); + BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret, + DAG.getIntPtrConstant(0)); + TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret, + DAG.getIntPtrConstant(1)); } + if (isSigned) { Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1, TLI.getShiftAmountTy(BottomHalf.getValueType())); @@ -3409,7 +3587,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp2.getValueType()), Tmp2, Tmp3, Tmp4, dl); - LastCALLSEQ_END = DAG.getEntryNode(); + assert(LastCALLSEQ.size() == 1 && "branch inside CALLSEQ_BEGIN/END?"); + setLastCALLSEQ(DAG.getEntryNode()); assert(!Tmp3.getNode() && "Can't legalize BR_CC with legal condition!"); Tmp3 = DAG.getConstant(0, Tmp2.getValueType()); diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index f0752df80f12..935aab0e59af 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1051,8 +1051,6 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::SSUBO: ExpandIntRes_SADDSUBO(N, Lo, Hi); break; case ISD::UADDO: case ISD::USUBO: ExpandIntRes_UADDSUBO(N, Lo, Hi); break; - case ISD::UMULO: - case ISD::SMULO: ExpandIntRes_UMULSMULO(N, Lo, Hi); break; } // If Lo/Hi is null, the sub-method took care of registering results etc. @@ -1428,9 +1426,9 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, HiOps[2] = Lo.getValue(1); Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3); } - return; + return; } - + if (N->getOpcode() == ISD::ADD) { Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps, 2); Hi = DAG.getNode(ISD::ADD, dl, NVT, HiOps, 2); @@ -2128,31 +2126,6 @@ void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N, ReplaceValueWith(SDValue(N, 1), Ofl); } -void DAGTypeLegalizer::ExpandIntRes_UMULSMULO(SDNode *N, - SDValue &Lo, SDValue &Hi) { - SDValue LHS = N->getOperand(0); - SDValue RHS = N->getOperand(1); - DebugLoc dl = N->getDebugLoc(); - EVT VT = N->getValueType(0); - EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() / 2); - // Expand the result by simply replacing it with the equivalent - // non-overflow-checking operation. - SDValue Ret = DAG.getNode(ISD::MUL, dl, LHS.getValueType(), LHS, RHS); - SplitInteger(Ret, Lo, Hi); - - // Now calculate overflow. - SDValue Ofl; - if (N->getOpcode() == ISD::UMULO) - Ofl = DAG.getSetCC(dl, N->getValueType(1), Hi, - DAG.getConstant(0, VT), ISD::SETNE); - else { - SDValue Tmp = DAG.getConstant(VT.getSizeInBits() - 1, HalfVT); - Tmp = DAG.getNode(ISD::SRA, dl, HalfVT, Lo, Tmp); - Ofl = DAG.getSetCC(dl, N->getValueType(1), Hi, Tmp, ISD::SETNE); - } - ReplaceValueWith(SDValue(N, 1), Ofl); -} - void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 3f81bbbe4061..5409b88efaba 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -348,7 +348,6 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { void ExpandIntRes_SADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi); - void ExpandIntRes_UMULSMULO (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandShiftByConstant(SDNode *N, unsigned Amt, SDValue &Lo, SDValue &Hi); @@ -523,6 +522,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue ScalarizeVecRes_BITCAST(SDNode *N); SDValue ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N); SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N); + SDValue ScalarizeVecRes_FP_ROUND(SDNode *N); SDValue ScalarizeVecRes_FPOWI(SDNode *N); SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecRes_LOAD(LoadSDNode *N); @@ -566,7 +566,6 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { void SplitVecRes_BUILD_PAIR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi); - void SplitVecRes_CONVERT_RNDSAT(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 167dbe0377b3..5d0f923afb0f 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -58,6 +58,9 @@ class VectorLegalizer { SDValue UnrollVSETCC(SDValue Op); // Implements expansion for FNEG; falls back to UnrollVectorOp if FSUB // isn't legal. + // Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if + // SINT_TO_FLOAT and SHR on vectors isn't legal. + SDValue ExpandUINT_TO_FLOAT(SDValue Op); SDValue ExpandFNEG(SDValue Op); // Implements vector promotion; this is essentially just bitcasting the // operands to a different type and bitcasting the result back to the @@ -207,7 +210,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { // FALL THROUGH } case TargetLowering::Expand: - if (Node->getOpcode() == ISD::FNEG) + if (Node->getOpcode() == ISD::UINT_TO_FP) + Result = ExpandUINT_TO_FLOAT(Op); + else if (Node->getOpcode() == ISD::FNEG) Result = ExpandFNEG(Op); else if (Node->getOpcode() == ISD::VSETCC) Result = UnrollVSETCC(Op); @@ -251,6 +256,48 @@ SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) { return DAG.getNode(ISD::BITCAST, dl, VT, Op); } +SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) { + + + EVT VT = Op.getOperand(0).getValueType(); + DebugLoc DL = Op.getDebugLoc(); + + // Make sure that the SINT_TO_FP and SRL instructions are available. + if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, VT) || + !TLI.isOperationLegalOrCustom(ISD::SRL, VT)) + return DAG.UnrollVectorOp(Op.getNode()); + + EVT SVT = VT.getScalarType(); + assert((SVT.getSizeInBits() == 64 || SVT.getSizeInBits() == 32) && + "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide"); + + unsigned BW = SVT.getSizeInBits(); + SDValue HalfWord = DAG.getConstant(BW/2, VT); + + // Constants to clear the upper part of the word. + // Notice that we can also use SHL+SHR, but using a constant is slightly + // faster on x86. + uint64_t HWMask = (SVT.getSizeInBits()==64)?0x00000000FFFFFFFF:0x0000FFFF; + SDValue HalfWordMask = DAG.getConstant(HWMask, VT); + + // Two to the power of half-word-size. + SDValue TWOHW = DAG.getConstantFP((1<<(BW/2)), Op.getValueType()); + + // Clear upper part of LO, lower HI + SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord); + SDValue LO = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), HalfWordMask); + + // Convert hi and lo to floats + // Convert the hi part back to the upper values + SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), HI); + fHI = DAG.getNode(ISD::FMUL, DL, Op.getValueType(), fHI, TWOHW); + SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, Op.getValueType(), LO); + + // Add the two halves + return DAG.getNode(ISD::FADD, DL, Op.getValueType(), fHI, fLO); +} + + SDValue VectorLegalizer::ExpandFNEG(SDValue Op) { if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) { SDValue Zero = DAG.getConstantFP(-0.0, Op.getValueType()); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 182f8fcbfbf3..0b4dd357c39d 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -50,6 +50,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::BUILD_VECTOR: R = N->getOperand(0); break; case ISD::CONVERT_RNDSAT: R = ScalarizeVecRes_CONVERT_RNDSAT(N); break; case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break; + case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break; case ISD::FP_ROUND_INREG: R = ScalarizeVecRes_InregOp(N); break; case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break; case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break; @@ -63,27 +64,33 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break; case ISD::VSETCC: R = ScalarizeVecRes_VSETCC(N); break; + case ISD::ANY_EXTEND: case ISD::CTLZ: case ISD::CTPOP: case ISD::CTTZ: case ISD::FABS: + case ISD::FCEIL: case ISD::FCOS: + case ISD::FEXP: + case ISD::FEXP2: + case ISD::FFLOOR: + case ISD::FLOG: + case ISD::FLOG10: + case ISD::FLOG2: + case ISD::FNEARBYINT: case ISD::FNEG: + case ISD::FP_EXTEND: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: + case ISD::FRINT: case ISD::FSIN: case ISD::FSQRT: case ISD::FTRUNC: - case ISD::FFLOOR: - case ISD::FCEIL: - case ISD::FRINT: - case ISD::FNEARBYINT: - case ISD::UINT_TO_FP: + case ISD::SIGN_EXTEND: case ISD::SINT_TO_FP: case ISD::TRUNCATE: - case ISD::SIGN_EXTEND: + case ISD::UINT_TO_FP: case ISD::ZERO_EXTEND: - case ISD::ANY_EXTEND: R = ScalarizeVecRes_UnaryOp(N); break; @@ -145,6 +152,13 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N) { N->getOperand(0), N->getOperand(1)); } +SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_ROUND(SDNode *N) { + EVT NewVT = N->getValueType(0).getVectorElementType(); + SDValue Op = GetScalarizedVector(N->getOperand(0)); + return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), + NewVT, Op, N->getOperand(1)); +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) { SDValue Op = GetScalarizedVector(N->getOperand(0)); return DAG.getNode(ISD::FPOWI, N->getDebugLoc(), @@ -405,11 +419,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break; case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; - case ISD::BITCAST: SplitVecRes_BITCAST(N, Lo, Hi); break; case ISD::BUILD_VECTOR: SplitVecRes_BUILD_VECTOR(N, Lo, Hi); break; case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break; - case ISD::CONVERT_RNDSAT: SplitVecRes_CONVERT_RNDSAT(N, Lo, Hi); break; case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break; case ISD::FP_ROUND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break; @@ -427,32 +439,35 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SplitVecRes_VECTOR_SHUFFLE(cast(N), Lo, Hi); break; - case ISD::CTTZ: + case ISD::ANY_EXTEND: + case ISD::CONVERT_RNDSAT: case ISD::CTLZ: case ISD::CTPOP: - case ISD::FNEG: + case ISD::CTTZ: case ISD::FABS: - case ISD::FSQRT: - case ISD::FSIN: - case ISD::FCOS: - case ISD::FTRUNC: - case ISD::FFLOOR: case ISD::FCEIL: - case ISD::FRINT: - case ISD::FNEARBYINT: - case ISD::FP_TO_SINT: - case ISD::FP_TO_UINT: - case ISD::SINT_TO_FP: - case ISD::UINT_TO_FP: - case ISD::TRUNCATE: - case ISD::SIGN_EXTEND: - case ISD::ZERO_EXTEND: - case ISD::ANY_EXTEND: + case ISD::FCOS: case ISD::FEXP: case ISD::FEXP2: + case ISD::FFLOOR: case ISD::FLOG: - case ISD::FLOG2: case ISD::FLOG10: + case ISD::FLOG2: + case ISD::FNEARBYINT: + case ISD::FNEG: + case ISD::FP_EXTEND: + case ISD::FP_ROUND: + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: + case ISD::FRINT: + case ISD::FSIN: + case ISD::FSQRT: + case ISD::FTRUNC: + case ISD::SIGN_EXTEND: + case ISD::SINT_TO_FP: + case ISD::TRUNCATE: + case ISD::UINT_TO_FP: + case ISD::ZERO_EXTEND: SplitVecRes_UnaryOp(N, Lo, Hi); break; @@ -587,60 +602,6 @@ void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HiVT, &HiOps[0], HiOps.size()); } -void DAGTypeLegalizer::SplitVecRes_CONVERT_RNDSAT(SDNode *N, SDValue &Lo, - SDValue &Hi) { - EVT LoVT, HiVT; - DebugLoc dl = N->getDebugLoc(); - GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); - - SDValue DTyOpLo = DAG.getValueType(LoVT); - SDValue DTyOpHi = DAG.getValueType(HiVT); - - SDValue RndOp = N->getOperand(3); - SDValue SatOp = N->getOperand(4); - ISD::CvtCode CvtCode = cast(N)->getCvtCode(); - - // Split the input. - SDValue VLo, VHi; - EVT InVT = N->getOperand(0).getValueType(); - switch (getTypeAction(InVT)) { - default: llvm_unreachable("Unexpected type action!"); - case Legal: { - EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), - LoVT.getVectorNumElements()); - VLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0), - DAG.getIntPtrConstant(0)); - VHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0), - DAG.getIntPtrConstant(InNVT.getVectorNumElements())); - break; - } - case SplitVector: - GetSplitVector(N->getOperand(0), VLo, VHi); - break; - case WidenVector: { - // If the result needs to be split and the input needs to be widened, - // the two types must have different lengths. Use the widened result - // and extract from it to do the split. - SDValue InOp = GetWidenedVector(N->getOperand(0)); - EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), - LoVT.getVectorNumElements()); - VLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, - DAG.getIntPtrConstant(0)); - VHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, - DAG.getIntPtrConstant(InNVT.getVectorNumElements())); - break; - } - } - - SDValue STyOpLo = DAG.getValueType(VLo.getValueType()); - SDValue STyOpHi = DAG.getValueType(VHi.getValueType()); - - Lo = DAG.getConvertRndSat(LoVT, dl, VLo, DTyOpLo, STyOpLo, RndOp, SatOp, - CvtCode); - Hi = DAG.getConvertRndSat(HiVT, dl, VHi, DTyOpHi, STyOpHi, RndOp, SatOp, - CvtCode); -} - void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Vec = N->getOperand(0); @@ -840,8 +801,25 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, } } - Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo); - Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi); + if (N->getOpcode() == ISD::FP_ROUND) { + Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1)); + Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1)); + } else if (N->getOpcode() == ISD::CONVERT_RNDSAT) { + SDValue DTyOpLo = DAG.getValueType(LoVT); + SDValue DTyOpHi = DAG.getValueType(HiVT); + SDValue STyOpLo = DAG.getValueType(Lo.getValueType()); + SDValue STyOpHi = DAG.getValueType(Hi.getValueType()); + SDValue RndOp = N->getOperand(3); + SDValue SatOp = N->getOperand(4); + ISD::CvtCode CvtCode = cast(N)->getCvtCode(); + Lo = DAG.getConvertRndSat(LoVT, dl, Lo, DTyOpLo, STyOpLo, RndOp, SatOp, + CvtCode); + Hi = DAG.getConvertRndSat(HiVT, dl, Hi, DTyOpHi, STyOpHi, RndOp, SatOp, + CvtCode); + } else { + Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo); + Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi); + } } void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, @@ -989,11 +967,11 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { case ISD::CTTZ: case ISD::CTLZ: case ISD::CTPOP: + case ISD::FP_EXTEND: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: - case ISD::FP_EXTEND: case ISD::FTRUNC: case ISD::TRUNCATE: case ISD::SIGN_EXTEND: @@ -1270,15 +1248,16 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { Res = WidenVecRes_Shift(N); break; + case ISD::ANY_EXTEND: + case ISD::FP_EXTEND: case ISD::FP_ROUND: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: - case ISD::SINT_TO_FP: - case ISD::UINT_TO_FP: - case ISD::TRUNCATE: case ISD::SIGN_EXTEND: + case ISD::SINT_TO_FP: + case ISD::TRUNCATE: + case ISD::UINT_TO_FP: case ISD::ZERO_EXTEND: - case ISD::ANY_EXTEND: Res = WidenVecRes_Convert(N); break; @@ -1286,15 +1265,20 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::CTPOP: case ISD::CTTZ: case ISD::FABS: + case ISD::FCEIL: case ISD::FCOS: - case ISD::FNEG: - case ISD::FSIN: - case ISD::FSQRT: case ISD::FEXP: case ISD::FEXP2: + case ISD::FFLOOR: case ISD::FLOG: - case ISD::FLOG2: case ISD::FLOG10: + case ISD::FLOG2: + case ISD::FNEARBYINT: + case ISD::FNEG: + case ISD::FRINT: + case ISD::FSIN: + case ISD::FSQRT: + case ISD::FTRUNC: Res = WidenVecRes_Unary(N); break; } @@ -2004,7 +1988,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) { case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break; case ISD::STORE: Res = WidenVecOp_STORE(N); break; - case ISD::FP_ROUND: + case ISD::FP_EXTEND: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::SINT_TO_FP: diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index e3da2084529a..7b560d173ed3 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -570,13 +570,20 @@ void ScheduleDAGFast::ListScheduleBottomUp() { TRI->getMinimalPhysRegClass(Reg, VT); const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); - // If cross copy register class is null, then it must be possible copy - // the value directly. Do not try duplicate the def. + // If cross copy register class is the same as RC, then it must be + // possible copy the value directly. Do not try duplicate the def. + // If cross copy register class is not the same as RC, then it's + // possible to copy the value but it require cross register class copies + // and it is expensive. + // If cross copy register class is null, then it's not possible to copy + // the value at all. SUnit *NewDef = 0; - if (DestRC) + if (DestRC != RC) { NewDef = CopyAndMoveSuccessors(LRDef); - else - DestRC = RC; + if (!DestRC && !NewDef) + report_fatal_error("Can't handle live physical " + "register dependency!"); + } if (!NewDef) { // Issue copies, these can be expensive cross register class copies. SmallVector Copies; diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 0b548b277f4c..88bd4509b468 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -70,6 +70,50 @@ static cl::opt DisableSchedCycles( "disable-sched-cycles", cl::Hidden, cl::init(false), cl::desc("Disable cycle-level precision during preRA scheduling")); +// Temporary sched=list-ilp flags until the heuristics are robust. +// Some options are also available under sched=list-hybrid. +static cl::opt DisableSchedRegPressure( + "disable-sched-reg-pressure", cl::Hidden, cl::init(false), + cl::desc("Disable regpressure priority in sched=list-ilp")); +static cl::opt DisableSchedLiveUses( + "disable-sched-live-uses", cl::Hidden, cl::init(true), + cl::desc("Disable live use priority in sched=list-ilp")); +static cl::opt DisableSchedVRegCycle( + "disable-sched-vrcycle", cl::Hidden, cl::init(false), + cl::desc("Disable virtual register cycle interference checks")); +static cl::opt DisableSchedPhysRegJoin( + "disable-sched-physreg-join", cl::Hidden, cl::init(false), + cl::desc("Disable physreg def-use affinity")); +static cl::opt DisableSchedStalls( + "disable-sched-stalls", cl::Hidden, cl::init(true), + cl::desc("Disable no-stall priority in sched=list-ilp")); +static cl::opt DisableSchedCriticalPath( + "disable-sched-critical-path", cl::Hidden, cl::init(false), + cl::desc("Disable critical path priority in sched=list-ilp")); +static cl::opt DisableSchedHeight( + "disable-sched-height", cl::Hidden, cl::init(false), + cl::desc("Disable scheduled-height priority in sched=list-ilp")); + +static cl::opt MaxReorderWindow( + "max-sched-reorder", cl::Hidden, cl::init(6), + cl::desc("Number of instructions to allow ahead of the critical path " + "in sched=list-ilp")); + +static cl::opt AvgIPC( + "sched-avg-ipc", cl::Hidden, cl::init(1), + cl::desc("Average inst/cycle whan no target itinerary exists.")); + +#ifndef NDEBUG +namespace { + // For sched=list-ilp, Count the number of times each factor comes into play. + enum { FactPressureDiff, FactRegUses, FactStall, FactHeight, FactDepth, + FactStatic, FactOther, NumFactors }; +} +static const char *FactorName[NumFactors] = +{"PressureDiff", "RegUses", "Stall", "Height", "Depth","Static", "Other"}; +static int FactorCount[NumFactors]; +#endif //!NDEBUG + namespace { //===----------------------------------------------------------------------===// /// ScheduleDAGRRList - The actual register reduction list scheduler @@ -103,6 +147,10 @@ class ScheduleDAGRRList : public ScheduleDAGSDNodes { /// MinAvailableCycle - Cycle of the soonest available instruction. unsigned MinAvailableCycle; + /// IssueCount - Count instructions issued in this cycle + /// Currently valid only for bottom-up scheduling. + unsigned IssueCount; + /// LiveRegDefs - A set of physical registers and their definition /// that are "live". These nodes must be scheduled before any other nodes that /// modifies the registers can be scheduled. @@ -234,8 +282,14 @@ void ScheduleDAGRRList::Schedule() { DEBUG(dbgs() << "********** List Scheduling BB#" << BB->getNumber() << " '" << BB->getName() << "' **********\n"); +#ifndef NDEBUG + for (int i = 0; i < NumFactors; ++i) { + FactorCount[i] = 0; + } +#endif //!NDEBUG CurCycle = 0; + IssueCount = 0; MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX; NumLiveRegs = 0; LiveRegDefs.resize(TRI->getNumRegs(), NULL); @@ -258,6 +312,11 @@ void ScheduleDAGRRList::Schedule() { else ListScheduleTopDown(); +#ifndef NDEBUG + for (int i = 0; i < NumFactors; ++i) { + DEBUG(dbgs() << FactorName[i] << "\t" << FactorCount[i] << "\n"); + } +#endif // !NDEBUG AvailableQueue->releaseState(); } @@ -295,7 +354,7 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { if (Height < MinAvailableCycle) MinAvailableCycle = Height; - if (isReady(SU)) { + if (isReady(PredSU)) { AvailableQueue->push(PredSU); } // CapturePred and others may have left the node in the pending queue, avoid @@ -383,6 +442,7 @@ void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) { if (NextCycle <= CurCycle) return; + IssueCount = 0; AvailableQueue->setCurCycle(NextCycle); if (!HazardRec->isEnabled()) { // Bypass lots of virtual calls in case of long latency. @@ -407,6 +467,13 @@ void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) { if (DisableSchedCycles) return; + // FIXME: Nodes such as CopyFromReg probably should not advance the current + // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node + // has predecessors the cycle will be advanced when they are scheduled. + // But given the crude nature of modeling latency though such nodes, we + // currently need to treat these nodes like real instructions. + // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return; + unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth(); // Bump CurCycle to account for latency. We assume the latency of other @@ -477,6 +544,8 @@ void ScheduleDAGRRList::EmitNode(SUnit *SU) { } } +static void resetVRegCycle(SUnit *SU); + /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending /// count of its predecessors. If a predecessor pending count is zero, add it to /// the Available queue. @@ -486,12 +555,13 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { #ifndef NDEBUG if (CurCycle < SU->getHeight()) - DEBUG(dbgs() << " Height [" << SU->getHeight() << "] pipeline stall!\n"); + DEBUG(dbgs() << " Height [" << SU->getHeight() + << "] pipeline stall!\n"); #endif // FIXME: Do not modify node height. It may interfere with // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the - // node it's ready cycle can aid heuristics, and after scheduling it can + // node its ready cycle can aid heuristics, and after scheduling it can // indicate the scheduled cycle. SU->setHeightToAtLeast(CurCycle); @@ -502,6 +572,12 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { AvailableQueue->ScheduledNode(SU); + // If HazardRec is disabled, and each inst counts as one cycle, then + // advance CurCycle before ReleasePredecessors to avoid useless pushes to + // PendingQueue for schedulers that implement HasReadyFilter. + if (!HazardRec->isEnabled() && AvgIPC < 2) + AdvanceToCycle(CurCycle + 1); + // Update liveness of predecessors before successors to avoid treating a // two-address node as a live range def. ReleasePredecessors(SU); @@ -518,16 +594,25 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { } } + resetVRegCycle(SU); + SU->isScheduled = true; // Conditions under which the scheduler should eagerly advance the cycle: // (1) No available instructions // (2) All pipelines full, so available instructions must have hazards. // - // If HazardRec is disabled, count each inst as one cycle. - if (!HazardRec->isEnabled() || HazardRec->atIssueLimit() - || AvailableQueue->empty()) - AdvanceToCycle(CurCycle + 1); + // If HazardRec is disabled, the cycle was pre-advanced before calling + // ReleasePredecessors. In that case, IssueCount should remain 0. + // + // Check AvailableQueue after ReleasePredecessors in case of zero latency. + if (HazardRec->isEnabled() || AvgIPC > 1) { + if (SU->getNode() && SU->getNode()->isMachineOpcode()) + ++IssueCount; + if ((HazardRec->isEnabled() && HazardRec->atIssueLimit()) + || (!HazardRec->isEnabled() && IssueCount == AvgIPC)) + AdvanceToCycle(CurCycle + 1); + } } /// CapturePred - This does the opposite of ReleasePred. Since SU is being @@ -872,6 +957,15 @@ void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, AddPred(SuccSU, D); DelDeps.push_back(std::make_pair(SuccSU, *I)); } + else { + // Avoid scheduling the def-side copy before other successors. Otherwise + // we could introduce another physreg interference on the copy and + // continue inserting copies indefinitely. + SDep D(CopyFromSU, SDep::Order, /*Latency=*/0, + /*Reg=*/0, /*isNormalMemory=*/false, + /*isMustAlias=*/false, /*isArtificial=*/true); + AddPred(SuccSU, D); + } } for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) RemovePred(DelDeps[i].first, DelDeps[i].second); @@ -1077,13 +1171,19 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { TRI->getMinimalPhysRegClass(Reg, VT); const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); - // If cross copy register class is null, then it must be possible copy - // the value directly. Do not try duplicate the def. + // If cross copy register class is the same as RC, then it must be possible + // copy the value directly. Do not try duplicate the def. + // If cross copy register class is not the same as RC, then it's possible to + // copy the value but it require cross register class copies and it is + // expensive. + // If cross copy register class is null, then it's not possible to copy + // the value at all. SUnit *NewDef = 0; - if (DestRC) + if (DestRC != RC) { NewDef = CopyAndMoveSuccessors(LRDef); - else - DestRC = RC; + if (!DestRC && !NewDef) + report_fatal_error("Can't handle live physical register dependency!"); + } if (!NewDef) { // Issue copies, these can be expensive cross register class copies. SmallVector Copies; @@ -1139,7 +1239,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { // priority. If it is not ready put it back. Schedule the node. Sequence.reserve(SUnits.size()); while (!AvailableQueue->empty()) { - DEBUG(dbgs() << "\n*** Examining Available\n"; + DEBUG(dbgs() << "\nExamining Available:\n"; AvailableQueue->dump(this)); // Pick the best node to schedule taking all constraints into @@ -1318,7 +1418,7 @@ struct src_ls_rr_sort : public queue_sort { struct hybrid_ls_rr_sort : public queue_sort { enum { IsBottomUp = true, - HasReadyFilter = true + HasReadyFilter = false }; RegReductionPQBase *SPQ; @@ -1337,7 +1437,7 @@ struct hybrid_ls_rr_sort : public queue_sort { struct ilp_ls_rr_sort : public queue_sort { enum { IsBottomUp = true, - HasReadyFilter = true + HasReadyFilter = false }; RegReductionPQBase *SPQ; @@ -1395,7 +1495,7 @@ class RegReductionPQBase : public SchedulingPriorityQueue { std::fill(RegPressure.begin(), RegPressure.end(), 0); for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), E = TRI->regclass_end(); I != E; ++I) - RegLimit[(*I)->getID()] = tli->getRegPressureLimit(*I, MF); + RegLimit[(*I)->getID()] = tri->getRegPressureLimit(*I, MF); } } @@ -1422,6 +1522,8 @@ class RegReductionPQBase : public SchedulingPriorityQueue { unsigned getNodePriority(const SUnit *SU) const; unsigned getNodeOrdering(const SUnit *SU) const { + if (!SU->getNode()) return 0; + return scheduleDAG->DAG->GetOrdering(SU->getNode()); } @@ -1450,7 +1552,9 @@ class RegReductionPQBase : public SchedulingPriorityQueue { bool HighRegPressure(const SUnit *SU) const; - bool MayReduceRegPressure(SUnit *SU); + bool MayReduceRegPressure(SUnit *SU) const; + + int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const; void ScheduledNode(SUnit *SU); @@ -1538,6 +1642,20 @@ ILPBURRPriorityQueue; // Static Node Priority for Register Pressure Reduction //===----------------------------------------------------------------------===// +// Check for special nodes that bypass scheduling heuristics. +// Currently this pushes TokenFactor nodes down, but may be used for other +// pseudo-ops as well. +// +// Return -1 to schedule right above left, 1 for left above right. +// Return 0 if no bias exists. +static int checkSpecialNodes(const SUnit *left, const SUnit *right) { + bool LSchedLow = left->isScheduleLow; + bool RSchedLow = right->isScheduleLow; + if (LSchedLow != RSchedLow) + return LSchedLow < RSchedLow ? 1 : -1; + return 0; +} + /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. /// Smaller number is the higher priority. static unsigned @@ -1576,17 +1694,6 @@ void RegReductionPQBase::CalculateSethiUllmanNumbers() { CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers); } -void RegReductionPQBase::initNodes(std::vector &sunits) { - SUnits = &sunits; - // Add pseudo dependency edges for two-address nodes. - AddPseudoTwoAddrDeps(); - // Reroute edges to nodes with multiple uses. - if (!TracksRegPressure) - PrescheduleNodesWithMultipleUses(); - // Calculate node priorities. - CalculateSethiUllmanNumbers(); -} - void RegReductionPQBase::addNode(const SUnit *SU) { unsigned SUSize = SethiUllmanNumbers.size(); if (SUnits->size() > SUSize) @@ -1625,7 +1732,17 @@ unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const { // If SU does not have a register def, schedule it close to its uses // because it does not lengthen any live ranges. return 0; +#if 1 return SethiUllmanNumbers[SU->NodeNum]; +#else + unsigned Priority = SethiUllmanNumbers[SU->NodeNum]; + if (SU->isCallOp) { + // FIXME: This assumes all of the defs are used as call operands. + int NP = (int)Priority - SU->getNode()->getNumValues(); + return (NP > 0) ? NP : 0; + } + return Priority; +#endif } //===----------------------------------------------------------------------===// @@ -1670,7 +1787,7 @@ bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const { return false; } -bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) { +bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const { const SDNode *N = SU->getNode(); if (!N->isMachineOpcode() || !SU->NumSuccs) @@ -1688,10 +1805,60 @@ bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) { return false; } +// Compute the register pressure contribution by this instruction by count up +// for uses that are not live and down for defs. Only count register classes +// that are already under high pressure. As a side effect, compute the number of +// uses of registers that are already live. +// +// FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure +// so could probably be factored. +int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const { + LiveUses = 0; + int PDiff = 0; + for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) + continue; + SUnit *PredSU = I->getSUnit(); + // NumRegDefsLeft is zero when enough uses of this node have been scheduled + // to cover the number of registers defined (they are all live). + if (PredSU->NumRegDefsLeft == 0) { + if (PredSU->getNode()->isMachineOpcode()) + ++LiveUses; + continue; + } + for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); + RegDefPos.IsValid(); RegDefPos.Advance()) { + EVT VT = RegDefPos.GetValue(); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + if (RegPressure[RCId] >= RegLimit[RCId]) + ++PDiff; + } + } + const SDNode *N = SU->getNode(); + + if (!N || !N->isMachineOpcode() || !SU->NumSuccs) + return PDiff; + + unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); + for (unsigned i = 0; i != NumDefs; ++i) { + EVT VT = N->getValueType(i); + if (!N->hasAnyUseOfValue(i)) + continue; + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + if (RegPressure[RCId] >= RegLimit[RCId]) + --PDiff; + } + return PDiff; +} + void RegReductionPQBase::ScheduledNode(SUnit *SU) { if (!TracksRegPressure) return; + if (!SU->getNode()) + return; + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { if (I->isCtrl()) @@ -1758,6 +1925,8 @@ void RegReductionPQBase::UnscheduledNode(SUnit *SU) { return; const SDNode *N = SU->getNode(); + if (!N) return; + if (!N->isMachineOpcode()) { if (N->getOpcode() != ISD::CopyToReg) return; @@ -1871,7 +2040,29 @@ static unsigned calcMaxScratches(const SUnit *SU) { return Scratches; } -/// hasOnlyLiveOutUse - Return true if SU has a single value successor that is a +/// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are +/// CopyFromReg from a virtual register. +static bool hasOnlyLiveInOpers(const SUnit *SU) { + bool RetVal = false; + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) continue; + const SUnit *PredSU = I->getSUnit(); + if (PredSU->getNode() && + PredSU->getNode()->getOpcode() == ISD::CopyFromReg) { + unsigned Reg = + cast(PredSU->getNode()->getOperand(1))->getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + RetVal = true; + continue; + } + } + return false; + } + return RetVal; +} + +/// hasOnlyLiveOutUses - Return true if SU has only value successors that are /// CopyToReg to a virtual register. This SU def is probably a liveout and /// it has no other use. It should be scheduled closer to the terminator. static bool hasOnlyLiveOutUses(const SUnit *SU) { @@ -1893,20 +2084,67 @@ static bool hasOnlyLiveOutUses(const SUnit *SU) { return RetVal; } -/// UnitsSharePred - Return true if the two scheduling units share a common -/// data predecessor. -static bool UnitsSharePred(const SUnit *left, const SUnit *right) { - SmallSet Preds; - for (SUnit::const_pred_iterator I = left->Preds.begin(),E = left->Preds.end(); +// Set isVRegCycle for a node with only live in opers and live out uses. Also +// set isVRegCycle for its CopyFromReg operands. +// +// This is only relevant for single-block loops, in which case the VRegCycle +// node is likely an induction variable in which the operand and target virtual +// registers should be coalesced (e.g. pre/post increment values). Setting the +// isVRegCycle flag helps the scheduler prioritize other uses of the same +// CopyFromReg so that this node becomes the virtual register "kill". This +// avoids interference between the values live in and out of the block and +// eliminates a copy inside the loop. +static void initVRegCycle(SUnit *SU) { + if (DisableSchedVRegCycle) + return; + + if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU)) + return; + + DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n"); + + SU->isVRegCycle = true; + + for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { - if (I->isCtrl()) continue; // ignore chain preds - Preds.insert(I->getSUnit()); + if (I->isCtrl()) continue; + I->getSUnit()->isVRegCycle = true; } - for (SUnit::const_pred_iterator I = right->Preds.begin(),E = right->Preds.end(); +} + +// After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of +// CopyFromReg operands. We should no longer penalize other uses of this VReg. +static void resetVRegCycle(SUnit *SU) { + if (!SU->isVRegCycle) + return; + + for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); I != E; ++I) { if (I->isCtrl()) continue; // ignore chain preds - if (Preds.count(I->getSUnit())) + SUnit *PredSU = I->getSUnit(); + if (PredSU->isVRegCycle) { + assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg && + "VRegCycle def must be CopyFromReg"); + I->getSUnit()->isVRegCycle = 0; + } + } +} + +// Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This +// means a node that defines the VRegCycle has not been scheduled yet. +static bool hasVRegCycleUse(const SUnit *SU) { + // If this SU also defines the VReg, don't hoist it as a "use". + if (SU->isVRegCycle) + return false; + + for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); + I != E; ++I) { + if (I->isCtrl()) continue; // ignore chain preds + if (I->getSUnit()->isVRegCycle && + I->getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) { + DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n"); return true; + } } return false; } @@ -1926,23 +2164,12 @@ static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) { // Return 0 if latency-based priority is equivalent. static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, RegReductionPQBase *SPQ) { - // If the two nodes share an operand and one of them has a single - // use that is a live out copy, favor the one that is live out. Otherwise - // it will be difficult to eliminate the copy if the instruction is a - // loop induction variable update. e.g. - // BB: - // sub r1, r3, #1 - // str r0, [r2, r3] - // mov r3, r1 - // cmp - // bne BB - bool SharePred = UnitsSharePred(left, right); - // FIXME: Only adjust if BB is a loop back edge. - // FIXME: What's the cost of a copy? - int LBonus = (SharePred && hasOnlyLiveOutUses(left)) ? 1 : 0; - int RBonus = (SharePred && hasOnlyLiveOutUses(right)) ? 1 : 0; - int LHeight = (int)left->getHeight() - LBonus; - int RHeight = (int)right->getHeight() - RBonus; + // Scheduling an instruction that uses a VReg whose postincrement has not yet + // been scheduled will induce a copy. Model this as an extra cycle of latency. + int LPenalty = hasVRegCycleUse(left) ? 1 : 0; + int RPenalty = hasVRegCycleUse(right) ? 1 : 0; + int LHeight = (int)left->getHeight() + LPenalty; + int RHeight = (int)right->getHeight() + RPenalty; bool LStall = (!checkPref || left->SchedulingPref == Sched::Latency) && BUHasStall(left, LHeight, SPQ); @@ -1953,45 +2180,102 @@ static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, // If scheduling either one of the node will cause a pipeline stall, sort // them according to their height. if (LStall) { - if (!RStall) + if (!RStall) { + DEBUG(++FactorCount[FactStall]); return 1; - if (LHeight != RHeight) + } + if (LHeight != RHeight) { + DEBUG(++FactorCount[FactStall]); return LHeight > RHeight ? 1 : -1; - } else if (RStall) + } + } else if (RStall) { + DEBUG(++FactorCount[FactStall]); return -1; + } // If either node is scheduling for latency, sort them by height/depth // and latency. if (!checkPref || (left->SchedulingPref == Sched::Latency || right->SchedulingPref == Sched::Latency)) { if (DisableSchedCycles) { - if (LHeight != RHeight) + if (LHeight != RHeight) { + DEBUG(++FactorCount[FactHeight]); return LHeight > RHeight ? 1 : -1; + } } else { // If neither instruction stalls (!LStall && !RStall) then - // it's height is already covered so only its depth matters. We also reach + // its height is already covered so only its depth matters. We also reach // this if both stall but have the same height. - unsigned LDepth = left->getDepth(); - unsigned RDepth = right->getDepth(); + int LDepth = left->getDepth() - LPenalty; + int RDepth = right->getDepth() - RPenalty; if (LDepth != RDepth) { + DEBUG(++FactorCount[FactDepth]); DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum << ") depth " << LDepth << " vs SU (" << right->NodeNum << ") depth " << RDepth << "\n"); return LDepth < RDepth ? 1 : -1; } } - if (left->Latency != right->Latency) + if (left->Latency != right->Latency) { + DEBUG(++FactorCount[FactOther]); return left->Latency > right->Latency ? 1 : -1; + } } return 0; } static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { + // Schedule physical register definitions close to their use. This is + // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as + // long as shortening physreg live ranges is generally good, we can defer + // creating a subtarget hook. + if (!DisableSchedPhysRegJoin) { + bool LHasPhysReg = left->hasPhysRegDefs; + bool RHasPhysReg = right->hasPhysRegDefs; + if (LHasPhysReg != RHasPhysReg) { + DEBUG(++FactorCount[FactRegUses]); + #ifndef NDEBUG + const char *PhysRegMsg[] = {" has no physreg", " defines a physreg"}; + #endif + DEBUG(dbgs() << " SU (" << left->NodeNum << ") " + << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") " + << PhysRegMsg[RHasPhysReg] << "\n"); + return LHasPhysReg < RHasPhysReg; + } + } + + // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down. unsigned LPriority = SPQ->getNodePriority(left); unsigned RPriority = SPQ->getNodePriority(right); - if (LPriority != RPriority) + + // Be really careful about hoisting call operands above previous calls. + // Only allows it if it would reduce register pressure. + if (left->isCall && right->isCallOp) { + unsigned RNumVals = right->getNode()->getNumValues(); + RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0; + } + if (right->isCall && left->isCallOp) { + unsigned LNumVals = left->getNode()->getNumValues(); + LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0; + } + + if (LPriority != RPriority) { + DEBUG(++FactorCount[FactStatic]); return LPriority > RPriority; + } + + // One or both of the nodes are calls and their sethi-ullman numbers are the + // same, then keep source order. + if (left->isCall || right->isCall) { + unsigned LOrder = SPQ->getNodeOrdering(left); + unsigned ROrder = SPQ->getNodeOrdering(right); + + // Prefer an ordering where the lower the non-zero order number, the higher + // the preference. + if ((LOrder || ROrder) && LOrder != ROrder) + return LOrder != 0 && (LOrder < ROrder || ROrder == 0); + } // Try schedule def + use closer when Sethi-Ullman numbers are the same. // e.g. @@ -2012,40 +2296,62 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { // This creates more short live intervals. unsigned LDist = closestSucc(left); unsigned RDist = closestSucc(right); - if (LDist != RDist) + if (LDist != RDist) { + DEBUG(++FactorCount[FactOther]); return LDist < RDist; + } // How many registers becomes live when the node is scheduled. unsigned LScratch = calcMaxScratches(left); unsigned RScratch = calcMaxScratches(right); - if (LScratch != RScratch) + if (LScratch != RScratch) { + DEBUG(++FactorCount[FactOther]); return LScratch > RScratch; + } - if (!DisableSchedCycles) { + // Comparing latency against a call makes little sense unless the node + // is register pressure-neutral. + if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0)) + return (left->NodeQueueId > right->NodeQueueId); + + // Do not compare latencies when one or both of the nodes are calls. + if (!DisableSchedCycles && + !(left->isCall || right->isCall)) { int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ); if (result != 0) return result > 0; } else { - if (left->getHeight() != right->getHeight()) + if (left->getHeight() != right->getHeight()) { + DEBUG(++FactorCount[FactHeight]); return left->getHeight() > right->getHeight(); + } - if (left->getDepth() != right->getDepth()) + if (left->getDepth() != right->getDepth()) { + DEBUG(++FactorCount[FactDepth]); return left->getDepth() < right->getDepth(); + } } assert(left->NodeQueueId && right->NodeQueueId && "NodeQueueId cannot be zero"); + DEBUG(++FactorCount[FactOther]); return (left->NodeQueueId > right->NodeQueueId); } // Bottom up bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { + if (int res = checkSpecialNodes(left, right)) + return res > 0; + return BURRSort(left, right, SPQ); } // Source order, otherwise bottom up. bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { + if (int res = checkSpecialNodes(left, right)) + return res > 0; + unsigned LOrder = SPQ->getNodeOrdering(left); unsigned ROrder = SPQ->getNodeOrdering(right); @@ -2077,6 +2383,9 @@ bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { // Return true if right should be scheduled with higher priority than left. bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { + if (int res = checkSpecialNodes(left, right)) + return res > 0; + if (left->isCall || right->isCall) // No way to compute latency of calls. return BURRSort(left, right, SPQ); @@ -2086,16 +2395,18 @@ bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { // Avoid causing spills. If register pressure is high, schedule for // register pressure reduction. if (LHigh && !RHigh) { + DEBUG(++FactorCount[FactPressureDiff]); DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU(" << right->NodeNum << ")\n"); return true; } else if (!LHigh && RHigh) { + DEBUG(++FactorCount[FactPressureDiff]); DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU(" << left->NodeNum << ")\n"); return false; } - else if (!LHigh && !RHigh) { + if (!LHigh && !RHigh) { int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ); if (result != 0) return result > 0; @@ -2112,34 +2423,118 @@ bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { != ScheduleHazardRecognizer::NoHazard) return false; - return SU->getHeight() <= CurCycle; + return true; } +static bool canEnableCoalescing(SUnit *SU) { + unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; + if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) + // CopyToReg should be close to its uses to facilitate coalescing and + // avoid spilling. + return true; + + if (Opc == TargetOpcode::EXTRACT_SUBREG || + Opc == TargetOpcode::SUBREG_TO_REG || + Opc == TargetOpcode::INSERT_SUBREG) + // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be + // close to their uses to facilitate coalescing. + return true; + + if (SU->NumPreds == 0 && SU->NumSuccs != 0) + // If SU does not have a register def, schedule it close to its uses + // because it does not lengthen any live ranges. + return true; + + return false; +} + +// list-ilp is currently an experimental scheduler that allows various +// heuristics to be enabled prior to the normal register reduction logic. bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { + if (int res = checkSpecialNodes(left, right)) + return res > 0; + if (left->isCall || right->isCall) // No way to compute latency of calls. return BURRSort(left, right, SPQ); - bool LHigh = SPQ->HighRegPressure(left); - bool RHigh = SPQ->HighRegPressure(right); - // Avoid causing spills. If register pressure is high, schedule for - // register pressure reduction. - if (LHigh && !RHigh) - return true; - else if (!LHigh && RHigh) - return false; - else if (!LHigh && !RHigh) { - // Low register pressure situation, schedule to maximize instruction level - // parallelism. - if (left->NumPreds > right->NumPreds) - return false; - else if (left->NumPreds < right->NumPreds) - return false; + unsigned LLiveUses = 0, RLiveUses = 0; + int LPDiff = 0, RPDiff = 0; + if (!DisableSchedRegPressure || !DisableSchedLiveUses) { + LPDiff = SPQ->RegPressureDiff(left, LLiveUses); + RPDiff = SPQ->RegPressureDiff(right, RLiveUses); + } + if (!DisableSchedRegPressure && LPDiff != RPDiff) { + DEBUG(++FactorCount[FactPressureDiff]); + DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff + << " != SU(" << right->NodeNum << "): " << RPDiff << "\n"); + return LPDiff > RPDiff; + } + + if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) { + bool LReduce = canEnableCoalescing(left); + bool RReduce = canEnableCoalescing(right); + DEBUG(if (LReduce != RReduce) ++FactorCount[FactPressureDiff]); + if (LReduce && !RReduce) return false; + if (RReduce && !LReduce) return true; + } + + if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) { + DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses + << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n"); + DEBUG(++FactorCount[FactRegUses]); + return LLiveUses < RLiveUses; + } + + if (!DisableSchedStalls) { + bool LStall = BUHasStall(left, left->getHeight(), SPQ); + bool RStall = BUHasStall(right, right->getHeight(), SPQ); + if (LStall != RStall) { + DEBUG(++FactorCount[FactHeight]); + return left->getHeight() > right->getHeight(); + } + } + + if (!DisableSchedCriticalPath) { + int spread = (int)left->getDepth() - (int)right->getDepth(); + if (std::abs(spread) > MaxReorderWindow) { + DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): " + << left->getDepth() << " != SU(" << right->NodeNum << "): " + << right->getDepth() << "\n"); + DEBUG(++FactorCount[FactDepth]); + return left->getDepth() < right->getDepth(); + } + } + + if (!DisableSchedHeight && left->getHeight() != right->getHeight()) { + int spread = (int)left->getHeight() - (int)right->getHeight(); + if (std::abs(spread) > MaxReorderWindow) { + DEBUG(++FactorCount[FactHeight]); + return left->getHeight() > right->getHeight(); + } } return BURRSort(left, right, SPQ); } +void RegReductionPQBase::initNodes(std::vector &sunits) { + SUnits = &sunits; + // Add pseudo dependency edges for two-address nodes. + AddPseudoTwoAddrDeps(); + // Reroute edges to nodes with multiple uses. + if (!TracksRegPressure) + PrescheduleNodesWithMultipleUses(); + // Calculate node priorities. + CalculateSethiUllmanNumbers(); + + // For single block loops, mark nodes that look like canonical IV increments. + if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB)) { + for (unsigned i = 0, e = sunits.size(); i != e; ++i) { + initVRegCycle(&sunits[i]); + } + } +} + //===----------------------------------------------------------------------===// // Preschedule for Register Pressure //===----------------------------------------------------------------------===// @@ -2417,6 +2812,9 @@ static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU, // Top down bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { + if (int res = checkSpecialNodes(left, right)) + return res < 0; + unsigned LPriority = SPQ->getNodePriority(left); unsigned RPriority = SPQ->getNodePriority(right); bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode(); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 477c1ffe65d3..9f2f0121a86d 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -27,12 +27,21 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; STATISTIC(LoadsClustered, "Number of loads clustered together"); +// This allows latency based scheduler to notice high latency instructions +// without a target itinerary. The choise if number here has more to do with +// balancing scheduler heursitics than with the actual machine latency. +static cl::opt HighLatencyCycles( + "sched-high-latency-cycles", cl::Hidden, cl::init(10), + cl::desc("Roughly estimate the number of cycles that 'long latency'" + "instructions take for targets with no itinerary")); + ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf) : ScheduleDAG(mf), InstrItins(mf.getTarget().getInstrItineraryData()) {} @@ -72,11 +81,15 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) { SUnit *SU = NewSUnit(Old->getNode()); SU->OrigNode = Old->OrigNode; SU->Latency = Old->Latency; + SU->isVRegCycle = Old->isVRegCycle; SU->isCall = Old->isCall; + SU->isCallOp = Old->isCallOp; SU->isTwoAddress = Old->isTwoAddress; SU->isCommutable = Old->isCommutable; SU->hasPhysRegDefs = Old->hasPhysRegDefs; SU->hasPhysRegClobbers = Old->hasPhysRegClobbers; + SU->isScheduleHigh = Old->isScheduleHigh; + SU->isScheduleLow = Old->isScheduleLow; SU->SchedulingPref = Old->SchedulingPref; Old->isCloned = true; return SU; @@ -273,6 +286,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { Worklist.push_back(DAG->getRoot().getNode()); Visited.insert(DAG->getRoot().getNode()); + SmallVector CallSUnits; while (!Worklist.empty()) { SDNode *NI = Worklist.pop_back_val(); @@ -325,6 +339,15 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { if (!HasGlueUse) break; } + if (NodeSUnit->isCall) + CallSUnits.push_back(NodeSUnit); + + // Schedule zero-latency TokenFactor below any nodes that may increase the + // schedule height. Otherwise, ancestors of the TokenFactor may appear to + // have false stalls. + if (NI->getOpcode() == ISD::TokenFactor) + NodeSUnit->isScheduleLow = true; + // If there are glue operands involved, N is now the bottom-most node // of the sequence of nodes that are glued together. // Update the SUnit. @@ -338,6 +361,20 @@ void ScheduleDAGSDNodes::BuildSchedUnits() { // Assign the Latency field of NodeSUnit using target-provided information. ComputeLatency(NodeSUnit); } + + // Find all call operands. + while (!CallSUnits.empty()) { + SUnit *SU = CallSUnits.pop_back_val(); + for (const SDNode *SUNode = SU->getNode(); SUNode; + SUNode = SUNode->getGluedNode()) { + if (SUNode->getOpcode() != ISD::CopyToReg) + continue; + SDNode *SrcN = SUNode->getOperand(2).getNode(); + if (isPassiveNode(SrcN)) continue; // Not scheduled. + SUnit *SrcSU = &SUnits[SrcN->getNodeId()]; + SrcSU->isCallOp = true; + } + } } void ScheduleDAGSDNodes::AddSchedEdges() { @@ -403,6 +440,10 @@ void ScheduleDAGSDNodes::AddSchedEdges() { // If this is a ctrl dep, latency is 1. unsigned OpLatency = isChain ? 1 : OpSU->Latency; + // Special-case TokenFactor chains as zero-latency. + if(isChain && OpN->getOpcode() == ISD::TokenFactor) + OpLatency = 0; + const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data, OpLatency, PhysReg); if (!isChain && !UnitLatencies) { @@ -410,11 +451,15 @@ void ScheduleDAGSDNodes::AddSchedEdges() { ST.adjustSchedDependency(OpSU, SU, const_cast(dep)); } - if (!SU->addPred(dep) && !dep.isCtrl() && OpSU->NumRegDefsLeft > 0) { + if (!SU->addPred(dep) && !dep.isCtrl() && OpSU->NumRegDefsLeft > 1) { // Multiple register uses are combined in the same SUnit. For example, // we could have a set of glued nodes with all their defs consumed by // another set of glued nodes. Register pressure tracking sees this as // a single use, so to keep pressure balanced we reduce the defs. + // + // We can't tell (without more book-keeping) if this results from + // glued nodes or duplicate operands. As long as we don't reduce + // NumRegDefsLeft to zero, we handle the common cases well. --OpSU->NumRegDefsLeft; } } @@ -437,6 +482,10 @@ void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) { // Initialize NumNodeDefs for the current Node's opcode. void ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs() { + // Check for phys reg copy. + if (!Node) + return; + if (!Node->isMachineOpcode()) { if (Node->getOpcode() == ISD::CopyFromReg) NodeNumDefs = 1; @@ -499,6 +548,16 @@ void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) { } void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) { + SDNode *N = SU->getNode(); + + // TokenFactor operands are considered zero latency, and some schedulers + // (e.g. Top-Down list) may rely on the fact that operand latency is nonzero + // whenever node latency is nonzero. + if (N && N->getOpcode() == ISD::TokenFactor) { + SU->Latency = 0; + return; + } + // Check to see if the scheduler cares about latencies. if (ForceUnitLatencies()) { SU->Latency = 1; @@ -506,7 +565,11 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) { } if (!InstrItins || InstrItins->isEmpty()) { - SU->Latency = 1; + if (N && N->isMachineOpcode() && + TII->isHighLatencyDef(N->getMachineOpcode())) + SU->Latency = HighLatencyCycles; + else + SU->Latency = 1; return; } @@ -573,7 +636,7 @@ namespace { }; } -/// ProcessSDDbgValues - Process SDDbgValues assoicated with this node. +/// ProcessSDDbgValues - Process SDDbgValues associated with this node. static void ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, SmallVector, 32> &Orders, diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index cc7310e4ca42..b5f68f3055cf 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -80,6 +80,12 @@ namespace llvm { /// flagged together nodes with a single SUnit. virtual void BuildSchedGraph(AliasAnalysis *AA); + /// InitVRegCycleFlag - Set isVRegCycle if this node's single use is + /// CopyToReg and its only active data operands are CopyFromReg within a + /// single block loop. + /// + void InitVRegCycleFlag(SUnit *SU); + /// InitNumRegDefsLeft - Determine the # of regs defined by this node. /// void InitNumRegDefsLeft(SUnit *SU); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 9120288921e2..c2711c8097d0 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1418,9 +1418,9 @@ SDValue SelectionDAG::getMDNode(const MDNode *MD) { /// getShiftAmountOperand - Return the specified value casted to /// the target's desired shift amount type. -SDValue SelectionDAG::getShiftAmountOperand(SDValue Op) { +SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) { EVT OpTy = Op.getValueType(); - MVT ShTy = TLI.getShiftAmountTy(OpTy); + MVT ShTy = TLI.getShiftAmountTy(LHSTy); if (OpTy == ShTy || OpTy.isVector()) return Op; ISD::NodeType Opcode = OpTy.bitsGT(ShTy) ? ISD::TRUNCATE : ISD::ZERO_EXTEND; @@ -2482,6 +2482,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, "Vector element count mismatch!"); if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND) return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); + else if (OpOpcode == ISD::UNDEF) + // sext(undef) = 0, because the top bits will all be the same. + return getConstant(0, VT); break; case ISD::ZERO_EXTEND: assert(VT.isInteger() && Operand.getValueType().isInteger() && @@ -2496,6 +2499,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, if (OpOpcode == ISD::ZERO_EXTEND) // (zext (zext x)) -> (zext x) return getNode(ISD::ZERO_EXTEND, DL, VT, Operand.getNode()->getOperand(0)); + else if (OpOpcode == ISD::UNDEF) + // zext(undef) = 0, because the top bits will be zero. + return getConstant(0, VT); break; case ISD::ANY_EXTEND: assert(VT.isInteger() && Operand.getValueType().isInteger() && @@ -2512,6 +2518,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, OpOpcode == ISD::ANY_EXTEND) // (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x) return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); + else if (OpOpcode == ISD::UNDEF) + return getUNDEF(VT); // (ext (trunx x)) -> x if (OpOpcode == ISD::TRUNCATE) { @@ -5904,7 +5912,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::UINT_TO_FP: return "uint_to_fp"; case ISD::FP_TO_SINT: return "fp_to_sint"; case ISD::FP_TO_UINT: return "fp_to_uint"; - case ISD::BITCAST: return "bit_convert"; + case ISD::BITCAST: return "bitcast"; case ISD::FP16_TO_FP32: return "fp16_to_fp32"; case ISD::FP32_TO_FP16: return "fp32_to_fp16"; @@ -6226,6 +6234,9 @@ static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N, return; for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + // Don't follow chain operands. + if (N->getOperand(i).getValueType() == MVT::Other) + continue; OS << '\n'; printrWithDepthHelper(OS, N->getOperand(i).getNode(), G, depth-1, indent+2); } @@ -6238,7 +6249,7 @@ void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G, void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const { // Don't print impossibly deep things. - printrWithDepth(OS, G, 100); + printrWithDepth(OS, G, 10); } void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const { @@ -6247,7 +6258,7 @@ void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const { void SDNode::dumprFull(const SelectionDAG *G) const { // Don't print impossibly deep things. - dumprWithDepth(G, 100); + dumprWithDepth(G, 10); } static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) { @@ -6311,7 +6322,8 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { case ISD::ROTL: case ISD::ROTR: Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands[0], - getShiftAmountOperand(Operands[1]))); + getShiftAmountOperand(Operands[0].getValueType(), + Operands[1]))); break; case ISD::SIGN_EXTEND_INREG: case ISD::FP_ROUND_INREG: { diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 48d9bbb5132e..b02a7b66c496 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -50,7 +50,6 @@ #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Support/Compiler.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -84,9 +83,7 @@ LimitFPPrecision("limit-float-precision", // %buffer = alloca [4096 x i8] // %data = load [4096 x i8]* %argPtr // store [4096 x i8] %data, [4096 x i8]* %buffer -static cl::opt -MaxParallelChains("dag-chain-limit", cl::desc("Max parallel isel dag chains"), - cl::init(64), cl::Hidden); +static const unsigned MaxParallelChains = 64; static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, const SDValue *Parts, unsigned NumParts, @@ -1130,15 +1127,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { else if (F->paramHasAttr(0, Attribute::ZExt)) ExtendKind = ISD::ZERO_EXTEND; - // FIXME: C calling convention requires the return type to be promoted - // to at least 32-bit. But this is not necessary for non-C calling - // conventions. The frontend should mark functions whose return values - // require promoting with signext or zeroext attributes. - if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) { - EVT MinVT = TLI.getRegisterType(*DAG.getContext(), MVT::i32); - if (VT.bitsLT(MinVT)) - VT = MinVT; - } + if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) + VT = TLI.getTypeForExtArgOrReturn(*DAG.getContext(), VT, ExtendKind); unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT); EVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT); @@ -1153,9 +1143,9 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { Flags.setInReg(); // Propagate extension type if any - if (F->paramHasAttr(0, Attribute::SExt)) + if (ExtendKind == ISD::SIGN_EXTEND) Flags.setSExt(); - else if (F->paramHasAttr(0, Attribute::ZExt)) + else if (ExtendKind == ISD::ZERO_EXTEND) Flags.setZExt(); for (unsigned i = 0; i < NumParts; ++i) { @@ -2029,9 +2019,13 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, APInt Range = ComputeRange(LEnd, RBegin); assert((Range - 2ULL).isNonNegative() && "Invalid case distance"); - double LDensity = (double)LSize.roundToDouble() / + // Use volatile double here to avoid excess precision issues on some hosts, + // e.g. that use 80-bit X87 registers. + volatile double LDensity = + (double)LSize.roundToDouble() / (LEnd - First + 1ULL).roundToDouble(); - double RDensity = (double)RSize.roundToDouble() / + volatile double RDensity = + (double)RSize.roundToDouble() / (Last - RBegin + 1ULL).roundToDouble(); double Metric = Range.logBase2()*(LDensity+RDensity); // Should always split in some non-trivial place @@ -4039,10 +4033,6 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, if (DV.isInlinedFnArgument(MF.getFunction())) return false; - MachineBasicBlock *MBB = FuncInfo.MBB; - if (MBB != &MF.front()) - return false; - unsigned Reg = 0; if (Arg->hasByValAttr()) { // Byval arguments' frame index is recorded during argument lowering. @@ -4413,7 +4403,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::eh_sjlj_dispatch_setup: { DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other, - getRoot(), getValue(I.getArgOperand(0)))); + getRoot())); return 0; } @@ -4682,9 +4672,22 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::flt_rounds: setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32)); return 0; - case Intrinsic::trap: - DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot())); + case Intrinsic::trap: { + StringRef TrapFuncName = getTrapFunctionName(); + if (TrapFuncName.empty()) { + DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot())); + return 0; + } + TargetLowering::ArgListTy Args; + std::pair Result = + TLI.LowerCallTo(getRoot(), I.getType(), + false, false, false, false, 0, CallingConv::C, + /*isTailCall=*/false, /*isReturnValueUsed=*/true, + DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()), + Args, DAG, getCurDebugLoc()); + DAG.setRoot(Result.second); return 0; + } case Intrinsic::uadd_with_overflow: return implVisitAluOverflow(I, ISD::UADDO); case Intrinsic::sadd_with_overflow: @@ -4937,15 +4940,21 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), DAG.getVTList(&RetTys[0], RetTys.size()), &ReturnValues[0], ReturnValues.size())); - } - // As a special case, a null chain means that a tail call has been emitted and - // the DAG root is already updated. - if (Result.second.getNode()) - DAG.setRoot(Result.second); - else + // Assign order to nodes here. If the call does not produce a result, it won't + // be mapped to a SDNode and visit() will not assign it an order number. + if (!Result.second.getNode()) { + // As a special case, a null chain means that a tail call has been emitted and + // the DAG root is already updated. HasTailCall = true; + ++SDNodeOrder; + AssignOrderingToNode(DAG.getRoot().getNode()); + } else { + DAG.setRoot(Result.second); + ++SDNodeOrder; + AssignOrderingToNode(Result.second.getNode()); + } if (LandingPad) { // Insert a label at the end of the invoke call to mark the try range. This @@ -5211,12 +5220,11 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { LowerCallTo(&I, Callee, I.isTailCall()); } -namespace llvm { +namespace { /// AsmOperandInfo - This contains information for each constraint that we are /// lowering. -class LLVM_LIBRARY_VISIBILITY SDISelAsmOperandInfo : - public TargetLowering::AsmOperandInfo { +class SDISelAsmOperandInfo : public TargetLowering::AsmOperandInfo { public: /// CallOperand - If this is the result output operand or a clobber /// this is null, otherwise it is the incoming operand to the CallInst. @@ -5304,7 +5312,7 @@ class LLVM_LIBRARY_VISIBILITY SDISelAsmOperandInfo : typedef SmallVector SDISelAsmOperandInfoVector; -} // end llvm namespace. +} // end anonymous namespace /// isAllocatableRegister - If the specified register is safe to allocate, /// i.e. it isn't a stack pointer or some other special register, return the @@ -5363,11 +5371,13 @@ isAllocatableRegister(unsigned Reg, MachineFunction &MF, /// OpInfo describes the operand. /// Input and OutputRegs are the set of already allocated physical registers. /// -void SelectionDAGBuilder:: -GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, - std::set &OutputRegs, - std::set &InputRegs) { - LLVMContext &Context = FuncInfo.Fn->getContext(); +static void GetRegistersForValue(SelectionDAG &DAG, + const TargetLowering &TLI, + DebugLoc DL, + SDISelAsmOperandInfo &OpInfo, + std::set &OutputRegs, + std::set &InputRegs) { + LLVMContext &Context = *DAG.getContext(); // Compute whether this value requires an input register, an output register, // or both. @@ -5413,7 +5423,7 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, // vector types). EVT RegVT = *PhysReg.second->vt_begin(); if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) { - OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, getCurDebugLoc(), + OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand); OpInfo.ConstraintVT = RegVT; } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) { @@ -5423,7 +5433,7 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, // machine. RegVT = EVT::getIntegerVT(Context, OpInfo.ConstraintVT.getSizeInBits()); - OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, getCurDebugLoc(), + OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand); OpInfo.ConstraintVT = RegVT; } @@ -5694,7 +5704,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // If this constraint is for a specific register, allocate it before // anything else. if (OpInfo.ConstraintType == TargetLowering::C_Register) - GetRegistersForValue(OpInfo, OutputRegs, InputRegs); + GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo, OutputRegs, + InputRegs); } // Second pass - Loop over all of the operands, assigning virtual or physregs @@ -5705,7 +5716,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // C_Register operands have already been allocated, Other/Memory don't need // to be. if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass) - GetRegistersForValue(OpInfo, OutputRegs, InputRegs); + GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo, OutputRegs, + InputRegs); } // AsmNodeOperands - The operands for the ISD::INLINEASM node. @@ -6181,7 +6193,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, // For a function returning void, there is no return value. We can't create // such a node, so we just return a null return value in that case. In - // that case, nothing will actualy look at the value. + // that case, nothing will actually look at the value. if (ReturnValues.empty()) return std::make_pair(SDValue(), Chain); @@ -6397,7 +6409,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { SDB->setValue(I, Res); // If this argument is live outside of the entry block, insert a copy from - // whereever we got it to the vreg that other BB's will reference it as. + // wherever we got it to the vreg that other BB's will reference it as. SDB->CopyToExportRegsIfNeeded(I); } } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 8f466d913bbb..a689b76cdc88 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -23,7 +23,6 @@ #include "llvm/Support/CallSite.h" #include "llvm/Support/ErrorHandling.h" #include -#include namespace llvm { @@ -60,7 +59,6 @@ class MDNode; class PHINode; class PtrToIntInst; class ReturnInst; -class SDISelAsmOperandInfo; class SDDbgValue; class SExtInst; class SelectInst; @@ -380,10 +378,6 @@ class SelectionDAGBuilder { assert(N.getNode() == 0 && "Already set a value for this node!"); N = NewN; } - - void GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, - std::set &OutputRegs, - std::set &InputRegs); void FindMergedConditions(const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, MachineBasicBlock *CurBB, diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 68ba966d268a..fdf3767d8c65 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -421,10 +421,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { return true; } -void -SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin, - BasicBlock::const_iterator End, - bool &HadTailCall) { +void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin, + BasicBlock::const_iterator End, + bool &HadTailCall) { // Lower all of the non-terminator instructions. If a call is emitted // as a tail call, cease emitting nodes for this block. Terminators // are handled below. @@ -438,7 +437,6 @@ SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin, // Final step, emit the lowered DAG as machine code. CodeGenAndEmitDAG(); - return; } void SelectionDAGISel::ComputeLiveOutVRegInfo() { @@ -489,13 +487,19 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { if (TimePassesIsEnabled) GroupName = "Instruction Selection and Scheduling"; std::string BlockName; + int BlockNumber = -1; +#ifdef NDEBUG if (ViewDAGCombine1 || ViewLegalizeTypesDAGs || ViewLegalizeDAGs || ViewDAGCombine2 || ViewDAGCombineLT || ViewISelDAGs || ViewSchedDAGs || ViewSUnitDAGs) +#endif + { + BlockNumber = FuncInfo->MBB->getNumber(); BlockName = MF->getFunction()->getNameStr() + ":" + FuncInfo->MBB->getBasicBlock()->getNameStr(); - - DEBUG(dbgs() << "Initial selection DAG:\n"; CurDAG->dump()); + } + DEBUG(dbgs() << "Initial selection DAG: BB#" << BlockNumber + << " '" << BlockName << "'\n"; CurDAG->dump()); if (ViewDAGCombine1) CurDAG->viewGraph("dag-combine1 input for " + BlockName); @@ -505,7 +509,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(Unrestricted, *AA, OptLevel); } - DEBUG(dbgs() << "Optimized lowered selection DAG:\n"; CurDAG->dump()); + DEBUG(dbgs() << "Optimized lowered selection DAG: BB#" << BlockNumber + << " '" << BlockName << "'\n"; CurDAG->dump()); // Second step, hack on the DAG until it only uses operations and types that // the target supports. @@ -518,7 +523,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { Changed = CurDAG->LegalizeTypes(); } - DEBUG(dbgs() << "Type-legalized selection DAG:\n"; CurDAG->dump()); + DEBUG(dbgs() << "Type-legalized selection DAG: BB#" << BlockNumber + << " '" << BlockName << "'\n"; CurDAG->dump()); if (Changed) { if (ViewDAGCombineLT) @@ -531,8 +537,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(NoIllegalTypes, *AA, OptLevel); } - DEBUG(dbgs() << "Optimized type-legalized selection DAG:\n"; - CurDAG->dump()); + DEBUG(dbgs() << "Optimized type-legalized selection DAG: BB#" << BlockNumber + << " '" << BlockName << "'\n"; CurDAG->dump()); } { @@ -556,8 +562,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); } - DEBUG(dbgs() << "Optimized vector-legalized selection DAG:\n"; - CurDAG->dump()); + DEBUG(dbgs() << "Optimized vector-legalized selection DAG: BB#" + << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump()); } if (ViewLegalizeDAGs) CurDAG->viewGraph("legalize input for " + BlockName); @@ -567,7 +573,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Legalize(OptLevel); } - DEBUG(dbgs() << "Legalized selection DAG:\n"; CurDAG->dump()); + DEBUG(dbgs() << "Legalized selection DAG: BB#" << BlockNumber + << " '" << BlockName << "'\n"; CurDAG->dump()); if (ViewDAGCombine2) CurDAG->viewGraph("dag-combine2 input for " + BlockName); @@ -577,7 +584,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); } - DEBUG(dbgs() << "Optimized legalized selection DAG:\n"; CurDAG->dump()); + DEBUG(dbgs() << "Optimized legalized selection DAG: BB#" << BlockNumber + << " '" << BlockName << "'\n"; CurDAG->dump()); if (OptLevel != CodeGenOpt::None) ComputeLiveOutVRegInfo(); @@ -591,7 +599,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { DoInstructionSelection(); } - DEBUG(dbgs() << "Selected selection DAG:\n"; CurDAG->dump()); + DEBUG(dbgs() << "Selected selection DAG: BB#" << BlockNumber + << " '" << BlockName << "'\n"; CurDAG->dump()); if (ViewSchedDAGs) CurDAG->viewGraph("scheduler input for " + BlockName); @@ -632,7 +641,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { } void SelectionDAGISel::DoInstructionSelection() { - DEBUG(errs() << "===== Instruction selection begins:\n"); + DEBUG(errs() << "===== Instruction selection begins: BB#" + << FuncInfo->MBB->getNumber() + << " '" << FuncInfo->MBB->getName() << "'\n"); PreprocessISelDAG(); @@ -735,16 +746,49 @@ void SelectionDAGISel::PrepareEHLandingPad() { - +/// TryToFoldFastISelLoad - We're checking to see if we can fold the specified +/// load into the specified FoldInst. Note that we could have a sequence where +/// multiple LLVM IR instructions are folded into the same machineinstr. For +/// example we could have: +/// A: x = load i32 *P +/// B: y = icmp A, 42 +/// C: br y, ... +/// +/// In this scenario, LI is "A", and FoldInst is "C". We know about "B" (and +/// any other folded instructions) because it is between A and C. +/// +/// If we succeed in folding the load into the operation, return true. +/// bool SelectionDAGISel::TryToFoldFastISelLoad(const LoadInst *LI, + const Instruction *FoldInst, FastISel *FastIS) { + // We know that the load has a single use, but don't know what it is. If it + // isn't one of the folded instructions, then we can't succeed here. Handle + // this by scanning the single-use users of the load until we get to FoldInst. + unsigned MaxUsers = 6; // Don't scan down huge single-use chains of instrs. + + const Instruction *TheUser = LI->use_back(); + while (TheUser != FoldInst && // Scan up until we find FoldInst. + // Stay in the right block. + TheUser->getParent() == FoldInst->getParent() && + --MaxUsers) { // Don't scan too far. + // If there are multiple or no uses of this instruction, then bail out. + if (!TheUser->hasOneUse()) + return false; + + TheUser = TheUser->use_back(); + } + // Don't try to fold volatile loads. Target has to deal with alignment // constraints. if (LI->isVolatile()) return false; - // Figure out which vreg this is going into. + // Figure out which vreg this is going into. If there is no assigned vreg yet + // then there actually was no reference to it. Perhaps the load is referenced + // by a dead instruction. unsigned LoadReg = FastIS->getRegForValue(LI); - assert(LoadReg && "Load isn't already assigned a vreg? "); + if (LoadReg == 0) + return false; // Check to see what the uses of this vreg are. If it has no uses, or more // than one use (at the machine instr level) then we can't fold it. @@ -764,7 +808,7 @@ bool SelectionDAGISel::TryToFoldFastISelLoad(const LoadInst *LI, "The only use of the vreg must be a use, we haven't emitted the def!"); MachineInstr *User = &*RI; - + // Set the insertion point properly. Folding the load can cause generation of // other random instructions (like sign extends) for addressing modes, make // sure they get inserted in a logical place before the new instruction. @@ -817,6 +861,17 @@ static void CheckLineNumbers(const MachineBasicBlock *MBB) { } #endif +/// isFoldedOrDeadInstruction - Return true if the specified instruction is +/// side-effect free and is either dead or folded into a generated instruction. +/// Return false if it needs to be emitted. +static bool isFoldedOrDeadInstruction(const Instruction *I, + FunctionLoweringInfo *FuncInfo) { + return !I->mayWriteToMemory() && // Side-effecting instructions aren't folded. + !isa(I) && // Terminators aren't folded. + !isa(I) && // Debug instructions aren't folded. + !FuncInfo->isExportedInst(I); // Exported instrs must be computed. +} + void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Initialize the Fast-ISel state, if needed. FastISel *FastIS = 0; @@ -843,15 +898,13 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { } if (AllPredsVisited) { - for (BasicBlock::const_iterator I = LLVMBB->begin(), E = LLVMBB->end(); - I != E && isa(I); ++I) { + for (BasicBlock::const_iterator I = LLVMBB->begin(); + isa(I); ++I) FuncInfo->ComputePHILiveOutRegInfo(cast(I)); - } } else { - for (BasicBlock::const_iterator I = LLVMBB->begin(), E = LLVMBB->end(); - I != E && isa(I); ++I) { + for (BasicBlock::const_iterator I = LLVMBB->begin(); + isa(I); ++I) FuncInfo->InvalidatePHILiveOutRegInfo(cast(I)); - } } FuncInfo->VisitedBBs.insert(LLVMBB); @@ -899,10 +952,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { const Instruction *Inst = llvm::prior(BI); // If we no longer require this instruction, skip it. - if (!Inst->mayWriteToMemory() && - !isa(Inst) && - !isa(Inst) && - !FuncInfo->isExportedInst(Inst)) + if (isFoldedOrDeadInstruction(Inst, FuncInfo)) continue; // Bottom-up: reset the insert pos at the top, after any local-value @@ -911,16 +961,20 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Try to select the instruction with FastISel. if (FastIS->SelectInstruction(Inst)) { - // If fast isel succeeded, check to see if there is a single-use - // non-volatile load right before the selected instruction, and see if - // the load is used by the instruction. If so, try to fold it. - const Instruction *BeforeInst = 0; - if (Inst != Begin) - BeforeInst = llvm::prior(llvm::prior(BI)); - if (BeforeInst && isa(BeforeInst) && - BeforeInst->hasOneUse() && *BeforeInst->use_begin() == Inst && - TryToFoldFastISelLoad(cast(BeforeInst), FastIS)) - --BI; // If we succeeded, don't re-select the load. + // If fast isel succeeded, skip over all the folded instructions, and + // then see if there is a load right before the selected instructions. + // Try to fold the load if so. + const Instruction *BeforeInst = Inst; + while (BeforeInst != Begin) { + BeforeInst = llvm::prior(BasicBlock::const_iterator(BeforeInst)); + if (!isFoldedOrDeadInstruction(BeforeInst, FuncInfo)) + break; + } + if (BeforeInst != Inst && isa(BeforeInst) && + BeforeInst->hasOneUse() && + TryToFoldFastISelLoad(cast(BeforeInst), Inst, FastIS)) + // If we succeeded, don't re-select the load. + BI = llvm::next(BasicBlock::const_iterator(BeforeInst)); continue; } @@ -974,11 +1028,13 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { else ++NumFastIselBlocks; - // Run SelectionDAG instruction selection on the remainder of the block - // not handled by FastISel. If FastISel is not run, this is the entire - // block. - bool HadTailCall; - SelectBasicBlock(Begin, BI, HadTailCall); + if (Begin != BI) { + // Run SelectionDAG instruction selection on the remainder of the block + // not handled by FastISel. If FastISel is not run, this is the entire + // block. + bool HadTailCall; + SelectBasicBlock(Begin, BI, HadTailCall); + } FinishBasicBlock(); FuncInfo->PHINodesToUpdate.clear(); @@ -2392,6 +2448,18 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, CurDAG->getRegister(RegNo, VT), (SDNode*)0)); continue; } + case OPC_EmitRegister2: { + // For targets w/ more than 256 register names, the register enum + // values are stored in two bytes in the matcher table (just like + // opcodes). + MVT::SimpleValueType VT = + (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; + unsigned RegNo = MatcherTable[MatcherIndex++]; + RegNo |= MatcherTable[MatcherIndex++] << 8; + RecordedNodes.push_back(std::pair( + CurDAG->getRegister(RegNo, VT), (SDNode*)0)); + continue; + } case OPC_EmitConvertToTarget: { // Convert from IMM/FPIMM to target version. diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index 76eb9453561e..cd1647b17b9b 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -90,7 +90,8 @@ namespace llvm { /// If you want to override the dot attributes printed for a particular /// edge, override this method. template - static std::string getEdgeAttributes(const void *Node, EdgeIter EI) { + static std::string getEdgeAttributes(const void *Node, EdgeIter EI, + const SelectionDAG *Graph) { SDValue Op = EI.getNode()->getOperand(EI.getOperand()); EVT VT = Op.getValueType(); if (VT == MVT::Glue) diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 35b847ccabfb..15606af787f8 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -93,6 +93,19 @@ static void InitLibcallNames(const char **Names) { Names[RTLIB::UREM_I32] = "__umodsi3"; Names[RTLIB::UREM_I64] = "__umoddi3"; Names[RTLIB::UREM_I128] = "__umodti3"; + + // These are generally not available. + Names[RTLIB::SDIVREM_I8] = 0; + Names[RTLIB::SDIVREM_I16] = 0; + Names[RTLIB::SDIVREM_I32] = 0; + Names[RTLIB::SDIVREM_I64] = 0; + Names[RTLIB::SDIVREM_I128] = 0; + Names[RTLIB::UDIVREM_I8] = 0; + Names[RTLIB::UDIVREM_I16] = 0; + Names[RTLIB::UDIVREM_I32] = 0; + Names[RTLIB::UDIVREM_I64] = 0; + Names[RTLIB::UDIVREM_I128] = 0; + Names[RTLIB::NEG_I32] = "__negsi2"; Names[RTLIB::NEG_I64] = "__negdi2"; Names[RTLIB::ADD_F32] = "__addsf3"; @@ -1665,6 +1678,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, ConstantSDNode *ShAmt = dyn_cast(In.getOperand(1)); if (!ShAmt) break; + SDValue Shift = In.getOperand(1); + if (TLO.LegalTypes()) { + uint64_t ShVal = ShAmt->getZExtValue(); + Shift = + TLO.DAG.getConstant(ShVal, getShiftAmountTy(Op.getValueType())); + } + APInt HighBits = APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth); HighBits = HighBits.lshr(ShAmt->getZExtValue()).trunc(BitWidth); @@ -1678,7 +1698,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(), NewTrunc, - In.getOperand(1))); + Shift)); } break; } @@ -1829,7 +1849,6 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, DAGCombinerInfo &DCI, DebugLoc dl) const { SelectionDAG &DAG = DCI.DAG; - LLVMContext &Context = *DAG.getContext(); // These setcc operations always fold. switch (Cond) { @@ -1840,12 +1859,11 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, case ISD::SETTRUE2: return DAG.getConstant(1, VT); } - if (isa(N0.getNode())) { - // Ensure that the constant occurs on the RHS, and fold constant - // comparisons. + // Ensure that the constant occurs on the RHS, and fold constant + // comparisons. + if (isa(N0.getNode())) return DAG.getSetCC(dl, VT, N1, N0, ISD::getSetCCSwappedOperands(Cond)); - } - + if (ConstantSDNode *N1C = dyn_cast(N1.getNode())) { const APInt &C1 = N1C->getAPIntValue(); @@ -1898,6 +1916,42 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // TODO: (ctpop x) == 1 -> x && (x & x-1) == 0 iff ctpop is illegal. } + // (zext x) == C --> x == (trunc C) + if (DCI.isBeforeLegalize() && N0->hasOneUse() && + (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { + unsigned MinBits = N0.getValueSizeInBits(); + SDValue PreZExt; + if (N0->getOpcode() == ISD::ZERO_EXTEND) { + // ZExt + MinBits = N0->getOperand(0).getValueSizeInBits(); + PreZExt = N0->getOperand(0); + } else if (N0->getOpcode() == ISD::AND) { + // DAGCombine turns costly ZExts into ANDs + if (ConstantSDNode *C = dyn_cast(N0->getOperand(1))) + if ((C->getAPIntValue()+1).isPowerOf2()) { + MinBits = C->getAPIntValue().countTrailingOnes(); + PreZExt = N0->getOperand(0); + } + } else if (LoadSDNode *LN0 = dyn_cast(N0)) { + // ZEXTLOAD + if (LN0->getExtensionType() == ISD::ZEXTLOAD) { + MinBits = LN0->getMemoryVT().getSizeInBits(); + PreZExt = N0; + } + } + + // Make sure we're not loosing bits from the constant. + if (MinBits < C1.getBitWidth() && MinBits > C1.getActiveBits()) { + EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits); + if (isTypeDesirableForOp(ISD::SETCC, MinVT)) { + // Will get folded away. + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreZExt); + SDValue C = DAG.getConstant(C1.trunc(MinBits), MinVT); + return DAG.getSetCC(dl, VT, Trunc, C, Cond); + } + } + } + // If the LHS is '(and load, const)', the RHS is 0, // the test is for equality or unsigned, and all 1 bits of the const are // in the same partial word, see if we can shorten the load. @@ -1936,7 +1990,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } } if (bestWidth) { - EVT newVT = EVT::getIntegerVT(Context, bestWidth); + EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth); if (newVT.isRound()) { EVT PtrType = Lod->getOperand(1).getValueType(); SDValue Ptr = Lod->getBasePtr(); @@ -3174,26 +3228,39 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, // FIXME: We should use a narrower constant when the upper // bits are known to be zero. - ConstantSDNode *N1C = cast(N->getOperand(1)); - APInt::mu magics = N1C->getAPIntValue().magicu(); + const APInt &N1C = cast(N->getOperand(1))->getAPIntValue(); + APInt::mu magics = N1C.magicu(); + + SDValue Q = N->getOperand(0); + + // If the divisor is even, we can avoid using the expensive fixup by shifting + // the divided value upfront. + if (magics.a != 0 && !N1C[0]) { + unsigned Shift = N1C.countTrailingZeros(); + Q = DAG.getNode(ISD::SRL, dl, VT, Q, + DAG.getConstant(Shift, getShiftAmountTy(Q.getValueType()))); + if (Created) + Created->push_back(Q.getNode()); + + // Get magic number for the shifted divisor. + magics = N1C.lshr(Shift).magicu(Shift); + assert(magics.a == 0 && "Should use cheap fixup now"); + } // Multiply the numerator (operand 0) by the magic value // FIXME: We should support doing a MUL in a wider type - SDValue Q; if (isOperationLegalOrCustom(ISD::MULHU, VT)) - Q = DAG.getNode(ISD::MULHU, dl, VT, N->getOperand(0), - DAG.getConstant(magics.m, VT)); + Q = DAG.getNode(ISD::MULHU, dl, VT, Q, DAG.getConstant(magics.m, VT)); else if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT)) - Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), - N->getOperand(0), - DAG.getConstant(magics.m, VT)).getNode(), 1); + Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), Q, + DAG.getConstant(magics.m, VT)).getNode(), 1); else return SDValue(); // No mulhu or equvialent if (Created) Created->push_back(Q.getNode()); if (magics.a == 0) { - assert(magics.s < N1C->getAPIntValue().getBitWidth() && + assert(magics.s < N1C.getBitWidth() && "We shouldn't generate an undefined shift!"); return DAG.getNode(ISD::SRL, dl, VT, Q, DAG.getConstant(magics.s, getShiftAmountTy(Q.getValueType()))); diff --git a/lib/CodeGen/ShrinkWrapping.cpp b/lib/CodeGen/ShrinkWrapping.cpp index 7b5bca495206..160f38f69236 100644 --- a/lib/CodeGen/ShrinkWrapping.cpp +++ b/lib/CodeGen/ShrinkWrapping.cpp @@ -277,7 +277,7 @@ void PEI::calculateAnticAvail(MachineFunction &Fn) { // Initialize data flow sets. clearAnticAvailSets(); - // Calulate Antic{In,Out} and Avail{In,Out} iteratively on the MCFG. + // Calculate Antic{In,Out} and Avail{In,Out} iteratively on the MCFG. bool changed = true; unsigned iterations = 0; while (changed) { diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp index 2843c1a5b6d8..35b8e14ddc61 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.cpp +++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp @@ -104,6 +104,18 @@ void SimpleRegisterCoalescing::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } +void SimpleRegisterCoalescing::markAsJoined(MachineInstr *CopyMI) { + /// Joined copies are not deleted immediately, but kept in JoinedCopies. + JoinedCopies.insert(CopyMI); + + /// Mark all register operands of CopyMI as so they won't affect dead + /// code elimination. + for (MachineInstr::mop_iterator I = CopyMI->operands_begin(), + E = CopyMI->operands_end(); I != E; ++I) + if (I->isReg()) + I->setIsUndef(true); +} + /// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy with IntA /// being the source and IntB being the dest, thus this defines a value number /// in IntB. If the source value number (in IntA) is defined by a copy from B, @@ -196,15 +208,14 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(const CoalescerPair &CP, if (ValLR+1 != BLR) return false; // If a live interval is a physical register, conservatively check if any - // of its sub-registers is overlapping the live interval of the virtual - // register. If so, do not coalesce. - if (TargetRegisterInfo::isPhysicalRegister(IntB.reg) && - *tri_->getSubRegisters(IntB.reg)) { - for (const unsigned* SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) - if (li_->hasInterval(*SR) && IntA.overlaps(li_->getInterval(*SR))) { + // of its aliases is overlapping the live interval of the virtual register. + // If so, do not coalesce. + if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) { + for (const unsigned *AS = tri_->getAliasSet(IntB.reg); *AS; ++AS) + if (li_->hasInterval(*AS) && IntA.overlaps(li_->getInterval(*AS))) { DEBUG({ - dbgs() << "\t\tInterfere with sub-register "; - li_->getInterval(*SR).print(dbgs(), tri_); + dbgs() << "\t\tInterfere with alias "; + li_->getInterval(*AS).print(dbgs(), tri_); }); return false; } @@ -471,7 +482,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP, DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI); assert(DVNI->def == DefIdx); BValNo = IntB.MergeValueNumberInto(BValNo, DVNI); - JoinedCopies.insert(UseMI); + markAsJoined(UseMI); } // Extend BValNo by merging in IntA live ranges of AValNo. Val# definition @@ -901,6 +912,58 @@ SimpleRegisterCoalescing::ShortenDeadCopySrcLiveRange(LiveInterval &li, return removeIntervalIfEmpty(li, li_, tri_); } +/// shouldJoinPhys - Return true if a copy involving a physreg should be joined. +/// We need to be careful about coalescing a source physical register with a +/// virtual register. Once the coalescing is done, it cannot be broken and these +/// are not spillable! If the destination interval uses are far away, think +/// twice about coalescing them! +bool SimpleRegisterCoalescing::shouldJoinPhys(CoalescerPair &CP) { + bool Allocatable = li_->isAllocatable(CP.getDstReg()); + LiveInterval &JoinVInt = li_->getInterval(CP.getSrcReg()); + + /// Always join simple intervals that are defined by a single copy from a + /// reserved register. This doesn't increase register pressure, so it is + /// always beneficial. + if (!Allocatable && CP.isFlipped() && JoinVInt.containsOneValue()) + return true; + + if (DisablePhysicalJoin) { + DEBUG(dbgs() << "\tPhysreg joins disabled.\n"); + return false; + } + + // Only coalesce to allocatable physreg, we don't want to risk modifying + // reserved registers. + if (!Allocatable) { + DEBUG(dbgs() << "\tRegister is an unallocatable physreg.\n"); + return false; // Not coalescable. + } + + // Don't join with physregs that have a ridiculous number of live + // ranges. The data structure performance is really bad when that + // happens. + if (li_->hasInterval(CP.getDstReg()) && + li_->getInterval(CP.getDstReg()).ranges.size() > 1000) { + ++numAborts; + DEBUG(dbgs() + << "\tPhysical register live interval too complicated, abort!\n"); + return false; + } + + // FIXME: Why are we skipping this test for partial copies? + // CodeGen/X86/phys_subreg_coalesce-3.ll needs it. + if (!CP.isPartial()) { + const TargetRegisterClass *RC = mri_->getRegClass(CP.getSrcReg()); + unsigned Threshold = allocatableRCRegs_[RC].count() * 2; + unsigned Length = li_->getApproximateInstructionCount(JoinVInt); + if (Length > Threshold) { + ++numAborts; + DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n"); + return false; + } + } + return true; +} /// isWinToJoinCrossClass - Return true if it's profitable to coalesce /// two virtual registers from different register classes. @@ -973,27 +1036,25 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { return false; // Not coalescable. } - if (DisablePhysicalJoin && CP.isPhys()) { - DEBUG(dbgs() << "\tPhysical joins disabled.\n"); - return false; - } - - DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), tri_)); + DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), tri_) + << " with " << PrintReg(CP.getDstReg(), tri_, CP.getSubIdx()) + << "\n"); // Enforce policies. if (CP.isPhys()) { - DEBUG(dbgs() <<" with physreg " << PrintReg(CP.getDstReg(), tri_) << "\n"); - // Only coalesce to allocatable physreg. - if (!li_->isAllocatable(CP.getDstReg())) { - DEBUG(dbgs() << "\tRegister is an unallocatable physreg.\n"); - return false; // Not coalescable. + if (!shouldJoinPhys(CP)) { + // Before giving up coalescing, if definition of source is defined by + // trivial computation, try rematerializing it. + if (!CP.isFlipped() && + ReMaterializeTrivialDef(li_->getInterval(CP.getSrcReg()), true, + CP.getDstReg(), 0, CopyMI)) + return true; + return false; } } else { - DEBUG(dbgs() << " with " << PrintReg(CP.getDstReg(), tri_, CP.getSubIdx()) - << " to " << CP.getNewRC()->getName() << "\n"); - // Avoid constraining virtual register regclass too much. if (CP.isCrossClass()) { + DEBUG(dbgs() << "\tCross-class to " << CP.getNewRC()->getName() << ".\n"); if (DisableCrossClassJoin) { DEBUG(dbgs() << "\tCross-class joins disabled.\n"); return false; @@ -1002,8 +1063,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { mri_->getRegClass(CP.getSrcReg()), mri_->getRegClass(CP.getDstReg()), CP.getNewRC())) { - DEBUG(dbgs() << "\tAvoid coalescing to constrained register class: " - << CP.getNewRC()->getName() << ".\n"); + DEBUG(dbgs() << "\tAvoid coalescing to constrained register class.\n"); Again = true; // May be possible to coalesce later. return false; } @@ -1015,45 +1075,6 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { CP.flip(); } - // We need to be careful about coalescing a source physical register with a - // virtual register. Once the coalescing is done, it cannot be broken and - // these are not spillable! If the destination interval uses are far away, - // think twice about coalescing them! - // FIXME: Why are we skipping this test for partial copies? - // CodeGen/X86/phys_subreg_coalesce-3.ll needs it. - if (!CP.isPartial() && CP.isPhys()) { - LiveInterval &JoinVInt = li_->getInterval(CP.getSrcReg()); - - // Don't join with physregs that have a ridiculous number of live - // ranges. The data structure performance is really bad when that - // happens. - if (li_->hasInterval(CP.getDstReg()) && - li_->getInterval(CP.getDstReg()).ranges.size() > 1000) { - ++numAborts; - DEBUG(dbgs() - << "\tPhysical register live interval too complicated, abort!\n"); - return false; - } - - const TargetRegisterClass *RC = mri_->getRegClass(CP.getSrcReg()); - unsigned Threshold = allocatableRCRegs_[RC].count() * 2; - unsigned Length = li_->getApproximateInstructionCount(JoinVInt); - if (Length > Threshold && - std::distance(mri_->use_nodbg_begin(CP.getSrcReg()), - mri_->use_nodbg_end()) * Threshold < Length) { - // Before giving up coalescing, if definition of source is defined by - // trivial computation, try rematerializing it. - if (!CP.isFlipped() && - ReMaterializeTrivialDef(JoinVInt, true, CP.getDstReg(), 0, CopyMI)) - return true; - - ++numAborts; - DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n"); - Again = true; // May be possible to coalesce later. - return false; - } - } - // Okay, attempt to join these two intervals. On failure, this returns false. // Otherwise, if one of the intervals being joined is a physreg, this method // always canonicalizes DstInt to be it. The output "SrcInt" will not have @@ -1072,7 +1093,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { if (!CP.isPartial()) { if (AdjustCopiesBackFrom(CP, CopyMI) || RemoveCopyByCommutingDef(CP, CopyMI)) { - JoinedCopies.insert(CopyMI); + markAsJoined(CopyMI); DEBUG(dbgs() << "\tTrivial!\n"); return true; } @@ -1092,7 +1113,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { } // Remember to delete the copy instruction. - JoinedCopies.insert(CopyMI); + markAsJoined(CopyMI); UpdateRegDefsUses(CP); @@ -1568,9 +1589,7 @@ SimpleRegisterCoalescing::lastRegisterUse(SlotIndex Start, if (UseMI->isIdentityCopy()) continue; SlotIndex Idx = li_->getInstructionIndex(UseMI); - // FIXME: Should this be Idx != UseIdx? SlotIndex() will return something - // that compares higher than any other interval. - if (Idx >= Start && Idx < End && Idx >= UseIdx) { + if (Idx >= Start && Idx < End && (!UseIdx.isValid() || Idx >= UseIdx)) { LastUse = &Use; UseIdx = Idx.getUseIndex(); } diff --git a/lib/CodeGen/SimpleRegisterCoalescing.h b/lib/CodeGen/SimpleRegisterCoalescing.h index 56703dfa2ddd..65cf542836dd 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.h +++ b/lib/CodeGen/SimpleRegisterCoalescing.h @@ -68,16 +68,6 @@ namespace llvm { initializeSimpleRegisterCoalescingPass(*PassRegistry::getPassRegistry()); } - struct InstrSlots { - enum { - LOAD = 0, - USE = 1, - DEF = 2, - STORE = 3, - NUM = 4 - }; - }; - virtual void getAnalysisUsage(AnalysisUsage &AU) const; virtual void releaseMemory(); @@ -148,6 +138,9 @@ namespace llvm { unsigned DstReg, unsigned DstSubIdx, MachineInstr *CopyMI); + /// shouldJoinPhys - Return true if a physreg copy should be joined. + bool shouldJoinPhys(CoalescerPair &CP); + /// isWinToJoinCrossClass - Return true if it's profitable to coalesce /// two virtual registers from different register classes. bool isWinToJoinCrossClass(unsigned SrcReg, @@ -186,6 +179,9 @@ namespace llvm { /// cycles Start and End or NULL if there are no uses. MachineOperand *lastRegisterUse(SlotIndex Start, SlotIndex End, unsigned Reg, SlotIndex &LastUseIdx) const; + + /// markAsJoined - Remember that CopyMI has already been joined. + void markAsJoined(MachineInstr *CopyMI); }; } // End llvm namespace diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index 13e1454fa5f3..43904a76cf13 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -442,25 +442,18 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) { BasicBlock *DispatchBlock = BasicBlock::Create(F.getContext(), "eh.sjlj.setjmp.catch", &F); - // Add a call to dispatch_setup at the start of the dispatch block. This is - // expanded to any target-specific setup that needs to be done. - Value *SetupArg = - CastInst::Create(Instruction::BitCast, FunctionContext, - Type::getInt8PtrTy(F.getContext()), "", - DispatchBlock); - CallInst::Create(DispatchSetupFn, SetupArg, "", DispatchBlock); - // Insert a load of the callsite in the dispatch block, and a switch on its - // value. By default, we go to a block that just does an unwind (which is the - // correct action for a standard call). - BasicBlock *UnwindBlock = - BasicBlock::Create(F.getContext(), "unwindbb", &F); - Unwinds.push_back(new UnwindInst(F.getContext(), UnwindBlock)); + // value. By default, we issue a trap statement. + BasicBlock *TrapBlock = + BasicBlock::Create(F.getContext(), "trapbb", &F); + CallInst::Create(Intrinsic::getDeclaration(F.getParent(), Intrinsic::trap), + "", TrapBlock); + new UnreachableInst(F.getContext(), TrapBlock); Value *DispatchLoad = new LoadInst(CallSite, "invoke.num", true, DispatchBlock); SwitchInst *DispatchSwitch = - SwitchInst::Create(DispatchLoad, UnwindBlock, Invokes.size(), + SwitchInst::Create(DispatchLoad, TrapBlock, Invokes.size(), DispatchBlock); // Split the entry block to insert the conditional branch for the setjmp. BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(), @@ -524,6 +517,11 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) { Value *DispatchVal = CallInst::Create(BuiltinSetjmpFn, SetjmpArg, "dispatch", EntryBB->getTerminator()); + + // Add a call to dispatch_setup after the setjmp call. This is expanded to any + // target-specific setup that needs to be done. + CallInst::Create(DispatchSetupFn, "", EntryBB->getTerminator()); + // check the return value of the setjmp. non-zero goes to dispatcher. Value *IsNormal = new ICmpInst(EntryBB->getTerminator(), ICmpInst::ICMP_EQ, DispatchVal, Zero, @@ -564,7 +562,7 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) { // Replace all unwinds with a branch to the unwind handler. // ??? Should this ever happen with sjlj exceptions? for (unsigned i = 0, e = Unwinds.size(); i != e; ++i) { - BranchInst::Create(UnwindBlock, Unwinds[i]); + BranchInst::Create(TrapBlock, Unwinds[i]); Unwinds[i]->eraseFromParent(); } diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp index 6e3fa90e4341..ca79cafcf4be 100644 --- a/lib/CodeGen/SlotIndexes.cpp +++ b/lib/CodeGen/SlotIndexes.cpp @@ -10,47 +10,20 @@ #define DEBUG_TYPE "slotindexes" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Support/ManagedStatic.h" #include "llvm/Target/TargetInstrInfo.h" using namespace llvm; - -// Yep - these are thread safe. See the header for details. -namespace { - - - class EmptyIndexListEntry : public IndexListEntry { - public: - EmptyIndexListEntry() : IndexListEntry(EMPTY_KEY) {} - }; - - class TombstoneIndexListEntry : public IndexListEntry { - public: - TombstoneIndexListEntry() : IndexListEntry(TOMBSTONE_KEY) {} - }; - - // The following statics are thread safe. They're read only, and you - // can't step from them to any other list entries. - ManagedStatic IndexListEntryEmptyKey; - ManagedStatic IndexListEntryTombstoneKey; -} - char SlotIndexes::ID = 0; INITIALIZE_PASS(SlotIndexes, "slotindexes", "Slot index numbering", false, false) -IndexListEntry* IndexListEntry::getEmptyKeyEntry() { - return &*IndexListEntryEmptyKey; -} - -IndexListEntry* IndexListEntry::getTombstoneKeyEntry() { - return &*IndexListEntryTombstoneKey; -} - +STATISTIC(NumLocalRenum, "Number of local renumberings"); +STATISTIC(NumGlobalRenum, "Number of global renumberings"); void SlotIndexes::getAnalysisUsage(AnalysisUsage &au) const { au.setPreservesAll(); @@ -59,7 +32,7 @@ void SlotIndexes::getAnalysisUsage(AnalysisUsage &au) const { void SlotIndexes::releaseMemory() { mi2iMap.clear(); - mbb2IdxMap.clear(); + MBBRanges.clear(); idx2MBBMap.clear(); clearList(); } @@ -85,13 +58,15 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { "Index list non-empty at initial numbering?"); assert(idx2MBBMap.empty() && "Index -> MBB mapping non-empty at initial numbering?"); - assert(mbb2IdxMap.empty() && + assert(MBBRanges.empty() && "MBB -> Index mapping non-empty at initial numbering?"); assert(mi2iMap.empty() && "MachineInstr -> Index mapping non-empty at initial numbering?"); functionSize = 0; unsigned index = 0; + MBBRanges.resize(mf->getNumBlockIDs()); + idx2MBBMap.reserve(mf->size()); push_back(createEntry(0, index)); @@ -103,8 +78,6 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { // Insert an index for the MBB start. SlotIndex blockStartIndex(back(), SlotIndex::LOAD); - index += SlotIndex::NUM; - for (MachineBasicBlock::iterator miItr = mbb->begin(), miEnd = mbb->end(); miItr != miEnd; ++miItr) { MachineInstr *mi = miItr; @@ -112,32 +85,19 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { continue; // Insert a store index for the instr. - push_back(createEntry(mi, index)); + push_back(createEntry(mi, index += SlotIndex::InstrDist)); // Save this base index in the maps. - mi2iMap.insert( - std::make_pair(mi, SlotIndex(back(), SlotIndex::LOAD))); + mi2iMap.insert(std::make_pair(mi, SlotIndex(back(), SlotIndex::LOAD))); ++functionSize; - - unsigned Slots = mi->getDesc().getNumDefs(); - if (Slots == 0) - Slots = 1; - - index += (Slots + 1) * SlotIndex::NUM; } - // We insert two blank instructions between basic blocks. - // One to represent live-out registers and one to represent live-ins. - push_back(createEntry(0, index)); - index += SlotIndex::NUM; - - push_back(createEntry(0, index)); - - SlotIndex blockEndIndex(back(), SlotIndex::LOAD); - mbb2IdxMap.insert( - std::make_pair(mbb, std::make_pair(blockStartIndex, blockEndIndex))); + // We insert one blank instructions between basic blocks. + push_back(createEntry(0, index += SlotIndex::InstrDist)); + MBBRanges[mbb->getNumber()].first = blockStartIndex; + MBBRanges[mbb->getNumber()].second = SlotIndex(back(), SlotIndex::LOAD); idx2MBBMap.push_back(IdxMBBPair(blockStartIndex, mbb)); } @@ -151,38 +111,41 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { } void SlotIndexes::renumberIndexes() { - // Renumber updates the index of every element of the index list. - // If all instrs in the function have been allocated an index (which has been - // placed in the index list in the order of instruction iteration) then the - // resulting numbering will match what would have been generated by the - // pass during the initial numbering of the function if the new instructions - // had been present. DEBUG(dbgs() << "\n*** Renumbering SlotIndexes ***\n"); + ++NumGlobalRenum; - functionSize = 0; unsigned index = 0; for (IndexListEntry *curEntry = front(); curEntry != getTail(); curEntry = curEntry->getNext()) { - curEntry->setIndex(index); - - if (curEntry->getInstr() == 0) { - // MBB start entry. Just step index by 1. - index += SlotIndex::NUM; - } - else { - ++functionSize; - unsigned Slots = curEntry->getInstr()->getDesc().getNumDefs(); - if (Slots == 0) - Slots = 1; - - index += (Slots + 1) * SlotIndex::NUM; - } + index += SlotIndex::InstrDist; } } +// Renumber indexes locally after curEntry was inserted, but failed to get a new +// index. +void SlotIndexes::renumberIndexes(IndexListEntry *curEntry) { + // Number indexes with half the default spacing so we can catch up quickly. + const unsigned Space = SlotIndex::InstrDist/2; + assert((Space & 3) == 0 && "InstrDist must be a multiple of 2*NUM"); + + IndexListEntry *start = curEntry->getPrev(); + unsigned index = start->getIndex(); + IndexListEntry *tail = getTail(); + do { + curEntry->setIndex(index += Space); + curEntry = curEntry->getNext(); + // If the next index is bigger, we have caught up. + } while (curEntry != tail && curEntry->getIndex() <= index); + + DEBUG(dbgs() << "\n*** Renumbered SlotIndexes " << start->getIndex() << '-' + << index << " ***\n"); + ++NumLocalRenum; +} + + void SlotIndexes::dump() const { for (const IndexListEntry *itr = front(); itr != getTail(); itr = itr->getNext()) { @@ -195,11 +158,9 @@ void SlotIndexes::dump() const { } } - for (MBB2IdxMap::const_iterator itr = mbb2IdxMap.begin(); - itr != mbb2IdxMap.end(); ++itr) { - dbgs() << "MBB " << itr->first->getNumber() << " (" << itr->first << ") - [" - << itr->second.first << ", " << itr->second.second << "]\n"; - } + for (unsigned i = 0, e = MBBRanges.size(); i != e; ++i) + dbgs() << "BB#" << i << "\t[" << MBBRanges[i].first << ';' + << MBBRanges[i].second << ")\n"; } // Print a SlotIndex to a raw_ostream. diff --git a/lib/CodeGen/SpillPlacement.cpp b/lib/CodeGen/SpillPlacement.cpp index 9c0bf1629a14..694961863261 100644 --- a/lib/CodeGen/SpillPlacement.cpp +++ b/lib/CodeGen/SpillPlacement.cpp @@ -67,11 +67,11 @@ void SpillPlacement::getAnalysisUsage(AnalysisUsage &AU) const { /// because all weights are positive. /// struct SpillPlacement::Node { - /// Frequency - Total block frequency feeding into[0] or out of[1] the bundle. + /// Scale - Inverse block frequency feeding into[0] or out of[1] the bundle. /// Ideally, these two numbers should be identical, but inaccuracies in the /// block frequency estimates means that we need to normalize ingoing and /// outgoing frequencies separately so they are commensurate. - float Frequency[2]; + float Scale[2]; /// Bias - Normalized contributions from non-transparent blocks. /// A bundle connected to a MustSpill block has a huge negative bias, @@ -107,7 +107,7 @@ struct SpillPlacement::Node { /// Node - Create a blank Node. Node() { - Frequency[0] = Frequency[1] = 0; + Scale[0] = Scale[1] = 0; } /// clear - Reset per-query data, but preserve frequencies that only depend on @@ -121,7 +121,7 @@ struct SpillPlacement::Node { /// out=0 for an ingoing link, and 1 for an outgoing link. void addLink(unsigned b, float w, bool out) { // Normalize w relative to all connected blocks from that direction. - w /= Frequency[out]; + w *= Scale[out]; // There can be multiple links to the same bundle, add them up. for (LinkVector::iterator I = Links.begin(), E = Links.end(); I != E; ++I) @@ -134,9 +134,10 @@ struct SpillPlacement::Node { } /// addBias - Bias this node from an ingoing[0] or outgoing[1] link. + /// Return the change to the total number of positive biases. void addBias(float w, bool out) { // Normalize w relative to all connected blocks from that direction. - w /= Frequency[out]; + w *= Scale[out]; Bias += w; } @@ -175,13 +176,22 @@ bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) { nodes = new Node[bundles->getNumBundles()]; // Compute total ingoing and outgoing block frequencies for all bundles. + BlockFrequency.resize(mf.getNumBlockIDs()); for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I) { - float Freq = getBlockFrequency(I); + float Freq = LiveIntervals::getSpillWeight(true, false, + loops->getLoopDepth(I)); unsigned Num = I->getNumber(); - nodes[bundles->getBundle(Num, 1)].Frequency[0] += Freq; - nodes[bundles->getBundle(Num, 0)].Frequency[1] += Freq; + BlockFrequency[Num] = Freq; + nodes[bundles->getBundle(Num, 1)].Scale[0] += Freq; + nodes[bundles->getBundle(Num, 0)].Scale[1] += Freq; } + // Scales are reciprocal frequencies. + for (unsigned i = 0, e = bundles->getNumBundles(); i != e; ++i) + for (unsigned d = 0; d != 2; ++d) + if (nodes[i].Scale[d] > 0) + nodes[i].Scale[d] = 1 / nodes[i].Scale[d]; + // We never change the function. return false; } @@ -200,31 +210,12 @@ void SpillPlacement::activate(unsigned n) { } -/// prepareNodes - Compute node biases and weights from a set of constraints. +/// addConstraints - Compute node biases and weights from a set of constraints. /// Set a bit in NodeMask for each active node. -void SpillPlacement:: -prepareNodes(const SmallVectorImpl &LiveBlocks) { - for (SmallVectorImpl::const_iterator I = LiveBlocks.begin(), +void SpillPlacement::addConstraints(ArrayRef LiveBlocks) { + for (ArrayRef::iterator I = LiveBlocks.begin(), E = LiveBlocks.end(); I != E; ++I) { - MachineBasicBlock *MBB = MF->getBlockNumbered(I->Number); - float Freq = getBlockFrequency(MBB); - - // Is this a transparent block? Link ingoing and outgoing bundles. - if (I->Entry == DontCare && I->Exit == DontCare) { - unsigned ib = bundles->getBundle(I->Number, 0); - unsigned ob = bundles->getBundle(I->Number, 1); - - // Ignore self-loops. - if (ib == ob) - continue; - activate(ib); - activate(ob); - nodes[ib].addLink(ob, Freq, 1); - nodes[ob].addLink(ib, Freq, 0); - continue; - } - - // This block is not transparent, but it can still add bias. + float Freq = getBlockFrequency(I->Number); const float Bias[] = { 0, // DontCare, 1, // PrefReg, @@ -248,10 +239,54 @@ prepareNodes(const SmallVectorImpl &LiveBlocks) { } } +void SpillPlacement::addLinks(ArrayRef Links) { + for (ArrayRef::iterator I = Links.begin(), E = Links.end(); I != E; + ++I) { + unsigned Number = *I; + unsigned ib = bundles->getBundle(Number, 0); + unsigned ob = bundles->getBundle(Number, 1); + + // Ignore self-loops. + if (ib == ob) + continue; + activate(ib); + activate(ob); + if (nodes[ib].Links.empty() && !nodes[ib].mustSpill()) + Linked.push_back(ib); + if (nodes[ob].Links.empty() && !nodes[ob].mustSpill()) + Linked.push_back(ob); + float Freq = getBlockFrequency(Number); + nodes[ib].addLink(ob, Freq, 1); + nodes[ob].addLink(ib, Freq, 0); + } +} + +bool SpillPlacement::scanActiveBundles() { + Linked.clear(); + RecentPositive.clear(); + for (int n = ActiveNodes->find_first(); n>=0; n = ActiveNodes->find_next(n)) { + nodes[n].update(nodes); + // A node that must spill, or a node without any links is not going to + // change its value ever again, so exclude it from iterations. + if (nodes[n].mustSpill()) + continue; + if (!nodes[n].Links.empty()) + Linked.push_back(n); + if (nodes[n].preferReg()) + RecentPositive.push_back(n); + } + return !RecentPositive.empty(); +} + /// iterate - Repeatedly update the Hopfield nodes until stability or the /// maximum number of iterations is reached. /// @param Linked - Numbers of linked nodes that need updating. -void SpillPlacement::iterate(const SmallVectorImpl &Linked) { +void SpillPlacement::iterate() { + // First update the recently positive nodes. They have likely received new + // negative bias that will turn them off. + while (!RecentPositive.empty()) + nodes[RecentPositive.pop_back_val()].update(nodes); + if (Linked.empty()) return; @@ -267,10 +302,13 @@ void SpillPlacement::iterate(const SmallVectorImpl &Linked) { for (SmallVectorImpl::const_reverse_iterator I = llvm::next(Linked.rbegin()), E = Linked.rend(); I != E; ++I) { unsigned n = *I; - bool C = nodes[n].update(nodes); - Changed |= C; + if (nodes[n].update(nodes)) { + Changed = true; + if (nodes[n].preferReg()) + RecentPositive.push_back(n); + } } - if (!Changed) + if (!Changed || !RecentPositive.empty()) return; // Scan forwards, skipping the first node which was just updated. @@ -278,53 +316,37 @@ void SpillPlacement::iterate(const SmallVectorImpl &Linked) { for (SmallVectorImpl::const_iterator I = llvm::next(Linked.begin()), E = Linked.end(); I != E; ++I) { unsigned n = *I; - bool C = nodes[n].update(nodes); - Changed |= C; + if (nodes[n].update(nodes)) { + Changed = true; + if (nodes[n].preferReg()) + RecentPositive.push_back(n); + } } - if (!Changed) + if (!Changed || !RecentPositive.empty()) return; } } -bool -SpillPlacement::placeSpills(const SmallVectorImpl &LiveBlocks, - BitVector &RegBundles) { +void SpillPlacement::prepare(BitVector &RegBundles) { + Linked.clear(); + RecentPositive.clear(); // Reuse RegBundles as our ActiveNodes vector. ActiveNodes = &RegBundles; ActiveNodes->clear(); ActiveNodes->resize(bundles->getNumBundles()); +} - // Compute active nodes, links and biases. - prepareNodes(LiveBlocks); +bool +SpillPlacement::finish() { + assert(ActiveNodes && "Call prepare() first"); - // Update all active nodes, and find the ones that are actually linked to - // something so their value may change when iterating. - SmallVector Linked; - for (int n = RegBundles.find_first(); n>=0; n = RegBundles.find_next(n)) { - nodes[n].update(nodes); - // A node that must spill, or a node without any links is not going to - // change its value ever again, so exclude it from iterations. - if (!nodes[n].Links.empty() && !nodes[n].mustSpill()) - Linked.push_back(n); - } - - // Iterate the network to convergence. - iterate(Linked); - - // Write preferences back to RegBundles. + // Write preferences back to ActiveNodes. bool Perfect = true; - for (int n = RegBundles.find_first(); n>=0; n = RegBundles.find_next(n)) + for (int n = ActiveNodes->find_first(); n>=0; n = ActiveNodes->find_next(n)) if (!nodes[n].preferReg()) { - RegBundles.reset(n); + ActiveNodes->reset(n); Perfect = false; } + ActiveNodes = 0; return Perfect; } - -/// getBlockFrequency - Return our best estimate of the block frequency which is -/// the expected number of block executions per function invocation. -float SpillPlacement::getBlockFrequency(const MachineBasicBlock *MBB) { - // Use the unnormalized spill weight for real block frequencies. - return LiveIntervals::getSpillWeight(true, false, loops->getLoopDepth(MBB)); -} - diff --git a/lib/CodeGen/SpillPlacement.h b/lib/CodeGen/SpillPlacement.h index ef2d516cdce7..6952ad800965 100644 --- a/lib/CodeGen/SpillPlacement.h +++ b/lib/CodeGen/SpillPlacement.h @@ -10,8 +10,8 @@ // This analysis computes the optimal spill code placement between basic blocks. // // The runOnMachineFunction() method only precomputes some profiling information -// about the CFG. The real work is done by placeSpills() which is called by the -// register allocator. +// about the CFG. The real work is done by prepare(), addConstraints(), and +// finish() which are called by the register allocator. // // Given a variable that is live across multiple basic blocks, and given // constraints on the basic blocks where the variable is live, determine which @@ -27,6 +27,8 @@ #ifndef LLVM_CODEGEN_SPILLPLACEMENT_H #define LLVM_CODEGEN_SPILLPLACEMENT_H +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineFunctionPass.h" namespace llvm { @@ -35,7 +37,6 @@ class BitVector; class EdgeBundles; class MachineBasicBlock; class MachineLoopInfo; -template class SmallVectorImpl; class SpillPlacement : public MachineFunctionPass { struct Node; @@ -44,10 +45,20 @@ class SpillPlacement : public MachineFunctionPass { const MachineLoopInfo *loops; Node *nodes; - // Nodes that are active in the current computation. Owned by the placeSpills + // Nodes that are active in the current computation. Owned by the prepare() // caller. BitVector *ActiveNodes; + // Nodes with active links. Populated by scanActiveBundles. + SmallVector Linked; + + // Nodes that went positive during the last call to scanActiveBundles or + // iterate. + SmallVector RecentPositive; + + // Block frequencies are computed once. Indexed by block number. + SmallVector BlockFrequency; + public: static char ID; // Pass identification, replacement for typeid. @@ -70,28 +81,53 @@ class SpillPlacement : public MachineFunctionPass { BorderConstraint Exit : 8; ///< Constraint on block exit. }; - /// placeSpills - Compute the optimal spill code placement given the - /// constraints. No MustSpill constraints will be violated, and the smallest - /// possible number of PrefX constraints will be violated, weighted by - /// expected execution frequencies. - /// @param LiveBlocks Constraints for blocks that have the variable live in or - /// live out. DontCare/DontCare means the variable is live - /// through the block. DontCare/X means the variable is live - /// out, but not live in. + /// prepare - Reset state and prepare for a new spill placement computation. /// @param RegBundles Bit vector to receive the edge bundles where the /// variable should be kept in a register. Each bit /// corresponds to an edge bundle, a set bit means the /// variable should be kept in a register through the /// bundle. A clear bit means the variable should be - /// spilled. + /// spilled. This vector is retained. + void prepare(BitVector &RegBundles); + + /// addConstraints - Add constraints and biases. This method may be called + /// more than once to accumulate constraints. + /// @param LiveBlocks Constraints for blocks that have the variable live in or + /// live out. + void addConstraints(ArrayRef LiveBlocks); + + /// addLinks - Add transparent blocks with the given numbers. + void addLinks(ArrayRef Links); + + /// scanActiveBundles - Perform an initial scan of all bundles activated by + /// addConstraints and addLinks, updating their state. Add all the bundles + /// that now prefer a register to RecentPositive. + /// Prepare internal data structures for iterate. + /// Return true is there are any positive nodes. + bool scanActiveBundles(); + + /// iterate - Update the network iteratively until convergence, or new bundles + /// are found. + void iterate(); + + /// getRecentPositive - Return an array of bundles that became positive during + /// the previous call to scanActiveBundles or iterate. + ArrayRef getRecentPositive() { return RecentPositive; } + + /// finish - Compute the optimal spill code placement given the + /// constraints. No MustSpill constraints will be violated, and the smallest + /// possible number of PrefX constraints will be violated, weighted by + /// expected execution frequencies. + /// The selected bundles are returned in the bitvector passed to prepare(). /// @return True if a perfect solution was found, allowing the variable to be /// in a register through all relevant bundles. - bool placeSpills(const SmallVectorImpl &LiveBlocks, - BitVector &RegBundles); + bool finish(); /// getBlockFrequency - Return the estimated block execution frequency per /// function invocation. - float getBlockFrequency(const MachineBasicBlock*); + float getBlockFrequency(unsigned Number) const { + return BlockFrequency[Number]; + } private: virtual bool runOnMachineFunction(MachineFunction&); @@ -99,8 +135,6 @@ class SpillPlacement : public MachineFunctionPass { virtual void releaseMemory(); void activate(unsigned); - void prepareNodes(const SmallVectorImpl&); - void iterate(const SmallVectorImpl&); }; } // end namespace llvm diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp index fd385824aff9..b6bbcd7176dd 100644 --- a/lib/CodeGen/Spiller.cpp +++ b/lib/CodeGen/Spiller.cpp @@ -11,6 +11,7 @@ #include "Spiller.h" #include "VirtRegMap.h" +#include "LiveRangeEdit.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -24,7 +25,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include using namespace llvm; @@ -180,11 +180,9 @@ class TrivialSpiller : public SpillerBase { VirtRegMap &vrm) : SpillerBase(pass, mf, vrm) {} - void spill(LiveInterval *li, - SmallVectorImpl &newIntervals, - const SmallVectorImpl &) { + void spill(LiveRangeEdit &LRE) { // Ignore spillIs - we don't use it. - trivialSpillEverywhere(li, newIntervals); + trivialSpillEverywhere(&LRE.getParent(), *LRE.getNewVRegs()); } }; @@ -210,22 +208,22 @@ class StandardSpiller : public Spiller { vrm(&vrm) {} /// Falls back on LiveIntervals::addIntervalsForSpills. - void spill(LiveInterval *li, - SmallVectorImpl &newIntervals, - const SmallVectorImpl &spillIs) { + void spill(LiveRangeEdit &LRE) { std::vector added = - lis->addIntervalsForSpills(*li, spillIs, loopInfo, *vrm); - newIntervals.insert(newIntervals.end(), added.begin(), added.end()); + lis->addIntervalsForSpills(LRE.getParent(), LRE.getUselessVRegs(), + loopInfo, *vrm); + LRE.getNewVRegs()->insert(LRE.getNewVRegs()->end(), + added.begin(), added.end()); // Update LiveStacks. - int SS = vrm->getStackSlot(li->reg); + int SS = vrm->getStackSlot(LRE.getReg()); if (SS == VirtRegMap::NO_STACK_SLOT) return; - const TargetRegisterClass *RC = mf->getRegInfo().getRegClass(li->reg); + const TargetRegisterClass *RC = mf->getRegInfo().getRegClass(LRE.getReg()); LiveInterval &SI = lss->getOrCreateInterval(SS, RC); if (!SI.hasAtLeastOneValue()) SI.getNextValue(SlotIndex(), 0, lss->getVNInfoAllocator()); - SI.MergeRangesInAsValue(*li, SI.getValNumInfo(0)); + SI.MergeRangesInAsValue(LRE.getParent(), SI.getValNumInfo(0)); } }; diff --git a/lib/CodeGen/Spiller.h b/lib/CodeGen/Spiller.h index f017583494ed..41f1727da439 100644 --- a/lib/CodeGen/Spiller.h +++ b/lib/CodeGen/Spiller.h @@ -12,11 +12,9 @@ namespace llvm { - class LiveInterval; + class LiveRangeEdit; class MachineFunction; class MachineFunctionPass; - class SlotIndex; - template class SmallVectorImpl; class VirtRegMap; /// Spiller interface. @@ -27,16 +25,8 @@ namespace llvm { public: virtual ~Spiller() = 0; - /// spill - Spill the given live interval. The method used will depend on - /// the Spiller implementation selected. - /// - /// @param li The live interval to be spilled. - /// @param spillIs A list of intervals that are about to be spilled, - /// and so cannot be used for remat etc. - /// @param newIntervals The newly created intervals will be appended here. - virtual void spill(LiveInterval *li, - SmallVectorImpl &newIntervals, - const SmallVectorImpl &spillIs) = 0; + /// spill - Spill the LRE.getParent() live interval. + virtual void spill(LiveRangeEdit &LRE) = 0; }; diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp index fd5d50b7ecb8..ac9d72bf62c9 100644 --- a/lib/CodeGen/SplitKit.cpp +++ b/lib/CodeGen/SplitKit.cpp @@ -16,12 +16,11 @@ #include "SplitKit.h" #include "LiveRangeEdit.h" #include "VirtRegMap.h" -#include "llvm/CodeGen/CalcSpillWeights.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" @@ -29,9 +28,8 @@ using namespace llvm; -static cl::opt -AllowSplit("spiller-splits-edges", - cl::desc("Allow critical edge splitting during spilling")); +STATISTIC(NumFinished, "Number of splits finished"); +STATISTIC(NumSimple, "Number of splits that were simple"); //===----------------------------------------------------------------------===// // Split Analysis @@ -45,49 +43,105 @@ SplitAnalysis::SplitAnalysis(const VirtRegMap &vrm, LIS(lis), Loops(mli), TII(*MF.getTarget().getInstrInfo()), - CurLI(0) {} + CurLI(0), + LastSplitPoint(MF.getNumBlockIDs()) {} void SplitAnalysis::clear() { UseSlots.clear(); - UsingInstrs.clear(); - UsingBlocks.clear(); - LiveBlocks.clear(); + UseBlocks.clear(); + ThroughBlocks.clear(); CurLI = 0; } -bool SplitAnalysis::canAnalyzeBranch(const MachineBasicBlock *MBB) { - MachineBasicBlock *T, *F; - SmallVector Cond; - return !TII.AnalyzeBranch(const_cast(*MBB), T, F, Cond); +SlotIndex SplitAnalysis::computeLastSplitPoint(unsigned Num) { + const MachineBasicBlock *MBB = MF.getBlockNumbered(Num); + const MachineBasicBlock *LPad = MBB->getLandingPadSuccessor(); + std::pair &LSP = LastSplitPoint[Num]; + + // Compute split points on the first call. The pair is independent of the + // current live interval. + if (!LSP.first.isValid()) { + MachineBasicBlock::const_iterator FirstTerm = MBB->getFirstTerminator(); + if (FirstTerm == MBB->end()) + LSP.first = LIS.getMBBEndIdx(MBB); + else + LSP.first = LIS.getInstructionIndex(FirstTerm); + + // If there is a landing pad successor, also find the call instruction. + if (!LPad) + return LSP.first; + // There may not be a call instruction (?) in which case we ignore LPad. + LSP.second = LSP.first; + for (MachineBasicBlock::const_iterator I = FirstTerm, E = MBB->begin(); + I != E; --I) + if (I->getDesc().isCall()) { + LSP.second = LIS.getInstructionIndex(I); + break; + } + } + + // If CurLI is live into a landing pad successor, move the last split point + // back to the call that may throw. + if (LPad && LSP.second.isValid() && LIS.isLiveInToMBB(*CurLI, LPad)) + return LSP.second; + else + return LSP.first; } /// analyzeUses - Count instructions, basic blocks, and loops using CurLI. void SplitAnalysis::analyzeUses() { + assert(UseSlots.empty() && "Call clear first"); + + // First get all the defs from the interval values. This provides the correct + // slots for early clobbers. + for (LiveInterval::const_vni_iterator I = CurLI->vni_begin(), + E = CurLI->vni_end(); I != E; ++I) + if (!(*I)->isPHIDef() && !(*I)->isUnused()) + UseSlots.push_back((*I)->def); + + // Get use slots form the use-def chain. const MachineRegisterInfo &MRI = MF.getRegInfo(); - for (MachineRegisterInfo::reg_iterator I = MRI.reg_begin(CurLI->reg), - E = MRI.reg_end(); I != E; ++I) { - MachineOperand &MO = I.getOperand(); - if (MO.isUse() && MO.isUndef()) - continue; - MachineInstr *MI = MO.getParent(); - if (MI->isDebugValue() || !UsingInstrs.insert(MI)) - continue; - UseSlots.push_back(LIS.getInstructionIndex(MI).getDefIndex()); - MachineBasicBlock *MBB = MI->getParent(); - UsingBlocks[MBB]++; - } + for (MachineRegisterInfo::use_nodbg_iterator + I = MRI.use_nodbg_begin(CurLI->reg), E = MRI.use_nodbg_end(); I != E; + ++I) + if (!I.getOperand().isUndef()) + UseSlots.push_back(LIS.getInstructionIndex(&*I).getDefIndex()); + array_pod_sort(UseSlots.begin(), UseSlots.end()); - calcLiveBlockInfo(); - DEBUG(dbgs() << " counted " - << UsingInstrs.size() << " instrs, " - << UsingBlocks.size() << " blocks.\n"); + + // Remove duplicates, keeping the smaller slot for each instruction. + // That is what we want for early clobbers. + UseSlots.erase(std::unique(UseSlots.begin(), UseSlots.end(), + SlotIndex::isSameInstr), + UseSlots.end()); + + // Compute per-live block info. + if (!calcLiveBlockInfo()) { + // FIXME: calcLiveBlockInfo found inconsistencies in the live range. + // I am looking at you, SimpleRegisterCoalescing! + DEBUG(dbgs() << "*** Fixing inconsistent live interval! ***\n"); + const_cast(LIS) + .shrinkToUses(const_cast(CurLI)); + UseBlocks.clear(); + ThroughBlocks.clear(); + bool fixed = calcLiveBlockInfo(); + (void)fixed; + assert(fixed && "Couldn't fix broken live interval"); + } + + DEBUG(dbgs() << "Analyze counted " + << UseSlots.size() << " instrs in " + << UseBlocks.size() << " blocks, through " + << NumThroughBlocks << " blocks.\n"); } /// calcLiveBlockInfo - Fill the LiveBlocks array with information about blocks /// where CurLI is live. -void SplitAnalysis::calcLiveBlockInfo() { +bool SplitAnalysis::calcLiveBlockInfo() { + ThroughBlocks.resize(MF.getNumBlockIDs()); + NumThroughBlocks = 0; if (CurLI->empty()) - return; + return true; LiveInterval::const_iterator LVI = CurLI->begin(); LiveInterval::const_iterator LVE = CurLI->end(); @@ -104,24 +158,14 @@ void SplitAnalysis::calcLiveBlockInfo() { SlotIndex Start, Stop; tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB); - // The last split point is the latest possible insertion point that dominates - // all successor blocks. If interference reaches LastSplitPoint, it is not - // possible to insert a split or reload that makes CurLI live in the - // outgoing bundle. - MachineBasicBlock::iterator LSP = LIS.getLastSplitPoint(*CurLI, BI.MBB); - if (LSP == BI.MBB->end()) - BI.LastSplitPoint = Stop; - else - BI.LastSplitPoint = LIS.getInstructionIndex(LSP); - // LVI is the first live segment overlapping MBB. BI.LiveIn = LVI->start <= Start; if (!BI.LiveIn) BI.Def = LVI->start; // Find the first and last uses in the block. - BI.Uses = hasUses(MFI); - if (BI.Uses && UseI != UseE) { + bool Uses = UseI != UseE && *UseI < Stop; + if (Uses) { BI.FirstUse = *UseI; assert(BI.FirstUse >= Start); do ++UseI; @@ -149,7 +193,16 @@ void SplitAnalysis::calcLiveBlockInfo() { // Don't set LiveThrough when the block has a gap. BI.LiveThrough = !hasGap && BI.LiveIn && BI.LiveOut; - LiveBlocks.push_back(BI); + if (Uses) + UseBlocks.push_back(BI); + else { + ++NumThroughBlocks; + ThroughBlocks.set(BI.MBB->getNumber()); + } + // FIXME: This should never happen. The live range stops or starts without a + // corresponding use. An earlier pass did something wrong. + if (!BI.LiveThrough && !Uses) + return false; // LVI is now at LVE or LVI->end >= Stop. if (LVI == LVE) @@ -165,6 +218,30 @@ void SplitAnalysis::calcLiveBlockInfo() { else MFI = LIS.getMBBFromIndex(LVI->start); } + return true; +} + +unsigned SplitAnalysis::countLiveBlocks(const LiveInterval *cli) const { + if (cli->empty()) + return 0; + LiveInterval *li = const_cast(cli); + LiveInterval::iterator LVI = li->begin(); + LiveInterval::iterator LVE = li->end(); + unsigned Count = 0; + + // Loop over basic blocks where li is live. + MachineFunction::const_iterator MFI = LIS.getMBBFromIndex(LVI->start); + SlotIndex Stop = LIS.getMBBEndIdx(MFI); + for (;;) { + ++Count; + LVI = li->advanceTo(LVI, Stop); + if (LVI == LVE) + return Count; + do { + ++MFI; + Stop = LIS.getMBBEndIdx(MFI); + } while (Stop <= LVI->start); + } } bool SplitAnalysis::isOriginalEndpoint(SlotIndex Idx) const { @@ -181,15 +258,6 @@ bool SplitAnalysis::isOriginalEndpoint(SlotIndex Idx) const { return I != Orig.begin() && (--I)->end == Idx; } -void SplitAnalysis::print(const BlockPtrSet &B, raw_ostream &OS) const { - for (BlockPtrSet::const_iterator I = B.begin(), E = B.end(); I != E; ++I) { - unsigned count = UsingBlocks.lookup(*I); - OS << " BB#" << (*I)->getNumber(); - if (count) - OS << '(' << count << ')'; - } -} - void SplitAnalysis::analyze(const LiveInterval *li) { clear(); CurLI = li; @@ -198,171 +266,242 @@ void SplitAnalysis::analyze(const LiveInterval *li) { //===----------------------------------------------------------------------===// -// LiveIntervalMap +// Split Editor //===----------------------------------------------------------------------===// -// Work around the fact that the std::pair constructors are broken for pointer -// pairs in some implementations. makeVV(x, 0) works. -static inline std::pair -makeVV(const VNInfo *a, VNInfo *b) { - return std::make_pair(a, b); -} +/// Create a new SplitEditor for editing the LiveInterval analyzed by SA. +SplitEditor::SplitEditor(SplitAnalysis &sa, + LiveIntervals &lis, + VirtRegMap &vrm, + MachineDominatorTree &mdt) + : SA(sa), LIS(lis), VRM(vrm), + MRI(vrm.getMachineFunction().getRegInfo()), + MDT(mdt), + TII(*vrm.getMachineFunction().getTarget().getInstrInfo()), + TRI(*vrm.getMachineFunction().getTarget().getRegisterInfo()), + Edit(0), + OpenIdx(0), + RegAssign(Allocator) +{} -void LiveIntervalMap::reset(LiveInterval *li) { - LI = li; +void SplitEditor::reset(LiveRangeEdit &lre) { + Edit = &lre; + OpenIdx = 0; + RegAssign.clear(); Values.clear(); - LiveOutCache.clear(); + + // We don't need to clear LiveOutCache, only LiveOutSeen entries are read. + LiveOutSeen.clear(); + + // We don't need an AliasAnalysis since we will only be performing + // cheap-as-a-copy remats anyway. + Edit->anyRematerializable(LIS, TII, 0); } -bool LiveIntervalMap::isComplexMapped(const VNInfo *ParentVNI) const { - ValueMap::const_iterator i = Values.find(ParentVNI); - return i != Values.end() && i->second == 0; +void SplitEditor::dump() const { + if (RegAssign.empty()) { + dbgs() << " empty\n"; + return; + } + + for (RegAssignMap::const_iterator I = RegAssign.begin(); I.valid(); ++I) + dbgs() << " [" << I.start() << ';' << I.stop() << "):" << I.value(); + dbgs() << '\n'; } -// defValue - Introduce a LI def for ParentVNI that could be later than -// ParentVNI->def. -VNInfo *LiveIntervalMap::defValue(const VNInfo *ParentVNI, SlotIndex Idx) { - assert(LI && "call reset first"); +VNInfo *SplitEditor::defValue(unsigned RegIdx, + const VNInfo *ParentVNI, + SlotIndex Idx) { assert(ParentVNI && "Mapping NULL value"); assert(Idx.isValid() && "Invalid SlotIndex"); - assert(ParentLI.getVNInfoAt(Idx) == ParentVNI && "Bad ParentVNI"); + assert(Edit->getParent().getVNInfoAt(Idx) == ParentVNI && "Bad Parent VNI"); + LiveInterval *LI = Edit->get(RegIdx); // Create a new value. VNInfo *VNI = LI->getNextValue(Idx, 0, LIS.getVNInfoAllocator()); - // Preserve the PHIDef bit. - if (ParentVNI->isPHIDef() && Idx == ParentVNI->def) - VNI->setIsPHIDef(true); - // Use insert for lookup, so we can add missing values with a second lookup. - std::pair InsP = - Values.insert(makeVV(ParentVNI, Idx == ParentVNI->def ? VNI : 0)); + std::pair InsP = + Values.insert(std::make_pair(std::make_pair(RegIdx, ParentVNI->id), VNI)); - // This is now a complex def. Mark with a NULL in valueMap. - if (!InsP.second) + // This was the first time (RegIdx, ParentVNI) was mapped. + // Keep it as a simple def without any liveness. + if (InsP.second) + return VNI; + + // If the previous value was a simple mapping, add liveness for it now. + if (VNInfo *OldVNI = InsP.first->second) { + SlotIndex Def = OldVNI->def; + LI->addRange(LiveRange(Def, Def.getNextSlot(), OldVNI)); + // No longer a simple mapping. InsP.first->second = 0; + } + + // This is a complex mapping, add liveness for VNI + SlotIndex Def = VNI->def; + LI->addRange(LiveRange(Def, Def.getNextSlot(), VNI)); return VNI; } - -// mapValue - Find the mapped value for ParentVNI at Idx. -// Potentially create phi-def values. -VNInfo *LiveIntervalMap::mapValue(const VNInfo *ParentVNI, SlotIndex Idx, - bool *simple) { - assert(LI && "call reset first"); +void SplitEditor::markComplexMapped(unsigned RegIdx, const VNInfo *ParentVNI) { assert(ParentVNI && "Mapping NULL value"); + VNInfo *&VNI = Values[std::make_pair(RegIdx, ParentVNI->id)]; + + // ParentVNI was either unmapped or already complex mapped. Either way. + if (!VNI) + return; + + // This was previously a single mapping. Make sure the old def is represented + // by a trivial live range. + SlotIndex Def = VNI->def; + Edit->get(RegIdx)->addRange(LiveRange(Def, Def.getNextSlot(), VNI)); + VNI = 0; +} + +// extendRange - Extend the live range to reach Idx. +// Potentially create phi-def values. +void SplitEditor::extendRange(unsigned RegIdx, SlotIndex Idx) { assert(Idx.isValid() && "Invalid SlotIndex"); - assert(ParentLI.getVNInfoAt(Idx) == ParentVNI && "Bad ParentVNI"); - - // Use insert for lookup, so we can add missing values with a second lookup. - std::pair InsP = - Values.insert(makeVV(ParentVNI, 0)); - - // This was an unknown value. Create a simple mapping. - if (InsP.second) { - if (simple) *simple = true; - return InsP.first->second = LI->createValueCopy(ParentVNI, - LIS.getVNInfoAllocator()); - } - - // This was a simple mapped value. - if (InsP.first->second) { - if (simple) *simple = true; - return InsP.first->second; - } - - // This is a complex mapped value. There may be multiple defs, and we may need - // to create phi-defs. - if (simple) *simple = false; MachineBasicBlock *IdxMBB = LIS.getMBBFromIndex(Idx); assert(IdxMBB && "No MBB at Idx"); + LiveInterval *LI = Edit->get(RegIdx); // Is there a def in the same MBB we can extend? - if (VNInfo *VNI = extendTo(IdxMBB, Idx)) - return VNI; + if (LI->extendInBlock(LIS.getMBBStartIdx(IdxMBB), Idx)) + return; // Now for the fun part. We know that ParentVNI potentially has multiple defs, // and we may need to create even more phi-defs to preserve VNInfo SSA form. // Perform a search for all predecessor blocks where we know the dominating - // VNInfo. Insert phi-def VNInfos along the path back to IdxMBB. - DEBUG(dbgs() << "\n Reaching defs for BB#" << IdxMBB->getNumber() - << " at " << Idx << " in " << *LI << '\n'); + // VNInfo. + VNInfo *VNI = findReachingDefs(LI, IdxMBB, Idx.getNextSlot()); + + // When there were multiple different values, we may need new PHIs. + if (!VNI) + return updateSSA(); + + // Poor man's SSA update for the single-value case. + LiveOutPair LOP(VNI, MDT[LIS.getMBBFromIndex(VNI->def)]); + for (SmallVectorImpl::iterator I = LiveInBlocks.begin(), + E = LiveInBlocks.end(); I != E; ++I) { + MachineBasicBlock *MBB = I->DomNode->getBlock(); + SlotIndex Start = LIS.getMBBStartIdx(MBB); + if (I->Kill.isValid()) + LI->addRange(LiveRange(Start, I->Kill, VNI)); + else { + LiveOutCache[MBB] = LOP; + LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB), VNI)); + } + } +} + +/// findReachingDefs - Search the CFG for known live-out values. +/// Add required live-in blocks to LiveInBlocks. +VNInfo *SplitEditor::findReachingDefs(LiveInterval *LI, + MachineBasicBlock *KillMBB, + SlotIndex Kill) { + // Initialize the live-out cache the first time it is needed. + if (LiveOutSeen.empty()) { + unsigned N = VRM.getMachineFunction().getNumBlockIDs(); + LiveOutSeen.resize(N); + LiveOutCache.resize(N); + } // Blocks where LI should be live-in. - SmallVector LiveIn; - LiveIn.push_back(MDT[IdxMBB]); + SmallVector WorkList(1, KillMBB); + + // Remember if we have seen more than one value. + bool UniqueVNI = true; + VNInfo *TheVNI = 0; // Using LiveOutCache as a visited set, perform a BFS for all reaching defs. - for (unsigned i = 0; i != LiveIn.size(); ++i) { - MachineBasicBlock *MBB = LiveIn[i]->getBlock(); + for (unsigned i = 0; i != WorkList.size(); ++i) { + MachineBasicBlock *MBB = WorkList[i]; + assert(!MBB->pred_empty() && "Value live-in to entry block?"); for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), PE = MBB->pred_end(); PI != PE; ++PI) { MachineBasicBlock *Pred = *PI; + LiveOutPair &LOP = LiveOutCache[Pred]; + // Is this a known live-out block? - std::pair LOIP = - LiveOutCache.insert(std::make_pair(Pred, LiveOutPair())); - // Yes, we have been here before. - if (!LOIP.second) { - DEBUG(if (VNInfo *VNI = LOIP.first->second.first) - dbgs() << " known valno #" << VNI->id - << " at BB#" << Pred->getNumber() << '\n'); + if (LiveOutSeen.test(Pred->getNumber())) { + if (VNInfo *VNI = LOP.first) { + if (TheVNI && TheVNI != VNI) + UniqueVNI = false; + TheVNI = VNI; + } continue; } + // First time. LOP is garbage and must be cleared below. + LiveOutSeen.set(Pred->getNumber()); + // Does Pred provide a live-out value? - SlotIndex Last = LIS.getMBBEndIdx(Pred).getPrevSlot(); - if (VNInfo *VNI = extendTo(Pred, Last)) { - MachineBasicBlock *DefMBB = LIS.getMBBFromIndex(VNI->def); - DEBUG(dbgs() << " found valno #" << VNI->id - << " from BB#" << DefMBB->getNumber() - << " at BB#" << Pred->getNumber() << '\n'); - LiveOutPair &LOP = LOIP.first->second; - LOP.first = VNI; - LOP.second = MDT[DefMBB]; + SlotIndex Start, Last; + tie(Start, Last) = LIS.getSlotIndexes()->getMBBRange(Pred); + Last = Last.getPrevSlot(); + VNInfo *VNI = LI->extendInBlock(Start, Last); + LOP.first = VNI; + if (VNI) { + LOP.second = MDT[LIS.getMBBFromIndex(VNI->def)]; + if (TheVNI && TheVNI != VNI) + UniqueVNI = false; + TheVNI = VNI; continue; } + LOP.second = 0; + // No, we need a live-in value for Pred as well - if (Pred != IdxMBB) - LiveIn.push_back(MDT[Pred]); + if (Pred != KillMBB) + WorkList.push_back(Pred); + else + // Loopback to KillMBB, so value is really live through. + Kill = SlotIndex(); } } - // We may need to add phi-def values to preserve the SSA form. + // Transfer WorkList to LiveInBlocks in reverse order. + // This ordering works best with updateSSA(). + LiveInBlocks.clear(); + LiveInBlocks.reserve(WorkList.size()); + while(!WorkList.empty()) + LiveInBlocks.push_back(MDT[WorkList.pop_back_val()]); + + // The kill block may not be live-through. + assert(LiveInBlocks.back().DomNode->getBlock() == KillMBB); + LiveInBlocks.back().Kill = Kill; + + return UniqueVNI ? TheVNI : 0; +} + +void SplitEditor::updateSSA() { // This is essentially the same iterative algorithm that SSAUpdater uses, // except we already have a dominator tree, so we don't have to recompute it. - VNInfo *IdxVNI = 0; unsigned Changes; do { Changes = 0; - DEBUG(dbgs() << " Iterating over " << LiveIn.size() << " blocks.\n"); - // Propagate live-out values down the dominator tree, inserting phi-defs when - // necessary. Since LiveIn was created by a BFS, going backwards makes it more - // likely for us to visit immediate dominators before their children. - for (unsigned i = LiveIn.size(); i; --i) { - MachineDomTreeNode *Node = LiveIn[i-1]; + // Propagate live-out values down the dominator tree, inserting phi-defs + // when necessary. + for (SmallVectorImpl::iterator I = LiveInBlocks.begin(), + E = LiveInBlocks.end(); I != E; ++I) { + MachineDomTreeNode *Node = I->DomNode; + // Skip block if the live-in value has already been determined. + if (!Node) + continue; MachineBasicBlock *MBB = Node->getBlock(); MachineDomTreeNode *IDom = Node->getIDom(); LiveOutPair IDomValue; + // We need a live-in value to a block with no immediate dominator? // This is probably an unreachable block that has survived somehow. - bool needPHI = !IDom; + bool needPHI = !IDom || !LiveOutSeen.test(IDom->getBlock()->getNumber()); - // Get the IDom live-out value. - if (!needPHI) { - LiveOutMap::iterator I = LiveOutCache.find(IDom->getBlock()); - if (I != LiveOutCache.end()) - IDomValue = I->second; - else - // If IDom is outside our set of live-out blocks, there must be new - // defs, and we need a phi-def here. - needPHI = true; - } - - // IDom dominates all of our predecessors, but it may not be the immediate - // dominator. Check if any of them have live-out values that are properly - // dominated by IDom. If so, we need a phi-def here. + // IDom dominates all of our predecessors, but it may not be their + // immediate dominator. Check if any of them have live-out values that are + // properly dominated by IDom. If so, we need a phi-def here. if (!needPHI) { + IDomValue = LiveOutCache[IDom->getBlock()]; for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), PE = MBB->pred_end(); PI != PE; ++PI) { LiveOutPair Value = LiveOutCache[*PI]; @@ -378,215 +517,57 @@ VNInfo *LiveIntervalMap::mapValue(const VNInfo *ParentVNI, SlotIndex Idx, } } + // The value may be live-through even if Kill is set, as can happen when + // we are called from extendRange. In that case LiveOutSeen is true, and + // LiveOutCache indicates a foreign or missing value. + LiveOutPair &LOP = LiveOutCache[MBB]; + // Create a phi-def if required. if (needPHI) { ++Changes; SlotIndex Start = LIS.getMBBStartIdx(MBB); + unsigned RegIdx = RegAssign.lookup(Start); + LiveInterval *LI = Edit->get(RegIdx); VNInfo *VNI = LI->getNextValue(Start, 0, LIS.getVNInfoAllocator()); VNI->setIsPHIDef(true); - DEBUG(dbgs() << " - BB#" << MBB->getNumber() - << " phi-def #" << VNI->id << " at " << Start << '\n'); - // We no longer need LI to be live-in. - LiveIn.erase(LiveIn.begin()+(i-1)); - // Blocks in LiveIn are either IdxMBB, or have a value live-through. - if (MBB == IdxMBB) - IdxVNI = VNI; - // Check if we need to update live-out info. - LiveOutMap::iterator I = LiveOutCache.find(MBB); - if (I == LiveOutCache.end() || I->second.second == Node) { - // We already have a live-out defined in MBB, so this must be IdxMBB. - assert(MBB == IdxMBB && "Adding phi-def to known live-out"); - LI->addRange(LiveRange(Start, Idx.getNextSlot(), VNI)); - } else { - // This phi-def is also live-out, so color the whole block. + I->Value = VNI; + // This block is done, we know the final value. + I->DomNode = 0; + if (I->Kill.isValid()) + LI->addRange(LiveRange(Start, I->Kill, VNI)); + else { LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB), VNI)); - I->second = LiveOutPair(VNI, Node); + LOP = LiveOutPair(VNI, Node); } } else if (IDomValue.first) { - // No phi-def here. Remember incoming value for IdxMBB. - if (MBB == IdxMBB) - IdxVNI = IDomValue.first; + // No phi-def here. Remember incoming value. + I->Value = IDomValue.first; + if (I->Kill.isValid()) + continue; // Propagate IDomValue if needed: // MBB is live-out and doesn't define its own value. - LiveOutMap::iterator I = LiveOutCache.find(MBB); - if (I != LiveOutCache.end() && I->second.second != Node && - I->second.first != IDomValue.first) { + if (LOP.second != Node && LOP.first != IDomValue.first) { ++Changes; - I->second = IDomValue; - DEBUG(dbgs() << " - BB#" << MBB->getNumber() - << " idom valno #" << IDomValue.first->id - << " from BB#" << IDom->getBlock()->getNumber() << '\n'); + LOP = IDomValue; } } } - DEBUG(dbgs() << " - made " << Changes << " changes.\n"); } while (Changes); - assert(IdxVNI && "Didn't find value for Idx"); - -#ifndef NDEBUG - // Check the LiveOutCache invariants. - for (LiveOutMap::iterator I = LiveOutCache.begin(), E = LiveOutCache.end(); - I != E; ++I) { - assert(I->first && "Null MBB entry in cache"); - assert(I->second.first && "Null VNInfo in cache"); - assert(I->second.second && "Null DomTreeNode in cache"); - if (I->second.second->getBlock() == I->first) - continue; - for (MachineBasicBlock::pred_iterator PI = I->first->pred_begin(), - PE = I->first->pred_end(); PI != PE; ++PI) - assert(LiveOutCache.lookup(*PI) == I->second && "Bad invariant"); - } -#endif - - // Since we went through the trouble of a full BFS visiting all reaching defs, - // the values in LiveIn are now accurate. No more phi-defs are needed + // The values in LiveInBlocks are now accurate. No more phi-defs are needed // for these blocks, so we can color the live ranges. - // This makes the next mapValue call much faster. - for (unsigned i = 0, e = LiveIn.size(); i != e; ++i) { - MachineBasicBlock *MBB = LiveIn[i]->getBlock(); + for (SmallVectorImpl::iterator I = LiveInBlocks.begin(), + E = LiveInBlocks.end(); I != E; ++I) { + if (!I->DomNode) + continue; + assert(I->Value && "No live-in value found"); + MachineBasicBlock *MBB = I->DomNode->getBlock(); SlotIndex Start = LIS.getMBBStartIdx(MBB); - VNInfo *VNI = LiveOutCache.lookup(MBB).first; - - // Anything in LiveIn other than IdxMBB is live-through. - // In IdxMBB, we should stop at Idx unless the same value is live-out. - if (MBB == IdxMBB && IdxVNI != VNI) - LI->addRange(LiveRange(Start, Idx.getNextSlot(), IdxVNI)); - else - LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB), VNI)); + unsigned RegIdx = RegAssign.lookup(Start); + LiveInterval *LI = Edit->get(RegIdx); + LI->addRange(LiveRange(Start, I->Kill.isValid() ? + I->Kill : LIS.getMBBEndIdx(MBB), I->Value)); } - - return IdxVNI; -} - -#ifndef NDEBUG -void LiveIntervalMap::dumpCache() { - for (LiveOutMap::iterator I = LiveOutCache.begin(), E = LiveOutCache.end(); - I != E; ++I) { - assert(I->first && "Null MBB entry in cache"); - assert(I->second.first && "Null VNInfo in cache"); - assert(I->second.second && "Null DomTreeNode in cache"); - dbgs() << " cache: BB#" << I->first->getNumber() - << " has valno #" << I->second.first->id << " from BB#" - << I->second.second->getBlock()->getNumber() << ", preds"; - for (MachineBasicBlock::pred_iterator PI = I->first->pred_begin(), - PE = I->first->pred_end(); PI != PE; ++PI) - dbgs() << " BB#" << (*PI)->getNumber(); - dbgs() << '\n'; - } - dbgs() << " cache: " << LiveOutCache.size() << " entries.\n"; -} -#endif - -// extendTo - Find the last LI value defined in MBB at or before Idx. The -// ParentLI is assumed to be live at Idx. Extend the live range to Idx. -// Return the found VNInfo, or NULL. -VNInfo *LiveIntervalMap::extendTo(const MachineBasicBlock *MBB, SlotIndex Idx) { - assert(LI && "call reset first"); - LiveInterval::iterator I = std::upper_bound(LI->begin(), LI->end(), Idx); - if (I == LI->begin()) - return 0; - --I; - if (I->end <= LIS.getMBBStartIdx(MBB)) - return 0; - if (I->end <= Idx) - I->end = Idx.getNextSlot(); - return I->valno; -} - -// addSimpleRange - Add a simple range from ParentLI to LI. -// ParentVNI must be live in the [Start;End) interval. -void LiveIntervalMap::addSimpleRange(SlotIndex Start, SlotIndex End, - const VNInfo *ParentVNI) { - assert(LI && "call reset first"); - bool simple; - VNInfo *VNI = mapValue(ParentVNI, Start, &simple); - // A simple mapping is easy. - if (simple) { - LI->addRange(LiveRange(Start, End, VNI)); - return; - } - - // ParentVNI is a complex value. We must map per MBB. - MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start); - MachineFunction::iterator MBBE = LIS.getMBBFromIndex(End.getPrevSlot()); - - if (MBB == MBBE) { - LI->addRange(LiveRange(Start, End, VNI)); - return; - } - - // First block. - LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB), VNI)); - - // Run sequence of full blocks. - for (++MBB; MBB != MBBE; ++MBB) { - Start = LIS.getMBBStartIdx(MBB); - LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB), - mapValue(ParentVNI, Start))); - } - - // Final block. - Start = LIS.getMBBStartIdx(MBB); - if (Start != End) - LI->addRange(LiveRange(Start, End, mapValue(ParentVNI, Start))); -} - -/// addRange - Add live ranges to LI where [Start;End) intersects ParentLI. -/// All needed values whose def is not inside [Start;End) must be defined -/// beforehand so mapValue will work. -void LiveIntervalMap::addRange(SlotIndex Start, SlotIndex End) { - assert(LI && "call reset first"); - LiveInterval::const_iterator B = ParentLI.begin(), E = ParentLI.end(); - LiveInterval::const_iterator I = std::lower_bound(B, E, Start); - - // Check if --I begins before Start and overlaps. - if (I != B) { - --I; - if (I->end > Start) - addSimpleRange(Start, std::min(End, I->end), I->valno); - ++I; - } - - // The remaining ranges begin after Start. - for (;I != E && I->start < End; ++I) - addSimpleRange(I->start, std::min(End, I->end), I->valno); -} - - -//===----------------------------------------------------------------------===// -// Split Editor -//===----------------------------------------------------------------------===// - -/// Create a new SplitEditor for editing the LiveInterval analyzed by SA. -SplitEditor::SplitEditor(SplitAnalysis &sa, - LiveIntervals &lis, - VirtRegMap &vrm, - MachineDominatorTree &mdt, - LiveRangeEdit &edit) - : SA(sa), LIS(lis), VRM(vrm), - MRI(vrm.getMachineFunction().getRegInfo()), - MDT(mdt), - TII(*vrm.getMachineFunction().getTarget().getInstrInfo()), - TRI(*vrm.getMachineFunction().getTarget().getRegisterInfo()), - Edit(edit), - OpenIdx(0), - RegAssign(Allocator) -{ - // We don't need an AliasAnalysis since we will only be performing - // cheap-as-a-copy remats anyway. - Edit.anyRematerializable(LIS, TII, 0); -} - -void SplitEditor::dump() const { - if (RegAssign.empty()) { - dbgs() << " empty\n"; - return; - } - - for (RegAssignMap::const_iterator I = RegAssign.begin(); I.valid(); ++I) - dbgs() << " [" << I.start() << ';' << I.stop() << "):" << I.value(); - dbgs() << '\n'; } VNInfo *SplitEditor::defFromParent(unsigned RegIdx, @@ -596,51 +577,53 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx, MachineBasicBlock::iterator I) { MachineInstr *CopyMI = 0; SlotIndex Def; - LiveInterval *LI = Edit.get(RegIdx); + LiveInterval *LI = Edit->get(RegIdx); + + // We may be trying to avoid interference that ends at a deleted instruction, + // so always begin RegIdx 0 early and all others late. + bool Late = RegIdx != 0; // Attempt cheap-as-a-copy rematerialization. LiveRangeEdit::Remat RM(ParentVNI); - if (Edit.canRematerializeAt(RM, UseIdx, true, LIS)) { - Def = Edit.rematerializeAt(MBB, I, LI->reg, RM, LIS, TII, TRI); + if (Edit->canRematerializeAt(RM, UseIdx, true, LIS)) { + Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, LIS, TII, TRI, Late); } else { // Can't remat, just insert a copy from parent. CopyMI = BuildMI(MBB, I, DebugLoc(), TII.get(TargetOpcode::COPY), LI->reg) - .addReg(Edit.getReg()); - Def = LIS.InsertMachineInstrInMaps(CopyMI).getDefIndex(); + .addReg(Edit->getReg()); + Def = LIS.getSlotIndexes()->insertMachineInstrInMaps(CopyMI, Late) + .getDefIndex(); } // Define the value in Reg. - VNInfo *VNI = LIMappers[RegIdx].defValue(ParentVNI, Def); + VNInfo *VNI = defValue(RegIdx, ParentVNI, Def); VNI->setCopy(CopyMI); - - // Add minimal liveness for the new value. - Edit.get(RegIdx)->addRange(LiveRange(Def, Def.getNextSlot(), VNI)); return VNI; } /// Create a new virtual register and live interval. -void SplitEditor::openIntv() { - assert(!OpenIdx && "Previous LI not closed before openIntv"); - +unsigned SplitEditor::openIntv() { // Create the complement as index 0. - if (Edit.empty()) { - Edit.create(MRI, LIS, VRM); - LIMappers.push_back(LiveIntervalMap(LIS, MDT, Edit.getParent())); - LIMappers.back().reset(Edit.get(0)); - } + if (Edit->empty()) + Edit->create(LIS, VRM); // Create the open interval. - OpenIdx = Edit.size(); - Edit.create(MRI, LIS, VRM); - LIMappers.push_back(LiveIntervalMap(LIS, MDT, Edit.getParent())); - LIMappers[OpenIdx].reset(Edit.get(OpenIdx)); + OpenIdx = Edit->size(); + Edit->create(LIS, VRM); + return OpenIdx; +} + +void SplitEditor::selectIntv(unsigned Idx) { + assert(Idx != 0 && "Cannot select the complement interval"); + assert(Idx < Edit->size() && "Can only select previously opened interval"); + OpenIdx = Idx; } SlotIndex SplitEditor::enterIntvBefore(SlotIndex Idx) { assert(OpenIdx && "openIntv not called before enterIntvBefore"); DEBUG(dbgs() << " enterIntvBefore " << Idx); Idx = Idx.getBaseIndex(); - VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Idx); + VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx); if (!ParentVNI) { DEBUG(dbgs() << ": not live\n"); return Idx; @@ -658,14 +641,14 @@ SlotIndex SplitEditor::enterIntvAtEnd(MachineBasicBlock &MBB) { SlotIndex End = LIS.getMBBEndIdx(&MBB); SlotIndex Last = End.getPrevSlot(); DEBUG(dbgs() << " enterIntvAtEnd BB#" << MBB.getNumber() << ", " << Last); - VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Last); + VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Last); if (!ParentVNI) { DEBUG(dbgs() << ": not live\n"); return End; } DEBUG(dbgs() << ": valno " << ParentVNI->id); VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Last, MBB, - LIS.getLastSplitPoint(Edit.getParent(), &MBB)); + LIS.getLastSplitPoint(Edit->getParent(), &MBB)); RegAssign.insert(VNI->def, End, OpenIdx); DEBUG(dump()); return VNI->def; @@ -689,7 +672,7 @@ SlotIndex SplitEditor::leaveIntvAfter(SlotIndex Idx) { // The interval must be live beyond the instruction at Idx. Idx = Idx.getBoundaryIndex(); - VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Idx); + VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx); if (!ParentVNI) { DEBUG(dbgs() << ": not live\n"); return Idx.getNextSlot(); @@ -709,7 +692,7 @@ SlotIndex SplitEditor::leaveIntvBefore(SlotIndex Idx) { // The interval must be live into the instruction at Idx. Idx = Idx.getBoundaryIndex(); - VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Idx); + VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx); if (!ParentVNI) { DEBUG(dbgs() << ": not live\n"); return Idx.getNextSlot(); @@ -727,7 +710,7 @@ SlotIndex SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) { SlotIndex Start = LIS.getMBBStartIdx(&MBB); DEBUG(dbgs() << " leaveIntvAtTop BB#" << MBB.getNumber() << ", " << Start); - VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Start); + VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Start); if (!ParentVNI) { DEBUG(dbgs() << ": not live\n"); return Start; @@ -742,30 +725,169 @@ SlotIndex SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) { void SplitEditor::overlapIntv(SlotIndex Start, SlotIndex End) { assert(OpenIdx && "openIntv not called before overlapIntv"); - assert(Edit.getParent().getVNInfoAt(Start) == - Edit.getParent().getVNInfoAt(End.getPrevSlot()) && + const VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Start); + assert(ParentVNI == Edit->getParent().getVNInfoAt(End.getPrevSlot()) && "Parent changes value in extended range"); - assert(Edit.get(0)->getVNInfoAt(Start) && "Start must come from leaveIntv*"); assert(LIS.getMBBFromIndex(Start) == LIS.getMBBFromIndex(End) && "Range cannot span basic blocks"); - // Treat this as useIntv() for now. The complement interval will be extended - // as needed by mapValue(). + // The complement interval will be extended as needed by extendRange(). + if (ParentVNI) + markComplexMapped(0, ParentVNI); DEBUG(dbgs() << " overlapIntv [" << Start << ';' << End << "):"); RegAssign.insert(Start, End, OpenIdx); DEBUG(dump()); } -/// closeIntv - Indicate that we are done editing the currently open -/// LiveInterval, and ranges can be trimmed. -void SplitEditor::closeIntv() { - assert(OpenIdx && "openIntv not called before closeIntv"); - OpenIdx = 0; +/// transferValues - Transfer all possible values to the new live ranges. +/// Values that were rematerialized are left alone, they need extendRange(). +bool SplitEditor::transferValues() { + bool Skipped = false; + LiveInBlocks.clear(); + RegAssignMap::const_iterator AssignI = RegAssign.begin(); + for (LiveInterval::const_iterator ParentI = Edit->getParent().begin(), + ParentE = Edit->getParent().end(); ParentI != ParentE; ++ParentI) { + DEBUG(dbgs() << " blit " << *ParentI << ':'); + VNInfo *ParentVNI = ParentI->valno; + // RegAssign has holes where RegIdx 0 should be used. + SlotIndex Start = ParentI->start; + AssignI.advanceTo(Start); + do { + unsigned RegIdx; + SlotIndex End = ParentI->end; + if (!AssignI.valid()) { + RegIdx = 0; + } else if (AssignI.start() <= Start) { + RegIdx = AssignI.value(); + if (AssignI.stop() < End) { + End = AssignI.stop(); + ++AssignI; + } + } else { + RegIdx = 0; + End = std::min(End, AssignI.start()); + } + + // The interval [Start;End) is continuously mapped to RegIdx, ParentVNI. + DEBUG(dbgs() << " [" << Start << ';' << End << ")=" << RegIdx); + LiveInterval *LI = Edit->get(RegIdx); + + // Check for a simply defined value that can be blitted directly. + if (VNInfo *VNI = Values.lookup(std::make_pair(RegIdx, ParentVNI->id))) { + DEBUG(dbgs() << ':' << VNI->id); + LI->addRange(LiveRange(Start, End, VNI)); + Start = End; + continue; + } + + // Skip rematerialized values, we need to use extendRange() and + // extendPHIKillRanges() to completely recompute the live ranges. + if (Edit->didRematerialize(ParentVNI)) { + DEBUG(dbgs() << "(remat)"); + Skipped = true; + Start = End; + continue; + } + + // Initialize the live-out cache the first time it is needed. + if (LiveOutSeen.empty()) { + unsigned N = VRM.getMachineFunction().getNumBlockIDs(); + LiveOutSeen.resize(N); + LiveOutCache.resize(N); + } + + // This value has multiple defs in RegIdx, but it wasn't rematerialized, + // so the live range is accurate. Add live-in blocks in [Start;End) to the + // LiveInBlocks. + MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start); + SlotIndex BlockStart, BlockEnd; + tie(BlockStart, BlockEnd) = LIS.getSlotIndexes()->getMBBRange(MBB); + + // The first block may be live-in, or it may have its own def. + if (Start != BlockStart) { + VNInfo *VNI = LI->extendInBlock(BlockStart, + std::min(BlockEnd, End).getPrevSlot()); + assert(VNI && "Missing def for complex mapped value"); + DEBUG(dbgs() << ':' << VNI->id << "*BB#" << MBB->getNumber()); + // MBB has its own def. Is it also live-out? + if (BlockEnd <= End) { + LiveOutSeen.set(MBB->getNumber()); + LiveOutCache[MBB] = LiveOutPair(VNI, MDT[MBB]); + } + // Skip to the next block for live-in. + ++MBB; + BlockStart = BlockEnd; + } + + // Handle the live-in blocks covered by [Start;End). + assert(Start <= BlockStart && "Expected live-in block"); + while (BlockStart < End) { + DEBUG(dbgs() << ">BB#" << MBB->getNumber()); + BlockEnd = LIS.getMBBEndIdx(MBB); + if (BlockStart == ParentVNI->def) { + // This block has the def of a parent PHI, so it isn't live-in. + assert(ParentVNI->isPHIDef() && "Non-phi defined at block start?"); + VNInfo *VNI = LI->extendInBlock(BlockStart, + std::min(BlockEnd, End).getPrevSlot()); + assert(VNI && "Missing def for complex mapped parent PHI"); + if (End >= BlockEnd) { + // Live-out as well. + LiveOutSeen.set(MBB->getNumber()); + LiveOutCache[MBB] = LiveOutPair(VNI, MDT[MBB]); + } + } else { + // This block needs a live-in value. + LiveInBlocks.push_back(MDT[MBB]); + // The last block covered may not be live-out. + if (End < BlockEnd) + LiveInBlocks.back().Kill = End; + else { + // Live-out, but we need updateSSA to tell us the value. + LiveOutSeen.set(MBB->getNumber()); + LiveOutCache[MBB] = LiveOutPair((VNInfo*)0, + (MachineDomTreeNode*)0); + } + } + BlockStart = BlockEnd; + ++MBB; + } + Start = End; + } while (Start != ParentI->end); + DEBUG(dbgs() << '\n'); + } + + if (!LiveInBlocks.empty()) + updateSSA(); + + return Skipped; } -/// rewriteAssigned - Rewrite all uses of Edit.getReg(). -void SplitEditor::rewriteAssigned() { - for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Edit.getReg()), +void SplitEditor::extendPHIKillRanges() { + // Extend live ranges to be live-out for successor PHI values. + for (LiveInterval::const_vni_iterator I = Edit->getParent().vni_begin(), + E = Edit->getParent().vni_end(); I != E; ++I) { + const VNInfo *PHIVNI = *I; + if (PHIVNI->isUnused() || !PHIVNI->isPHIDef()) + continue; + unsigned RegIdx = RegAssign.lookup(PHIVNI->def); + MachineBasicBlock *MBB = LIS.getMBBFromIndex(PHIVNI->def); + for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), + PE = MBB->pred_end(); PI != PE; ++PI) { + SlotIndex End = LIS.getMBBEndIdx(*PI).getPrevSlot(); + // The predecessor may not have a live-out value. That is OK, like an + // undef PHI operand. + if (Edit->getParent().liveAt(End)) { + assert(RegAssign.lookup(End) == RegIdx && + "Different register assignment in phi predecessor"); + extendRange(RegIdx, End); + } + } + } +} + +/// rewriteAssigned - Rewrite all uses of Edit->getReg(). +void SplitEditor::rewriteAssigned(bool ExtendRanges) { + for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Edit->getReg()), RE = MRI.reg_end(); RI != RE;) { MachineOperand &MO = RI.getOperand(); MachineInstr *MI = MO.getParent(); @@ -780,147 +902,145 @@ void SplitEditor::rewriteAssigned() { // operands don't really read the register, so just assign them to // the complement. if (MO.isUse() && MO.isUndef()) { - MO.setReg(Edit.get(0)->reg); + MO.setReg(Edit->get(0)->reg); continue; } SlotIndex Idx = LIS.getInstructionIndex(MI); - Idx = MO.isUse() ? Idx.getUseIndex() : Idx.getDefIndex(); + if (MO.isDef()) + Idx = MO.isEarlyClobber() ? Idx.getUseIndex() : Idx.getDefIndex(); // Rewrite to the mapped register at Idx. unsigned RegIdx = RegAssign.lookup(Idx); - MO.setReg(Edit.get(RegIdx)->reg); + MO.setReg(Edit->get(RegIdx)->reg); DEBUG(dbgs() << " rewr BB#" << MI->getParent()->getNumber() << '\t' << Idx << ':' << RegIdx << '\t' << *MI); - // Extend liveness to Idx. - const VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Idx); - LIMappers[RegIdx].mapValue(ParentVNI, Idx); - } -} - -/// rewriteSplit - Rewrite uses of Intvs[0] according to the ConEQ mapping. -void SplitEditor::rewriteComponents(const SmallVectorImpl &Intvs, - const ConnectedVNInfoEqClasses &ConEq) { - for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Intvs[0]->reg), - RE = MRI.reg_end(); RI != RE;) { - MachineOperand &MO = RI.getOperand(); - MachineInstr *MI = MO.getParent(); - ++RI; - if (MO.isUse() && MO.isUndef()) + // Extend liveness to Idx if the instruction reads reg. + if (!ExtendRanges) continue; - // DBG_VALUE instructions should have been eliminated earlier. - SlotIndex Idx = LIS.getInstructionIndex(MI); - Idx = MO.isUse() ? Idx.getUseIndex() : Idx.getDefIndex(); - DEBUG(dbgs() << " rewr BB#" << MI->getParent()->getNumber() << '\t' - << Idx << ':'); - const VNInfo *VNI = Intvs[0]->getVNInfoAt(Idx); - assert(VNI && "Interval not live at use."); - MO.setReg(Intvs[ConEq.getEqClass(VNI)]->reg); - DEBUG(dbgs() << VNI->id << '\t' << *MI); + + // Skip instructions that don't read Reg. + if (MO.isDef()) { + if (!MO.getSubReg() && !MO.isEarlyClobber()) + continue; + // We may wan't to extend a live range for a partial redef, or for a use + // tied to an early clobber. + Idx = Idx.getPrevSlot(); + if (!Edit->getParent().liveAt(Idx)) + continue; + } else + Idx = Idx.getUseIndex(); + + extendRange(RegIdx, Idx); } } -void SplitEditor::finish() { - assert(OpenIdx == 0 && "Previous LI not closed before rewrite"); +void SplitEditor::deleteRematVictims() { + SmallVector Dead; + for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I){ + LiveInterval *LI = *I; + for (LiveInterval::const_iterator LII = LI->begin(), LIE = LI->end(); + LII != LIE; ++LII) { + // Dead defs end at the store slot. + if (LII->end != LII->valno->def.getNextSlot()) + continue; + MachineInstr *MI = LIS.getInstructionFromIndex(LII->valno->def); + assert(MI && "Missing instruction for dead def"); + MI->addRegisterDead(LI->reg, &TRI); + + if (!MI->allDefsAreDead()) + continue; + + DEBUG(dbgs() << "All defs dead: " << *MI); + Dead.push_back(MI); + } + } + + if (Dead.empty()) + return; + + Edit->eliminateDeadDefs(Dead, LIS, VRM, TII); +} + +void SplitEditor::finish(SmallVectorImpl *LRMap) { + ++NumFinished; // At this point, the live intervals in Edit contain VNInfos corresponding to // the inserted copies. // Add the original defs from the parent interval. - for (LiveInterval::const_vni_iterator I = Edit.getParent().vni_begin(), - E = Edit.getParent().vni_end(); I != E; ++I) { + for (LiveInterval::const_vni_iterator I = Edit->getParent().vni_begin(), + E = Edit->getParent().vni_end(); I != E; ++I) { const VNInfo *ParentVNI = *I; if (ParentVNI->isUnused()) continue; - LiveIntervalMap &LIM = LIMappers[RegAssign.lookup(ParentVNI->def)]; - VNInfo *VNI = LIM.defValue(ParentVNI, ParentVNI->def); - LIM.getLI()->addRange(LiveRange(ParentVNI->def, - ParentVNI->def.getNextSlot(), VNI)); - // Mark all values as complex to force liveness computation. - // This should really only be necessary for remat victims, but we are lazy. - LIM.markComplexMapped(ParentVNI); + unsigned RegIdx = RegAssign.lookup(ParentVNI->def); + VNInfo *VNI = defValue(RegIdx, ParentVNI, ParentVNI->def); + VNI->setIsPHIDef(ParentVNI->isPHIDef()); + VNI->setCopy(ParentVNI->getCopy()); + + // Mark rematted values as complex everywhere to force liveness computation. + // The new live ranges may be truncated. + if (Edit->didRematerialize(ParentVNI)) + for (unsigned i = 0, e = Edit->size(); i != e; ++i) + markComplexMapped(i, ParentVNI); } #ifndef NDEBUG // Every new interval must have a def by now, otherwise the split is bogus. - for (LiveRangeEdit::iterator I = Edit.begin(), E = Edit.end(); I != E; ++I) + for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I) assert((*I)->hasAtLeastOneValue() && "Split interval has no value"); #endif - // FIXME: Don't recompute the liveness of all values, infer it from the - // overlaps between the parent live interval and RegAssign. - // The mapValue algorithm is only necessary when: - // - The parent value maps to multiple defs, and new phis are needed, or - // - The value has been rematerialized before some uses, and we want to - // minimize the live range so it only reaches the remaining uses. - // All other values have simple liveness that can be computed from RegAssign - // and the parent live interval. + // Transfer the simply mapped values, check if any are skipped. + bool Skipped = transferValues(); + if (Skipped) + extendPHIKillRanges(); + else + ++NumSimple; - // Extend live ranges to be live-out for successor PHI values. - for (LiveInterval::const_vni_iterator I = Edit.getParent().vni_begin(), - E = Edit.getParent().vni_end(); I != E; ++I) { - const VNInfo *PHIVNI = *I; - if (PHIVNI->isUnused() || !PHIVNI->isPHIDef()) - continue; - unsigned RegIdx = RegAssign.lookup(PHIVNI->def); - LiveIntervalMap &LIM = LIMappers[RegIdx]; - MachineBasicBlock *MBB = LIS.getMBBFromIndex(PHIVNI->def); - DEBUG(dbgs() << " map phi in BB#" << MBB->getNumber() << '@' << PHIVNI->def - << " -> " << RegIdx << '\n'); - for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), - PE = MBB->pred_end(); PI != PE; ++PI) { - SlotIndex End = LIS.getMBBEndIdx(*PI).getPrevSlot(); - DEBUG(dbgs() << " pred BB#" << (*PI)->getNumber() << '@' << End); - // The predecessor may not have a live-out value. That is OK, like an - // undef PHI operand. - if (VNInfo *VNI = Edit.getParent().getVNInfoAt(End)) { - DEBUG(dbgs() << " has parent valno #" << VNI->id << " live out\n"); - assert(RegAssign.lookup(End) == RegIdx && - "Different register assignment in phi predecessor"); - LIM.mapValue(VNI, End); - } - else - DEBUG(dbgs() << " is not live-out\n"); - } - DEBUG(dbgs() << " " << *LIM.getLI() << '\n'); - } + // Rewrite virtual registers, possibly extending ranges. + rewriteAssigned(Skipped); - // Rewrite instructions. - rewriteAssigned(); - - // FIXME: Delete defs that were rematted everywhere. + // Delete defs that were rematted everywhere. + if (Skipped) + deleteRematVictims(); // Get rid of unused values and set phi-kill flags. - for (LiveRangeEdit::iterator I = Edit.begin(), E = Edit.end(); I != E; ++I) + for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I) (*I)->RenumberValues(LIS); + // Provide a reverse mapping from original indices to Edit ranges. + if (LRMap) { + LRMap->clear(); + for (unsigned i = 0, e = Edit->size(); i != e; ++i) + LRMap->push_back(i); + } + // Now check if any registers were separated into multiple components. ConnectedVNInfoEqClasses ConEQ(LIS); - for (unsigned i = 0, e = Edit.size(); i != e; ++i) { + for (unsigned i = 0, e = Edit->size(); i != e; ++i) { // Don't use iterators, they are invalidated by create() below. - LiveInterval *li = Edit.get(i); + LiveInterval *li = Edit->get(i); unsigned NumComp = ConEQ.Classify(li); if (NumComp <= 1) continue; DEBUG(dbgs() << " " << NumComp << " components: " << *li << '\n'); SmallVector dups; dups.push_back(li); - for (unsigned i = 1; i != NumComp; ++i) - dups.push_back(&Edit.create(MRI, LIS, VRM)); - rewriteComponents(dups, ConEQ); - ConEQ.Distribute(&dups[0]); + for (unsigned j = 1; j != NumComp; ++j) + dups.push_back(&Edit->create(LIS, VRM)); + ConEQ.Distribute(&dups[0], MRI); + // The new intervals all map back to i. + if (LRMap) + LRMap->resize(Edit->size(), i); } // Calculate spill weight and allocation hints for new intervals. - VirtRegAuxInfo vrai(VRM.getMachineFunction(), LIS, SA.Loops); - for (LiveRangeEdit::iterator I = Edit.begin(), E = Edit.end(); I != E; ++I){ - LiveInterval &li = **I; - vrai.CalculateRegClass(li.reg); - vrai.CalculateWeightAndHint(li); - DEBUG(dbgs() << " new interval " << MRI.getRegClass(li.reg)->getName() - << ":" << li << '\n'); - } + Edit->calculateRegClassAndHint(VRM.getMachineFunction(), LIS, SA.Loops); + + assert(!LRMap || LRMap->size() == Edit->size()); } @@ -932,113 +1052,42 @@ void SplitEditor::finish() { /// may be an advantage to split CurLI for the duration of the block. bool SplitAnalysis::getMultiUseBlocks(BlockPtrSet &Blocks) { // If CurLI is local to one block, there is no point to splitting it. - if (LiveBlocks.size() <= 1) + if (UseBlocks.size() <= 1) return false; // Add blocks with multiple uses. - for (unsigned i = 0, e = LiveBlocks.size(); i != e; ++i) { - const BlockInfo &BI = LiveBlocks[i]; - if (!BI.Uses) - continue; - unsigned Instrs = UsingBlocks.lookup(BI.MBB); - if (Instrs <= 1) - continue; - if (Instrs == 2 && BI.LiveIn && BI.LiveOut && !BI.LiveThrough) + for (unsigned i = 0, e = UseBlocks.size(); i != e; ++i) { + const BlockInfo &BI = UseBlocks[i]; + if (BI.FirstUse == BI.LastUse) continue; Blocks.insert(BI.MBB); } return !Blocks.empty(); } +void SplitEditor::splitSingleBlock(const SplitAnalysis::BlockInfo &BI) { + openIntv(); + SlotIndex LastSplitPoint = SA.getLastSplitPoint(BI.MBB->getNumber()); + SlotIndex SegStart = enterIntvBefore(std::min(BI.FirstUse, + LastSplitPoint)); + if (!BI.LiveOut || BI.LastUse < LastSplitPoint) { + useIntv(SegStart, leaveIntvAfter(BI.LastUse)); + } else { + // The last use is after the last valid split point. + SlotIndex SegStop = leaveIntvBefore(LastSplitPoint); + useIntv(SegStart, SegStop); + overlapIntv(SegStop, BI.LastUse); + } +} + /// splitSingleBlocks - Split CurLI into a separate live interval inside each /// basic block in Blocks. void SplitEditor::splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks) { DEBUG(dbgs() << " splitSingleBlocks for " << Blocks.size() << " blocks.\n"); - - for (unsigned i = 0, e = SA.LiveBlocks.size(); i != e; ++i) { - const SplitAnalysis::BlockInfo &BI = SA.LiveBlocks[i]; - if (!BI.Uses || !Blocks.count(BI.MBB)) - continue; - - openIntv(); - SlotIndex SegStart = enterIntvBefore(BI.FirstUse); - if (!BI.LiveOut || BI.LastUse < BI.LastSplitPoint) { - useIntv(SegStart, leaveIntvAfter(BI.LastUse)); - } else { - // The last use is after the last valid split point. - SlotIndex SegStop = leaveIntvBefore(BI.LastSplitPoint); - useIntv(SegStart, SegStop); - overlapIntv(SegStop, BI.LastUse); - } - closeIntv(); + ArrayRef UseBlocks = SA.getUseBlocks(); + for (unsigned i = 0; i != UseBlocks.size(); ++i) { + const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; + if (Blocks.count(BI.MBB)) + splitSingleBlock(BI); } finish(); } - - -//===----------------------------------------------------------------------===// -// Sub Block Splitting -//===----------------------------------------------------------------------===// - -/// getBlockForInsideSplit - If CurLI is contained inside a single basic block, -/// and it wou pay to subdivide the interval inside that block, return it. -/// Otherwise return NULL. The returned block can be passed to -/// SplitEditor::splitInsideBlock. -const MachineBasicBlock *SplitAnalysis::getBlockForInsideSplit() { - // The interval must be exclusive to one block. - if (UsingBlocks.size() != 1) - return 0; - // Don't to this for less than 4 instructions. We want to be sure that - // splitting actually reduces the instruction count per interval. - if (UsingInstrs.size() < 4) - return 0; - return UsingBlocks.begin()->first; -} - -/// splitInsideBlock - Split CurLI into multiple intervals inside MBB. -void SplitEditor::splitInsideBlock(const MachineBasicBlock *MBB) { - SmallVector Uses; - Uses.reserve(SA.UsingInstrs.size()); - for (SplitAnalysis::InstrPtrSet::const_iterator I = SA.UsingInstrs.begin(), - E = SA.UsingInstrs.end(); I != E; ++I) - if ((*I)->getParent() == MBB) - Uses.push_back(LIS.getInstructionIndex(*I)); - DEBUG(dbgs() << " splitInsideBlock BB#" << MBB->getNumber() << " for " - << Uses.size() << " instructions.\n"); - assert(Uses.size() >= 3 && "Need at least 3 instructions"); - array_pod_sort(Uses.begin(), Uses.end()); - - // Simple algorithm: Find the largest gap between uses as determined by slot - // indices. Create new intervals for instructions before the gap and after the - // gap. - unsigned bestPos = 0; - int bestGap = 0; - DEBUG(dbgs() << " dist (" << Uses[0]); - for (unsigned i = 1, e = Uses.size(); i != e; ++i) { - int g = Uses[i-1].distance(Uses[i]); - DEBUG(dbgs() << ") -" << g << "- (" << Uses[i]); - if (g > bestGap) - bestPos = i, bestGap = g; - } - DEBUG(dbgs() << "), best: -" << bestGap << "-\n"); - - // bestPos points to the first use after the best gap. - assert(bestPos > 0 && "Invalid gap"); - - // FIXME: Don't create intervals for low densities. - - // First interval before the gap. Don't create single-instr intervals. - if (bestPos > 1) { - openIntv(); - useIntv(enterIntvBefore(Uses.front()), leaveIntvAfter(Uses[bestPos-1])); - closeIntv(); - } - - // Second interval after the gap. - if (bestPos < Uses.size()-1) { - openIntv(); - useIntv(enterIntvBefore(Uses[bestPos]), leaveIntvAfter(Uses.back())); - closeIntv(); - } - - finish(); -} diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h index e02e6297035d..2ae760a58da5 100644 --- a/lib/CodeGen/SplitKit.h +++ b/lib/CodeGen/SplitKit.h @@ -12,7 +12,13 @@ // //===----------------------------------------------------------------------===// +#ifndef LLVM_CODEGEN_SPLITKIT_H +#define LLVM_CODEGEN_SPLITKIT_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/IndexedMap.h" #include "llvm/ADT/IntervalMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/CodeGen/SlotIndexes.h" @@ -48,17 +54,9 @@ class SplitAnalysis { const MachineLoopInfo &Loops; const TargetInstrInfo &TII; - // Instructions using the the current register. - typedef SmallPtrSet InstrPtrSet; - InstrPtrSet UsingInstrs; - // Sorted slot indexes of using instructions. SmallVector UseSlots; - // The number of instructions using CurLI in each basic block. - typedef DenseMap BlockCountMap; - BlockCountMap UsingBlocks; - /// Additional information about basic blocks where the current variable is /// live. Such a block will look like one of these templates: /// @@ -75,35 +73,37 @@ class SplitAnalysis { SlotIndex LastUse; ///< Last instr using current reg. SlotIndex Kill; ///< Interval end point inside block. SlotIndex Def; ///< Interval start point inside block. - /// Last possible point for splitting live ranges. - SlotIndex LastSplitPoint; - bool Uses; ///< Current reg has uses or defs in block. bool LiveThrough; ///< Live in whole block (Templ 5. or 6. above). bool LiveIn; ///< Current reg is live in. bool LiveOut; ///< Current reg is live out. - - // Per-interference pattern scratch data. - bool OverlapEntry; ///< Interference overlaps entering interval. - bool OverlapExit; ///< Interference overlaps exiting interval. }; - /// Basic blocks where var is live. This array is parallel to - /// SpillConstraints. - SmallVector LiveBlocks; - private: // Current live interval. const LiveInterval *CurLI; + /// LastSplitPoint - Last legal split point in each basic block in the current + /// function. The first entry is the first terminator, the second entry is the + /// last valid split point for a variable that is live in to a landing pad + /// successor. + SmallVector, 8> LastSplitPoint; + + /// UseBlocks - Blocks where CurLI has uses. + SmallVector UseBlocks; + + /// ThroughBlocks - Block numbers where CurLI is live through without uses. + BitVector ThroughBlocks; + + /// NumThroughBlocks - Number of live-through blocks. + unsigned NumThroughBlocks; + + SlotIndex computeLastSplitPoint(unsigned Num); + // Sumarize statistics by counting instructions using CurLI. void analyzeUses(); /// calcLiveBlockInfo - Compute per-block information about CurLI. - void calcLiveBlockInfo(); - - /// canAnalyzeBranch - Return true if MBB ends in a branch that can be - /// analyzed. - bool canAnalyzeBranch(const MachineBasicBlock *MBB); + bool calcLiveBlockInfo(); public: SplitAnalysis(const VirtRegMap &vrm, const LiveIntervals &lis, @@ -120,9 +120,14 @@ class SplitAnalysis { /// getParent - Return the last analyzed interval. const LiveInterval &getParent() const { return *CurLI; } - /// hasUses - Return true if MBB has any uses of CurLI. - bool hasUses(const MachineBasicBlock *MBB) const { - return UsingBlocks.lookup(MBB); + /// getLastSplitPoint - Return that base index of the last valid split point + /// in the basic block numbered Num. + SlotIndex getLastSplitPoint(unsigned Num) { + // Inline the common simple case. + if (LastSplitPoint[Num].first.isValid() && + !LastSplitPoint[Num].second.isValid()) + return LastSplitPoint[Num].first; + return computeLastSplitPoint(Num); } /// isOriginalEndpoint - Return true if the original live range was killed or @@ -132,127 +137,30 @@ class SplitAnalysis { /// splitting. bool isOriginalEndpoint(SlotIndex Idx) const; - typedef SmallPtrSet BlockPtrSet; + /// getUseBlocks - Return an array of BlockInfo objects for the basic blocks + /// where CurLI has uses. + ArrayRef getUseBlocks() { return UseBlocks; } - // Print a set of blocks with use counts. - void print(const BlockPtrSet&, raw_ostream&) const; + /// getNumThroughBlocks - Return the number of through blocks. + unsigned getNumThroughBlocks() const { return NumThroughBlocks; } + + /// isThroughBlock - Return true if CurLI is live through MBB without uses. + bool isThroughBlock(unsigned MBB) const { return ThroughBlocks.test(MBB); } + + /// getThroughBlocks - Return the set of through blocks. + const BitVector &getThroughBlocks() const { return ThroughBlocks; } + + /// countLiveBlocks - Return the number of blocks where li is live. + /// This is guaranteed to return the same number as getNumThroughBlocks() + + /// getUseBlocks().size() after calling analyze(li). + unsigned countLiveBlocks(const LiveInterval *li) const; + + typedef SmallPtrSet BlockPtrSet; /// getMultiUseBlocks - Add basic blocks to Blocks that may benefit from /// having CurLI split to a new live interval. Return true if Blocks can be /// passed to SplitEditor::splitSingleBlocks. bool getMultiUseBlocks(BlockPtrSet &Blocks); - - /// getBlockForInsideSplit - If CurLI is contained inside a single basic - /// block, and it would pay to subdivide the interval inside that block, - /// return it. Otherwise return NULL. The returned block can be passed to - /// SplitEditor::splitInsideBlock. - const MachineBasicBlock *getBlockForInsideSplit(); -}; - - -/// LiveIntervalMap - Map values from a large LiveInterval into a small -/// interval that is a subset. Insert phi-def values as needed. This class is -/// used by SplitEditor to create new smaller LiveIntervals. -/// -/// ParentLI is the larger interval, LI is the subset interval. Every value -/// in LI corresponds to exactly one value in ParentLI, and the live range -/// of the value is contained within the live range of the ParentLI value. -/// Values in ParentLI may map to any number of OpenLI values, including 0. -class LiveIntervalMap { - LiveIntervals &LIS; - MachineDominatorTree &MDT; - - // The parent interval is never changed. - const LiveInterval &ParentLI; - - // The child interval's values are fully contained inside ParentLI values. - LiveInterval *LI; - - typedef DenseMap ValueMap; - - // Map ParentLI values to simple values in LI that are defined at the same - // SlotIndex, or NULL for ParentLI values that have complex LI defs. - // Note there is a difference between values mapping to NULL (complex), and - // values not present (unknown/unmapped). - ValueMap Values; - - typedef std::pair LiveOutPair; - typedef DenseMap LiveOutMap; - - // LiveOutCache - Map each basic block where LI is live out to the live-out - // value and its defining block. One of these conditions shall be true: - // - // 1. !LiveOutCache.count(MBB) - // 2. LiveOutCache[MBB].second.getNode() == MBB - // 3. forall P in preds(MBB): LiveOutCache[P] == LiveOutCache[MBB] - // - // This is only a cache, the values can be computed as: - // - // VNI = LI->getVNInfoAt(LIS.getMBBEndIdx(MBB)) - // Node = mbt_[LIS.getMBBFromIndex(VNI->def)] - // - // The cache is also used as a visiteed set by mapValue(). - LiveOutMap LiveOutCache; - - // Dump the live-out cache to dbgs(). - void dumpCache(); - -public: - LiveIntervalMap(LiveIntervals &lis, - MachineDominatorTree &mdt, - const LiveInterval &parentli) - : LIS(lis), MDT(mdt), ParentLI(parentli), LI(0) {} - - /// reset - clear all data structures and start a new live interval. - void reset(LiveInterval *); - - /// getLI - return the current live interval. - LiveInterval *getLI() const { return LI; } - - /// defValue - define a value in LI from the ParentLI value VNI and Idx. - /// Idx does not have to be ParentVNI->def, but it must be contained within - /// ParentVNI's live range in ParentLI. - /// Return the new LI value. - VNInfo *defValue(const VNInfo *ParentVNI, SlotIndex Idx); - - /// mapValue - map ParentVNI to the corresponding LI value at Idx. It is - /// assumed that ParentVNI is live at Idx. - /// If ParentVNI has not been defined by defValue, it is assumed that - /// ParentVNI->def dominates Idx. - /// If ParentVNI has been defined by defValue one or more times, a value that - /// dominates Idx will be returned. This may require creating extra phi-def - /// values and adding live ranges to LI. - /// If simple is not NULL, *simple will indicate if ParentVNI is a simply - /// mapped value. - VNInfo *mapValue(const VNInfo *ParentVNI, SlotIndex Idx, bool *simple = 0); - - // extendTo - Find the last LI value defined in MBB at or before Idx. The - // parentli is assumed to be live at Idx. Extend the live range to include - // Idx. Return the found VNInfo, or NULL. - VNInfo *extendTo(const MachineBasicBlock *MBB, SlotIndex Idx); - - /// isMapped - Return true is ParentVNI is a known mapped value. It may be a - /// simple 1-1 mapping or a complex mapping to later defs. - bool isMapped(const VNInfo *ParentVNI) const { - return Values.count(ParentVNI); - } - - /// isComplexMapped - Return true if ParentVNI has received new definitions - /// with defValue. - bool isComplexMapped(const VNInfo *ParentVNI) const; - - /// markComplexMapped - Mark ParentVNI as complex mapped regardless of the - /// number of definitions. - void markComplexMapped(const VNInfo *ParentVNI) { Values[ParentVNI] = 0; } - - // addSimpleRange - Add a simple range from ParentLI to LI. - // ParentVNI must be live in the [Start;End) interval. - void addSimpleRange(SlotIndex Start, SlotIndex End, const VNInfo *ParentVNI); - - /// addRange - Add live ranges to LI where [Start;End) intersects ParentLI. - /// All needed values whose def is not inside [Start;End) must be defined - /// beforehand so mapValue will work. - void addRange(SlotIndex Start, SlotIndex End); }; @@ -277,7 +185,7 @@ class SplitEditor { const TargetRegisterInfo &TRI; /// Edit - The current parent register and new intervals created. - LiveRangeEdit &Edit; + LiveRangeEdit *Edit; /// Index into Edit of the currently open interval. /// The index 0 is used for the complement, so the first interval started by @@ -295,8 +203,76 @@ class SplitEditor { /// Idx. RegAssignMap RegAssign; - /// LIMappers - One LiveIntervalMap or each interval in Edit. - SmallVector LIMappers; + typedef DenseMap, VNInfo*> ValueMap; + + /// Values - keep track of the mapping from parent values to values in the new + /// intervals. Given a pair (RegIdx, ParentVNI->id), Values contains: + /// + /// 1. No entry - the value is not mapped to Edit.get(RegIdx). + /// 2. Null - the value is mapped to multiple values in Edit.get(RegIdx). + /// Each value is represented by a minimal live range at its def. + /// 3. A non-null VNInfo - the value is mapped to a single new value. + /// The new value has no live ranges anywhere. + ValueMap Values; + + typedef std::pair LiveOutPair; + typedef IndexedMap LiveOutMap; + + // LiveOutCache - Map each basic block where a new register is live out to the + // live-out value and its defining block. + // One of these conditions shall be true: + // + // 1. !LiveOutCache.count(MBB) + // 2. LiveOutCache[MBB].second.getNode() == MBB + // 3. forall P in preds(MBB): LiveOutCache[P] == LiveOutCache[MBB] + // + // This is only a cache, the values can be computed as: + // + // VNI = Edit.get(RegIdx)->getVNInfoAt(LIS.getMBBEndIdx(MBB)) + // Node = mbt_[LIS.getMBBFromIndex(VNI->def)] + // + // The cache is also used as a visited set by extendRange(). It can be shared + // by all the new registers because at most one is live out of each block. + LiveOutMap LiveOutCache; + + // LiveOutSeen - Indexed by MBB->getNumber(), a bit is set for each valid + // entry in LiveOutCache. + BitVector LiveOutSeen; + + /// LiveInBlock - Info for updateSSA() about a block where a register is + /// live-in. + /// The updateSSA caller provides DomNode and Kill inside MBB, updateSSA() + /// adds the computed live-in value. + struct LiveInBlock { + // Dominator tree node for the block. + // Cleared by updateSSA when the final value has been determined. + MachineDomTreeNode *DomNode; + + // Live-in value filled in by updateSSA once it is known. + VNInfo *Value; + + // Position in block where the live-in range ends, or SlotIndex() if the + // range passes through the block. + SlotIndex Kill; + + LiveInBlock(MachineDomTreeNode *node) : DomNode(node), Value(0) {} + }; + + /// LiveInBlocks - List of live-in blocks used by findReachingDefs() and + /// updateSSA(). This list is usually empty, it exists here to avoid frequent + /// reallocations. + SmallVector LiveInBlocks; + + /// defValue - define a value in RegIdx from ParentVNI at Idx. + /// Idx does not have to be ParentVNI->def, but it must be contained within + /// ParentVNI's live range in ParentLI. The new value is added to the value + /// map. + /// Return the new LI value. + VNInfo *defValue(unsigned RegIdx, const VNInfo *ParentVNI, SlotIndex Idx); + + /// markComplexMapped - Mark ParentVNI as complex mapped in RegIdx regardless + /// of the number of defs. + void markComplexMapped(unsigned RegIdx, const VNInfo *ParentVNI); /// defFromParent - Define Reg from ParentVNI at UseIdx using either /// rematerialization or a COPY from parent. Return the new value. @@ -306,27 +282,56 @@ class SplitEditor { MachineBasicBlock &MBB, MachineBasicBlock::iterator I); - /// rewriteAssigned - Rewrite all uses of Edit.getReg() to assigned registers. - void rewriteAssigned(); + /// extendRange - Extend the live range of Edit.get(RegIdx) so it reaches Idx. + /// Insert PHIDefs as needed to preserve SSA form. + void extendRange(unsigned RegIdx, SlotIndex Idx); - /// rewriteComponents - Rewrite all uses of Intv[0] according to the eq - /// classes in ConEQ. - /// This must be done when Intvs[0] is styill live at all uses, before calling - /// ConEq.Distribute(). - void rewriteComponents(const SmallVectorImpl &Intvs, - const ConnectedVNInfoEqClasses &ConEq); + /// findReachingDefs - Starting from MBB, add blocks to LiveInBlocks until all + /// reaching defs for LI are found. + /// @param LI Live interval whose value is needed. + /// @param MBB Block where LI should be live-in. + /// @param Kill Kill point in MBB. + /// @return Unique value seen, or NULL. + VNInfo *findReachingDefs(LiveInterval *LI, MachineBasicBlock *MBB, + SlotIndex Kill); + + /// updateSSA - Compute and insert PHIDefs such that all blocks in + // LiveInBlocks get a known live-in value. Add live ranges to the blocks. + void updateSSA(); + + /// transferValues - Transfer values to the new ranges. + /// Return true if any ranges were skipped. + bool transferValues(); + + /// extendPHIKillRanges - Extend the ranges of all values killed by original + /// parent PHIDefs. + void extendPHIKillRanges(); + + /// rewriteAssigned - Rewrite all uses of Edit.getReg() to assigned registers. + void rewriteAssigned(bool ExtendRanges); + + /// deleteRematVictims - Delete defs that are dead after rematerializing. + void deleteRematVictims(); public: /// Create a new SplitEditor for editing the LiveInterval analyzed by SA. /// Newly created intervals will be appended to newIntervals. SplitEditor(SplitAnalysis &SA, LiveIntervals&, VirtRegMap&, - MachineDominatorTree&, LiveRangeEdit&); + MachineDominatorTree&); - /// getAnalysis - Get the corresponding analysis. - SplitAnalysis &getAnalysis() { return SA; } + /// reset - Prepare for a new split. + void reset(LiveRangeEdit&); /// Create a new virtual register and live interval. - void openIntv(); + /// Return the interval index, starting from 1. Interval index 0 is the + /// implicit complement interval. + unsigned openIntv(); + + /// currentIntv - Return the current interval index. + unsigned currentIntv() const { return OpenIdx; } + + /// selectIntv - Select a previously opened interval index. + void selectIntv(unsigned Idx); /// enterIntvBefore - Enter the open interval before the instruction at Idx. /// If the parent interval is not live before Idx, a COPY is not inserted. @@ -369,25 +374,28 @@ class SplitEditor { /// void overlapIntv(SlotIndex Start, SlotIndex End); - /// closeIntv - Indicate that we are done editing the currently open - /// LiveInterval, and ranges can be trimmed. - void closeIntv(); - /// finish - after all the new live ranges have been created, compute the /// remaining live range, and rewrite instructions to use the new registers. - void finish(); + /// @param LRMap When not null, this vector will map each live range in Edit + /// back to the indices returned by openIntv. + /// There may be extra indices created by dead code elimination. + void finish(SmallVectorImpl *LRMap = 0); /// dump - print the current interval maping to dbgs(). void dump() const; // ===--- High level methods ---=== + /// splitSingleBlock - Split CurLI into a separate live interval around the + /// uses in a single block. This is intended to be used as part of a larger + /// split, and doesn't call finish(). + void splitSingleBlock(const SplitAnalysis::BlockInfo &BI); + /// splitSingleBlocks - Split CurLI into a separate live interval inside each /// basic block in Blocks. void splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks); - - /// splitInsideBlock - Split CurLI into multiple intervals inside MBB. - void splitInsideBlock(const MachineBasicBlock *); }; } + +#endif diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index fcaee4208ba3..f0a44abaf5cd 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -153,7 +153,6 @@ bool StackProtector::InsertStackProtectors() { for (Function::iterator I = F->begin(), E = F->end(); I != E; ) { BasicBlock *BB = I++; - ReturnInst *RI = dyn_cast(BB->getTerminator()); if (!RI) continue; @@ -191,8 +190,6 @@ bool StackProtector::InsertStackProtectors() { // Create the basic block to jump to when the guard check fails. FailBB = CreateFailBB(); - if (DT) - FailBBDom = DT->isReachableFromEntry(BB) ? BB : 0; } // For each block with a return instruction, convert this: @@ -219,9 +216,10 @@ bool StackProtector::InsertStackProtectors() { // Split the basic block before the return instruction. BasicBlock *NewBB = BB->splitBasicBlock(RI, "SP_return"); - if (DT) { - DT->addNewBlock(NewBB, DT->isReachableFromEntry(BB) ? BB : 0); - FailBBDom = DT->findNearestCommonDominator(FailBBDom, BB); + + if (DT && DT->isReachableFromEntry(BB)) { + DT->addNewBlock(NewBB, BB); + FailBBDom = FailBBDom ? DT->findNearestCommonDominator(FailBBDom, BB) :BB; } // Remove default branch instruction to the new BB. @@ -242,7 +240,7 @@ bool StackProtector::InsertStackProtectors() { // statements in the function. if (!FailBB) return false; - if (DT) + if (DT && FailBBDom) DT->addNewBlock(FailBB, FailBBDom); return true; diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp index ec7829ec39fe..227eb47e6827 100644 --- a/lib/CodeGen/StrongPHIElimination.cpp +++ b/lib/CodeGen/StrongPHIElimination.cpp @@ -587,7 +587,7 @@ StrongPHIElimination::SplitInterferencesForBasicBlock( } // We now walk the PHIs in successor blocks and check for interferences. This - // is necesary because the use of a PHI's operands are logically contained in + // is necessary because the use of a PHI's operands are logically contained in // the predecessor block. The def of a PHI's destination register is processed // along with the other defs in a basic block. diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp index 15340a3f1084..b9fcd3804d7f 100644 --- a/lib/CodeGen/TargetInstrInfoImpl.cpp +++ b/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -388,11 +388,6 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI, if (MO.isDef() != (i == 0)) return false; - // For the def, it should be the only def of that register. - if (MO.isDef() && (llvm::next(MRI.def_begin(Reg)) != MRI.def_end() || - MRI.isLiveIn(Reg))) - return false; - // Don't allow any virtual-register uses. Rematting an instruction with // virtual register uses would length the live ranges of the uses, which // is not necessarily a good idea, certainly not "trivial". diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index fa311dc5d66c..6ed91b09966e 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -23,6 +23,7 @@ #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionCOFF.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetData.h" @@ -176,6 +177,52 @@ const MCSection *TargetLoweringObjectFileELF::getEHFrameSection() const { SectionKind::getDataRel()); } +MCSymbol * +TargetLoweringObjectFileELF::getCFIPersonalitySymbol(const GlobalValue *GV, + Mangler *Mang, + MachineModuleInfo *MMI) const { + unsigned Encoding = getPersonalityEncoding(); + switch (Encoding & 0x70) { + default: + report_fatal_error("We do not support this DWARF encoding yet!"); + case dwarf::DW_EH_PE_absptr: + return Mang->getSymbol(GV); + break; + case dwarf::DW_EH_PE_pcrel: { + Twine FullName = StringRef("DW.ref.") + Mang->getSymbol(GV)->getName(); + return getContext().GetOrCreateSymbol(FullName); + break; + } + } +} + +void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer, + const TargetMachine &TM, + const MCSymbol *Sym) const { + Twine FullName = StringRef("DW.ref.") + Sym->getName(); + MCSymbol *Label = getContext().GetOrCreateSymbol(FullName); + Streamer.EmitSymbolAttribute(Label, MCSA_Hidden); + Streamer.EmitSymbolAttribute(Label, MCSA_Weak); + Twine SectionName = StringRef(".data.") + Label->getName(); + SmallString<64> NameData; + SectionName.toVector(NameData); + unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE | ELF::SHF_GROUP; + const MCSection *Sec = getContext().getELFSection(NameData, + ELF::SHT_PROGBITS, + Flags, + SectionKind::getDataRel(), + 0, Label->getName()); + Streamer.SwitchSection(Sec); + Streamer.EmitValueToAlignment(8); + Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject); + const MCExpr *E = MCConstantExpr::Create(8, getContext()); + Streamer.EmitELFSize(Label, E); + Streamer.EmitLabel(Label); + + unsigned Size = TM.getTargetData()->getPointerSize(); + Streamer.EmitSymbolValue(Sym, Size); +} + static SectionKind getELFKindForNamedSection(StringRef Name, SectionKind K) { // FIXME: Why is this here? Codegen is should not be in the business @@ -424,8 +471,7 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, } return TargetLoweringObjectFile:: - getExprForDwarfReference(SSym, Mang, MMI, - Encoding & ~dwarf::DW_EH_PE_indirect, Streamer); + getExprForDwarfReference(SSym, Encoding & ~dwarf::DW_EH_PE_indirect, Streamer); } return TargetLoweringObjectFile:: @@ -446,18 +492,10 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx, IsFunctionEHFrameSymbolPrivate = false; SupportsWeakOmittedEHFrame = false; + // .comm doesn't support alignment before Leopard. Triple T(((LLVMTargetMachine&)TM).getTargetTriple()); - if (T.getOS() == Triple::Darwin) { - switch (T.getDarwinMajorNumber()) { - case 7: // 10.3 Panther. - case 8: // 10.4 Tiger. - CommDirectiveSupportsAlignment = false; - break; - case 9: // 10.5 Leopard. - case 10: // 10.6 SnowLeopard. - break; - } - } + if (T.isMacOSX() && T.isMacOSXVersionLT(10, 5)) + CommDirectiveSupportsAlignment = false; TargetLoweringObjectFile::Initialize(Ctx, TM); @@ -641,10 +679,11 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, Mangler *Mang, const TargetMachine &TM) const { // Parse the section specifier and create it if valid. StringRef Segment, Section; - unsigned TAA = (unsigned)MCSectionMachO::SECTION_ATTRIBUTES, StubSize = 0; + unsigned TAA = 0, StubSize = 0; + bool TAAParsed; std::string ErrorCode = MCSectionMachO::ParseSectionSpecifier(GV->getSection(), Segment, Section, - TAA, StubSize); + TAA, TAAParsed, StubSize); if (!ErrorCode.empty()) { // If invalid, report the error with report_fatal_error. report_fatal_error("Global variable '" + GV->getNameStr() + @@ -654,17 +693,13 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, return DataSection; } - bool TAAWasSet = (TAA != MCSectionMachO::SECTION_ATTRIBUTES); - if (!TAAWasSet) - TAA = 0; // Sensible default if this is a new section. - // Get the section. const MCSectionMachO *S = getContext().getMachOSection(Segment, Section, TAA, StubSize, Kind); // If TAA wasn't set by ParseSectionSpecifier() above, // use the value returned by getMachOSection() as a default. - if (!TAAWasSet) + if (!TAAParsed) TAA = S->getTypeAndAttributes(); // Okay, now that we got the section, verify that the TAA & StubSize agree. @@ -806,14 +841,36 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, } return TargetLoweringObjectFile:: - getExprForDwarfReference(SSym, Mang, MMI, - Encoding & ~dwarf::DW_EH_PE_indirect, Streamer); + getExprForDwarfReference(SSym, Encoding & ~dwarf::DW_EH_PE_indirect, Streamer); } return TargetLoweringObjectFile:: getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer); } +MCSymbol *TargetLoweringObjectFileMachO:: +getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang, + MachineModuleInfo *MMI) const { + // The mach-o version of this method defaults to returning a stub reference. + MachineModuleInfoMachO &MachOMMI = + MMI->getObjFileInfo(); + + SmallString<128> Name; + Mang->getNameWithPrefix(Name, GV, true); + Name += "$non_lazy_ptr"; + + // Add information about the stub reference to MachOMMI so that the stub + // gets emitted by the asmprinter. + MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str()); + MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym); + if (StubSym.getPointer() == 0) { + MCSymbol *Sym = Mang->getSymbol(GV); + StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage()); + } + + return SSym; +} + unsigned TargetLoweringObjectFileMachO::getPersonalityEncoding() const { return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4; } @@ -822,7 +879,7 @@ unsigned TargetLoweringObjectFileMachO::getLSDAEncoding() const { return DW_EH_PE_pcrel; } -unsigned TargetLoweringObjectFileMachO::getFDEEncoding() const { +unsigned TargetLoweringObjectFileMachO::getFDEEncoding(bool CFI) const { return DW_EH_PE_pcrel; } diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index b3120b8be1ab..52ea87231ccd 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -105,7 +105,7 @@ namespace { MachineFunction::iterator &mbbi, unsigned RegB, unsigned RegC, unsigned Dist); - bool isProfitableToConv3Addr(unsigned RegA); + bool isProfitableToConv3Addr(unsigned RegA, unsigned RegB); bool ConvertInstTo3Addr(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, @@ -124,7 +124,11 @@ namespace { MachineBasicBlock::iterator &nmi, MachineFunction::iterator &mbbi, unsigned SrcIdx, unsigned DstIdx, - unsigned Dist); + unsigned Dist, + SmallPtrSet &Processed); + + void ScanUses(unsigned DstReg, MachineBasicBlock *MBB, + SmallPtrSet &Processed); void ProcessCopy(MachineInstr *MI, MachineBasicBlock *MBB, SmallPtrSet &Processed); @@ -615,16 +619,18 @@ TwoAddressInstructionPass::CommuteInstruction(MachineBasicBlock::iterator &mi, /// isProfitableToConv3Addr - Return true if it is profitable to convert the /// given 2-address instruction to a 3-address one. bool -TwoAddressInstructionPass::isProfitableToConv3Addr(unsigned RegA) { +TwoAddressInstructionPass::isProfitableToConv3Addr(unsigned RegA,unsigned RegB){ // Look for situations like this: // %reg1024 = MOV r1 // %reg1025 = MOV r0 // %reg1026 = ADD %reg1024, %reg1025 // r2 = MOV %reg1026 // Turn ADD into a 3-address instruction to avoid a copy. - unsigned FromRegA = getMappedReg(RegA, SrcRegMap); + unsigned FromRegB = getMappedReg(RegB, SrcRegMap); + if (!FromRegB) + return false; unsigned ToRegA = getMappedReg(RegA, DstRegMap); - return (FromRegA && ToRegA && !regsAreCompatible(FromRegA, ToRegA, TRI)); + return (ToRegA && !regsAreCompatible(FromRegB, ToRegA, TRI)); } /// ConvertInstTo3Addr - Convert the specified two-address instruction into a @@ -664,6 +670,54 @@ TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi, return false; } +/// ScanUses - Scan forward recursively for only uses, update maps if the use +/// is a copy or a two-address instruction. +void +TwoAddressInstructionPass::ScanUses(unsigned DstReg, MachineBasicBlock *MBB, + SmallPtrSet &Processed) { + SmallVector VirtRegPairs; + bool IsDstPhys; + bool IsCopy = false; + unsigned NewReg = 0; + unsigned Reg = DstReg; + while (MachineInstr *UseMI = findOnlyInterestingUse(Reg, MBB, MRI, TII,IsCopy, + NewReg, IsDstPhys)) { + if (IsCopy && !Processed.insert(UseMI)) + break; + + DenseMap::iterator DI = DistanceMap.find(UseMI); + if (DI != DistanceMap.end()) + // Earlier in the same MBB.Reached via a back edge. + break; + + if (IsDstPhys) { + VirtRegPairs.push_back(NewReg); + break; + } + bool isNew = SrcRegMap.insert(std::make_pair(NewReg, Reg)).second; + if (!isNew) + assert(SrcRegMap[NewReg] == Reg && "Can't map to two src registers!"); + VirtRegPairs.push_back(NewReg); + Reg = NewReg; + } + + if (!VirtRegPairs.empty()) { + unsigned ToReg = VirtRegPairs.back(); + VirtRegPairs.pop_back(); + while (!VirtRegPairs.empty()) { + unsigned FromReg = VirtRegPairs.back(); + VirtRegPairs.pop_back(); + bool isNew = DstRegMap.insert(std::make_pair(FromReg, ToReg)).second; + if (!isNew) + assert(DstRegMap[FromReg] == ToReg &&"Can't map to two dst registers!"); + ToReg = FromReg; + } + bool isNew = DstRegMap.insert(std::make_pair(DstReg, ToReg)).second; + if (!isNew) + assert(DstRegMap[DstReg] == ToReg && "Can't map to two dst registers!"); + } +} + /// ProcessCopy - If the specified instruction is not yet processed, process it /// if it's a copy. For a copy instruction, we find the physical registers the /// source and destination registers might be mapped to. These are kept in @@ -695,49 +749,11 @@ void TwoAddressInstructionPass::ProcessCopy(MachineInstr *MI, assert(SrcRegMap[DstReg] == SrcReg && "Can't map to two src physical registers!"); - SmallVector VirtRegPairs; - bool IsCopy = false; - unsigned NewReg = 0; - while (MachineInstr *UseMI = findOnlyInterestingUse(DstReg, MBB, MRI,TII, - IsCopy, NewReg, IsDstPhys)) { - if (IsCopy) { - if (!Processed.insert(UseMI)) - break; - } - - DenseMap::iterator DI = DistanceMap.find(UseMI); - if (DI != DistanceMap.end()) - // Earlier in the same MBB.Reached via a back edge. - break; - - if (IsDstPhys) { - VirtRegPairs.push_back(NewReg); - break; - } - bool isNew = SrcRegMap.insert(std::make_pair(NewReg, DstReg)).second; - if (!isNew) - assert(SrcRegMap[NewReg] == DstReg && - "Can't map to two src physical registers!"); - VirtRegPairs.push_back(NewReg); - DstReg = NewReg; - } - - if (!VirtRegPairs.empty()) { - unsigned ToReg = VirtRegPairs.back(); - VirtRegPairs.pop_back(); - while (!VirtRegPairs.empty()) { - unsigned FromReg = VirtRegPairs.back(); - VirtRegPairs.pop_back(); - bool isNew = DstRegMap.insert(std::make_pair(FromReg, ToReg)).second; - if (!isNew) - assert(DstRegMap[FromReg] == ToReg && - "Can't map to two dst physical registers!"); - ToReg = FromReg; - } - } + ScanUses(DstReg, MBB, Processed); } Processed.insert(MI); + return; } /// isSafeToDelete - If the specified instruction does not produce any side @@ -836,7 +852,8 @@ bool TwoAddressInstructionPass:: TryInstructionTransform(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, MachineFunction::iterator &mbbi, - unsigned SrcIdx, unsigned DstIdx, unsigned Dist) { + unsigned SrcIdx, unsigned DstIdx, unsigned Dist, + SmallPtrSet &Processed) { const TargetInstrDesc &TID = mi->getDesc(); unsigned regA = mi->getOperand(DstIdx).getReg(); unsigned regB = mi->getOperand(SrcIdx).getReg(); @@ -887,10 +904,13 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, return false; } + if (TargetRegisterInfo::isVirtualRegister(regA)) + ScanUses(regA, &*mbbi, Processed); + if (TID.isConvertibleTo3Addr()) { // This instruction is potentially convertible to a true // three-address instruction. Check if it is profitable. - if (!regBKilled || isProfitableToConv3Addr(regA)) { + if (!regBKilled || isProfitableToConv3Addr(regA, regB)) { // Try to convert it. if (ConvertInstTo3Addr(mi, nmi, mbbi, regA, regB, Dist)) { ++NumConvertedTo3Addr; @@ -951,7 +971,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator NewMI = NewMIs[1]; bool TransformSuccess = TryInstructionTransform(NewMI, mi, mbbi, - NewSrcIdx, NewDstIdx, Dist); + NewSrcIdx, NewDstIdx, Dist, Processed); if (TransformSuccess || NewMIs[1]->getOperand(NewSrcIdx).isKill()) { // Success, or at least we made an improvement. Keep the unfolded @@ -1100,7 +1120,8 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { mi->getOperand(DstIdx).getReg()) break; // Done with this instruction. - if (TryInstructionTransform(mi, nmi, mbbi, SrcIdx, DstIdx, Dist)) + if (TryInstructionTransform(mi, nmi, mbbi, SrcIdx, DstIdx, Dist, + Processed)) break; // The tied operands have been eliminated. } diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index 734b87e62f62..226b78f7bcbd 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -259,7 +259,9 @@ void VirtRegMap::rewrite(SlotIndexes *Indexes) { DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n" << "********** Function: " << MF->getFunction()->getName() << '\n'); - + DEBUG(dump()); + SmallVector SuperDeads; + SmallVector SuperDefs; SmallVector SuperKills; for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); @@ -283,12 +285,13 @@ void VirtRegMap::rewrite(SlotIndexes *Indexes) { if (MO.getSubReg()) { // A virtual register kill refers to the whole register, so we may // have to add operands for the super-register. - if (MO.isUse() && MO.isKill() && !MO.isUndef()) - SuperKills.push_back(PhysReg); - - // We don't have to deal with sub-register defs because - // LiveIntervalAnalysis already added the necessary - // operands. + if (MO.isUse()) { + if (MO.isKill() && !MO.isUndef()) + SuperKills.push_back(PhysReg); + } else if (MO.isDead()) + SuperDeads.push_back(PhysReg); + else + SuperDefs.push_back(PhysReg); // PhysReg operands cannot have subregister indexes. PhysReg = TRI->getSubReg(PhysReg, MO.getSubReg()); @@ -305,16 +308,28 @@ void VirtRegMap::rewrite(SlotIndexes *Indexes) { while (!SuperKills.empty()) MI->addRegisterKilled(SuperKills.pop_back_val(), TRI, true); + while (!SuperDeads.empty()) + MI->addRegisterDead(SuperDeads.pop_back_val(), TRI, true); + + while (!SuperDefs.empty()) + MI->addRegisterDefined(SuperDefs.pop_back_val(), TRI); + DEBUG(dbgs() << "> " << *MI); // Finally, remove any identity copies. if (MI->isIdentityCopy()) { - DEBUG(dbgs() << "Deleting identity copy.\n"); - RemoveMachineInstrFromMaps(MI); - if (Indexes) - Indexes->removeMachineInstrFromMaps(MI); - // It's safe to erase MI because MII has already been incremented. - MI->eraseFromParent(); + if (MI->getNumOperands() == 2) { + DEBUG(dbgs() << "Deleting identity copy.\n"); + RemoveMachineInstrFromMaps(MI); + if (Indexes) + Indexes->removeMachineInstrFromMaps(MI); + // It's safe to erase MI because MII has already been incremented. + MI->eraseFromParent(); + } else { + // Transform identity copy to a KILL to deal with subregisters. + MI->setDesc(TII->get(TargetOpcode::KILL)); + DEBUG(dbgs() << "Identity copy: " << *MI); + } } } } diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp index ec149dddc1d9..185065880581 100644 --- a/lib/CodeGen/VirtRegRewriter.cpp +++ b/lib/CodeGen/VirtRegRewriter.cpp @@ -32,7 +32,7 @@ STATISTIC(NumCommutes, "Number of instructions commuted"); STATISTIC(NumDRM , "Number of re-materializable defs elided"); STATISTIC(NumStores , "Number of stores added"); STATISTIC(NumPSpills , "Number of physical register spills"); -STATISTIC(NumOmitted , "Number of reloads omited"); +STATISTIC(NumOmitted , "Number of reloads omitted"); STATISTIC(NumAvoided , "Number of reloads deemed unnecessary"); STATISTIC(NumCopified, "Number of available reloads turned into copies"); STATISTIC(NumReMats , "Number of re-materialization"); @@ -261,6 +261,10 @@ class AvailableSpills { /// now). void ModifyStackSlotOrReMat(int SlotOrReMat); + /// ClobberSharingStackSlots - When a register mapped to a stack slot changes, + /// other stack slots sharing the same register are no longer valid. + void ClobberSharingStackSlots(int StackSlot); + /// AddAvailableRegsToLiveIn - Availability information is being kept coming /// into the specified MBB. Add available physical registers as potential /// live-in's. If they are reused in the MBB, they will be added to the @@ -665,7 +669,7 @@ static void UpdateKills(MachineInstr &MI, const TargetRegisterInfo* TRI, } } -/// ReMaterialize - Re-materialize definition for Reg targetting DestReg. +/// ReMaterialize - Re-materialize definition for Reg targeting DestReg. /// static void ReMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MII, @@ -831,6 +835,26 @@ void AvailableSpills::ModifyStackSlotOrReMat(int SlotOrReMat) { PhysRegsAvailable.erase(I); } +void AvailableSpills::ClobberSharingStackSlots(int StackSlot) { + std::map::iterator It = + SpillSlotsOrReMatsAvailable.find(StackSlot); + if (It == SpillSlotsOrReMatsAvailable.end()) return; + unsigned Reg = It->second >> 1; + + // Erase entries in PhysRegsAvailable for other stack slots. + std::multimap::iterator I = PhysRegsAvailable.lower_bound(Reg); + while (I != PhysRegsAvailable.end() && I->first == Reg) { + std::multimap::iterator NextI = llvm::next(I); + if (I->second != StackSlot) { + DEBUG(dbgs() << "Clobbered sharing SS#" << I->second << " in " + << PrintReg(Reg, TRI) << '\n'); + SpillSlotsOrReMatsAvailable.erase(I->second); + PhysRegsAvailable.erase(I); + } + I = NextI; + } +} + // ************************** // // Reuse Info Implementation // // ************************** // @@ -1791,8 +1815,8 @@ bool LocalRewriter::InsertRestores(MachineInstr *MI, else DEBUG(dbgs() << "Reusing SS#" << SSorRMId); DEBUG(dbgs() << " from physreg " - << TRI->getName(InReg) << " for vreg" - << VirtReg <<" instead of reloading into physreg " + << TRI->getName(InReg) << " for " << PrintReg(VirtReg) + <<" instead of reloading into physreg " << TRI->getName(Phys) << '\n'); // Reusing a physreg may resurrect it. But we expect ProcessUses to update @@ -1807,8 +1831,8 @@ bool LocalRewriter::InsertRestores(MachineInstr *MI, else DEBUG(dbgs() << "Reusing SS#" << SSorRMId); DEBUG(dbgs() << " from physreg " - << TRI->getName(InReg) << " for vreg" - << VirtReg <<" by copying it into physreg " + << TRI->getName(InReg) << " for " << PrintReg(VirtReg) + <<" by copying it into physreg " << TRI->getName(Phys) << '\n'); // If the reloaded / remat value is available in another register, @@ -2025,7 +2049,8 @@ void LocalRewriter::ProcessUses(MachineInstr &MI, AvailableSpills &Spills, TRI->regsOverlap(MOk.getReg(), PhysReg)) { CanReuse = false; DEBUG(dbgs() << "Not reusing physreg " << TRI->getName(PhysReg) - << " for vreg" << VirtReg << ": " << MOk << '\n'); + << " for " << PrintReg(VirtReg) << ": " << MOk + << '\n'); break; } } @@ -2039,9 +2064,9 @@ void LocalRewriter::ProcessUses(MachineInstr &MI, AvailableSpills &Spills, else DEBUG(dbgs() << "Reusing SS#" << ReuseSlot); DEBUG(dbgs() << " from physreg " - << TRI->getName(PhysReg) << " for vreg" - << VirtReg <<" instead of reloading into physreg " - << TRI->getName(VRM->getPhys(VirtReg)) << '\n'); + << TRI->getName(PhysReg) << " for " << PrintReg(VirtReg) + << " instead of reloading into " + << PrintReg(VRM->getPhys(VirtReg), TRI) << '\n'); unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg; MI.getOperand(i).setReg(RReg); MI.getOperand(i).setSubReg(0); @@ -2126,7 +2151,7 @@ void LocalRewriter::ProcessUses(MachineInstr &MI, AvailableSpills &Spills, else DEBUG(dbgs() << "Reusing SS#" << ReuseSlot); DEBUG(dbgs() << " from physreg " << TRI->getName(PhysReg) - << " for vreg" << VirtReg + << " for " << PrintReg(VirtReg) << " instead of reloading into same physreg.\n"); unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg; MI.getOperand(i).setReg(RReg); @@ -2315,7 +2340,7 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, for (unsigned FVI = 0, FVE = FoldedVirts.size(); FVI != FVE; ++FVI) { unsigned VirtReg = FoldedVirts[FVI].first; VirtRegMap::ModRef MR = FoldedVirts[FVI].second; - DEBUG(dbgs() << "Folded vreg: " << VirtReg << " MR: " << MR); + DEBUG(dbgs() << "Folded " << PrintReg(VirtReg) << " MR: " << MR); int SS = VRM->getStackSlot(VirtReg); if (SS == VirtRegMap::NO_STACK_SLOT) @@ -2549,6 +2574,10 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, } } + // If StackSlot is available in a register that also holds other stack + // slots, clobber those stack slots now. + Spills.ClobberSharingStackSlots(StackSlot); + assert(PhysReg && "VR not assigned a physical register?"); MRI->setPhysRegUsed(PhysReg); unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg; diff --git a/lib/CompilerDriver/CMakeLists.txt b/lib/CompilerDriver/CMakeLists.txt index 2248de01b954..a12b3378aaf2 100644 --- a/lib/CompilerDriver/CMakeLists.txt +++ b/lib/CompilerDriver/CMakeLists.txt @@ -1,10 +1,12 @@ set(LLVM_LINK_COMPONENTS support) -set(LLVM_REQUIRES_EH 1) -add_llvm_tool(llvmc +# We don't want this library to appear in `llvm-config --libs` output, +# so its name doesn't start with "LLVM". + +add_llvm_library(CompilerDriver Action.cpp + BuiltinOptions.cpp CompilationGraph.cpp - llvmc.cpp - Plugin.cpp + Main.cpp Tool.cpp ) diff --git a/lib/ExecutionEngine/CMakeLists.txt b/lib/ExecutionEngine/CMakeLists.txt index b5632d2bc5c3..8bff2654d6c6 100644 --- a/lib/ExecutionEngine/CMakeLists.txt +++ b/lib/ExecutionEngine/CMakeLists.txt @@ -6,3 +6,4 @@ add_llvm_library(LLVMExecutionEngine add_subdirectory(Interpreter) add_subdirectory(JIT) add_subdirectory(MCJIT) +add_subdirectory(RuntimeDyld) diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp index f28697530b3d..2b1e8786c727 100644 --- a/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/lib/ExecutionEngine/ExecutionEngine.cpp @@ -79,9 +79,10 @@ ExecutionEngine::~ExecutionEngine() { void ExecutionEngine::DeregisterAllTables() { if (ExceptionTableDeregister) { - for (std::vector::iterator it = AllExceptionTables.begin(), - ie = AllExceptionTables.end(); it != ie; ++it) - ExceptionTableDeregister(*it); + DenseMap::iterator it = AllExceptionTables.begin(); + DenseMap::iterator ite = AllExceptionTables.end(); + for (; it != ite; ++it) + ExceptionTableDeregister(it->second); AllExceptionTables.clear(); } } @@ -310,19 +311,19 @@ void ExecutionEngine::runStaticConstructorsDestructors(Module *module, // it. if (!GV || GV->isDeclaration() || GV->hasLocalLinkage()) return; - // Should be an array of '{ int, void ()* }' structs. The first value is + // Should be an array of '{ i32, void ()* }' structs. The first value is // the init priority, which we ignore. - ConstantArray *InitList = dyn_cast(GV->getInitializer()); - if (!InitList) return; + if (isa(GV->getInitializer())) + return; + ConstantArray *InitList = cast(GV->getInitializer()); for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) { - ConstantStruct *CS = - dyn_cast(InitList->getOperand(i)); - if (!CS) continue; - if (CS->getNumOperands() != 2) return; // Not array of 2-element structs. + if (isa(InitList->getOperand(i))) + continue; + ConstantStruct *CS = cast(InitList->getOperand(i)); Constant *FP = CS->getOperand(1); if (FP->isNullValue()) - break; // Found a null terminator, exit. + continue; // Found a sentinal value, ignore. // Strip off constant expression casts. if (ConstantExpr *CE = dyn_cast(FP)) @@ -838,7 +839,7 @@ void ExecutionEngine::StoreValueToMemory(const GenericValue &Val, case Type::PointerTyID: // Ensure 64 bit target pointers are fully initialized on 32 bit hosts. if (StoreBytes != sizeof(PointerTy)) - memset(Ptr, 0, StoreBytes); + memset(&(Ptr->PointerVal), 0, StoreBytes); *((PointerTy*)Ptr) = Val.PointerVal; break; diff --git a/lib/ExecutionEngine/JIT/Intercept.cpp b/lib/ExecutionEngine/JIT/Intercept.cpp index 169e1bae547b..fa8bee460427 100644 --- a/lib/ExecutionEngine/JIT/Intercept.cpp +++ b/lib/ExecutionEngine/JIT/Intercept.cpp @@ -52,8 +52,8 @@ static void runAtExitHandlers() { #include #endif #include -/* stat functions are redirecting to __xstat with a version number. On x86-64 - * linking with libc_nonshared.a and -Wl,--export-dynamic doesn't make 'stat' +/* stat functions are redirecting to __xstat with a version number. On x86-64 + * linking with libc_nonshared.a and -Wl,--export-dynamic doesn't make 'stat' * available as an exported symbol, so we have to add it explicitly. */ namespace { @@ -119,18 +119,18 @@ void *JIT::getPointerToNamedFunction(const std::string &Name, const char *NameStr = Name.c_str(); // If this is an asm specifier, skip the sentinal. if (NameStr[0] == 1) ++NameStr; - + // If it's an external function, look it up in the process image... void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr); if (Ptr) return Ptr; - + // If it wasn't found and if it starts with an underscore ('_') character, // and has an asm specifier, try again without the underscore. if (Name[0] == 1 && NameStr[0] == '_') { Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1); if (Ptr) return Ptr; } - + // Darwin/PPC adds $LDBLStub suffixes to various symbols like printf. These // are references to hidden visibility symbols that dlsym cannot resolve. // If we have one of these, strip off $LDBLStub and try again. @@ -147,7 +147,7 @@ void *JIT::getPointerToNamedFunction(const std::string &Name, } #endif } - + /// If a LazyFunctionCreator is installed, use it to get/create the function. if (LazyFunctionCreator) if (void *RP = LazyFunctionCreator(Name)) diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp index cc76b138a8a6..d1f87acd61b0 100644 --- a/lib/ExecutionEngine/JIT/JIT.cpp +++ b/lib/ExecutionEngine/JIT/JIT.cpp @@ -35,7 +35,7 @@ using namespace llvm; -#ifdef __APPLE__ +#ifdef __APPLE__ // Apple gcc defaults to -fuse-cxa-atexit (i.e. calls __cxa_atexit instead // of atexit). It passes the address of linker generated symbol __dso_handle // to the function. @@ -75,7 +75,7 @@ extern "C" void LLVMLinkInJIT() { #endif #if HAVE_EHTABLE_SUPPORT - + // libgcc defines the __register_frame function to dynamically register new // dwarf frames for exception handling. This functionality is not portable // across compilers and is only provided by GCC. We use the __register_frame @@ -113,10 +113,10 @@ struct LibgccObject { void *unused1; void *unused2; void *unused3; - + /// frame - Pointer to the exception table. void *frame; - + /// encoding - The encoding of the object? union { struct { @@ -124,15 +124,15 @@ struct LibgccObject { unsigned long from_array : 1; unsigned long mixed_encoding : 1; unsigned long encoding : 8; - unsigned long count : 21; + unsigned long count : 21; } b; size_t i; } encoding; - + /// fde_end - libgcc defines this field only if some macro is defined. We /// include this field even if it may not there, to make libgcc happy. char *fde_end; - + /// next - At least we know it's a chained list! struct LibgccObject *next; }; @@ -153,7 +153,7 @@ struct LibgccObjectInfo { /// unseenObjects - LibgccObjects not parsed yet by the unwinding runtime. /// struct LibgccObject* unseenObjects; - + unsigned unused[2]; }; @@ -165,32 +165,32 @@ void DarwinRegisterFrame(void* FrameBegin) { LibgccObjectInfo* LOI = (struct LibgccObjectInfo*) _keymgr_get_and_lock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST); assert(LOI && "This should be preallocated by the runtime"); - + // Allocate a new LibgccObject to represent this frame. Deallocation of this // object may be impossible: since darwin code in libgcc was written after // the ability to dynamically register frames, things may crash if we // deallocate it. struct LibgccObject* ob = (struct LibgccObject*) malloc(sizeof(struct LibgccObject)); - + // Do like libgcc for the values of the field. ob->unused1 = (void *)-1; ob->unused2 = 0; ob->unused3 = 0; ob->frame = FrameBegin; - ob->encoding.i = 0; + ob->encoding.i = 0; ob->encoding.b.encoding = llvm::dwarf::DW_EH_PE_omit; - + // Put the info on both places, as libgcc uses the first or the second // field. Note that we rely on having two pointers here. If fde_end was a // char, things would get complicated. ob->fde_end = (char*)LOI->unseenObjects; ob->next = LOI->unseenObjects; - + // Update the key's unseenObjects list. LOI->unseenObjects = ob; - - // Finally update the "key". Apparently, libgcc requires it. + + // Finally update the "key". Apparently, libgcc requires it. _keymgr_set_and_unlock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST, LOI); @@ -312,18 +312,18 @@ JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji, if (TM.addPassesToEmitMachineCode(PM, *JCE, OptLevel)) { report_fatal_error("Target does not support machine code emission!"); } - + // Register routine for informing unwinding runtime about new EH frames #if HAVE_EHTABLE_SUPPORT #if USE_KEYMGR struct LibgccObjectInfo* LOI = (struct LibgccObjectInfo*) _keymgr_get_and_lock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST); - + // The key is created on demand, and libgcc creates it the first time an // exception occurs. Since we need the key to register frames, we create // it now. if (!LOI) - LOI = (LibgccObjectInfo*)calloc(sizeof(struct LibgccObjectInfo), 1); + LOI = (LibgccObjectInfo*)calloc(sizeof(struct LibgccObjectInfo), 1); _keymgr_set_and_unlock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST, LOI); InstallExceptionTableRegister(DarwinRegisterFrame); // Not sure about how to deregister on Darwin. @@ -332,7 +332,7 @@ JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji, InstallExceptionTableDeregister(__deregister_frame); #endif // __APPLE__ #endif // HAVE_EHTABLE_SUPPORT - + // Initialize passes. PM.doInitialization(); } @@ -365,11 +365,11 @@ void JIT::addModule(Module *M) { if (TM.addPassesToEmitMachineCode(PM, *JCE, CodeGenOpt::Default)) { report_fatal_error("Target does not support machine code emission!"); } - + // Initialize passes. PM.doInitialization(); } - + ExecutionEngine::addModule(M); } @@ -377,29 +377,29 @@ void JIT::addModule(Module *M) { /// since the PassManager it contains references a released Module. bool JIT::removeModule(Module *M) { bool result = ExecutionEngine::removeModule(M); - + MutexGuard locked(lock); - + if (jitstate->getModule() == M) { delete jitstate; jitstate = 0; } - + if (!jitstate && !Modules.empty()) { jitstate = new JITState(Modules[0]); FunctionPassManager &PM = jitstate->getPM(locked); PM.add(new TargetData(*TM.getTargetData())); - + // Turn the machine code intermediate representation into bytes in memory // that may be executed. if (TM.addPassesToEmitMachineCode(PM, *JCE, CodeGenOpt::Default)) { report_fatal_error("Target does not support machine code emission!"); } - + // Initialize passes. PM.doInitialization(); - } + } return result; } @@ -433,7 +433,7 @@ GenericValue JIT::runFunction(Function *F, // Call the function. GenericValue rv; - rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue(), + rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue(), (char **)GVTOP(ArgValues[1]), (const char **)GVTOP(ArgValues[2]))); return rv; @@ -446,7 +446,7 @@ GenericValue JIT::runFunction(Function *F, // Call the function. GenericValue rv; - rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue(), + rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue(), (char **)GVTOP(ArgValues[1]))); return rv; } @@ -480,7 +480,7 @@ GenericValue JIT::runFunction(Function *F, rv.IntVal = APInt(BitWidth, ((int(*)())(intptr_t)FPtr)()); else if (BitWidth <= 64) rv.IntVal = APInt(BitWidth, ((int64_t(*)())(intptr_t)FPtr)()); - else + else llvm_unreachable("Integer types > 64 bits not supported"); return rv; } @@ -542,7 +542,7 @@ GenericValue JIT::runFunction(Function *F, case Type::PointerTyID: void *ArgPtr = GVTOP(AV); if (sizeof(void*) == 4) - C = ConstantInt::get(Type::getInt32Ty(F->getContext()), + C = ConstantInt::get(Type::getInt32Ty(F->getContext()), (int)(intptr_t)ArgPtr); else C = ConstantInt::get(Type::getInt64Ty(F->getContext()), @@ -649,7 +649,7 @@ void JIT::runJITOnFunctionUnlocked(Function *F, const MutexGuard &locked) { "Externally-defined function should not be in pending list."); jitTheFunction(PF, locked); - + // Now that the function has been jitted, ask the JITEmitter to rewrite // the stub with real address of the function. updateFunctionStub(PF); @@ -666,7 +666,7 @@ void JIT::jitTheFunction(Function *F, const MutexGuard &locked) { } /// getPointerToFunction - This method is used to get the address of the -/// specified function, compiling it if neccesary. +/// specified function, compiling it if necessary. /// void *JIT::getPointerToFunction(Function *F) { @@ -703,7 +703,7 @@ void *JIT::getPointerToFunction(Function *F) { void JIT::addPointerToBasicBlock(const BasicBlock *BB, void *Addr) { MutexGuard locked(lock); - + BasicBlockAddressMapTy::iterator I = getBasicBlockAddressMap(locked).find(BB); if (I == getBasicBlockAddressMap(locked).end()) { @@ -724,7 +724,7 @@ void *JIT::getPointerToBasicBlock(BasicBlock *BB) { // resolve basic block address MutexGuard locked(lock); - + BasicBlockAddressMapTy::iterator I = getBasicBlockAddressMap(locked).find(BB); if (I != getBasicBlockAddressMap(locked).end()) { diff --git a/lib/ExecutionEngine/JIT/JIT.h b/lib/ExecutionEngine/JIT/JIT.h index 1d1763edd4db..b576c168f272 100644 --- a/lib/ExecutionEngine/JIT/JIT.h +++ b/lib/ExecutionEngine/JIT/JIT.h @@ -42,7 +42,7 @@ class JITState { FunctionPassManager &getPM(const MutexGuard &L) { return PM; } - + Module *getModule() const { return M; } std::vector > &getPendingFunctions(const MutexGuard &L){ return PendingFunctions; @@ -86,7 +86,7 @@ class JIT : public ExecutionEngine { static void Register() { JITCtor = createJIT; } - + /// getJITInfo - Return the target JIT information structure. /// TargetJITInfo &getJITInfo() const { return TJI; } @@ -106,7 +106,7 @@ class JIT : public ExecutionEngine { } virtual void addModule(Module *M); - + /// removeModule - Remove a Module from the list of modules. Returns true if /// M is found. virtual bool removeModule(Module *M); @@ -146,7 +146,7 @@ class JIT : public ExecutionEngine { /// getPointerToBasicBlock - This returns the address of the specified basic /// block, assuming function is compiled. void *getPointerToBasicBlock(BasicBlock *BB); - + /// getOrEmitGlobalVariable - Return the address of the specified global /// variable, possibly emitting it to memory if needed. This is used by the /// Emitter. @@ -172,7 +172,7 @@ class JIT : public ExecutionEngine { void freeMachineCodeForFunction(Function *F); /// addPendingFunction - while jitting non-lazily, a called but non-codegen'd - /// function was encountered. Add it to a pending list to be processed after + /// function was encountered. Add it to a pending list to be processed after /// the current function. /// void addPendingFunction(Function *F); diff --git a/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp b/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp index 3b5acb7ecc48..e71c20b89fda 100644 --- a/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp +++ b/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp @@ -27,7 +27,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Support/Mutex.h" #include -#include namespace llvm { @@ -143,7 +142,7 @@ void JITDebugRegisterer::RegisterFunction(const Function *F, DebugInfo &I) { // Add a mapping from F to the entry and buffer, so we can delete this // info later. - FnMap[F] = std::make_pair(Buffer, JITCodeEntry); + FnMap[F] = std::make_pair(Buffer, JITCodeEntry); // Acquire the lock and do the registration. { diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp index f54cccadea65..ddb0d5478596 100644 --- a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp +++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp @@ -34,7 +34,7 @@ using namespace llvm; JITDwarfEmitter::JITDwarfEmitter(JIT& theJit) : MMI(0), Jit(theJit) {} -unsigned char* JITDwarfEmitter::EmitDwarfTable(MachineFunction& F, +unsigned char* JITDwarfEmitter::EmitDwarfTable(MachineFunction& F, JITCodeEmitter& jce, unsigned char* StartFunction, unsigned char* EndFunction, @@ -47,10 +47,10 @@ unsigned char* JITDwarfEmitter::EmitDwarfTable(MachineFunction& F, RI = TM.getRegisterInfo(); TFI = TM.getFrameLowering(); JCE = &jce; - + unsigned char* ExceptionTable = EmitExceptionTable(&F, StartFunction, EndFunction); - + unsigned char* Result = 0; const std::vector Personalities = MMI->getPersonalities(); @@ -63,7 +63,7 @@ unsigned char* JITDwarfEmitter::EmitDwarfTable(MachineFunction& F, } -void +void JITDwarfEmitter::EmitFrameMoves(intptr_t BaseLabelPtr, const std::vector &Moves) const { unsigned PointerSize = TD->getPointerSize(); @@ -74,26 +74,26 @@ JITDwarfEmitter::EmitFrameMoves(intptr_t BaseLabelPtr, for (unsigned i = 0, N = Moves.size(); i < N; ++i) { const MachineMove &Move = Moves[i]; MCSymbol *Label = Move.getLabel(); - + // Throw out move if the label is invalid. if (Label && (*JCE->getLabelLocations())[Label] == 0) continue; - + intptr_t LabelPtr = 0; if (Label) LabelPtr = JCE->getLabelAddress(Label); const MachineLocation &Dst = Move.getDestination(); const MachineLocation &Src = Move.getSource(); - + // Advance row if new location. if (BaseLabelPtr && Label && BaseLabel != Label) { JCE->emitByte(dwarf::DW_CFA_advance_loc4); JCE->emitInt32(LabelPtr - BaseLabelPtr); - - BaseLabel = Label; + + BaseLabel = Label; BaseLabelPtr = LabelPtr; } - + // If advancing cfa. if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) { if (!Src.isReg()) { @@ -103,7 +103,7 @@ JITDwarfEmitter::EmitFrameMoves(intptr_t BaseLabelPtr, JCE->emitByte(dwarf::DW_CFA_def_cfa); JCE->emitULEB128Bytes(RI->getDwarfRegNum(Src.getReg(), true)); } - + JCE->emitULEB128Bytes(-Src.getOffset()); } else { llvm_unreachable("Machine move not supported yet."); @@ -119,7 +119,7 @@ JITDwarfEmitter::EmitFrameMoves(intptr_t BaseLabelPtr, } else { unsigned Reg = RI->getDwarfRegNum(Src.getReg(), true); int Offset = Dst.getOffset() / stackGrowth; - + if (Offset < 0) { JCE->emitByte(dwarf::DW_CFA_offset_extended_sf); JCE->emitULEB128Bytes(Reg); @@ -382,7 +382,7 @@ unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF, unsigned TypeOffset = sizeof(int8_t) + // Call site format // Call-site table length - MCAsmInfo::getULEB128Size(SizeSites) + + MCAsmInfo::getULEB128Size(SizeSites) + SizeSites + SizeActions + SizeTypes; // Begin the exception table. @@ -452,7 +452,7 @@ unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF, // Emit the type ids. for (unsigned M = TypeInfos.size(); M; --M) { const GlobalVariable *GV = TypeInfos[M - 1]; - + if (GV) { if (TD->getPointerSize() == sizeof(int32_t)) JCE->emitInt32((intptr_t)Jit.getOrEmitGlobalVariable(GV)); @@ -484,7 +484,7 @@ JITDwarfEmitter::EmitCommonEHFrame(const Function* Personality) const { unsigned PointerSize = TD->getPointerSize(); int stackGrowth = stackGrowthDirection == TargetFrameLowering::StackGrowsUp ? PointerSize : -PointerSize; - + unsigned char* StartCommonPtr = (unsigned char*)JCE->getCurrentPCValue(); // EH Common Frame header JCE->allocateSpace(4, 0); @@ -499,13 +499,13 @@ JITDwarfEmitter::EmitCommonEHFrame(const Function* Personality) const { if (Personality) { // Augmentation Size: 3 small ULEBs of one byte each, and the personality // function which size is PointerSize. - JCE->emitULEB128Bytes(3 + PointerSize); - + JCE->emitULEB128Bytes(3 + PointerSize); + // We set the encoding of the personality as direct encoding because we use // the function pointer. The encoding is not relative because the current // PC value may be bigger than the personality function pointer. if (PointerSize == 4) { - JCE->emitByte(dwarf::DW_EH_PE_sdata4); + JCE->emitByte(dwarf::DW_EH_PE_sdata4); JCE->emitInt32(((intptr_t)Jit.getPointerToGlobal(Personality))); } else { JCE->emitByte(dwarf::DW_EH_PE_sdata8); @@ -540,11 +540,11 @@ JITDwarfEmitter::EmitCommonEHFrame(const Function* Personality) const { unsigned char* JITDwarfEmitter::EmitEHFrame(const Function* Personality, unsigned char* StartCommonPtr, - unsigned char* StartFunction, + unsigned char* StartFunction, unsigned char* EndFunction, unsigned char* ExceptionTable) const { unsigned PointerSize = TD->getPointerSize(); - + // EH frame header. unsigned char* StartEHPtr = (unsigned char*)JCE->getCurrentPCValue(); JCE->allocateSpace(4, 0); @@ -558,7 +558,7 @@ JITDwarfEmitter::EmitEHFrame(const Function* Personality, // specific data area in the exception table. if (Personality) { JCE->emitULEB128Bytes(PointerSize == 4 ? 4 : 8); - + if (PointerSize == 4) { if (!MMI->getLandingPads().empty()) JCE->emitInt32(ExceptionTable-(unsigned char*)JCE->getCurrentPCValue()); @@ -573,7 +573,7 @@ JITDwarfEmitter::EmitEHFrame(const Function* Personality, } else { JCE->emitULEB128Bytes(0); } - + // Indicate locations of function specific callee saved registers in // frame. EmitFrameMoves((intptr_t)StartFunction, MMI->getFrameMoves()); @@ -593,6 +593,6 @@ JITDwarfEmitter::EmitEHFrame(const Function* Personality, JCE->emitInt32(0); JCE->emitInt32(0); } - + return StartEHPtr; } diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.h b/lib/ExecutionEngine/JIT/JITDwarfEmitter.h index 9495697a1aa4..e1d00454d8d2 100644 --- a/lib/ExecutionEngine/JIT/JITDwarfEmitter.h +++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.h @@ -35,33 +35,33 @@ class JITDwarfEmitter { MachineModuleInfo* MMI; JIT& Jit; bool stackGrowthDirection; - + unsigned char* EmitExceptionTable(MachineFunction* MF, - unsigned char* StartFunction, + unsigned char* StartFunction, unsigned char* EndFunction) const; - void EmitFrameMoves(intptr_t BaseLabelPtr, + void EmitFrameMoves(intptr_t BaseLabelPtr, const std::vector &Moves) const; - + unsigned char* EmitCommonEHFrame(const Function* Personality) const; - unsigned char* EmitEHFrame(const Function* Personality, + unsigned char* EmitEHFrame(const Function* Personality, unsigned char* StartBufferPtr, - unsigned char* StartFunction, + unsigned char* StartFunction, unsigned char* EndFunction, unsigned char* ExceptionTable) const; - + public: - + JITDwarfEmitter(JIT& jit); - - unsigned char* EmitDwarfTable(MachineFunction& F, + + unsigned char* EmitDwarfTable(MachineFunction& F, JITCodeEmitter& JCE, unsigned char* StartFunction, unsigned char* EndFunction, unsigned char* &EHFramePtr); - - + + void setModuleInfo(MachineModuleInfo* Info) { MMI = Info; } diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp index 4cd8757ad0b8..d046b8aea641 100644 --- a/lib/ExecutionEngine/JIT/JITEmitter.cpp +++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp @@ -123,17 +123,18 @@ namespace { return FunctionToLazyStubMap; } - GlobalToIndirectSymMapTy& getGlobalToIndirectSymMap(const MutexGuard& locked) { - assert(locked.holds(TheJIT->lock)); + GlobalToIndirectSymMapTy& getGlobalToIndirectSymMap(const MutexGuard& lck) { + assert(lck.holds(TheJIT->lock)); return GlobalToIndirectSymMap; } - pair LookupFunctionFromCallSite( + std::pair LookupFunctionFromCallSite( const MutexGuard &locked, void *CallSite) const { assert(locked.holds(TheJIT->lock)); - // The address given to us for the stub may not be exactly right, it might be - // a little bit after the stub. As such, use upper_bound to find it. + // The address given to us for the stub may not be exactly right, it + // might be a little bit after the stub. As such, use upper_bound to + // find it. CallSiteToFunctionMapTy::const_iterator I = CallSiteToFunctionMap.upper_bound(CallSite); assert(I != CallSiteToFunctionMap.begin() && @@ -645,7 +646,7 @@ void *JITResolver::JITCompilerFn(void *Stub) { // The address given to us for the stub may not be exactly right, it might // be a little bit after the stub. As such, use upper_bound to find it. - pair I = + std::pair I = JR->state.LookupFunctionFromCallSite(locked, Stub); F = I.second; ActualPtr = I.first; @@ -659,7 +660,8 @@ void *JITResolver::JITCompilerFn(void *Stub) { // If lazy compilation is disabled, emit a useful error message and abort. if (!JR->TheJIT->isCompilingLazily()) { - report_fatal_error("LLVM JIT requested to do lazy compilation of function '" + report_fatal_error("LLVM JIT requested to do lazy compilation of" + " function '" + F->getName() + "' when lazy compiles are disabled!"); } @@ -745,7 +747,7 @@ void *JITEmitter::getPointerToGVIndirectSym(GlobalValue *V, void *Reference) { void JITEmitter::processDebugLoc(DebugLoc DL, bool BeforePrintingInsn) { if (DL.isUnknown()) return; if (!BeforePrintingInsn) return; - + const LLVMContext &Context = EmissionDetails.MF->getFunction()->getContext(); if (DL.getScope(Context) != 0 && PrevDL != DL) { @@ -781,7 +783,7 @@ void JITEmitter::startFunction(MachineFunction &F) { uintptr_t ActualSize = 0; // Set the memory writable, if it's not already MemMgr->setMemoryWritable(); - + if (SizeEstimate > 0) { // SizeEstimate will be non-zero on reallocation attempts. ActualSize = SizeEstimate; @@ -859,7 +861,8 @@ bool JITEmitter::finishFunction(MachineFunction &F) { } else if (MR.isBasicBlock()) { ResultPtr = (void*)getMachineBasicBlockAddress(MR.getBasicBlock()); } else if (MR.isConstantPoolIndex()) { - ResultPtr = (void*)getConstantPoolEntryAddress(MR.getConstantPoolIndex()); + ResultPtr = + (void*)getConstantPoolEntryAddress(MR.getConstantPoolIndex()); } else { assert(MR.isJumpTableIndex()); ResultPtr=(void*)getJumpTableEntryAddress(MR.getJumpTableIndex()); @@ -985,7 +988,7 @@ bool JITEmitter::finishFunction(MachineFunction &F) { CurBufferPtr = SavedCurBufferPtr; if (JITExceptionHandling) { - TheJIT->RegisterTable(FrameRegister); + TheJIT->RegisterTable(F.getFunction(), FrameRegister); } if (JITEmitDebugInfo) { @@ -1033,8 +1036,9 @@ void JITEmitter::deallocateMemForFunction(const Function *F) { EmittedFunctions.erase(Emitted); } - // TODO: Do we need to unregister exception handling information from libgcc - // here? + if(JITExceptionHandling) { + TheJIT->DeregisterTable(F); + } if (JITEmitDebugInfo) { DR->UnregisterFunction(F); @@ -1129,7 +1133,7 @@ void JITEmitter::emitJumpTableInfo(MachineJumpTableInfo *MJTI) { const std::vector &JT = MJTI->getJumpTables(); if (JT.empty() || JumpTableBase == 0) return; - + switch (MJTI->getEntryKind()) { case MachineJumpTableInfo::EK_Inline: return; @@ -1138,11 +1142,11 @@ void JITEmitter::emitJumpTableInfo(MachineJumpTableInfo *MJTI) { // .word LBB123 assert(MJTI->getEntrySize(*TheJIT->getTargetData()) == sizeof(void*) && "Cross JIT'ing?"); - + // For each jump table, map each target in the jump table to the address of // an emitted MachineBasicBlock. intptr_t *SlotPtr = (intptr_t*)JumpTableBase; - + for (unsigned i = 0, e = JT.size(); i != e; ++i) { const std::vector &MBBs = JT[i].MBBs; // Store the address of the basic block for this jump table slot in the @@ -1152,7 +1156,7 @@ void JITEmitter::emitJumpTableInfo(MachineJumpTableInfo *MJTI) { } break; } - + case MachineJumpTableInfo::EK_Custom32: case MachineJumpTableInfo::EK_GPRel32BlockAddress: case MachineJumpTableInfo::EK_LabelDifference32: { diff --git a/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp b/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp index 670fa7da1fed..9a9ed6d33484 100644 --- a/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp +++ b/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp @@ -108,8 +108,8 @@ void OProfileJITEventListener::NotifyFunctionEmitted( if (op_write_native_code(Agent, F.getName().data(), reinterpret_cast(FnStart), FnStart, FnSize) == -1) { - DEBUG(dbgs() << "Failed to tell OProfile about native function " - << F.getName() << " at [" + DEBUG(dbgs() << "Failed to tell OProfile about native function " + << F.getName() << " at [" << FnStart << "-" << ((char*)FnStart + FnSize) << "]\n"); return; } @@ -153,9 +153,9 @@ void OProfileJITEventListener::NotifyFunctionEmitted( if (op_write_debug_line_info(Agent, FnStart, LineInfo.size(), &*LineInfo.begin()) == -1) { - DEBUG(dbgs() + DEBUG(dbgs() << "Failed to tell OProfile about line numbers for native function " - << F.getName() << " at [" + << F.getName() << " at [" << FnStart << "-" << ((char*)FnStart + FnSize) << "]\n"); } } diff --git a/lib/ExecutionEngine/JIT/TargetSelect.cpp b/lib/ExecutionEngine/JIT/TargetSelect.cpp index 6b7173cece18..8d92ab01c3db 100644 --- a/lib/ExecutionEngine/JIT/TargetSelect.cpp +++ b/lib/ExecutionEngine/JIT/TargetSelect.cpp @@ -84,7 +84,7 @@ TargetMachine *JIT::selectTarget(Module *Mod, } // Allocate a target... - TargetMachine *Target = + TargetMachine *Target = TheTarget->createTargetMachine(TheTriple.getTriple(), FeaturesStr); assert(Target && "Could not allocate target machine!"); return Target; diff --git a/lib/ExecutionEngine/MCJIT/CMakeLists.txt b/lib/ExecutionEngine/MCJIT/CMakeLists.txt index f7ed176fef78..655307943150 100644 --- a/lib/ExecutionEngine/MCJIT/CMakeLists.txt +++ b/lib/ExecutionEngine/MCJIT/CMakeLists.txt @@ -1,4 +1,5 @@ add_llvm_library(LLVMMCJIT MCJIT.cpp TargetSelect.cpp + Intercept.cpp ) diff --git a/lib/ExecutionEngine/MCJIT/Intercept.cpp b/lib/ExecutionEngine/MCJIT/Intercept.cpp new file mode 100644 index 000000000000..e431c848d630 --- /dev/null +++ b/lib/ExecutionEngine/MCJIT/Intercept.cpp @@ -0,0 +1,161 @@ +//===-- Intercept.cpp - System function interception routines -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// If a function call occurs to an external function, the JIT is designed to use +// the dynamic loader interface to find a function to call. This is useful for +// calling system calls and library functions that are not available in LLVM. +// Some system calls, however, need to be handled specially. For this reason, +// we intercept some of them here and use our own stubs to handle them. +// +//===----------------------------------------------------------------------===// + +#include "MCJIT.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/DynamicLibrary.h" +#include "llvm/Config/config.h" +using namespace llvm; + +// AtExitHandlers - List of functions to call when the program exits, +// registered with the atexit() library function. +static std::vector AtExitHandlers; + +/// runAtExitHandlers - Run any functions registered by the program's +/// calls to atexit(3), which we intercept and store in +/// AtExitHandlers. +/// +static void runAtExitHandlers() { + while (!AtExitHandlers.empty()) { + void (*Fn)() = AtExitHandlers.back(); + AtExitHandlers.pop_back(); + Fn(); + } +} + +//===----------------------------------------------------------------------===// +// Function stubs that are invoked instead of certain library calls +//===----------------------------------------------------------------------===// + +// Force the following functions to be linked in to anything that uses the +// JIT. This is a hack designed to work around the all-too-clever Glibc +// strategy of making these functions work differently when inlined vs. when +// not inlined, and hiding their real definitions in a separate archive file +// that the dynamic linker can't see. For more info, search for +// 'libc_nonshared.a' on Google, or read http://llvm.org/PR274. +#if defined(__linux__) +#if defined(HAVE_SYS_STAT_H) +#include +#endif +#include +/* stat functions are redirecting to __xstat with a version number. On x86-64 + * linking with libc_nonshared.a and -Wl,--export-dynamic doesn't make 'stat' + * available as an exported symbol, so we have to add it explicitly. + */ +namespace { +class StatSymbols { +public: + StatSymbols() { + sys::DynamicLibrary::AddSymbol("stat", (void*)(intptr_t)stat); + sys::DynamicLibrary::AddSymbol("fstat", (void*)(intptr_t)fstat); + sys::DynamicLibrary::AddSymbol("lstat", (void*)(intptr_t)lstat); + sys::DynamicLibrary::AddSymbol("stat64", (void*)(intptr_t)stat64); + sys::DynamicLibrary::AddSymbol("\x1stat64", (void*)(intptr_t)stat64); + sys::DynamicLibrary::AddSymbol("\x1open64", (void*)(intptr_t)open64); + sys::DynamicLibrary::AddSymbol("\x1lseek64", (void*)(intptr_t)lseek64); + sys::DynamicLibrary::AddSymbol("fstat64", (void*)(intptr_t)fstat64); + sys::DynamicLibrary::AddSymbol("lstat64", (void*)(intptr_t)lstat64); + sys::DynamicLibrary::AddSymbol("atexit", (void*)(intptr_t)atexit); + sys::DynamicLibrary::AddSymbol("mknod", (void*)(intptr_t)mknod); + } +}; +} +static StatSymbols initStatSymbols; +#endif // __linux__ + +// jit_exit - Used to intercept the "exit" library call. +static void jit_exit(int Status) { + runAtExitHandlers(); // Run atexit handlers... + exit(Status); +} + +// jit_atexit - Used to intercept the "atexit" library call. +static int jit_atexit(void (*Fn)()) { + AtExitHandlers.push_back(Fn); // Take note of atexit handler... + return 0; // Always successful +} + +static int jit_noop() { + return 0; +} + +//===----------------------------------------------------------------------===// +// +/// getPointerToNamedFunction - This method returns the address of the specified +/// function by using the dynamic loader interface. As such it is only useful +/// for resolving library symbols, not code generated symbols. +/// +void *MCJIT::getPointerToNamedFunction(const std::string &Name, + bool AbortOnFailure) { + if (!isSymbolSearchingDisabled()) { + // Check to see if this is one of the functions we want to intercept. Note, + // we cast to intptr_t here to silence a -pedantic warning that complains + // about casting a function pointer to a normal pointer. + if (Name == "exit") return (void*)(intptr_t)&jit_exit; + if (Name == "atexit") return (void*)(intptr_t)&jit_atexit; + + // We should not invoke parent's ctors/dtors from generated main()! + // On Mingw and Cygwin, the symbol __main is resolved to + // callee's(eg. tools/lli) one, to invoke wrong duplicated ctors + // (and register wrong callee's dtors with atexit(3)). + // We expect ExecutionEngine::runStaticConstructorsDestructors() + // is called before ExecutionEngine::runFunctionAsMain() is called. + if (Name == "__main") return (void*)(intptr_t)&jit_noop; + + const char *NameStr = Name.c_str(); + // If this is an asm specifier, skip the sentinal. + if (NameStr[0] == 1) ++NameStr; + + // If it's an external function, look it up in the process image... + void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr); + if (Ptr) return Ptr; + + // If it wasn't found and if it starts with an underscore ('_') character, + // and has an asm specifier, try again without the underscore. + if (Name[0] == 1 && NameStr[0] == '_') { + Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1); + if (Ptr) return Ptr; + } + + // Darwin/PPC adds $LDBLStub suffixes to various symbols like printf. These + // are references to hidden visibility symbols that dlsym cannot resolve. + // If we have one of these, strip off $LDBLStub and try again. +#if defined(__APPLE__) && defined(__ppc__) + if (Name.size() > 9 && Name[Name.size()-9] == '$' && + memcmp(&Name[Name.size()-8], "LDBLStub", 8) == 0) { + // First try turning $LDBLStub into $LDBL128. If that fails, strip it off. + // This mirrors logic in libSystemStubs.a. + std::string Prefix = std::string(Name.begin(), Name.end()-9); + if (void *Ptr = getPointerToNamedFunction(Prefix+"$LDBL128", false)) + return Ptr; + if (void *Ptr = getPointerToNamedFunction(Prefix, false)) + return Ptr; + } +#endif + } + + /// If a LazyFunctionCreator is installed, use it to get/create the function. + if (LazyFunctionCreator) + if (void *RP = LazyFunctionCreator(Name)) + return RP; + + if (AbortOnFailure) { + report_fatal_error("Program used external function '"+Name+ + "' which could not be resolved!"); + } + return 0; +} diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp index f1e9dab250bf..3d4ee369ead0 100644 --- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp +++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp @@ -1,4 +1,4 @@ -//===-- JIT.cpp - MC-based Just-in-Time Compiler --------------------------===// +//===-- MCJIT.cpp - MC-based Just-in-Time Compiler ------------------------===// // // The LLVM Compiler Infrastructure // @@ -8,10 +8,17 @@ //===----------------------------------------------------------------------===// #include "MCJIT.h" +#include "MCJITMemoryManager.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" #include "llvm/ExecutionEngine/GenericValue.h" #include "llvm/ExecutionEngine/MCJIT.h" +#include "llvm/ExecutionEngine/JITMemoryManager.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/DynamicLibrary.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Target/TargetData.h" using namespace llvm; @@ -51,20 +58,47 @@ ExecutionEngine *MCJIT::createJIT(Module *M, // If the target supports JIT code generation, create the JIT. if (TargetJITInfo *TJ = TM->getJITInfo()) - return new MCJIT(M, *TM, *TJ, JMM, OptLevel, GVsWithCode); + return new MCJIT(M, TM, *TJ, new MCJITMemoryManager(JMM), OptLevel, + GVsWithCode); if (ErrorStr) *ErrorStr = "target does not support JIT code generation"; return 0; } -MCJIT::MCJIT(Module *M, TargetMachine &tm, TargetJITInfo &tji, - JITMemoryManager *JMM, CodeGenOpt::Level OptLevel, +MCJIT::MCJIT(Module *m, TargetMachine *tm, TargetJITInfo &tji, + RTDyldMemoryManager *MM, CodeGenOpt::Level OptLevel, bool AllocateGVsWithCode) - : ExecutionEngine(M) { + : ExecutionEngine(m), TM(tm), MemMgr(MM), M(m), OS(Buffer), Dyld(MM) { + + PM.add(new TargetData(*TM->getTargetData())); + + // Turn the machine code intermediate representation into bytes in memory + // that may be executed. + if (TM->addPassesToEmitMC(PM, Ctx, OS, CodeGenOpt::Default, false)) { + report_fatal_error("Target does not support MC emission!"); + } + + // Initialize passes. + // FIXME: When we support multiple modules, we'll want to move the code + // gen and finalization out of the constructor here and do it more + // on-demand as part of getPointerToFunction(). + PM.run(*M); + // Flush the output buffer so the SmallVector gets its data. + OS.flush(); + + // Load the object into the dynamic linker. + // FIXME: It would be nice to avoid making yet another copy. + MemoryBuffer *MB = MemoryBuffer::getMemBufferCopy(StringRef(Buffer.data(), + Buffer.size())); + if (Dyld.loadObject(MB)) + report_fatal_error(Dyld.getErrorString()); + // Resolve any relocations. + Dyld.resolveRelocations(); } MCJIT::~MCJIT() { + delete MemMgr; } void *MCJIT::getPointerToBasicBlock(BasicBlock *BB) { @@ -73,8 +107,15 @@ void *MCJIT::getPointerToBasicBlock(BasicBlock *BB) { } void *MCJIT::getPointerToFunction(Function *F) { - report_fatal_error("not yet implemented"); - return 0; + if (F->isDeclaration() || F->hasAvailableExternallyLinkage()) { + bool AbortOnFailure = !F->hasExternalWeakLinkage(); + void *Addr = getPointerToNamedFunction(F->getName(), AbortOnFailure); + addGlobalMapping(F, Addr); + return Addr; + } + + Twine Name = TM->getMCAsmInfo()->getGlobalPrefix() + F->getName(); + return (void*)Dyld.getSymbolAddress(Name.str()); } void *MCJIT::recompileAndRelinkFunction(Function *F) { @@ -87,6 +128,102 @@ void MCJIT::freeMachineCodeForFunction(Function *F) { GenericValue MCJIT::runFunction(Function *F, const std::vector &ArgValues) { - report_fatal_error("not yet implemented"); + assert(F && "Function *F was null at entry to run()"); + + void *FPtr = getPointerToFunction(F); + assert(FPtr && "Pointer to fn's code was null after getPointerToFunction"); + const FunctionType *FTy = F->getFunctionType(); + const Type *RetTy = FTy->getReturnType(); + + assert((FTy->getNumParams() == ArgValues.size() || + (FTy->isVarArg() && FTy->getNumParams() <= ArgValues.size())) && + "Wrong number of arguments passed into function!"); + assert(FTy->getNumParams() == ArgValues.size() && + "This doesn't support passing arguments through varargs (yet)!"); + + // Handle some common cases first. These cases correspond to common `main' + // prototypes. + if (RetTy->isIntegerTy(32) || RetTy->isVoidTy()) { + switch (ArgValues.size()) { + case 3: + if (FTy->getParamType(0)->isIntegerTy(32) && + FTy->getParamType(1)->isPointerTy() && + FTy->getParamType(2)->isPointerTy()) { + int (*PF)(int, char **, const char **) = + (int(*)(int, char **, const char **))(intptr_t)FPtr; + + // Call the function. + GenericValue rv; + rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue(), + (char **)GVTOP(ArgValues[1]), + (const char **)GVTOP(ArgValues[2]))); + return rv; + } + break; + case 2: + if (FTy->getParamType(0)->isIntegerTy(32) && + FTy->getParamType(1)->isPointerTy()) { + int (*PF)(int, char **) = (int(*)(int, char **))(intptr_t)FPtr; + + // Call the function. + GenericValue rv; + rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue(), + (char **)GVTOP(ArgValues[1]))); + return rv; + } + break; + case 1: + if (FTy->getNumParams() == 1 && + FTy->getParamType(0)->isIntegerTy(32)) { + GenericValue rv; + int (*PF)(int) = (int(*)(int))(intptr_t)FPtr; + rv.IntVal = APInt(32, PF(ArgValues[0].IntVal.getZExtValue())); + return rv; + } + break; + } + } + + // Handle cases where no arguments are passed first. + if (ArgValues.empty()) { + GenericValue rv; + switch (RetTy->getTypeID()) { + default: llvm_unreachable("Unknown return type for function call!"); + case Type::IntegerTyID: { + unsigned BitWidth = cast(RetTy)->getBitWidth(); + if (BitWidth == 1) + rv.IntVal = APInt(BitWidth, ((bool(*)())(intptr_t)FPtr)()); + else if (BitWidth <= 8) + rv.IntVal = APInt(BitWidth, ((char(*)())(intptr_t)FPtr)()); + else if (BitWidth <= 16) + rv.IntVal = APInt(BitWidth, ((short(*)())(intptr_t)FPtr)()); + else if (BitWidth <= 32) + rv.IntVal = APInt(BitWidth, ((int(*)())(intptr_t)FPtr)()); + else if (BitWidth <= 64) + rv.IntVal = APInt(BitWidth, ((int64_t(*)())(intptr_t)FPtr)()); + else + llvm_unreachable("Integer types > 64 bits not supported"); + return rv; + } + case Type::VoidTyID: + rv.IntVal = APInt(32, ((int(*)())(intptr_t)FPtr)()); + return rv; + case Type::FloatTyID: + rv.FloatVal = ((float(*)())(intptr_t)FPtr)(); + return rv; + case Type::DoubleTyID: + rv.DoubleVal = ((double(*)())(intptr_t)FPtr)(); + return rv; + case Type::X86_FP80TyID: + case Type::FP128TyID: + case Type::PPC_FP128TyID: + llvm_unreachable("long double not supported yet"); + return rv; + case Type::PointerTyID: + return PTOGV(((void*(*)())(intptr_t)FPtr)()); + } + } + + assert("Full-featured argument passing not supported yet!"); return GenericValue(); } diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.h b/lib/ExecutionEngine/MCJIT/MCJIT.h index cd1f989b10c7..1b507663e4ae 100644 --- a/lib/ExecutionEngine/MCJIT/MCJIT.h +++ b/lib/ExecutionEngine/MCJIT/MCJIT.h @@ -10,14 +10,37 @@ #ifndef LLVM_LIB_EXECUTIONENGINE_MCJIT_H #define LLVM_LIB_EXECUTIONENGINE_MCJIT_H +#include "llvm/PassManager.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/ExecutionEngine/RuntimeDyld.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/raw_ostream.h" namespace llvm { +// FIXME: This makes all kinds of horrible assumptions for the time being, +// like only having one module, not needing to worry about multi-threading, +// blah blah. Purely in get-it-up-and-limping mode for now. + class MCJIT : public ExecutionEngine { - MCJIT(Module *M, TargetMachine &tm, TargetJITInfo &tji, - JITMemoryManager *JMM, CodeGenOpt::Level OptLevel, + MCJIT(Module *M, TargetMachine *tm, TargetJITInfo &tji, + RTDyldMemoryManager *MemMgr, CodeGenOpt::Level OptLevel, bool AllocateGVsWithCode); + + TargetMachine *TM; + MCContext *Ctx; + RTDyldMemoryManager *MemMgr; + + // FIXME: These may need moved to a separate 'jitstate' member like the + // non-MC JIT does for multithreading and such. Just keep them here for now. + PassManager PM; + Module *M; + // FIXME: This really doesn't belong here. + SmallVector Buffer; // Working buffer into which we JIT. + raw_svector_ostream OS; + + RuntimeDyld Dyld; + public: ~MCJIT(); @@ -35,6 +58,16 @@ class MCJIT : public ExecutionEngine { virtual GenericValue runFunction(Function *F, const std::vector &ArgValues); + /// getPointerToNamedFunction - This method returns the address of the + /// specified function by using the dlsym function call. As such it is only + /// useful for resolving library symbols, not code generated symbols. + /// + /// If AbortOnFailure is false and no function with the given name is + /// found, this function silently returns a null pointer. Otherwise, + /// it prints a message to stderr and aborts. + /// + void *getPointerToNamedFunction(const std::string &Name, + bool AbortOnFailure = true); /// @} /// @name (Private) Registration Interfaces /// @{ diff --git a/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h b/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h new file mode 100644 index 000000000000..e3c6fda63b48 --- /dev/null +++ b/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h @@ -0,0 +1,59 @@ +//===-- MCJITMemoryManager.h - Definition for the Memory Manager ---C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_EXECUTIONENGINE_MCJITMEMORYMANAGER_H +#define LLVM_LIB_EXECUTIONENGINE_MCJITMEMORYMANAGER_H + +#include "llvm/Module.h" +#include "llvm/ExecutionEngine/JITMemoryManager.h" +#include "llvm/ExecutionEngine/RuntimeDyld.h" +#include + +namespace llvm { + +// The MCJIT memory manager is a layer between the standard JITMemoryManager +// and the RuntimeDyld interface that maps objects, by name, onto their +// matching LLVM IR counterparts in the module(s) being compiled. +class MCJITMemoryManager : public RTDyldMemoryManager { + JITMemoryManager *JMM; + + // FIXME: Multiple modules. + Module *M; +public: + MCJITMemoryManager(JITMemoryManager *jmm) : JMM(jmm) {} + + // Allocate ActualSize bytes, or more, for the named function. Return + // a pointer to the allocated memory and update Size to reflect how much + // memory was acutally allocated. + uint8_t *startFunctionBody(const char *Name, uintptr_t &Size) { + // FIXME: This should really reference the MCAsmInfo to get the global + // prefix. + if (Name[0] == '_') ++Name; + Function *F = M->getFunction(Name); + assert(F && "No matching function in JIT IR Module!"); + return JMM->startFunctionBody(F, Size); + } + + // Mark the end of the function, including how much of the allocated + // memory was actually used. + void endFunctionBody(const char *Name, uint8_t *FunctionStart, + uint8_t *FunctionEnd) { + // FIXME: This should really reference the MCAsmInfo to get the global + // prefix. + if (Name[0] == '_') ++Name; + Function *F = M->getFunction(Name); + assert(F && "No matching function in JIT IR Module!"); + JMM->endFunctionBody(F, FunctionStart, FunctionEnd); + } + +}; + +} // End llvm namespace + +#endif diff --git a/lib/ExecutionEngine/Makefile b/lib/ExecutionEngine/Makefile index 1858d776616c..9a649a52cf9e 100644 --- a/lib/ExecutionEngine/Makefile +++ b/lib/ExecutionEngine/Makefile @@ -8,6 +8,6 @@ ##===----------------------------------------------------------------------===## LEVEL = ../.. LIBRARYNAME = LLVMExecutionEngine -PARALLEL_DIRS = Interpreter JIT MCJIT +PARALLEL_DIRS = Interpreter JIT MCJIT RuntimeDyld include $(LEVEL)/Makefile.common diff --git a/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt b/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt new file mode 100644 index 000000000000..9e53f8757ec0 --- /dev/null +++ b/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt @@ -0,0 +1,3 @@ +add_llvm_library(LLVMRuntimeDyld + RuntimeDyld.cpp + ) diff --git a/lib/ExecutionEngine/RuntimeDyld/Makefile b/lib/ExecutionEngine/RuntimeDyld/Makefile new file mode 100644 index 000000000000..5d6f26d950fe --- /dev/null +++ b/lib/ExecutionEngine/RuntimeDyld/Makefile @@ -0,0 +1,13 @@ +##===- lib/ExecutionEngine/MCJIT/Makefile ------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME = LLVMRuntimeDyld + +include $(LEVEL)/Makefile.common diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp new file mode 100644 index 000000000000..065e5e3d8a33 --- /dev/null +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -0,0 +1,669 @@ +//===-- RuntimeDyld.h - Run-time dynamic linker for MC-JIT ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implementation of the MC-JIT runtime dynamic linker. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "dyld" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Twine.h" +#include "llvm/ExecutionEngine/RuntimeDyld.h" +#include "llvm/Object/MachOObject.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/Memory.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/system_error.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; +using namespace llvm::object; + +// Empty out-of-line virtual destructor as the key function. +RTDyldMemoryManager::~RTDyldMemoryManager() {} + +namespace llvm { +class RuntimeDyldImpl { + unsigned CPUType; + unsigned CPUSubtype; + + // The MemoryManager to load objects into. + RTDyldMemoryManager *MemMgr; + + // FIXME: This all assumes we're dealing with external symbols for anything + // explicitly referenced. I.e., we can index by name and things + // will work out. In practice, this may not be the case, so we + // should find a way to effectively generalize. + + // For each function, we have a MemoryBlock of it's instruction data. + StringMap Functions; + + // Master symbol table. As modules are loaded and external symbols are + // resolved, their addresses are stored here. + StringMap SymbolTable; + + // For each symbol, keep a list of relocations based on it. Anytime + // its address is reassigned (the JIT re-compiled the function, e.g.), + // the relocations get re-resolved. + struct RelocationEntry { + std::string Target; // Object this relocation is contained in. + uint64_t Offset; // Offset into the object for the relocation. + uint32_t Data; // Second word of the raw macho relocation entry. + int64_t Addend; // Addend encoded in the instruction itself, if any. + bool isResolved; // Has this relocation been resolved previously? + + RelocationEntry(StringRef t, uint64_t offset, uint32_t data, int64_t addend) + : Target(t), Offset(offset), Data(data), Addend(addend), + isResolved(false) {} + }; + typedef SmallVector RelocationList; + StringMap Relocations; + + // FIXME: Also keep a map of all the relocations contained in an object. Use + // this to dynamically answer whether all of the relocations in it have + // been resolved or not. + + bool HasError; + std::string ErrorStr; + + // Set the error state and record an error string. + bool Error(const Twine &Msg) { + ErrorStr = Msg.str(); + HasError = true; + return true; + } + + void extractFunction(StringRef Name, uint8_t *StartAddress, + uint8_t *EndAddress); + bool resolveRelocation(uint8_t *Address, uint8_t *Value, bool isPCRel, + unsigned Type, unsigned Size); + bool resolveX86_64Relocation(uintptr_t Address, uintptr_t Value, bool isPCRel, + unsigned Type, unsigned Size); + bool resolveARMRelocation(uintptr_t Address, uintptr_t Value, bool isPCRel, + unsigned Type, unsigned Size); + + bool loadSegment32(const MachOObject *Obj, + const MachOObject::LoadCommandInfo *SegmentLCI, + const InMemoryStruct &SymtabLC); + bool loadSegment64(const MachOObject *Obj, + const MachOObject::LoadCommandInfo *SegmentLCI, + const InMemoryStruct &SymtabLC); + +public: + RuntimeDyldImpl(RTDyldMemoryManager *mm) : MemMgr(mm), HasError(false) {} + + bool loadObject(MemoryBuffer *InputBuffer); + + void *getSymbolAddress(StringRef Name) { + // FIXME: Just look up as a function for now. Overly simple of course. + // Work in progress. + return SymbolTable.lookup(Name); + } + + void resolveRelocations(); + + void reassignSymbolAddress(StringRef Name, uint8_t *Addr); + + // Is the linker in an error state? + bool hasError() { return HasError; } + + // Mark the error condition as handled and continue. + void clearError() { HasError = false; } + + // Get the error message. + StringRef getErrorString() { return ErrorStr; } +}; + +void RuntimeDyldImpl::extractFunction(StringRef Name, uint8_t *StartAddress, + uint8_t *EndAddress) { + // Allocate memory for the function via the memory manager. + uintptr_t Size = EndAddress - StartAddress + 1; + uint8_t *Mem = MemMgr->startFunctionBody(Name.data(), Size); + assert(Size >= (uint64_t)(EndAddress - StartAddress + 1) && + "Memory manager failed to allocate enough memory!"); + // Copy the function payload into the memory block. + memcpy(Mem, StartAddress, EndAddress - StartAddress + 1); + MemMgr->endFunctionBody(Name.data(), Mem, Mem + Size); + // Remember where we put it. + Functions[Name] = sys::MemoryBlock(Mem, Size); + // Default the assigned address for this symbol to wherever this + // allocated it. + SymbolTable[Name] = Mem; + DEBUG(dbgs() << " allocated to " << Mem << "\n"); +} + +bool RuntimeDyldImpl:: +resolveRelocation(uint8_t *Address, uint8_t *Value, bool isPCRel, + unsigned Type, unsigned Size) { + // This just dispatches to the proper target specific routine. + switch (CPUType) { + default: assert(0 && "Unsupported CPU type!"); + case mach::CTM_x86_64: + return resolveX86_64Relocation((uintptr_t)Address, (uintptr_t)Value, + isPCRel, Type, Size); + case mach::CTM_ARM: + return resolveARMRelocation((uintptr_t)Address, (uintptr_t)Value, + isPCRel, Type, Size); + } + llvm_unreachable(""); +} + +bool RuntimeDyldImpl:: +resolveX86_64Relocation(uintptr_t Address, uintptr_t Value, + bool isPCRel, unsigned Type, + unsigned Size) { + // If the relocation is PC-relative, the value to be encoded is the + // pointer difference. + if (isPCRel) + // FIXME: It seems this value needs to be adjusted by 4 for an effective PC + // address. Is that expected? Only for branches, perhaps? + Value -= Address + 4; + + switch(Type) { + default: + llvm_unreachable("Invalid relocation type!"); + case macho::RIT_X86_64_Unsigned: + case macho::RIT_X86_64_Branch: { + // Mask in the target value a byte at a time (we don't have an alignment + // guarantee for the target address, so this is safest). + uint8_t *p = (uint8_t*)Address; + for (unsigned i = 0; i < Size; ++i) { + *p++ = (uint8_t)Value; + Value >>= 8; + } + return false; + } + case macho::RIT_X86_64_Signed: + case macho::RIT_X86_64_GOTLoad: + case macho::RIT_X86_64_GOT: + case macho::RIT_X86_64_Subtractor: + case macho::RIT_X86_64_Signed1: + case macho::RIT_X86_64_Signed2: + case macho::RIT_X86_64_Signed4: + case macho::RIT_X86_64_TLV: + return Error("Relocation type not implemented yet!"); + } + return false; +} + +bool RuntimeDyldImpl::resolveARMRelocation(uintptr_t Address, uintptr_t Value, + bool isPCRel, unsigned Type, + unsigned Size) { + // If the relocation is PC-relative, the value to be encoded is the + // pointer difference. + if (isPCRel) { + Value -= Address; + // ARM PCRel relocations have an effective-PC offset of two instructions + // (four bytes in Thumb mode, 8 bytes in ARM mode). + // FIXME: For now, assume ARM mode. + Value -= 8; + } + + switch(Type) { + default: + llvm_unreachable("Invalid relocation type!"); + case macho::RIT_Vanilla: { + llvm_unreachable("Invalid relocation type!"); + // Mask in the target value a byte at a time (we don't have an alignment + // guarantee for the target address, so this is safest). + uint8_t *p = (uint8_t*)Address; + for (unsigned i = 0; i < Size; ++i) { + *p++ = (uint8_t)Value; + Value >>= 8; + } + break; + } + case macho::RIT_ARM_Branch24Bit: { + // Mask the value into the target address. We know instructions are + // 32-bit aligned, so we can do it all at once. + uint32_t *p = (uint32_t*)Address; + // The low two bits of the value are not encoded. + Value >>= 2; + // Mask the value to 24 bits. + Value &= 0xffffff; + // FIXME: If the destination is a Thumb function (and the instruction + // is a non-predicated BL instruction), we need to change it to a BLX + // instruction instead. + + // Insert the value into the instruction. + *p = (*p & ~0xffffff) | Value; + break; + } + case macho::RIT_ARM_ThumbBranch22Bit: + case macho::RIT_ARM_ThumbBranch32Bit: + case macho::RIT_ARM_Half: + case macho::RIT_ARM_HalfDifference: + case macho::RIT_Pair: + case macho::RIT_Difference: + case macho::RIT_ARM_LocalDifference: + case macho::RIT_ARM_PreboundLazyPointer: + return Error("Relocation type not implemented yet!"); + } + return false; +} + +bool RuntimeDyldImpl:: +loadSegment32(const MachOObject *Obj, + const MachOObject::LoadCommandInfo *SegmentLCI, + const InMemoryStruct &SymtabLC) { + InMemoryStruct SegmentLC; + Obj->ReadSegmentLoadCommand(*SegmentLCI, SegmentLC); + if (!SegmentLC) + return Error("unable to load segment load command"); + + for (unsigned SectNum = 0; SectNum != SegmentLC->NumSections; ++SectNum) { + InMemoryStruct Sect; + Obj->ReadSection(*SegmentLCI, SectNum, Sect); + if (!Sect) + return Error("unable to load section: '" + Twine(SectNum) + "'"); + + // FIXME: Improve check. + if (Sect->Flags != 0x80000400) + return Error("unsupported section type!"); + + // Address and names of symbols in the section. + typedef std::pair SymbolEntry; + SmallVector Symbols; + // Index of all the names, in this section or not. Used when we're + // dealing with relocation entries. + SmallVector SymbolNames; + for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) { + InMemoryStruct STE; + Obj->ReadSymbolTableEntry(SymtabLC->SymbolTableOffset, i, STE); + if (!STE) + return Error("unable to read symbol: '" + Twine(i) + "'"); + if (STE->SectionIndex > SegmentLC->NumSections) + return Error("invalid section index for symbol: '" + Twine(i) + "'"); + // Get the symbol name. + StringRef Name = Obj->getStringAtIndex(STE->StringIndex); + SymbolNames.push_back(Name); + + // Just skip symbols not defined in this section. + if ((unsigned)STE->SectionIndex - 1 != SectNum) + continue; + + // FIXME: Check the symbol type and flags. + if (STE->Type != 0xF) // external, defined in this section. + return Error("unexpected symbol type!"); + // Flags == 0x8 marks a thumb function for ARM, which is fine as it + // doesn't require any special handling here. + if (STE->Flags != 0x0 && STE->Flags != 0x8) + return Error("unexpected symbol type!"); + + // Remember the symbol. + Symbols.push_back(SymbolEntry(STE->Value, Name)); + + DEBUG(dbgs() << "Function sym: '" << Name << "' @ " << + (Sect->Address + STE->Value) << "\n"); + } + // Sort the symbols by address, just in case they didn't come in that way. + array_pod_sort(Symbols.begin(), Symbols.end()); + + // Extract the function data. + uint8_t *Base = (uint8_t*)Obj->getData(SegmentLC->FileOffset, + SegmentLC->FileSize).data(); + for (unsigned i = 0, e = Symbols.size() - 1; i != e; ++i) { + uint64_t StartOffset = Sect->Address + Symbols[i].first; + uint64_t EndOffset = Symbols[i + 1].first - 1; + DEBUG(dbgs() << "Extracting function: " << Symbols[i].second + << " from [" << StartOffset << ", " << EndOffset << "]\n"); + extractFunction(Symbols[i].second, Base + StartOffset, Base + EndOffset); + } + // The last symbol we do after since the end address is calculated + // differently because there is no next symbol to reference. + uint64_t StartOffset = Symbols[Symbols.size() - 1].first; + uint64_t EndOffset = Sect->Size - 1; + DEBUG(dbgs() << "Extracting function: " << Symbols[Symbols.size()-1].second + << " from [" << StartOffset << ", " << EndOffset << "]\n"); + extractFunction(Symbols[Symbols.size()-1].second, + Base + StartOffset, Base + EndOffset); + + // Now extract the relocation information for each function and process it. + for (unsigned j = 0; j != Sect->NumRelocationTableEntries; ++j) { + InMemoryStruct RE; + Obj->ReadRelocationEntry(Sect->RelocationTableOffset, j, RE); + if (RE->Word0 & macho::RF_Scattered) + return Error("NOT YET IMPLEMENTED: scattered relocations."); + // Word0 of the relocation is the offset into the section where the + // relocation should be applied. We need to translate that into an + // offset into a function since that's our atom. + uint32_t Offset = RE->Word0; + // Look for the function containing the address. This is used for JIT + // code, so the number of functions in section is almost always going + // to be very small (usually just one), so until we have use cases + // where that's not true, just use a trivial linear search. + unsigned SymbolNum; + unsigned NumSymbols = Symbols.size(); + assert(NumSymbols > 0 && Symbols[0].first <= Offset && + "No symbol containing relocation!"); + for (SymbolNum = 0; SymbolNum < NumSymbols - 1; ++SymbolNum) + if (Symbols[SymbolNum + 1].first > Offset) + break; + // Adjust the offset to be relative to the symbol. + Offset -= Symbols[SymbolNum].first; + // Get the name of the symbol containing the relocation. + StringRef TargetName = SymbolNames[SymbolNum]; + + bool isExtern = (RE->Word1 >> 27) & 1; + // Figure out the source symbol of the relocation. If isExtern is true, + // this relocation references the symbol table, otherwise it references + // a section in the same object, numbered from 1 through NumSections + // (SectionBases is [0, NumSections-1]). + // FIXME: Some targets (ARM) use internal relocations even for + // externally visible symbols, if the definition is in the same + // file as the reference. We need to convert those back to by-name + // references. We can resolve the address based on the section + // offset and see if we have a symbol at that address. If we do, + // use that; otherwise, puke. + if (!isExtern) + return Error("Internal relocations not supported."); + uint32_t SourceNum = RE->Word1 & 0xffffff; // 24-bit value + StringRef SourceName = SymbolNames[SourceNum]; + + // FIXME: Get the relocation addend from the target address. + + // Now store the relocation information. Associate it with the source + // symbol. + Relocations[SourceName].push_back(RelocationEntry(TargetName, + Offset, + RE->Word1, + 0 /*Addend*/)); + DEBUG(dbgs() << "Relocation at '" << TargetName << "' + " << Offset + << " from '" << SourceName << "(Word1: " + << format("0x%x", RE->Word1) << ")\n"); + } + } + return false; +} + + +bool RuntimeDyldImpl:: +loadSegment64(const MachOObject *Obj, + const MachOObject::LoadCommandInfo *SegmentLCI, + const InMemoryStruct &SymtabLC) { + InMemoryStruct Segment64LC; + Obj->ReadSegment64LoadCommand(*SegmentLCI, Segment64LC); + if (!Segment64LC) + return Error("unable to load segment load command"); + + for (unsigned SectNum = 0; SectNum != Segment64LC->NumSections; ++SectNum) { + InMemoryStruct Sect; + Obj->ReadSection64(*SegmentLCI, SectNum, Sect); + if (!Sect) + return Error("unable to load section: '" + Twine(SectNum) + "'"); + + // FIXME: Improve check. + if (Sect->Flags != 0x80000400) + return Error("unsupported section type!"); + + // Address and names of symbols in the section. + typedef std::pair SymbolEntry; + SmallVector Symbols; + // Index of all the names, in this section or not. Used when we're + // dealing with relocation entries. + SmallVector SymbolNames; + for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) { + InMemoryStruct STE; + Obj->ReadSymbol64TableEntry(SymtabLC->SymbolTableOffset, i, STE); + if (!STE) + return Error("unable to read symbol: '" + Twine(i) + "'"); + if (STE->SectionIndex > Segment64LC->NumSections) + return Error("invalid section index for symbol: '" + Twine(i) + "'"); + // Get the symbol name. + StringRef Name = Obj->getStringAtIndex(STE->StringIndex); + SymbolNames.push_back(Name); + + // Just skip symbols not defined in this section. + if ((unsigned)STE->SectionIndex - 1 != SectNum) + continue; + + // FIXME: Check the symbol type and flags. + if (STE->Type != 0xF) // external, defined in this section. + return Error("unexpected symbol type!"); + if (STE->Flags != 0x0) + return Error("unexpected symbol type!"); + + // Remember the symbol. + Symbols.push_back(SymbolEntry(STE->Value, Name)); + + DEBUG(dbgs() << "Function sym: '" << Name << "' @ " << + (Sect->Address + STE->Value) << "\n"); + } + // Sort the symbols by address, just in case they didn't come in that way. + array_pod_sort(Symbols.begin(), Symbols.end()); + + // Extract the function data. + uint8_t *Base = (uint8_t*)Obj->getData(Segment64LC->FileOffset, + Segment64LC->FileSize).data(); + for (unsigned i = 0, e = Symbols.size() - 1; i != e; ++i) { + uint64_t StartOffset = Sect->Address + Symbols[i].first; + uint64_t EndOffset = Symbols[i + 1].first - 1; + DEBUG(dbgs() << "Extracting function: " << Symbols[i].second + << " from [" << StartOffset << ", " << EndOffset << "]\n"); + extractFunction(Symbols[i].second, Base + StartOffset, Base + EndOffset); + } + // The last symbol we do after since the end address is calculated + // differently because there is no next symbol to reference. + uint64_t StartOffset = Symbols[Symbols.size() - 1].first; + uint64_t EndOffset = Sect->Size - 1; + DEBUG(dbgs() << "Extracting function: " << Symbols[Symbols.size()-1].second + << " from [" << StartOffset << ", " << EndOffset << "]\n"); + extractFunction(Symbols[Symbols.size()-1].second, + Base + StartOffset, Base + EndOffset); + + // Now extract the relocation information for each function and process it. + for (unsigned j = 0; j != Sect->NumRelocationTableEntries; ++j) { + InMemoryStruct RE; + Obj->ReadRelocationEntry(Sect->RelocationTableOffset, j, RE); + if (RE->Word0 & macho::RF_Scattered) + return Error("NOT YET IMPLEMENTED: scattered relocations."); + // Word0 of the relocation is the offset into the section where the + // relocation should be applied. We need to translate that into an + // offset into a function since that's our atom. + uint32_t Offset = RE->Word0; + // Look for the function containing the address. This is used for JIT + // code, so the number of functions in section is almost always going + // to be very small (usually just one), so until we have use cases + // where that's not true, just use a trivial linear search. + unsigned SymbolNum; + unsigned NumSymbols = Symbols.size(); + assert(NumSymbols > 0 && Symbols[0].first <= Offset && + "No symbol containing relocation!"); + for (SymbolNum = 0; SymbolNum < NumSymbols - 1; ++SymbolNum) + if (Symbols[SymbolNum + 1].first > Offset) + break; + // Adjust the offset to be relative to the symbol. + Offset -= Symbols[SymbolNum].first; + // Get the name of the symbol containing the relocation. + StringRef TargetName = SymbolNames[SymbolNum]; + + bool isExtern = (RE->Word1 >> 27) & 1; + // Figure out the source symbol of the relocation. If isExtern is true, + // this relocation references the symbol table, otherwise it references + // a section in the same object, numbered from 1 through NumSections + // (SectionBases is [0, NumSections-1]). + if (!isExtern) + return Error("Internal relocations not supported."); + uint32_t SourceNum = RE->Word1 & 0xffffff; // 24-bit value + StringRef SourceName = SymbolNames[SourceNum]; + + // FIXME: Get the relocation addend from the target address. + + // Now store the relocation information. Associate it with the source + // symbol. + Relocations[SourceName].push_back(RelocationEntry(TargetName, + Offset, + RE->Word1, + 0 /*Addend*/)); + DEBUG(dbgs() << "Relocation at '" << TargetName << "' + " << Offset + << " from '" << SourceName << "(Word1: " + << format("0x%x", RE->Word1) << ")\n"); + } + } + return false; +} + +bool RuntimeDyldImpl::loadObject(MemoryBuffer *InputBuffer) { + // If the linker is in an error state, don't do anything. + if (hasError()) + return true; + // Load the Mach-O wrapper object. + std::string ErrorStr; + OwningPtr Obj( + MachOObject::LoadFromBuffer(InputBuffer, &ErrorStr)); + if (!Obj) + return Error("unable to load object: '" + ErrorStr + "'"); + + // Get the CPU type information from the header. + const macho::Header &Header = Obj->getHeader(); + + // FIXME: Error checking that the loaded object is compatible with + // the system we're running on. + CPUType = Header.CPUType; + CPUSubtype = Header.CPUSubtype; + + // Validate that the load commands match what we expect. + const MachOObject::LoadCommandInfo *SegmentLCI = 0, *SymtabLCI = 0, + *DysymtabLCI = 0; + for (unsigned i = 0; i != Header.NumLoadCommands; ++i) { + const MachOObject::LoadCommandInfo &LCI = Obj->getLoadCommandInfo(i); + switch (LCI.Command.Type) { + case macho::LCT_Segment: + case macho::LCT_Segment64: + if (SegmentLCI) + return Error("unexpected input object (multiple segments)"); + SegmentLCI = &LCI; + break; + case macho::LCT_Symtab: + if (SymtabLCI) + return Error("unexpected input object (multiple symbol tables)"); + SymtabLCI = &LCI; + break; + case macho::LCT_Dysymtab: + if (DysymtabLCI) + return Error("unexpected input object (multiple symbol tables)"); + DysymtabLCI = &LCI; + break; + default: + return Error("unexpected input object (unexpected load command"); + } + } + + if (!SymtabLCI) + return Error("no symbol table found in object"); + if (!SegmentLCI) + return Error("no symbol table found in object"); + + // Read and register the symbol table data. + InMemoryStruct SymtabLC; + Obj->ReadSymtabLoadCommand(*SymtabLCI, SymtabLC); + if (!SymtabLC) + return Error("unable to load symbol table load command"); + Obj->RegisterStringTable(*SymtabLC); + + // Read the dynamic link-edit information, if present (not present in static + // objects). + if (DysymtabLCI) { + InMemoryStruct DysymtabLC; + Obj->ReadDysymtabLoadCommand(*DysymtabLCI, DysymtabLC); + if (!DysymtabLC) + return Error("unable to load dynamic link-exit load command"); + + // FIXME: We don't support anything interesting yet. +// if (DysymtabLC->LocalSymbolsIndex != 0) +// return Error("NOT YET IMPLEMENTED: local symbol entries"); +// if (DysymtabLC->ExternalSymbolsIndex != 0) +// return Error("NOT YET IMPLEMENTED: non-external symbol entries"); +// if (DysymtabLC->UndefinedSymbolsIndex != SymtabLC->NumSymbolTableEntries) +// return Error("NOT YET IMPLEMENTED: undefined symbol entries"); + } + + // Load the segment load command. + if (SegmentLCI->Command.Type == macho::LCT_Segment) { + if (loadSegment32(Obj.get(), SegmentLCI, SymtabLC)) + return true; + } else { + if (loadSegment64(Obj.get(), SegmentLCI, SymtabLC)) + return true; + } + + return false; +} + +// Resolve the relocations for all symbols we currently know about. +void RuntimeDyldImpl::resolveRelocations() { + // Just iterate over the symbols in our symbol table and assign their + // addresses. + StringMap::iterator i = SymbolTable.begin(); + StringMap::iterator e = SymbolTable.end(); + for (;i != e; ++i) + reassignSymbolAddress(i->getKey(), i->getValue()); +} + +// Assign an address to a symbol name and resolve all the relocations +// associated with it. +void RuntimeDyldImpl::reassignSymbolAddress(StringRef Name, uint8_t *Addr) { + // Assign the address in our symbol table. + SymbolTable[Name] = Addr; + + RelocationList &Relocs = Relocations[Name]; + for (unsigned i = 0, e = Relocs.size(); i != e; ++i) { + RelocationEntry &RE = Relocs[i]; + uint8_t *Target = SymbolTable[RE.Target] + RE.Offset; + bool isPCRel = (RE.Data >> 24) & 1; + unsigned Type = (RE.Data >> 28) & 0xf; + unsigned Size = 1 << ((RE.Data >> 25) & 3); + + DEBUG(dbgs() << "Resolving relocation at '" << RE.Target + << "' + " << RE.Offset << " (" << format("%p", Target) << ")" + << " from '" << Name << " (" << format("%p", Addr) << ")" + << "(" << (isPCRel ? "pcrel" : "absolute") + << ", type: " << Type << ", Size: " << Size << ").\n"); + + resolveRelocation(Target, Addr, isPCRel, Type, Size); + RE.isResolved = true; + } +} + +//===----------------------------------------------------------------------===// +// RuntimeDyld class implementation +RuntimeDyld::RuntimeDyld(RTDyldMemoryManager *MM) { + Dyld = new RuntimeDyldImpl(MM); +} + +RuntimeDyld::~RuntimeDyld() { + delete Dyld; +} + +bool RuntimeDyld::loadObject(MemoryBuffer *InputBuffer) { + return Dyld->loadObject(InputBuffer); +} + +void *RuntimeDyld::getSymbolAddress(StringRef Name) { + return Dyld->getSymbolAddress(Name); +} + +void RuntimeDyld::resolveRelocations() { + Dyld->resolveRelocations(); +} + +void RuntimeDyld::reassignSymbolAddress(StringRef Name, uint8_t *Addr) { + Dyld->reassignSymbolAddress(Name, Addr); +} + +StringRef RuntimeDyld::getErrorString() { + return Dyld->getErrorString(); +} + +} // end namespace llvm diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp index 5aa06abdd989..f372db2403c9 100644 --- a/lib/Linker/LinkModules.cpp +++ b/lib/Linker/LinkModules.cpp @@ -505,6 +505,7 @@ static bool LinkGlobals(Module *Dest, const Module *Src, SGV->getType()->getAddressSpace()); // Propagate alignment, visibility and section info. CopyGVAttributes(NewDGV, SGV); + NewDGV->setUnnamedAddr(SGV->hasUnnamedAddr()); // If the LLVM runtime renamed the global, but it is an externally visible // symbol, DGV must be an existing global with internal linkage. Rename diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt index f1811a1716fb..6aed059f0f46 100644 --- a/lib/MC/CMakeLists.txt +++ b/lib/MC/CMakeLists.txt @@ -8,6 +8,7 @@ add_llvm_library(LLVMMC MCCodeEmitter.cpp MCContext.cpp MCDisassembler.cpp + MCELF.cpp MCELFObjectTargetWriter.cpp MCELFStreamer.cpp MCExpr.cpp diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp index ea1629d30565..23c6d4c1e4c3 100644 --- a/lib/MC/ELFObjectWriter.cpp +++ b/lib/MC/ELFObjectWriter.cpp @@ -11,20 +11,14 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/OwningPtr.h" -#include "llvm/ADT/SmallPtrSet.h" +#include "ELFObjectWriter.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/Twine.h" -#include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCELFSymbolFlags.h" #include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCELFObjectWriter.h" -#include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSectionELF.h" -#include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -38,39 +32,14 @@ #include using namespace llvm; -static unsigned GetType(const MCSymbolData &SD) { - uint32_t Type = (SD.getFlags() & (0xf << ELF_STT_Shift)) >> ELF_STT_Shift; - assert(Type == ELF::STT_NOTYPE || Type == ELF::STT_OBJECT || - Type == ELF::STT_FUNC || Type == ELF::STT_SECTION || - Type == ELF::STT_FILE || Type == ELF::STT_COMMON || - Type == ELF::STT_TLS); - return Type; +bool ELFObjectWriter::isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind) { + const MCFixupKindInfo &FKI = + Asm.getBackend().getFixupKindInfo((MCFixupKind) Kind); + + return FKI.Flags & MCFixupKindInfo::FKF_IsPCRel; } -static unsigned GetBinding(const MCSymbolData &SD) { - uint32_t Binding = (SD.getFlags() & (0xf << ELF_STB_Shift)) >> ELF_STB_Shift; - assert(Binding == ELF::STB_LOCAL || Binding == ELF::STB_GLOBAL || - Binding == ELF::STB_WEAK); - return Binding; -} - -static void SetBinding(MCSymbolData &SD, unsigned Binding) { - assert(Binding == ELF::STB_LOCAL || Binding == ELF::STB_GLOBAL || - Binding == ELF::STB_WEAK); - uint32_t OtherFlags = SD.getFlags() & ~(0xf << ELF_STB_Shift); - SD.setFlags(OtherFlags | (Binding << ELF_STB_Shift)); -} - -static unsigned GetVisibility(MCSymbolData &SD) { - unsigned Visibility = - (SD.getFlags() & (0xf << ELF_STV_Shift)) >> ELF_STV_Shift; - assert(Visibility == ELF::STV_DEFAULT || Visibility == ELF::STV_INTERNAL || - Visibility == ELF::STV_HIDDEN || Visibility == ELF::STV_PROTECTED); - return Visibility; -} - - -static bool RelocNeedsGOT(MCSymbolRefExpr::VariantKind Variant) { +bool ELFObjectWriter::RelocNeedsGOT(MCSymbolRefExpr::VariantKind Variant) { switch (Variant) { default: return false; @@ -90,345 +59,6 @@ static bool RelocNeedsGOT(MCSymbolRefExpr::VariantKind Variant) { } } -static bool isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind) { - const MCFixupKindInfo &FKI = - Asm.getBackend().getFixupKindInfo((MCFixupKind) Kind); - - return FKI.Flags & MCFixupKindInfo::FKF_IsPCRel; -} - -namespace { - class ELFObjectWriter : public MCObjectWriter { - protected: - /*static bool isFixupKindX86RIPRel(unsigned Kind) { - return Kind == X86::reloc_riprel_4byte || - Kind == X86::reloc_riprel_4byte_movq_load; - }*/ - - - /// ELFSymbolData - Helper struct for containing some precomputed information - /// on symbols. - struct ELFSymbolData { - MCSymbolData *SymbolData; - uint64_t StringIndex; - uint32_t SectionIndex; - - // Support lexicographic sorting. - bool operator<(const ELFSymbolData &RHS) const { - if (GetType(*SymbolData) == ELF::STT_FILE) - return true; - if (GetType(*RHS.SymbolData) == ELF::STT_FILE) - return false; - return SymbolData->getSymbol().getName() < - RHS.SymbolData->getSymbol().getName(); - } - }; - - /// @name Relocation Data - /// @{ - - struct ELFRelocationEntry { - // Make these big enough for both 32-bit and 64-bit - uint64_t r_offset; - int Index; - unsigned Type; - const MCSymbol *Symbol; - uint64_t r_addend; - - ELFRelocationEntry() - : r_offset(0), Index(0), Type(0), Symbol(0), r_addend(0) {} - - ELFRelocationEntry(uint64_t RelocOffset, int Idx, - unsigned RelType, const MCSymbol *Sym, - uint64_t Addend) - : r_offset(RelocOffset), Index(Idx), Type(RelType), - Symbol(Sym), r_addend(Addend) {} - - // Support lexicographic sorting. - bool operator<(const ELFRelocationEntry &RE) const { - return RE.r_offset < r_offset; - } - }; - - /// The target specific ELF writer instance. - llvm::OwningPtr TargetObjectWriter; - - SmallPtrSet UsedInReloc; - SmallPtrSet WeakrefUsedInReloc; - DenseMap Renames; - - llvm::DenseMap > Relocations; - DenseMap SectionStringTableIndex; - - /// @} - /// @name Symbol Table Data - /// @{ - - SmallString<256> StringTable; - std::vector LocalSymbolData; - std::vector ExternalSymbolData; - std::vector UndefinedSymbolData; - - /// @} - - bool NeedsGOT; - - bool NeedsSymtabShndx; - - // This holds the symbol table index of the last local symbol. - unsigned LastLocalSymbolIndex; - // This holds the .strtab section index. - unsigned StringTableIndex; - // This holds the .symtab section index. - unsigned SymbolTableIndex; - - unsigned ShstrtabIndex; - - - const MCSymbol *SymbolToReloc(const MCAssembler &Asm, - const MCValue &Target, - const MCFragment &F) const; - - // For arch-specific emission of explicit reloc symbol - virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm, - const MCValue &Target, - const MCFragment &F, - bool IsBSS) const { - return NULL; - } - - bool is64Bit() const { return TargetObjectWriter->is64Bit(); } - bool hasRelocationAddend() const { - return TargetObjectWriter->hasRelocationAddend(); - } - - public: - ELFObjectWriter(MCELFObjectTargetWriter *MOTW, - raw_ostream &_OS, bool IsLittleEndian) - : MCObjectWriter(_OS, IsLittleEndian), - TargetObjectWriter(MOTW), - NeedsGOT(false), NeedsSymtabShndx(false){ - } - - virtual ~ELFObjectWriter(); - - void WriteWord(uint64_t W) { - if (is64Bit()) - Write64(W); - else - Write32(W); - } - - void StringLE16(char *buf, uint16_t Value) { - buf[0] = char(Value >> 0); - buf[1] = char(Value >> 8); - } - - void StringLE32(char *buf, uint32_t Value) { - StringLE16(buf, uint16_t(Value >> 0)); - StringLE16(buf + 2, uint16_t(Value >> 16)); - } - - void StringLE64(char *buf, uint64_t Value) { - StringLE32(buf, uint32_t(Value >> 0)); - StringLE32(buf + 4, uint32_t(Value >> 32)); - } - - void StringBE16(char *buf ,uint16_t Value) { - buf[0] = char(Value >> 8); - buf[1] = char(Value >> 0); - } - - void StringBE32(char *buf, uint32_t Value) { - StringBE16(buf, uint16_t(Value >> 16)); - StringBE16(buf + 2, uint16_t(Value >> 0)); - } - - void StringBE64(char *buf, uint64_t Value) { - StringBE32(buf, uint32_t(Value >> 32)); - StringBE32(buf + 4, uint32_t(Value >> 0)); - } - - void String8(MCDataFragment &F, uint8_t Value) { - char buf[1]; - buf[0] = Value; - F.getContents() += StringRef(buf, 1); - } - - void String16(MCDataFragment &F, uint16_t Value) { - char buf[2]; - if (isLittleEndian()) - StringLE16(buf, Value); - else - StringBE16(buf, Value); - F.getContents() += StringRef(buf, 2); - } - - void String32(MCDataFragment &F, uint32_t Value) { - char buf[4]; - if (isLittleEndian()) - StringLE32(buf, Value); - else - StringBE32(buf, Value); - F.getContents() += StringRef(buf, 4); - } - - void String64(MCDataFragment &F, uint64_t Value) { - char buf[8]; - if (isLittleEndian()) - StringLE64(buf, Value); - else - StringBE64(buf, Value); - F.getContents() += StringRef(buf, 8); - } - - virtual void WriteHeader(uint64_t SectionDataSize, unsigned NumberOfSections); - - /// Default e_flags = 0 - virtual void WriteEFlags() { Write32(0); } - - virtual void WriteSymbolEntry(MCDataFragment *SymtabF, MCDataFragment *ShndxF, - uint64_t name, uint8_t info, - uint64_t value, uint64_t size, - uint8_t other, uint32_t shndx, - bool Reserved); - - virtual void WriteSymbol(MCDataFragment *SymtabF, MCDataFragment *ShndxF, - ELFSymbolData &MSD, - const MCAsmLayout &Layout); - - typedef DenseMap SectionIndexMapTy; - virtual void WriteSymbolTable(MCDataFragment *SymtabF, MCDataFragment *ShndxF, - const MCAssembler &Asm, - const MCAsmLayout &Layout, - const SectionIndexMapTy &SectionIndexMap); - - virtual void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout, - const MCFragment *Fragment, const MCFixup &Fixup, - MCValue Target, uint64_t &FixedValue); - - virtual uint64_t getSymbolIndexInSymbolTable(const MCAssembler &Asm, - const MCSymbol *S); - - // Map from a group section to the signature symbol - typedef DenseMap GroupMapTy; - // Map from a signature symbol to the group section - typedef DenseMap RevGroupMapTy; - - /// ComputeSymbolTable - Compute the symbol table data - /// - /// \param StringTable [out] - The string table data. - /// \param StringIndexMap [out] - Map from symbol names to offsets in the - /// string table. - virtual void ComputeSymbolTable(MCAssembler &Asm, - const SectionIndexMapTy &SectionIndexMap, - RevGroupMapTy RevGroupMap); - - virtual void ComputeIndexMap(MCAssembler &Asm, - SectionIndexMapTy &SectionIndexMap); - - virtual void WriteRelocation(MCAssembler &Asm, MCAsmLayout &Layout, - const MCSectionData &SD); - - virtual void WriteRelocations(MCAssembler &Asm, MCAsmLayout &Layout) { - for (MCAssembler::const_iterator it = Asm.begin(), - ie = Asm.end(); it != ie; ++it) { - WriteRelocation(Asm, Layout, *it); - } - } - - virtual void CreateMetadataSections(MCAssembler &Asm, MCAsmLayout &Layout, - const SectionIndexMapTy &SectionIndexMap); - - // Create the sections that show up in the symbol table. Currently - // those are the .note.GNU-stack section and the group sections. - virtual void CreateIndexedSections(MCAssembler &Asm, MCAsmLayout &Layout, - GroupMapTy &GroupMap, - RevGroupMapTy &RevGroupMap); - - virtual void ExecutePostLayoutBinding(MCAssembler &Asm, - const MCAsmLayout &Layout); - - virtual void WriteSecHdrEntry(uint32_t Name, uint32_t Type, uint64_t Flags, - uint64_t Address, uint64_t Offset, - uint64_t Size, uint32_t Link, uint32_t Info, - uint64_t Alignment, uint64_t EntrySize); - - virtual void WriteRelocationsFragment(const MCAssembler &Asm, - MCDataFragment *F, - const MCSectionData *SD); - - virtual void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout); - virtual void WriteSection(MCAssembler &Asm, - const SectionIndexMapTy &SectionIndexMap, - uint32_t GroupSymbolIndex, - uint64_t Offset, uint64_t Size, uint64_t Alignment, - const MCSectionELF &Section); - - protected: - virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, - bool IsPCRel, bool IsRelocWithSymbol, - int64_t Addend) = 0; - }; - - //===- X86ELFObjectWriter -------------------------------------------===// - - class X86ELFObjectWriter : public ELFObjectWriter { - public: - X86ELFObjectWriter(MCELFObjectTargetWriter *MOTW, - raw_ostream &_OS, - bool IsLittleEndian); - - virtual ~X86ELFObjectWriter(); - protected: - virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, - bool IsPCRel, bool IsRelocWithSymbol, - int64_t Addend); - }; - - - //===- ARMELFObjectWriter -------------------------------------------===// - - class ARMELFObjectWriter : public ELFObjectWriter { - public: - // FIXME: MCAssembler can't yet return the Subtarget, - enum { DefaultEABIVersion = 0x05000000U }; - - ARMELFObjectWriter(MCELFObjectTargetWriter *MOTW, - raw_ostream &_OS, - bool IsLittleEndian); - - virtual ~ARMELFObjectWriter(); - - virtual void WriteEFlags(); - protected: - virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm, - const MCValue &Target, - const MCFragment &F, - bool IsBSS) const; - - virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, - bool IsPCRel, bool IsRelocWithSymbol, - int64_t Addend); - }; - - //===- MBlazeELFObjectWriter -------------------------------------------===// - - class MBlazeELFObjectWriter : public ELFObjectWriter { - public: - MBlazeELFObjectWriter(MCELFObjectTargetWriter *MOTW, - raw_ostream &_OS, - bool IsLittleEndian); - - virtual ~MBlazeELFObjectWriter(); - protected: - virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, - bool IsPCRel, bool IsRelocWithSymbol, - int64_t Addend); - }; -} - ELFObjectWriter::~ELFObjectWriter() {} @@ -533,7 +163,8 @@ void ELFObjectWriter::WriteSymbolEntry(MCDataFragment *SymtabF, } } -static uint64_t SymbolValue(MCSymbolData &Data, const MCAsmLayout &Layout) { +uint64_t ELFObjectWriter::SymbolValue(MCSymbolData &Data, + const MCAsmLayout &Layout) { if (Data.isCommon() && Data.isExternal()) return Data.getCommonAlignment(); @@ -579,7 +210,7 @@ void ELFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm, // Aliases defined with .symvar copy the binding from the symbol they alias. // This is the first place we are able to copy this information. it->setExternal(SD.isExternal()); - SetBinding(*it, GetBinding(SD)); + MCELF::SetBinding(*it, MCELF::GetBinding(SD)); StringRef Rest = AliasName.substr(Pos); if (!Symbol.isUndefined() && !Rest.startswith("@@@")) @@ -605,9 +236,9 @@ void ELFObjectWriter::WriteSymbol(MCDataFragment *SymtabF, bool IsReserved = Data.isCommon() || Data.getSymbol().isAbsolute() || Data.getSymbol().isVariable(); - uint8_t Binding = GetBinding(OrigData); - uint8_t Visibility = GetVisibility(OrigData); - uint8_t Type = GetType(Data); + uint8_t Binding = MCELF::GetBinding(OrigData); + uint8_t Visibility = MCELF::GetVisibility(OrigData); + uint8_t Type = MCELF::GetType(Data); uint8_t Info = (Binding << ELF_STB_Shift) | (Type << ELF_STT_Shift); uint8_t Other = Visibility; @@ -673,7 +304,7 @@ void ELFObjectWriter::WriteSymbolTable(MCDataFragment *SymtabF, (Data.getFlags() & ELF_STB_Weak)) && "External symbol requires STB_GLOBAL or STB_WEAK flag"); WriteSymbol(SymtabF, ShndxF, MSD, Layout); - if (GetBinding(Data) == ELF::STB_LOCAL) + if (MCELF::GetBinding(Data) == ELF::STB_LOCAL) LastLocalSymbolIndex++; } @@ -681,7 +312,7 @@ void ELFObjectWriter::WriteSymbolTable(MCDataFragment *SymtabF, ELFSymbolData &MSD = UndefinedSymbolData[i]; MCSymbolData &Data = *MSD.SymbolData; WriteSymbol(SymtabF, ShndxF, MSD, Layout); - if (GetBinding(Data) == ELF::STB_LOCAL) + if (MCELF::GetBinding(Data) == ELF::STB_LOCAL) LastLocalSymbolIndex++; } } @@ -798,7 +429,7 @@ void ELFObjectWriter::RecordRelocation(const MCAssembler &Asm, FixedValue = Value; unsigned Type = GetRelocType(Target, Fixup, IsPCRel, (RelocSymbol != 0), Addend); - + uint64_t RelocOffset = Layout.getFragmentOffset(Fragment) + Fixup.getOffset(); @@ -816,8 +447,9 @@ ELFObjectWriter::getSymbolIndexInSymbolTable(const MCAssembler &Asm, return SD.getIndex(); } -static bool isInSymtab(const MCAssembler &Asm, const MCSymbolData &Data, - bool Used, bool Renamed) { +bool ELFObjectWriter::isInSymtab(const MCAssembler &Asm, + const MCSymbolData &Data, + bool Used, bool Renamed) { if (Data.getFlags() & ELF_Other_Weakref) return false; @@ -836,7 +468,7 @@ static bool isInSymtab(const MCAssembler &Asm, const MCSymbolData &Data, if (Symbol.isVariable() && !A.isVariable() && A.isUndefined()) return false; - bool IsGlobal = GetBinding(Data) == ELF::STB_GLOBAL; + bool IsGlobal = MCELF::GetBinding(Data) == ELF::STB_GLOBAL; if (!Symbol.isVariable() && Symbol.isUndefined() && !IsGlobal) return false; @@ -849,8 +481,8 @@ static bool isInSymtab(const MCAssembler &Asm, const MCSymbolData &Data, return true; } -static bool isLocal(const MCSymbolData &Data, bool isSignature, - bool isUsedInReloc) { +bool ELFObjectWriter::isLocal(const MCSymbolData &Data, bool isSignature, + bool isUsedInReloc) { if (Data.isExternal()) return false; @@ -868,7 +500,8 @@ static bool isLocal(const MCSymbolData &Data, bool isSignature, } void ELFObjectWriter::ComputeIndexMap(MCAssembler &Asm, - SectionIndexMapTy &SectionIndexMap) { + SectionIndexMapTy &SectionIndexMap, + const RelMapTy &RelMap) { unsigned Index = 1; for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it) { @@ -883,31 +516,37 @@ void ELFObjectWriter::ComputeIndexMap(MCAssembler &Asm, ie = Asm.end(); it != ie; ++it) { const MCSectionELF &Section = static_cast(it->getSection()); - if (Section.getType() == ELF::SHT_GROUP) + if (Section.getType() == ELF::SHT_GROUP || + Section.getType() == ELF::SHT_REL || + Section.getType() == ELF::SHT_RELA) continue; SectionIndexMap[&Section] = Index++; + const MCSectionELF *RelSection = RelMap.lookup(&Section); + if (RelSection) + SectionIndexMap[RelSection] = Index++; } } void ELFObjectWriter::ComputeSymbolTable(MCAssembler &Asm, const SectionIndexMapTy &SectionIndexMap, - RevGroupMapTy RevGroupMap) { + RevGroupMapTy RevGroupMap, + unsigned NumRegularSections) { // FIXME: Is this the correct place to do this? if (NeedsGOT) { llvm::StringRef Name = "_GLOBAL_OFFSET_TABLE_"; MCSymbol *Sym = Asm.getContext().GetOrCreateSymbol(Name); MCSymbolData &Data = Asm.getOrCreateSymbolData(*Sym); Data.setExternal(true); - SetBinding(Data, ELF::STB_GLOBAL); + MCELF::SetBinding(Data, ELF::STB_GLOBAL); } - // Build section lookup table. - int NumRegularSections = Asm.size(); - // Index 0 is always the empty string. StringMap StringIndexMap; StringTable += '\x00'; + // FIXME: We could optimize suffixes in strtab in the same way we + // optimize them in shstrtab. + // Add the data for the symbols. for (MCAssembler::symbol_iterator it = Asm.symbol_begin(), ie = Asm.symbol_end(); it != ie; ++it) { @@ -929,14 +568,14 @@ void ELFObjectWriter::ComputeSymbolTable(MCAssembler &Asm, // Undefined symbols are global, but this is the first place we // are able to set it. bool Local = isLocal(*it, isSignature, Used); - if (!Local && GetBinding(*it) == ELF::STB_LOCAL) { + if (!Local && MCELF::GetBinding(*it) == ELF::STB_LOCAL) { MCSymbolData &SD = Asm.getSymbolData(RefSymbol); - SetBinding(*it, ELF::STB_GLOBAL); - SetBinding(SD, ELF::STB_GLOBAL); + MCELF::SetBinding(*it, ELF::STB_GLOBAL); + MCELF::SetBinding(SD, ELF::STB_GLOBAL); } if (RefSymbol.isUndefined() && !Used && WeakrefUsed) - SetBinding(*it, ELF::STB_WEAK); + MCELF::SetBinding(*it, ELF::STB_WEAK); if (it->isCommon()) { assert(!Local); @@ -1004,11 +643,16 @@ void ELFObjectWriter::ComputeSymbolTable(MCAssembler &Asm, UndefinedSymbolData[i].SymbolData->setIndex(Index++); } -void ELFObjectWriter::WriteRelocation(MCAssembler &Asm, MCAsmLayout &Layout, - const MCSectionData &SD) { - if (!Relocations[&SD].empty()) { +void ELFObjectWriter::CreateRelocationSections(MCAssembler &Asm, + MCAsmLayout &Layout, + RelMapTy &RelMap) { + for (MCAssembler::const_iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it) { + const MCSectionData &SD = *it; + if (Relocations[&SD].empty()) + continue; + MCContext &Ctx = Asm.getContext(); - const MCSectionELF *RelaSection; const MCSectionELF &Section = static_cast(SD.getSection()); @@ -1022,17 +666,32 @@ void ELFObjectWriter::WriteRelocation(MCAssembler &Asm, MCAsmLayout &Layout, else EntrySize = is64Bit() ? sizeof(ELF::Elf64_Rel) : sizeof(ELF::Elf32_Rel); - RelaSection = Ctx.getELFSection(RelaSectionName, hasRelocationAddend() ? - ELF::SHT_RELA : ELF::SHT_REL, 0, - SectionKind::getReadOnly(), - EntrySize, ""); + const MCSectionELF *RelaSection = + Ctx.getELFSection(RelaSectionName, hasRelocationAddend() ? + ELF::SHT_RELA : ELF::SHT_REL, 0, + SectionKind::getReadOnly(), + EntrySize, ""); + RelMap[&Section] = RelaSection; + Asm.getOrCreateSectionData(*RelaSection); + } +} +void ELFObjectWriter::WriteRelocations(MCAssembler &Asm, MCAsmLayout &Layout, + const RelMapTy &RelMap) { + for (MCAssembler::const_iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it) { + const MCSectionData &SD = *it; + const MCSectionELF &Section = + static_cast(SD.getSection()); + + const MCSectionELF *RelaSection = RelMap.lookup(&Section); + if (!RelaSection) + continue; MCSectionData &RelaSD = Asm.getOrCreateSectionData(*RelaSection); RelaSD.setAlignment(is64Bit() ? 8 : 4); MCDataFragment *F = new MCDataFragment(&RelaSD); - - WriteRelocationsFragment(Asm, F, &SD); + WriteRelocationsFragment(Asm, F, &*it); } } @@ -1092,9 +751,28 @@ void ELFObjectWriter::WriteRelocationsFragment(const MCAssembler &Asm, } } +static int compareBySuffix(const void *a, const void *b) { + const MCSectionELF *secA = *static_cast(a); + const MCSectionELF *secB = *static_cast(b); + const StringRef &NameA = secA->getSectionName(); + const StringRef &NameB = secB->getSectionName(); + const unsigned sizeA = NameA.size(); + const unsigned sizeB = NameB.size(); + const unsigned len = std::min(sizeA, sizeB); + for (unsigned int i = 0; i < len; ++i) { + char ca = NameA[sizeA - i - 1]; + char cb = NameB[sizeB - i - 1]; + if (ca != cb) + return cb - ca; + } + + return sizeB - sizeA; +} + void ELFObjectWriter::CreateMetadataSections(MCAssembler &Asm, MCAsmLayout &Layout, - const SectionIndexMapTy &SectionIndexMap) { + SectionIndexMapTy &SectionIndexMap, + const RelMapTy &RelMap) { MCContext &Ctx = Asm.getContext(); MCDataFragment *F; @@ -1106,7 +784,6 @@ void ELFObjectWriter::CreateMetadataSections(MCAssembler &Asm, SectionKind::getReadOnly()); MCSectionData &ShstrtabSD = Asm.getOrCreateSectionData(*ShstrtabSection); ShstrtabSD.setAlignment(1); - ShstrtabIndex = Asm.size(); const MCSectionELF *SymtabSection = Ctx.getELFSection(".symtab", ELF::SHT_SYMTAB, 0, @@ -1114,7 +791,6 @@ void ELFObjectWriter::CreateMetadataSections(MCAssembler &Asm, EntrySize, ""); MCSectionData &SymtabSD = Asm.getOrCreateSectionData(*SymtabSection); SymtabSD.setAlignment(is64Bit() ? 8 : 4); - SymbolTableIndex = Asm.size(); MCSectionData *SymtabShndxSD = NULL; @@ -1126,14 +802,17 @@ void ELFObjectWriter::CreateMetadataSections(MCAssembler &Asm, SymtabShndxSD->setAlignment(4); } - const MCSection *StrtabSection; + const MCSectionELF *StrtabSection; StrtabSection = Ctx.getELFSection(".strtab", ELF::SHT_STRTAB, 0, SectionKind::getReadOnly()); MCSectionData &StrtabSD = Asm.getOrCreateSectionData(*StrtabSection); StrtabSD.setAlignment(1); - StringTableIndex = Asm.size(); - WriteRelocations(Asm, Layout); + ComputeIndexMap(Asm, SectionIndexMap, RelMap); + + ShstrtabIndex = SectionIndexMap.lookup(ShstrtabSection); + SymbolTableIndex = SectionIndexMap.lookup(SymtabSection); + StringTableIndex = SectionIndexMap.lookup(StrtabSection); // Symbol table F = new MCDataFragment(&SymtabSD); @@ -1148,6 +827,15 @@ void ELFObjectWriter::CreateMetadataSections(MCAssembler &Asm, F = new MCDataFragment(&ShstrtabSD); + std::vector Sections; + for (MCAssembler::const_iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it) { + const MCSectionELF &Section = + static_cast(it->getSection()); + Sections.push_back(&Section); + } + array_pod_sort(Sections.begin(), Sections.end(), compareBySuffix); + // Section header string table. // // The first entry of a string table holds a null character so skip @@ -1155,22 +843,20 @@ void ELFObjectWriter::CreateMetadataSections(MCAssembler &Asm, uint64_t Index = 1; F->getContents() += '\x00'; - StringMap SecStringMap; - for (MCAssembler::const_iterator it = Asm.begin(), - ie = Asm.end(); it != ie; ++it) { - const MCSectionELF &Section = - static_cast(it->getSection()); - // FIXME: We could merge suffixes like in .text and .rela.text. + for (unsigned int I = 0, E = Sections.size(); I != E; ++I) { + const MCSectionELF &Section = *Sections[I]; StringRef Name = Section.getSectionName(); - if (SecStringMap.count(Name)) { - SectionStringTableIndex[&Section] = SecStringMap[Name]; - continue; + if (I != 0) { + StringRef PreviousName = Sections[I - 1]->getSectionName(); + if (PreviousName.endswith(Name)) { + SectionStringTableIndex[&Section] = Index - Name.size() - 1; + continue; + } } // Remember the index into the string table so we can write it // into the sh_name field of the section header table. SectionStringTableIndex[&Section] = Index; - SecStringMap[Name] = Index; Index += Name.size() + 1; F->getContents() += Name; @@ -1181,7 +867,9 @@ void ELFObjectWriter::CreateMetadataSections(MCAssembler &Asm, void ELFObjectWriter::CreateIndexedSections(MCAssembler &Asm, MCAsmLayout &Layout, GroupMapTy &GroupMap, - RevGroupMapTy &RevGroupMap) { + RevGroupMapTy &RevGroupMap, + SectionIndexMapTy &SectionIndexMap, + const RelMapTy &RelMap) { // Create the .note.GNU-stack section if needed. MCContext &Ctx = Asm.getContext(); if (Asm.getNoExecStack()) { @@ -1212,11 +900,11 @@ void ELFObjectWriter::CreateIndexedSections(MCAssembler &Asm, GroupMap[Group] = SignatureSymbol; } + ComputeIndexMap(Asm, SectionIndexMap, RelMap); + // Add sections to the groups - unsigned Index = 1; - unsigned NumGroups = RevGroupMap.size(); for (MCAssembler::const_iterator it = Asm.begin(), ie = Asm.end(); - it != ie; ++it, ++Index) { + it != ie; ++it) { const MCSectionELF &Section = static_cast(it->getSection()); if (!(Section.getFlags() & ELF::SHF_GROUP)) @@ -1225,7 +913,8 @@ void ELFObjectWriter::CreateIndexedSections(MCAssembler &Asm, MCSectionData &Data = Asm.getOrCreateSectionData(*Group); // FIXME: we could use the previous fragment MCDataFragment *F = new MCDataFragment(&Data); - String32(*F, NumGroups + Index); + unsigned Index = SectionIndexMap.lookup(&Section); + String32(*F, Index); } } @@ -1304,12 +993,12 @@ void ELFObjectWriter::WriteSection(MCAssembler &Asm, Alignment, Section.getEntrySize()); } -static bool IsELFMetaDataSection(const MCSectionData &SD) { +bool ELFObjectWriter::IsELFMetaDataSection(const MCSectionData &SD) { return SD.getOrdinal() == ~UINT32_C(0) && !SD.getSection().isVirtualSection(); } -static uint64_t DataSectionSize(const MCSectionData &SD) { +uint64_t ELFObjectWriter::DataSectionSize(const MCSectionData &SD) { uint64_t Ret = 0; for (MCSectionData::const_iterator i = SD.begin(), e = SD.end(); i != e; ++i) { @@ -1320,112 +1009,60 @@ static uint64_t DataSectionSize(const MCSectionData &SD) { return Ret; } -static uint64_t GetSectionFileSize(const MCAsmLayout &Layout, - const MCSectionData &SD) { +uint64_t ELFObjectWriter::GetSectionFileSize(const MCAsmLayout &Layout, + const MCSectionData &SD) { if (IsELFMetaDataSection(SD)) return DataSectionSize(SD); return Layout.getSectionFileSize(&SD); } -static uint64_t GetSectionAddressSize(const MCAsmLayout &Layout, - const MCSectionData &SD) { +uint64_t ELFObjectWriter::GetSectionAddressSize(const MCAsmLayout &Layout, + const MCSectionData &SD) { if (IsELFMetaDataSection(SD)) return DataSectionSize(SD); return Layout.getSectionAddressSize(&SD); } -static void WriteDataSectionData(ELFObjectWriter *W, const MCSectionData &SD) { - for (MCSectionData::const_iterator i = SD.begin(), e = SD.end(); i != e; - ++i) { - const MCFragment &F = *i; - assert(F.getKind() == MCFragment::FT_Data); - W->WriteBytes(cast(F).getContents().str()); +void ELFObjectWriter::WriteDataSectionData(MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCSectionELF &Section) { + uint64_t FileOff = OS.tell(); + const MCSectionData &SD = Asm.getOrCreateSectionData(Section); + + uint64_t Padding = OffsetToAlignment(FileOff, SD.getAlignment()); + WriteZeros(Padding); + FileOff += Padding; + + FileOff += GetSectionFileSize(Layout, SD); + + if (IsELFMetaDataSection(SD)) { + for (MCSectionData::const_iterator i = SD.begin(), e = SD.end(); i != e; + ++i) { + const MCFragment &F = *i; + assert(F.getKind() == MCFragment::FT_Data); + WriteBytes(cast(F).getContents().str()); + } + } else { + Asm.WriteSectionData(&SD, Layout); } } -void ELFObjectWriter::WriteObject(MCAssembler &Asm, - const MCAsmLayout &Layout) { - GroupMapTy GroupMap; - RevGroupMapTy RevGroupMap; - CreateIndexedSections(Asm, const_cast(Layout), GroupMap, - RevGroupMap); - - SectionIndexMapTy SectionIndexMap; - - ComputeIndexMap(Asm, SectionIndexMap); - - // Compute symbol table information. - ComputeSymbolTable(Asm, SectionIndexMap, RevGroupMap); - - CreateMetadataSections(const_cast(Asm), - const_cast(Layout), - SectionIndexMap); - - // Update to include the metadata sections. - ComputeIndexMap(Asm, SectionIndexMap); - - // Add 1 for the null section. - unsigned NumSections = Asm.size() + 1; - uint64_t NaturalAlignment = is64Bit() ? 8 : 4; - uint64_t HeaderSize = is64Bit() ? sizeof(ELF::Elf64_Ehdr) : - sizeof(ELF::Elf32_Ehdr); - uint64_t FileOff = HeaderSize; +void ELFObjectWriter::WriteSectionHeader(MCAssembler &Asm, + const GroupMapTy &GroupMap, + const MCAsmLayout &Layout, + const SectionIndexMapTy &SectionIndexMap, + const SectionOffsetMapTy &SectionOffsetMap) { + const unsigned NumSections = Asm.size() + 1; std::vector Sections; - Sections.resize(NumSections); + Sections.resize(NumSections - 1); for (SectionIndexMapTy::const_iterator i= SectionIndexMap.begin(), e = SectionIndexMap.end(); i != e; ++i) { const std::pair &p = *i; - Sections[p.second] = p.first; + Sections[p.second - 1] = p.first; } - for (unsigned i = 1; i < NumSections; ++i) { - const MCSectionELF &Section = *Sections[i]; - const MCSectionData &SD = Asm.getOrCreateSectionData(Section); - - FileOff = RoundUpToAlignment(FileOff, SD.getAlignment()); - - // Get the size of the section in the output file (including padding). - FileOff += GetSectionFileSize(Layout, SD); - } - - FileOff = RoundUpToAlignment(FileOff, NaturalAlignment); - - // Write out the ELF header ... - WriteHeader(FileOff - HeaderSize, NumSections); - - FileOff = HeaderSize; - - // ... then all of the sections ... - DenseMap SectionOffsetMap; - - for (unsigned i = 1; i < NumSections; ++i) { - const MCSectionELF &Section = *Sections[i]; - const MCSectionData &SD = Asm.getOrCreateSectionData(Section); - - uint64_t Padding = OffsetToAlignment(FileOff, SD.getAlignment()); - WriteZeros(Padding); - FileOff += Padding; - - // Remember the offset into the file for this section. - SectionOffsetMap[&Section] = FileOff; - - FileOff += GetSectionFileSize(Layout, SD); - - if (IsELFMetaDataSection(SD)) - WriteDataSectionData(this, SD); - else - Asm.WriteSectionData(&SD, Layout); - } - - uint64_t Padding = OffsetToAlignment(FileOff, NaturalAlignment); - WriteZeros(Padding); - FileOff += Padding; - - // ... and then the section header table. - // Should we align the section header table? - // // Null section first. uint64_t FirstSectionSize = NumSections >= ELF::SHN_LORESERVE ? NumSections : 0; @@ -1433,23 +1070,162 @@ void ELFObjectWriter::WriteObject(MCAssembler &Asm, ShstrtabIndex >= ELF::SHN_LORESERVE ? ShstrtabIndex : 0; WriteSecHdrEntry(0, 0, 0, 0, 0, FirstSectionSize, FirstSectionLink, 0, 0, 0); - for (unsigned i = 1; i < NumSections; ++i) { + for (unsigned i = 0; i < NumSections - 1; ++i) { const MCSectionELF &Section = *Sections[i]; const MCSectionData &SD = Asm.getOrCreateSectionData(Section); uint32_t GroupSymbolIndex; if (Section.getType() != ELF::SHT_GROUP) GroupSymbolIndex = 0; else - GroupSymbolIndex = getSymbolIndexInSymbolTable(Asm, GroupMap[&Section]); + GroupSymbolIndex = getSymbolIndexInSymbolTable(Asm, + GroupMap.lookup(&Section)); uint64_t Size = GetSectionAddressSize(Layout, SD); WriteSection(Asm, SectionIndexMap, GroupSymbolIndex, - SectionOffsetMap[&Section], Size, + SectionOffsetMap.lookup(&Section), Size, SD.getAlignment(), Section); } } +void ELFObjectWriter::ComputeSectionOrder(MCAssembler &Asm, + std::vector &Sections) { + for (MCAssembler::iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it) { + const MCSectionELF &Section = + static_cast(it->getSection()); + if (Section.getType() == ELF::SHT_GROUP) + Sections.push_back(&Section); + } + + for (MCAssembler::iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it) { + const MCSectionELF &Section = + static_cast(it->getSection()); + if (Section.getType() != ELF::SHT_GROUP && + Section.getType() != ELF::SHT_REL && + Section.getType() != ELF::SHT_RELA) + Sections.push_back(&Section); + } + + for (MCAssembler::iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it) { + const MCSectionELF &Section = + static_cast(it->getSection()); + if (Section.getType() == ELF::SHT_REL || + Section.getType() == ELF::SHT_RELA) + Sections.push_back(&Section); + } +} + +void ELFObjectWriter::WriteObject(MCAssembler &Asm, + const MCAsmLayout &Layout) { + GroupMapTy GroupMap; + RevGroupMapTy RevGroupMap; + SectionIndexMapTy SectionIndexMap; + + unsigned NumUserSections = Asm.size(); + + DenseMap RelMap; + CreateRelocationSections(Asm, const_cast(Layout), RelMap); + + const unsigned NumUserAndRelocSections = Asm.size(); + CreateIndexedSections(Asm, const_cast(Layout), GroupMap, + RevGroupMap, SectionIndexMap, RelMap); + const unsigned AllSections = Asm.size(); + const unsigned NumIndexedSections = AllSections - NumUserAndRelocSections; + + unsigned NumRegularSections = NumUserSections + NumIndexedSections; + + // Compute symbol table information. + ComputeSymbolTable(Asm, SectionIndexMap, RevGroupMap, NumRegularSections); + + + WriteRelocations(Asm, const_cast(Layout), RelMap); + + CreateMetadataSections(const_cast(Asm), + const_cast(Layout), + SectionIndexMap, + RelMap); + + uint64_t NaturalAlignment = is64Bit() ? 8 : 4; + uint64_t HeaderSize = is64Bit() ? sizeof(ELF::Elf64_Ehdr) : + sizeof(ELF::Elf32_Ehdr); + uint64_t FileOff = HeaderSize; + + std::vector Sections; + ComputeSectionOrder(Asm, Sections); + unsigned NumSections = Sections.size(); + SectionOffsetMapTy SectionOffsetMap; + for (unsigned i = 0; i < NumRegularSections + 1; ++i) { + const MCSectionELF &Section = *Sections[i]; + const MCSectionData &SD = Asm.getOrCreateSectionData(Section); + + FileOff = RoundUpToAlignment(FileOff, SD.getAlignment()); + + // Remember the offset into the file for this section. + SectionOffsetMap[&Section] = FileOff; + + // Get the size of the section in the output file (including padding). + FileOff += GetSectionFileSize(Layout, SD); + } + + FileOff = RoundUpToAlignment(FileOff, NaturalAlignment); + + const unsigned SectionHeaderOffset = FileOff - HeaderSize; + + uint64_t SectionHeaderEntrySize = is64Bit() ? + sizeof(ELF::Elf64_Shdr) : sizeof(ELF::Elf32_Shdr); + FileOff += (NumSections + 1) * SectionHeaderEntrySize; + + for (unsigned i = NumRegularSections + 1; i < NumSections; ++i) { + const MCSectionELF &Section = *Sections[i]; + const MCSectionData &SD = Asm.getOrCreateSectionData(Section); + + FileOff = RoundUpToAlignment(FileOff, SD.getAlignment()); + + // Remember the offset into the file for this section. + SectionOffsetMap[&Section] = FileOff; + + // Get the size of the section in the output file (including padding). + FileOff += GetSectionFileSize(Layout, SD); + } + + // Write out the ELF header ... + WriteHeader(SectionHeaderOffset, NumSections + 1); + + // ... then the regular sections ... + // + because of .shstrtab + for (unsigned i = 0; i < NumRegularSections + 1; ++i) + WriteDataSectionData(Asm, Layout, *Sections[i]); + + FileOff = OS.tell(); + uint64_t Padding = OffsetToAlignment(FileOff, NaturalAlignment); + WriteZeros(Padding); + + // ... then the section header table ... + WriteSectionHeader(Asm, GroupMap, Layout, SectionIndexMap, + SectionOffsetMap); + + FileOff = OS.tell(); + + // ... and then the remainting sections ... + for (unsigned i = NumRegularSections + 1; i < NumSections; ++i) + WriteDataSectionData(Asm, Layout, *Sections[i]); +} + +bool +ELFObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm, + const MCSymbolData &DataA, + const MCFragment &FB, + bool InSet, + bool IsPCRel) const { + if (DataA.getFlags() & ELF_STB_Weak) + return false; + return MCObjectWriter::IsSymbolRefDifferenceFullyResolvedImpl( + Asm, DataA, FB,InSet, IsPCRel); +} + MCObjectWriter *llvm::createELFObjectWriter(MCELFObjectTargetWriter *MOTW, raw_ostream &OS, bool IsLittleEndian) { @@ -1700,13 +1476,17 @@ unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target, if (IsPCRel) { switch ((unsigned)Fixup.getKind()) { default: llvm_unreachable("invalid fixup kind!"); + + case FK_Data_8: Type = ELF::R_X86_64_PC64; break; + case FK_Data_4: Type = ELF::R_X86_64_PC32; break; + case FK_Data_2: Type = ELF::R_X86_64_PC16; break; + case FK_PCRel_8: assert(Modifier == MCSymbolRefExpr::VK_None); Type = ELF::R_X86_64_PC64; break; case X86::reloc_signed_4byte: case X86::reloc_riprel_4byte_movq_load: - case FK_Data_4: // FIXME? case X86::reloc_riprel_4byte: case FK_PCRel_4: switch (Modifier) { diff --git a/lib/MC/ELFObjectWriter.h b/lib/MC/ELFObjectWriter.h new file mode 100644 index 000000000000..f1d514a89988 --- /dev/null +++ b/lib/MC/ELFObjectWriter.h @@ -0,0 +1,406 @@ +//===- lib/MC/ELFObjectWriter.h - ELF File Writer -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements ELF object file writer information. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_ELFOBJECTWRITER_H +#define LLVM_MC_ELFOBJECTWRITER_H + +#include "MCELF.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCELFObjectWriter.h" +#include "llvm/MC/MCELFSymbolFlags.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCSymbol.h" + +#include + +namespace llvm { + +class MCSection; +class MCDataFragment; +class MCSectionELF; + +class ELFObjectWriter : public MCObjectWriter { + protected: + + static bool isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind); + static bool RelocNeedsGOT(MCSymbolRefExpr::VariantKind Variant); + static uint64_t SymbolValue(MCSymbolData &Data, const MCAsmLayout &Layout); + static bool isInSymtab(const MCAssembler &Asm, const MCSymbolData &Data, + bool Used, bool Renamed); + static bool isLocal(const MCSymbolData &Data, bool isSignature, + bool isUsedInReloc); + static bool IsELFMetaDataSection(const MCSectionData &SD); + static uint64_t DataSectionSize(const MCSectionData &SD); + static uint64_t GetSectionFileSize(const MCAsmLayout &Layout, + const MCSectionData &SD); + static uint64_t GetSectionAddressSize(const MCAsmLayout &Layout, + const MCSectionData &SD); + + void WriteDataSectionData(MCAssembler &Asm, + const MCAsmLayout &Layout, + const MCSectionELF &Section); + + /*static bool isFixupKindX86RIPRel(unsigned Kind) { + return Kind == X86::reloc_riprel_4byte || + Kind == X86::reloc_riprel_4byte_movq_load; + }*/ + + /// ELFSymbolData - Helper struct for containing some precomputed + /// information on symbols. + struct ELFSymbolData { + MCSymbolData *SymbolData; + uint64_t StringIndex; + uint32_t SectionIndex; + + // Support lexicographic sorting. + bool operator<(const ELFSymbolData &RHS) const { + if (MCELF::GetType(*SymbolData) == ELF::STT_FILE) + return true; + if (MCELF::GetType(*RHS.SymbolData) == ELF::STT_FILE) + return false; + return SymbolData->getSymbol().getName() < + RHS.SymbolData->getSymbol().getName(); + } + }; + + /// @name Relocation Data + /// @{ + + struct ELFRelocationEntry { + // Make these big enough for both 32-bit and 64-bit + uint64_t r_offset; + int Index; + unsigned Type; + const MCSymbol *Symbol; + uint64_t r_addend; + + ELFRelocationEntry() + : r_offset(0), Index(0), Type(0), Symbol(0), r_addend(0) {} + + ELFRelocationEntry(uint64_t RelocOffset, int Idx, + unsigned RelType, const MCSymbol *Sym, + uint64_t Addend) + : r_offset(RelocOffset), Index(Idx), Type(RelType), + Symbol(Sym), r_addend(Addend) {} + + // Support lexicographic sorting. + bool operator<(const ELFRelocationEntry &RE) const { + return RE.r_offset < r_offset; + } + }; + + /// The target specific ELF writer instance. + llvm::OwningPtr TargetObjectWriter; + + SmallPtrSet UsedInReloc; + SmallPtrSet WeakrefUsedInReloc; + DenseMap Renames; + + llvm::DenseMap > Relocations; + DenseMap SectionStringTableIndex; + + /// @} + /// @name Symbol Table Data + /// @{ + + SmallString<256> StringTable; + std::vector LocalSymbolData; + std::vector ExternalSymbolData; + std::vector UndefinedSymbolData; + + /// @} + + bool NeedsGOT; + + bool NeedsSymtabShndx; + + // This holds the symbol table index of the last local symbol. + unsigned LastLocalSymbolIndex; + // This holds the .strtab section index. + unsigned StringTableIndex; + // This holds the .symtab section index. + unsigned SymbolTableIndex; + + unsigned ShstrtabIndex; + + + const MCSymbol *SymbolToReloc(const MCAssembler &Asm, + const MCValue &Target, + const MCFragment &F) const; + + // For arch-specific emission of explicit reloc symbol + virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm, + const MCValue &Target, + const MCFragment &F, + bool IsBSS) const { + return NULL; + } + + bool is64Bit() const { return TargetObjectWriter->is64Bit(); } + bool hasRelocationAddend() const { + return TargetObjectWriter->hasRelocationAddend(); + } + + public: + ELFObjectWriter(MCELFObjectTargetWriter *MOTW, + raw_ostream &_OS, bool IsLittleEndian) + : MCObjectWriter(_OS, IsLittleEndian), + TargetObjectWriter(MOTW), + NeedsGOT(false), NeedsSymtabShndx(false){ + } + + virtual ~ELFObjectWriter(); + + void WriteWord(uint64_t W) { + if (is64Bit()) + Write64(W); + else + Write32(W); + } + + void StringLE16(char *buf, uint16_t Value) { + buf[0] = char(Value >> 0); + buf[1] = char(Value >> 8); + } + + void StringLE32(char *buf, uint32_t Value) { + StringLE16(buf, uint16_t(Value >> 0)); + StringLE16(buf + 2, uint16_t(Value >> 16)); + } + + void StringLE64(char *buf, uint64_t Value) { + StringLE32(buf, uint32_t(Value >> 0)); + StringLE32(buf + 4, uint32_t(Value >> 32)); + } + + void StringBE16(char *buf ,uint16_t Value) { + buf[0] = char(Value >> 8); + buf[1] = char(Value >> 0); + } + + void StringBE32(char *buf, uint32_t Value) { + StringBE16(buf, uint16_t(Value >> 16)); + StringBE16(buf + 2, uint16_t(Value >> 0)); + } + + void StringBE64(char *buf, uint64_t Value) { + StringBE32(buf, uint32_t(Value >> 32)); + StringBE32(buf + 4, uint32_t(Value >> 0)); + } + + void String8(MCDataFragment &F, uint8_t Value) { + char buf[1]; + buf[0] = Value; + F.getContents() += StringRef(buf, 1); + } + + void String16(MCDataFragment &F, uint16_t Value) { + char buf[2]; + if (isLittleEndian()) + StringLE16(buf, Value); + else + StringBE16(buf, Value); + F.getContents() += StringRef(buf, 2); + } + + void String32(MCDataFragment &F, uint32_t Value) { + char buf[4]; + if (isLittleEndian()) + StringLE32(buf, Value); + else + StringBE32(buf, Value); + F.getContents() += StringRef(buf, 4); + } + + void String64(MCDataFragment &F, uint64_t Value) { + char buf[8]; + if (isLittleEndian()) + StringLE64(buf, Value); + else + StringBE64(buf, Value); + F.getContents() += StringRef(buf, 8); + } + + virtual void WriteHeader(uint64_t SectionDataSize, unsigned NumberOfSections); + + /// Default e_flags = 0 + virtual void WriteEFlags() { Write32(0); } + + virtual void WriteSymbolEntry(MCDataFragment *SymtabF, MCDataFragment *ShndxF, + uint64_t name, uint8_t info, + uint64_t value, uint64_t size, + uint8_t other, uint32_t shndx, + bool Reserved); + + virtual void WriteSymbol(MCDataFragment *SymtabF, MCDataFragment *ShndxF, + ELFSymbolData &MSD, + const MCAsmLayout &Layout); + + typedef DenseMap SectionIndexMapTy; + virtual void WriteSymbolTable(MCDataFragment *SymtabF, MCDataFragment *ShndxF, + const MCAssembler &Asm, + const MCAsmLayout &Layout, + const SectionIndexMapTy &SectionIndexMap); + + virtual void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout, + const MCFragment *Fragment, const MCFixup &Fixup, + MCValue Target, uint64_t &FixedValue); + + virtual uint64_t getSymbolIndexInSymbolTable(const MCAssembler &Asm, + const MCSymbol *S); + + // Map from a group section to the signature symbol + typedef DenseMap GroupMapTy; + // Map from a signature symbol to the group section + typedef DenseMap RevGroupMapTy; + // Map from a section to the section with the relocations + typedef DenseMap RelMapTy; + // Map from a section to its offset + typedef DenseMap SectionOffsetMapTy; + + /// ComputeSymbolTable - Compute the symbol table data + /// + /// \param StringTable [out] - The string table data. + /// \param StringIndexMap [out] - Map from symbol names to offsets in the + /// string table. + virtual void ComputeSymbolTable(MCAssembler &Asm, + const SectionIndexMapTy &SectionIndexMap, + RevGroupMapTy RevGroupMap, + unsigned NumRegularSections); + + virtual void ComputeIndexMap(MCAssembler &Asm, + SectionIndexMapTy &SectionIndexMap, + const RelMapTy &RelMap); + + void CreateRelocationSections(MCAssembler &Asm, MCAsmLayout &Layout, + RelMapTy &RelMap); + + void WriteRelocations(MCAssembler &Asm, MCAsmLayout &Layout, + const RelMapTy &RelMap); + + virtual void CreateMetadataSections(MCAssembler &Asm, MCAsmLayout &Layout, + SectionIndexMapTy &SectionIndexMap, + const RelMapTy &RelMap); + + // Create the sections that show up in the symbol table. Currently + // those are the .note.GNU-stack section and the group sections. + virtual void CreateIndexedSections(MCAssembler &Asm, MCAsmLayout &Layout, + GroupMapTy &GroupMap, + RevGroupMapTy &RevGroupMap, + SectionIndexMapTy &SectionIndexMap, + const RelMapTy &RelMap); + + virtual void ExecutePostLayoutBinding(MCAssembler &Asm, + const MCAsmLayout &Layout); + + void WriteSectionHeader(MCAssembler &Asm, const GroupMapTy &GroupMap, + const MCAsmLayout &Layout, + const SectionIndexMapTy &SectionIndexMap, + const SectionOffsetMapTy &SectionOffsetMap); + + void ComputeSectionOrder(MCAssembler &Asm, + std::vector &Sections); + + virtual void WriteSecHdrEntry(uint32_t Name, uint32_t Type, uint64_t Flags, + uint64_t Address, uint64_t Offset, + uint64_t Size, uint32_t Link, uint32_t Info, + uint64_t Alignment, uint64_t EntrySize); + + virtual void WriteRelocationsFragment(const MCAssembler &Asm, + MCDataFragment *F, + const MCSectionData *SD); + + virtual bool + IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm, + const MCSymbolData &DataA, + const MCFragment &FB, + bool InSet, + bool IsPCRel) const; + + virtual void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout); + virtual void WriteSection(MCAssembler &Asm, + const SectionIndexMapTy &SectionIndexMap, + uint32_t GroupSymbolIndex, + uint64_t Offset, uint64_t Size, uint64_t Alignment, + const MCSectionELF &Section); + + protected: + virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, + bool IsPCRel, bool IsRelocWithSymbol, + int64_t Addend) = 0; + }; + + //===- X86ELFObjectWriter -------------------------------------------===// + + class X86ELFObjectWriter : public ELFObjectWriter { + public: + X86ELFObjectWriter(MCELFObjectTargetWriter *MOTW, + raw_ostream &_OS, + bool IsLittleEndian); + + virtual ~X86ELFObjectWriter(); + protected: + virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, + bool IsPCRel, bool IsRelocWithSymbol, + int64_t Addend); + }; + + + //===- ARMELFObjectWriter -------------------------------------------===// + + class ARMELFObjectWriter : public ELFObjectWriter { + public: + // FIXME: MCAssembler can't yet return the Subtarget, + enum { DefaultEABIVersion = 0x05000000U }; + + ARMELFObjectWriter(MCELFObjectTargetWriter *MOTW, + raw_ostream &_OS, + bool IsLittleEndian); + + virtual ~ARMELFObjectWriter(); + + virtual void WriteEFlags(); + protected: + virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm, + const MCValue &Target, + const MCFragment &F, + bool IsBSS) const; + + virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, + bool IsPCRel, bool IsRelocWithSymbol, + int64_t Addend); + }; + + //===- MBlazeELFObjectWriter -------------------------------------------===// + + class MBlazeELFObjectWriter : public ELFObjectWriter { + public: + MBlazeELFObjectWriter(MCELFObjectTargetWriter *MOTW, + raw_ostream &_OS, + bool IsLittleEndian); + + virtual ~MBlazeELFObjectWriter(); + protected: + virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup, + bool IsPCRel, bool IsRelocWithSymbol, + int64_t Addend); + }; +} + +#endif diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp index 8199fb2e158a..541dd080accf 100644 --- a/lib/MC/MCAsmInfo.cpp +++ b/lib/MC/MCAsmInfo.cpp @@ -13,7 +13,11 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/Support/DataTypes.h" +#include "llvm/Support/Dwarf.h" #include #include using namespace llvm; @@ -26,7 +30,7 @@ MCAsmInfo::MCAsmInfo() { LinkerRequiresNonEmptyDwarfLines = false; MaxInstLength = 4; PCSymbol = "$"; - SeparatorChar = ';'; + SeparatorString = ";"; CommentColumn = 40; CommentString = "#"; LabelSuffix = ":"; @@ -106,3 +110,25 @@ unsigned MCAsmInfo::getSLEB128Size(int Value) { } while (IsMore); return Size; } + +const MCExpr * +MCAsmInfo::getExprForPersonalitySymbol(const MCSymbol *Sym, + unsigned Encoding, + MCStreamer &Streamer) const { + return getExprForFDESymbol(Sym, Encoding, Streamer); +} + +const MCExpr * +MCAsmInfo::getExprForFDESymbol(const MCSymbol *Sym, + unsigned Encoding, + MCStreamer &Streamer) const { + if (!(Encoding & dwarf::DW_EH_PE_pcrel)) + return MCSymbolRefExpr::Create(Sym, Streamer.getContext()); + + MCContext &Context = Streamer.getContext(); + const MCExpr *Res = MCSymbolRefExpr::Create(Sym, Context); + MCSymbol *PCSym = Context.CreateTempSymbol(); + Streamer.EmitLabel(PCSym); + const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, Context); + return MCBinaryExpr::CreateSub(Res, PC, Context); +} diff --git a/lib/MC/MCAsmInfoDarwin.cpp b/lib/MC/MCAsmInfoDarwin.cpp index 526ad0da42aa..4dd1d44af5d2 100644 --- a/lib/MC/MCAsmInfoDarwin.cpp +++ b/lib/MC/MCAsmInfoDarwin.cpp @@ -13,6 +13,9 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCAsmInfoDarwin.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCStreamer.h" using namespace llvm; MCAsmInfoDarwin::MCAsmInfoDarwin() { @@ -56,4 +59,3 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() { DwarfUsesAbsoluteLabelForStmtList = false; DwarfUsesLabelOffsetForRanges = false; } - diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp index 8d0698216f60..9717c016a92b 100644 --- a/lib/MC/MCAsmStreamer.cpp +++ b/lib/MC/MCAsmStreamer.cpp @@ -45,20 +45,25 @@ class MCAsmStreamer : public MCStreamer { unsigned IsVerboseAsm : 1; unsigned ShowInst : 1; unsigned UseLoc : 1; + unsigned UseCFI : 1; + + enum EHSymbolFlags { EHGlobal = 1, + EHWeakDefinition = 1 << 1, + EHPrivateExtern = 1 << 2 }; + DenseMap FlagMap; bool needsSet(const MCExpr *Value); public: MCAsmStreamer(MCContext &Context, formatted_raw_ostream &os, - bool isVerboseAsm, - bool useLoc, + bool isVerboseAsm, bool useLoc, bool useCFI, MCInstPrinter *printer, MCCodeEmitter *emitter, TargetAsmBackend *asmbackend, bool showInst) : MCStreamer(Context), OS(os), MAI(Context.getAsmInfo()), InstPrinter(printer), Emitter(emitter), AsmBackend(asmbackend), CommentStream(CommentToEmit), IsVerboseAsm(isVerboseAsm), - ShowInst(showInst), UseLoc(useLoc) { + ShowInst(showInst), UseLoc(useLoc), UseCFI(useCFI) { if (InstPrinter && IsVerboseAsm) InstPrinter->setCommentStream(CommentStream); } @@ -118,7 +123,8 @@ class MCAsmStreamer : public MCStreamer { } virtual void EmitLabel(MCSymbol *Symbol); - + virtual void EmitEHSymAttributes(const MCSymbol *Symbol, + MCSymbol *EHSymbol); virtual void EmitAssemblerFlag(MCAssemblerFlag Flag); virtual void EmitThumbFunc(MCSymbol *Func); @@ -127,6 +133,8 @@ class MCAsmStreamer : public MCStreamer { virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta, const MCSymbol *LastLabel, const MCSymbol *Label); + virtual void EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel, + const MCSymbol *Label); virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute); @@ -154,13 +162,13 @@ class MCAsmStreamer : public MCStreamer { virtual void EmitBytes(StringRef Data, unsigned AddrSpace); virtual void EmitValueImpl(const MCExpr *Value, unsigned Size, - bool isPCRel, unsigned AddrSpace); + unsigned AddrSpace); virtual void EmitIntValue(uint64_t Value, unsigned Size, unsigned AddrSpace = 0); - virtual void EmitULEB128Value(const MCExpr *Value, unsigned AddrSpace = 0); + virtual void EmitULEB128Value(const MCExpr *Value); - virtual void EmitSLEB128Value(const MCExpr *Value, unsigned AddrSpace = 0); + virtual void EmitSLEB128Value(const MCExpr *Value); virtual void EmitGPRel32Value(const MCExpr *Value); @@ -182,15 +190,32 @@ class MCAsmStreamer : public MCStreamer { virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Filename); virtual void EmitDwarfLocDirective(unsigned FileNo, unsigned Line, unsigned Column, unsigned Flags, - unsigned Isa, unsigned Discriminator); + unsigned Isa, unsigned Discriminator, + StringRef FileName); + + virtual void EmitCFIStartProc(); + virtual void EmitCFIEndProc(); + virtual void EmitCFIDefCfa(int64_t Register, int64_t Offset); + virtual void EmitCFIDefCfaOffset(int64_t Offset); + virtual void EmitCFIDefCfaRegister(int64_t Register); + virtual void EmitCFIOffset(int64_t Register, int64_t Offset); + virtual void EmitCFIPersonality(const MCSymbol *Sym, unsigned Encoding); + virtual void EmitCFILsda(const MCSymbol *Sym, unsigned Encoding); + virtual void EmitCFIRememberState(); + virtual void EmitCFIRestoreState(); + virtual void EmitCFISameValue(int64_t Register); + virtual void EmitCFIRelOffset(int64_t Register, int64_t Offset); + virtual void EmitCFIAdjustCfaOffset(int64_t Adjustment); + + virtual void EmitFnStart(); + virtual void EmitFnEnd(); + virtual void EmitCantUnwind(); + virtual void EmitPersonality(const MCSymbol *Personality); + virtual void EmitHandlerData(); + virtual void EmitSetFP(unsigned FpReg, unsigned SpReg, int64_t Offset = 0); + virtual void EmitPad(int64_t Offset); + virtual void EmitRegSave(const SmallVectorImpl &RegList, bool); - virtual bool EmitCFIStartProc(); - virtual bool EmitCFIEndProc(); - virtual bool EmitCFIDefCfaOffset(int64_t Offset); - virtual bool EmitCFIDefCfaRegister(int64_t Register); - virtual bool EmitCFIOffset(int64_t Register, int64_t Offset); - virtual bool EmitCFIPersonality(const MCSymbol *Sym, unsigned Encoding); - virtual bool EmitCFILsda(const MCSymbol *Sym, unsigned Encoding); virtual void EmitInstruction(const MCInst &Inst); @@ -259,14 +284,27 @@ void MCAsmStreamer::ChangeSection(const MCSection *Section) { Section->PrintSwitchToSection(MAI, OS); } +void MCAsmStreamer::EmitEHSymAttributes(const MCSymbol *Symbol, + MCSymbol *EHSymbol) { + if (UseCFI) + return; + + unsigned Flags = FlagMap.lookup(Symbol); + + if (Flags & EHGlobal) + EmitSymbolAttribute(EHSymbol, MCSA_Global); + if (Flags & EHWeakDefinition) + EmitSymbolAttribute(EHSymbol, MCSA_WeakDefinition); + if (Flags & EHPrivateExtern) + EmitSymbolAttribute(EHSymbol, MCSA_PrivateExtern); +} + void MCAsmStreamer::EmitLabel(MCSymbol *Symbol) { assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); - assert(!Symbol->isVariable() && "Cannot emit a variable symbol!"); - assert(getCurrentSection() && "Cannot emit before setting section!"); + MCStreamer::EmitLabel(Symbol); OS << *Symbol << MAI.getLabelSuffix(); EmitEOL(); - Symbol->setSection(*getCurrentSection()); } void MCAsmStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) { @@ -309,6 +347,15 @@ void MCAsmStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta, getContext().getTargetAsmInfo().getPointerSize()); } +void MCAsmStreamer::EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel, + const MCSymbol *Label) { + EmitIntValue(dwarf::DW_CFA_advance_loc4, 1); + const MCExpr *AddrDelta = BuildSymbolDiff(getContext(), Label, LastLabel); + AddrDelta = ForceExpAbs(this, getContext(), AddrDelta); + EmitValue(AddrDelta, 4); +} + + void MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) { switch (Attribute) { @@ -337,6 +384,7 @@ void MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol, return; case MCSA_Global: // .globl/.global OS << MAI.getGlobalDirective(); + FlagMap[Symbol] |= EHGlobal; break; case MCSA_Hidden: OS << "\t.hidden\t"; break; case MCSA_IndirectSymbol: OS << "\t.indirect_symbol\t"; break; @@ -345,11 +393,17 @@ void MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol, case MCSA_Local: OS << "\t.local\t"; break; case MCSA_NoDeadStrip: OS << "\t.no_dead_strip\t"; break; case MCSA_SymbolResolver: OS << "\t.symbol_resolver\t"; break; - case MCSA_PrivateExtern: OS << "\t.private_extern\t"; break; + case MCSA_PrivateExtern: + OS << "\t.private_extern\t"; + FlagMap[Symbol] |= EHPrivateExtern; + break; case MCSA_Protected: OS << "\t.protected\t"; break; case MCSA_Reference: OS << "\t.reference\t"; break; case MCSA_Weak: OS << "\t.weak\t"; break; - case MCSA_WeakDefinition: OS << "\t.weak_definition\t"; break; + case MCSA_WeakDefinition: + OS << "\t.weak_definition\t"; + FlagMap[Symbol] |= EHWeakDefinition; + break; // .weak_reference case MCSA_WeakReference: OS << MAI.getWeakRefDirective(); break; case MCSA_WeakDefAutoPrivate: OS << "\t.weak_def_can_be_hidden\t"; break; @@ -512,9 +566,8 @@ void MCAsmStreamer::EmitIntValue(uint64_t Value, unsigned Size, } void MCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size, - bool isPCRel, unsigned AddrSpace) { + unsigned AddrSpace) { assert(getCurrentSection() && "Cannot emit contents before setting section!"); - assert(!isPCRel && "Cannot emit pc relative relocations!"); const char *Directive = 0; switch (Size) { default: break; @@ -543,10 +596,10 @@ void MCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size, EmitEOL(); } -void MCAsmStreamer::EmitULEB128Value(const MCExpr *Value, unsigned AddrSpace) { +void MCAsmStreamer::EmitULEB128Value(const MCExpr *Value) { int64_t IntValue; if (Value->EvaluateAsAbsolute(IntValue)) { - EmitULEB128IntValue(IntValue, AddrSpace); + EmitULEB128IntValue(IntValue); return; } assert(MAI.hasLEB128() && "Cannot print a .uleb"); @@ -554,10 +607,10 @@ void MCAsmStreamer::EmitULEB128Value(const MCExpr *Value, unsigned AddrSpace) { EmitEOL(); } -void MCAsmStreamer::EmitSLEB128Value(const MCExpr *Value, unsigned AddrSpace) { +void MCAsmStreamer::EmitSLEB128Value(const MCExpr *Value) { int64_t IntValue; if (Value->EvaluateAsAbsolute(IntValue)) { - EmitSLEB128IntValue(IntValue, AddrSpace); + EmitSLEB128IntValue(IntValue); return; } assert(MAI.hasLEB128() && "Cannot print a .sleb"); @@ -673,9 +726,10 @@ bool MCAsmStreamer::EmitDwarfFileDirective(unsigned FileNo, StringRef Filename){ void MCAsmStreamer::EmitDwarfLocDirective(unsigned FileNo, unsigned Line, unsigned Column, unsigned Flags, unsigned Isa, - unsigned Discriminator) { + unsigned Discriminator, + StringRef FileName) { this->MCStreamer::EmitDwarfLocDirective(FileNo, Line, Column, Flags, - Isa, Discriminator); + Isa, Discriminator, FileName); if (!UseLoc) return; @@ -701,78 +755,144 @@ void MCAsmStreamer::EmitDwarfLocDirective(unsigned FileNo, unsigned Line, OS << "isa " << Isa; if (Discriminator) OS << "discriminator " << Discriminator; + + if (IsVerboseAsm) { + OS.PadToColumn(MAI.getCommentColumn()); + OS << MAI.getCommentString() << ' ' << FileName << ':' + << Line << ':' << Column; + } EmitEOL(); } -bool MCAsmStreamer::EmitCFIStartProc() { - if (this->MCStreamer::EmitCFIStartProc()) - return true; +void MCAsmStreamer::EmitCFIStartProc() { + MCStreamer::EmitCFIStartProc(); + + if (!UseCFI) + return; OS << "\t.cfi_startproc"; EmitEOL(); - - return false; } -bool MCAsmStreamer::EmitCFIEndProc() { - if (this->MCStreamer::EmitCFIEndProc()) - return true; +void MCAsmStreamer::EmitCFIEndProc() { + MCStreamer::EmitCFIEndProc(); + + if (!UseCFI) + return; OS << "\t.cfi_endproc"; EmitEOL(); - - return false; } -bool MCAsmStreamer::EmitCFIDefCfaOffset(int64_t Offset) { - if (this->MCStreamer::EmitCFIDefCfaOffset(Offset)) - return true; +void MCAsmStreamer::EmitCFIDefCfa(int64_t Register, int64_t Offset) { + MCStreamer::EmitCFIDefCfa(Register, Offset); + + if (!UseCFI) + return; + + OS << ".cfi_def_cfa " << Register << ", " << Offset; + EmitEOL(); +} + +void MCAsmStreamer::EmitCFIDefCfaOffset(int64_t Offset) { + MCStreamer::EmitCFIDefCfaOffset(Offset); + + if (!UseCFI) + return; OS << "\t.cfi_def_cfa_offset " << Offset; EmitEOL(); - - return false; } -bool MCAsmStreamer::EmitCFIDefCfaRegister(int64_t Register) { - if (this->MCStreamer::EmitCFIDefCfaRegister(Register)) - return true; +void MCAsmStreamer::EmitCFIDefCfaRegister(int64_t Register) { + MCStreamer::EmitCFIDefCfaRegister(Register); + + if (!UseCFI) + return; OS << "\t.cfi_def_cfa_register " << Register; EmitEOL(); - - return false; } -bool MCAsmStreamer::EmitCFIOffset(int64_t Register, int64_t Offset) { - if (this->MCStreamer::EmitCFIOffset(Register, Offset)) - return true; +void MCAsmStreamer::EmitCFIOffset(int64_t Register, int64_t Offset) { + this->MCStreamer::EmitCFIOffset(Register, Offset); + + if (!UseCFI) + return; OS << "\t.cfi_offset " << Register << ", " << Offset; EmitEOL(); - - return false; } -bool MCAsmStreamer::EmitCFIPersonality(const MCSymbol *Sym, +void MCAsmStreamer::EmitCFIPersonality(const MCSymbol *Sym, unsigned Encoding) { - if (this->MCStreamer::EmitCFIPersonality(Sym, Encoding)) - return true; + MCStreamer::EmitCFIPersonality(Sym, Encoding); + + if (!UseCFI) + return; OS << "\t.cfi_personality " << Encoding << ", " << *Sym; EmitEOL(); - - return false; } -bool MCAsmStreamer::EmitCFILsda(const MCSymbol *Sym, unsigned Encoding) { - if (this->MCStreamer::EmitCFILsda(Sym, Encoding)) - return true; +void MCAsmStreamer::EmitCFILsda(const MCSymbol *Sym, unsigned Encoding) { + MCStreamer::EmitCFILsda(Sym, Encoding); + + if (!UseCFI) + return; OS << "\t.cfi_lsda " << Encoding << ", " << *Sym; EmitEOL(); +} - return false; +void MCAsmStreamer::EmitCFIRememberState() { + MCStreamer::EmitCFIRememberState(); + + if (!UseCFI) + return; + + OS << "\t.cfi_remember_state"; + EmitEOL(); +} + +void MCAsmStreamer::EmitCFIRestoreState() { + MCStreamer::EmitCFIRestoreState(); + + if (!UseCFI) + return; + + OS << "\t.cfi_restore_state"; + EmitEOL(); +} + +void MCAsmStreamer::EmitCFISameValue(int64_t Register) { + MCStreamer::EmitCFISameValue(Register); + + if (!UseCFI) + return; + + OS << "\t.cfi_same_value " << Register; + EmitEOL(); +} + +void MCAsmStreamer::EmitCFIRelOffset(int64_t Register, int64_t Offset) { + MCStreamer::EmitCFIRelOffset(Register, Offset); + + if (!UseCFI) + return; + + OS << "\t.cfi_rel_offset " << Register << ", " << Offset; + EmitEOL(); +} + +void MCAsmStreamer::EmitCFIAdjustCfaOffset(int64_t Adjustment) { + MCStreamer::EmitCFIAdjustCfaOffset(Adjustment); + + if (!UseCFI) + return; + + OS << "\t.cfi_adjust_cfa_offset " << Adjustment; + EmitEOL(); } void MCAsmStreamer::AddEncodingComment(const MCInst &Inst) { @@ -834,13 +954,13 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst) { OS << "0b"; for (unsigned j = 8; j--;) { unsigned Bit = (Code[i] >> j) & 1; - + unsigned FixupBit; if (getContext().getTargetAsmInfo().isLittleEndian()) FixupBit = i * 8 + j; else FixupBit = i * 8 + (7-j); - + if (uint8_t MapEntry = FixupMap[FixupBit]) { assert(Bit == 0 && "Encoder wrote into fixed up bit!"); OS << char('A' + MapEntry - 1); @@ -859,12 +979,64 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst) { } } +void MCAsmStreamer::EmitFnStart() { + OS << "\t.fnstart"; + EmitEOL(); +} + +void MCAsmStreamer::EmitFnEnd() { + OS << "\t.fnend"; + EmitEOL(); +} + +void MCAsmStreamer::EmitCantUnwind() { + OS << "\t.cantunwind"; + EmitEOL(); +} + +void MCAsmStreamer::EmitHandlerData() { + OS << "\t.handlerdata"; + EmitEOL(); +} + +void MCAsmStreamer::EmitPersonality(const MCSymbol *Personality) { + OS << "\t.personality " << Personality->getName(); + EmitEOL(); +} + +void MCAsmStreamer::EmitSetFP(unsigned FpReg, unsigned SpReg, int64_t Offset) { + OS << "\t.setfp\t" << InstPrinter->getRegName(FpReg) + << ", " << InstPrinter->getRegName(SpReg); + if (Offset) + OS << ", #" << Offset; + EmitEOL(); +} + +void MCAsmStreamer::EmitPad(int64_t Offset) { + OS << "\t.pad\t#" << Offset; + EmitEOL(); +} + +void MCAsmStreamer::EmitRegSave(const SmallVectorImpl &RegList, + bool isVector) { + assert(RegList.size() && "RegList should not be empty"); + if (isVector) + OS << "\t.vsave\t{"; + else + OS << "\t.save\t{"; + + OS << InstPrinter->getRegName(RegList[0]); + + for (unsigned i = 1, e = RegList.size(); i != e; ++i) + OS << ", " << InstPrinter->getRegName(RegList[i]); + + OS << "}"; + EmitEOL(); +} + void MCAsmStreamer::EmitInstruction(const MCInst &Inst) { assert(getCurrentSection() && "Cannot emit contents before setting section!"); - if (!UseLoc) - MCLineEntry::Make(this, getCurrentSection()); - // Show the encoding in a comment if we have a code emitter. if (Emitter) AddEncodingComment(Inst); @@ -897,13 +1069,17 @@ void MCAsmStreamer::Finish() { // Dump out the dwarf file & directory tables and line tables. if (getContext().hasDwarfFiles() && !UseLoc) MCDwarfFileTable::Emit(this); + + if (getNumFrameInfos() && !UseCFI) + MCDwarfFrameEmitter::Emit(*this, false); } MCStreamer *llvm::createAsmStreamer(MCContext &Context, formatted_raw_ostream &OS, bool isVerboseAsm, bool useLoc, + bool useCFI, MCInstPrinter *IP, MCCodeEmitter *CE, TargetAsmBackend *TAB, bool ShowInst) { - return new MCAsmStreamer(Context, OS, isVerboseAsm, useLoc, + return new MCAsmStreamer(Context, OS, isVerboseAsm, useLoc, useCFI, IP, CE, TAB, ShowInst); } diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp index 999264604224..8360fc9f414e 100644 --- a/lib/MC/MCAssembler.cpp +++ b/lib/MC/MCAssembler.cpp @@ -28,7 +28,6 @@ #include "llvm/Target/TargetRegistry.h" #include "llvm/Target/TargetAsmBackend.h" -#include using namespace llvm; namespace { @@ -103,6 +102,33 @@ uint64_t MCAsmLayout::getFragmentOffset(const MCFragment *F) const { } uint64_t MCAsmLayout::getSymbolOffset(const MCSymbolData *SD) const { + const MCSymbol &S = SD->getSymbol(); + + // If this is a variable, then recursively evaluate now. + if (S.isVariable()) { + MCValue Target; + if (!S.getVariableValue()->EvaluateAsRelocatable(Target, *this)) + report_fatal_error("unable to evaluate offset for variable '" + + S.getName() + "'"); + + // Verify that any used symbols are defined. + if (Target.getSymA() && Target.getSymA()->getSymbol().isUndefined()) + report_fatal_error("unable to evaluate offset to undefined symbol '" + + Target.getSymA()->getSymbol().getName() + "'"); + if (Target.getSymB() && Target.getSymB()->getSymbol().isUndefined()) + report_fatal_error("unable to evaluate offset to undefined symbol '" + + Target.getSymB()->getSymbol().getName() + "'"); + + uint64_t Offset = Target.getConstant(); + if (Target.getSymA()) + Offset += getSymbolOffset(&Assembler.getSymbolData( + Target.getSymA()->getSymbol())); + if (Target.getSymB()) + Offset -= getSymbolOffset(&Assembler.getSymbolData( + Target.getSymB()->getSymbol())); + return Offset; + } + assert(SD->getFragment() && "Invalid getOffset() on undefined symbol!"); return getFragmentOffset(SD->getFragment()) + SD->getOffset(); } @@ -692,7 +718,9 @@ bool MCAssembler::RelaxInstruction(MCAsmLayout &Layout, bool MCAssembler::RelaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) { int64_t Value = 0; uint64_t OldSize = LF.getContents().size(); - LF.getValue().EvaluateAsAbsolute(Value, Layout); + bool IsAbs = LF.getValue().EvaluateAsAbsolute(Value, Layout); + (void)IsAbs; + assert(IsAbs); SmallString<8> &Data = LF.getContents(); Data.clear(); raw_svector_ostream OSE(Data); @@ -731,7 +759,8 @@ bool MCAssembler::RelaxDwarfCallFrameFragment(MCAsmLayout &Layout, SmallString<8> &Data = DF.getContents(); Data.clear(); raw_svector_ostream OSE(Data); - MCDwarfFrameEmitter::EncodeAdvanceLoc(AddrDelta, OSE); + const TargetAsmInfo &AsmInfo = getContext().getTargetAsmInfo(); + MCDwarfFrameEmitter::EncodeAdvanceLoc(AddrDelta, OSE, AsmInfo); OSE.flush(); return OldSize != Data.size(); } diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp index 018f00c08f6f..8faa72ecb4e8 100644 --- a/lib/MC/MCContext.cpp +++ b/lib/MC/MCContext.cpp @@ -27,8 +27,11 @@ typedef StringMap COFFUniqueMapTy; MCContext::MCContext(const MCAsmInfo &mai, const TargetAsmInfo *tai) : - MAI(mai), TAI(tai), NextUniqueID(0), - CurrentDwarfLoc(0,0,0,DWARF2_FLAG_IS_STMT,0,0) { + MAI(mai), TAI(tai), + Allocator(), Symbols(Allocator), UsedNames(Allocator), + NextUniqueID(0), + CurrentDwarfLoc(0,0,0,DWARF2_FLAG_IS_STMT,0,0), + AllowTemporaryLabels(true) { MachOUniquingMap = 0; ELFUniquingMap = 0; COFFUniquingMap = 0; @@ -76,18 +79,19 @@ MCSymbol *MCContext::GetOrCreateSymbol(StringRef Name) { } MCSymbol *MCContext::CreateSymbol(StringRef Name) { - // Determine whether this is an assembler temporary or normal label. - bool isTemporary = Name.startswith(MAI.getPrivateGlobalPrefix()); + // Determine whether this is an assembler temporary or normal label, if used. + bool isTemporary = false; + if (AllowTemporaryLabels) + isTemporary = Name.startswith(MAI.getPrivateGlobalPrefix()); StringMapEntry *NameEntry = &UsedNames.GetOrCreateValue(Name); if (NameEntry->getValue()) { assert(isTemporary && "Cannot rename non temporary symbols"); - SmallString<128> NewName; + SmallString<128> NewName = Name; do { - Twine T = Name + Twine(NextUniqueID++); - T.toVector(NewName); - StringRef foo = NewName; - NameEntry = &UsedNames.GetOrCreateValue(foo); + NewName.resize(Name.size()); + raw_svector_ostream(NewName) << NextUniqueID++; + NameEntry = &UsedNames.GetOrCreateValue(NewName); } while (NameEntry->getValue()); } NameEntry->setValue(true); @@ -107,9 +111,8 @@ MCSymbol *MCContext::GetOrCreateSymbol(const Twine &Name) { MCSymbol *MCContext::CreateTempSymbol() { SmallString<128> NameSV; - Twine Name = Twine(MAI.getPrivateGlobalPrefix()) + "tmp" + - Twine(NextUniqueID++); - Name.toVector(NameSV); + raw_svector_ostream(NameSV) + << MAI.getPrivateGlobalPrefix() << "tmp" << NextUniqueID++; return CreateSymbol(NameSV); } diff --git a/lib/MC/MCDisassembler/CMakeLists.txt b/lib/MC/MCDisassembler/CMakeLists.txt index 5fa7b70194b2..0ce359d4b533 100644 --- a/lib/MC/MCDisassembler/CMakeLists.txt +++ b/lib/MC/MCDisassembler/CMakeLists.txt @@ -1,7 +1,8 @@ add_llvm_library(LLVMMCDisassembler + Disassembler.cpp EDDisassembler.cpp - EDOperand.cpp EDInst.cpp + EDOperand.cpp EDToken.cpp ) diff --git a/lib/MC/MCDisassembler/Disassembler.cpp b/lib/MC/MCDisassembler/Disassembler.cpp new file mode 100644 index 000000000000..ced57e8ca2de --- /dev/null +++ b/lib/MC/MCDisassembler/Disassembler.cpp @@ -0,0 +1,171 @@ +//===-- lib/MC/Disassembler.cpp - Disassembler Public C Interface -*- C -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#include "Disassembler.h" +#include +#include "llvm-c/Disassembler.h" + +#include +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCContext.h" +#include "llvm/Target/TargetRegistry.h" +#include "llvm/Target/TargetAsmInfo.h" // FIXME. +#include "llvm/Target/TargetMachine.h" // FIXME. +#include "llvm/Target/TargetSelect.h" +#include "llvm/Support/MemoryObject.h" + +namespace llvm { +class Target; +} // namespace llvm +using namespace llvm; + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +// +// LLVMCreateDisasm() creates a disassembler for the TripleName. Symbolic +// disassembly is supported by passing a block of information in the DisInfo +// parameter and specifing the TagType and call back functions as described in +// the header llvm-c/Disassembler.h . The pointer to the block and the +// functions can all be passed as NULL. If successful this returns a +// disassembler context if not it returns NULL. +// +LLVMDisasmContextRef LLVMCreateDisasm(const char *TripleName, void *DisInfo, + int TagType, LLVMOpInfoCallback GetOpInfo, + LLVMSymbolLookupCallback SymbolLookUp) { + // Initialize targets and assembly printers/parsers. + llvm::InitializeAllTargetInfos(); + // FIXME: We shouldn't need to initialize the Target(Machine)s. + llvm::InitializeAllTargets(); + llvm::InitializeAllAsmPrinters(); + llvm::InitializeAllAsmParsers(); + llvm::InitializeAllDisassemblers(); + + // Get the target. + std::string Error; + const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error); + assert(TheTarget && "Unable to create target!"); + + // Get the assembler info needed to setup the MCContext. + const MCAsmInfo *MAI = TheTarget->createAsmInfo(TripleName); + assert(MAI && "Unable to create target asm info!"); + + // Package up features to be passed to target/subtarget + std::string FeaturesStr; + + // FIXME: We shouldn't need to do this (and link in codegen). + // When we split this out, we should do it in a way that makes + // it straightforward to switch subtargets on the fly. + TargetMachine *TM = TheTarget->createTargetMachine(TripleName, FeaturesStr); + assert(TM && "Unable to create target machine!"); + + // Get the target assembler info needed to setup the context. + const TargetAsmInfo *tai = new TargetAsmInfo(*TM); + assert(tai && "Unable to create target assembler!"); + + // Set up the MCContext for creating symbols and MCExpr's. + MCContext *Ctx = new MCContext(*MAI, tai); + assert(Ctx && "Unable to create MCContext!"); + + // Set up disassembler. + MCDisassembler *DisAsm = TheTarget->createMCDisassembler(); + assert(DisAsm && "Unable to create disassembler!"); + DisAsm->setupForSymbolicDisassembly(GetOpInfo, DisInfo, Ctx); + + // Set up the instruction printer. + int AsmPrinterVariant = MAI->getAssemblerDialect(); + MCInstPrinter *IP = TheTarget->createMCInstPrinter(*TM, AsmPrinterVariant, + *MAI); + assert(IP && "Unable to create instruction printer!"); + + LLVMDisasmContext *DC = new LLVMDisasmContext(TripleName, DisInfo, TagType, + GetOpInfo, SymbolLookUp, + TheTarget, MAI, TM, tai, Ctx, + DisAsm, IP); + assert(DC && "Allocation failure!"); + return DC; +} + +// +// LLVMDisasmDispose() disposes of the disassembler specified by the context. +// +void LLVMDisasmDispose(LLVMDisasmContextRef DCR){ + LLVMDisasmContext *DC = (LLVMDisasmContext *)DCR; + delete DC; +} + +namespace { +// +// The memory object created by LLVMDisasmInstruction(). +// +class DisasmMemoryObject : public MemoryObject { +private: + uint8_t *Bytes; + uint64_t Size; + uint64_t BasePC; +public: + DisasmMemoryObject(uint8_t *bytes, uint64_t size, uint64_t basePC) : + Bytes(bytes), Size(size), BasePC(basePC) {} + + uint64_t getBase() const { return BasePC; } + uint64_t getExtent() const { return Size; } + + int readByte(uint64_t Addr, uint8_t *Byte) const { + if (Addr - BasePC >= Size) + return -1; + *Byte = Bytes[Addr - BasePC]; + return 0; + } +}; +} // namespace + +// +// LLVMDisasmInstruction() disassembles a single instruction using the +// disassembler context specified in the parameter DC. The bytes of the +// instruction are specified in the parameter Bytes, and contains at least +// BytesSize number of bytes. The instruction is at the address specified by +// the PC parameter. If a valid instruction can be disassembled its string is +// returned indirectly in OutString which whos size is specified in the +// parameter OutStringSize. This function returns the number of bytes in the +// instruction or zero if there was no valid instruction. If this function +// returns zero the caller will have to pick how many bytes they want to step +// over by printing a .byte, .long etc. to continue. +// +size_t LLVMDisasmInstruction(LLVMDisasmContextRef DCR, uint8_t *Bytes, + uint64_t BytesSize, uint64_t PC, char *OutString, + size_t OutStringSize){ + LLVMDisasmContext *DC = (LLVMDisasmContext *)DCR; + // Wrap the pointer to the Bytes, BytesSize and PC in a MemoryObject. + DisasmMemoryObject MemoryObject(Bytes, BytesSize, PC); + + uint64_t Size; + MCInst Inst; + const MCDisassembler *DisAsm = DC->getDisAsm(); + MCInstPrinter *IP = DC->getIP(); + if (!DisAsm->getInstruction(Inst, Size, MemoryObject, PC, /*REMOVE*/ nulls())) + return 0; + + std::string InsnStr; + raw_string_ostream OS(InsnStr); + IP->printInst(&Inst, OS); + OS.flush(); + + size_t OutputSize = std::min(OutStringSize-1, InsnStr.size()); + std::memcpy(OutString, InsnStr.data(), OutputSize); + OutString[OutputSize] = '\0'; // Terminate string. + + return Size; +} + +#ifdef __cplusplus +} +#endif // __cplusplus diff --git a/lib/MC/MCDisassembler/Disassembler.h b/lib/MC/MCDisassembler/Disassembler.h new file mode 100644 index 000000000000..f0ec42a017a4 --- /dev/null +++ b/lib/MC/MCDisassembler/Disassembler.h @@ -0,0 +1,96 @@ +//===------------- Disassembler.h - LLVM Disassembler -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the interface for the Disassembly library's disassembler +// context. The disassembler is responsible for producing strings for +// individual instructions according to a given architecture and disassembly +// syntax. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_DISASSEMBLER_H +#define LLVM_MC_DISASSEMBLER_H + +#include "llvm-c/Disassembler.h" +#include +#include "llvm/ADT/OwningPtr.h" + +namespace llvm { +class TargetAsmInfo; +class MCContext; +class MCAsmInfo; +class MCDisassembler; +class MCInstPrinter; +class Target; +class TargetMachine; + +// +// This is the disassembler context returned by LLVMCreateDisasm(). +// +class LLVMDisasmContext { +private: + // + // The passed parameters when the disassembler context is created. + // + // The TripleName for this disassembler. + std::string TripleName; + // The pointer to the caller's block of symbolic information. + void *DisInfo; + // The Triple specific symbolic information type returned by GetOpInfo. + int TagType; + // The function to get the symbolic information for operands. + LLVMOpInfoCallback GetOpInfo; + // The function to look up a symbol name. + LLVMSymbolLookupCallback SymbolLookUp; + // + // The objects created and saved by LLVMCreateDisasm() then used by + // LLVMDisasmInstruction(). + // + // The LLVM target corresponding to the disassembler. + // FIXME: using llvm::OwningPtr causes a malloc error + // when this LLVMDisasmContext is deleted. + const Target *TheTarget; + // The assembly information for the target architecture. + llvm::OwningPtr MAI; + // The target machine instance. + llvm::OwningPtr TM; + // The disassembler for the target architecture. + // FIXME: using llvm::OwningPtr causes a malloc + // error when this LLVMDisasmContext is deleted. + const TargetAsmInfo *Tai; + // The assembly context for creating symbols and MCExprs. + llvm::OwningPtr Ctx; + // The disassembler for the target architecture. + llvm::OwningPtr DisAsm; + // The instruction printer for the target architecture. + llvm::OwningPtr IP; + +public: + LLVMDisasmContext(std::string tripleName, void *disInfo, int tagType, + LLVMOpInfoCallback getOpInfo, + LLVMSymbolLookupCallback symbolLookUp, + const Target *theTarget, const MCAsmInfo *mAI, + llvm::TargetMachine *tM, const TargetAsmInfo *tai, + llvm::MCContext *ctx, const MCDisassembler *disAsm, + MCInstPrinter *iP) : TripleName(tripleName), + DisInfo(disInfo), TagType(tagType), GetOpInfo(getOpInfo), + SymbolLookUp(symbolLookUp), TheTarget(theTarget), Tai(tai) { + TM.reset(tM); + MAI.reset(mAI); + Ctx.reset(ctx); + DisAsm.reset(disAsm); + IP.reset(iP); + } + const MCDisassembler *getDisAsm() const { return DisAsm.get(); } + MCInstPrinter *getIP() { return IP.get(); } +}; + +} // namespace llvm + +#endif diff --git a/lib/MC/MCDisassembler/EDDisassembler.cpp b/lib/MC/MCDisassembler/EDDisassembler.cpp index 2fd14db2a45d..91c5284892a5 100644 --- a/lib/MC/MCDisassembler/EDDisassembler.cpp +++ b/lib/MC/MCDisassembler/EDDisassembler.cpp @@ -193,7 +193,8 @@ EDDisassembler::EDDisassembler(CPUKey &key) : InstString.reset(new std::string); InstStream.reset(new raw_string_ostream(*InstString)); - InstPrinter.reset(Tgt->createMCInstPrinter(LLVMSyntaxVariant, *AsmInfo)); + InstPrinter.reset(Tgt->createMCInstPrinter(*TargetMachine, LLVMSyntaxVariant, + *AsmInfo)); if (!InstPrinter) return; @@ -253,9 +254,11 @@ EDInst *EDDisassembler::createInst(EDByteReaderCallback byteReader, delete inst; return NULL; } else { - const llvm::EDInstInfo *thisInstInfo; + const llvm::EDInstInfo *thisInstInfo = NULL; - thisInstInfo = &InstInfos[inst->getOpcode()]; + if (InstInfos) { + thisInstInfo = &InstInfos[inst->getOpcode()]; + } EDInst* sdInst = new EDInst(inst, byteSize, *this, thisInstInfo); return sdInst; @@ -331,6 +334,15 @@ int EDDisassembler::printInst(std::string &str, MCInst &inst) { return 0; } +static void diag_handler(const SMDiagnostic &diag, + void *context) +{ + if (context) { + EDDisassembler *disassembler = static_cast(context); + diag.Print("", disassembler->ErrorStream); + } +} + int EDDisassembler::parseInst(SmallVectorImpl &operands, SmallVectorImpl &tokens, const std::string &str) { @@ -353,6 +365,7 @@ int EDDisassembler::parseInst(SmallVectorImpl &operands, SMLoc instLoc; SourceMgr sourceMgr; + sourceMgr.setDiagHandler(diag_handler, static_cast(this)); sourceMgr.AddNewSourceBuffer(buf, SMLoc()); // ownership of buf handed over MCContext context(*AsmInfo, NULL); OwningPtr streamer(createNullStreamer(context)); diff --git a/lib/MC/MCDisassembler/EDDisassembler.h b/lib/MC/MCDisassembler/EDDisassembler.h index 71e45f0b042f..2fcc09d4bef0 100644 --- a/lib/MC/MCDisassembler/EDDisassembler.h +++ b/lib/MC/MCDisassembler/EDDisassembler.h @@ -87,13 +87,8 @@ struct EDDisassembler { /// operator< - Less-than operator bool operator<(const CPUKey &key) const { - if(Arch > key.Arch) - return false; - else if (Arch == key.Arch) { - if(Syntax > key.Syntax) - return false; - } - return true; + return ((Arch < key.Arch) || + ((Arch == key.Arch) && Syntax < (key.Syntax))); } }; diff --git a/lib/MC/MCDisassembler/EDInfo.h b/lib/MC/MCDisassembler/EDInfo.h index 627c06641dbc..ad5728263133 100644 --- a/lib/MC/MCDisassembler/EDInfo.h +++ b/lib/MC/MCDisassembler/EDInfo.h @@ -35,6 +35,7 @@ enum OperandTypes { kOperandTypeARMAddrMode5, kOperandTypeARMAddrMode6, kOperandTypeARMAddrMode6Offset, + kOperandTypeARMAddrMode7, kOperandTypeARMAddrModePC, kOperandTypeARMRegisterList, kOperandTypeARMTBAddrMode, @@ -51,7 +52,8 @@ enum OperandTypes { kOperandTypeThumb2AddrModeImm12, kOperandTypeThumb2AddrModeSoReg, kOperandTypeThumb2AddrModeImm8s4, - kOperandTypeThumb2AddrModeImm8s4Offset + kOperandTypeThumb2AddrModeImm8s4Offset, + kOperandTypeThumb2AddrModeReg }; enum OperandFlags { diff --git a/lib/MC/MCDisassembler/EDInst.cpp b/lib/MC/MCDisassembler/EDInst.cpp index 63b049fe40fd..6057e169e347 100644 --- a/lib/MC/MCDisassembler/EDInst.cpp +++ b/lib/MC/MCDisassembler/EDInst.cpp @@ -165,6 +165,9 @@ int EDInst::getOperand(EDOperand *&operand, unsigned int index) { int EDInst::tokenize() { if (TokenizeResult.valid()) return TokenizeResult.result(); + + if (ThisInstInfo == NULL) + return TokenizeResult.setResult(-1); if (stringify()) return TokenizeResult.setResult(-1); diff --git a/lib/MC/MCDisassembler/EDOperand.cpp b/lib/MC/MCDisassembler/EDOperand.cpp index 2b0c73e80593..492bb08f336a 100644 --- a/lib/MC/MCDisassembler/EDOperand.cpp +++ b/lib/MC/MCDisassembler/EDOperand.cpp @@ -73,6 +73,8 @@ EDOperand::EDOperand(const EDDisassembler &disassembler, case kOperandTypeThumb2AddrModeImm8Offset: case kOperandTypeARMTBAddrMode: case kOperandTypeThumb2AddrModeImm8s4Offset: + case kOperandTypeARMAddrMode7: + case kOperandTypeThumb2AddrModeReg: numMCOperands = 1; break; case kOperandTypeThumb2SoReg: @@ -196,15 +198,24 @@ int EDOperand::evaluate(uint64_t &result, default: return -1; case kOperandTypeImmediate: + if (!Inst.Inst->getOperand(MCOpIndex).isImm()) + return -1; + result = Inst.Inst->getOperand(MCOpIndex).getImm(); return 0; case kOperandTypeRegister: { + if (!Inst.Inst->getOperand(MCOpIndex).isReg()) + return -1; + unsigned reg = Inst.Inst->getOperand(MCOpIndex).getReg(); return callback(&result, reg, arg); } case kOperandTypeARMBranchTarget: { + if (!Inst.Inst->getOperand(MCOpIndex).isImm()) + return -1; + int64_t displacement = Inst.Inst->getOperand(MCOpIndex).getImm(); uint64_t pcVal; @@ -256,6 +267,7 @@ int EDOperand::isMemory() { case kOperandTypeARMAddrMode4: case kOperandTypeARMAddrMode5: case kOperandTypeARMAddrMode6: + case kOperandTypeARMAddrMode7: case kOperandTypeARMAddrModePC: case kOperandTypeARMBranchTarget: case kOperandTypeThumbAddrModeS1: @@ -269,6 +281,7 @@ int EDOperand::isMemory() { case kOperandTypeThumb2AddrModeImm12: case kOperandTypeThumb2AddrModeSoReg: case kOperandTypeThumb2AddrModeImm8s4: + case kOperandTypeThumb2AddrModeReg: return 1; } } diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp index 112d7d887a2d..f61f0c24cf6c 100644 --- a/lib/MC/MCDwarf.cpp +++ b/lib/MC/MCDwarf.cpp @@ -17,6 +17,7 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Twine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -439,14 +440,100 @@ static int getDataAlignmentFactor(MCStreamer &streamer) { return -size; } -static void EmitCFIInstruction(MCStreamer &Streamer, - const MCCFIInstruction &Instr) { +static unsigned getSizeForEncoding(MCStreamer &streamer, + unsigned symbolEncoding) { + MCContext &context = streamer.getContext(); + const TargetAsmInfo &asmInfo = context.getTargetAsmInfo(); + unsigned format = symbolEncoding & 0x0f; + switch (format) { + default: + assert(0 && "Unknown Encoding"); + case dwarf::DW_EH_PE_absptr: + case dwarf::DW_EH_PE_signed: + return asmInfo.getPointerSize(); + case dwarf::DW_EH_PE_udata2: + case dwarf::DW_EH_PE_sdata2: + return 2; + case dwarf::DW_EH_PE_udata4: + case dwarf::DW_EH_PE_sdata4: + return 4; + case dwarf::DW_EH_PE_udata8: + case dwarf::DW_EH_PE_sdata8: + return 8; + } +} + +static void EmitSymbol(MCStreamer &streamer, const MCSymbol &symbol, + unsigned symbolEncoding) { + MCContext &context = streamer.getContext(); + const MCAsmInfo &asmInfo = context.getAsmInfo(); + const MCExpr *v = asmInfo.getExprForFDESymbol(&symbol, + symbolEncoding, + streamer); + unsigned size = getSizeForEncoding(streamer, symbolEncoding); + streamer.EmitAbsValue(v, size); +} + +static void EmitPersonality(MCStreamer &streamer, const MCSymbol &symbol, + unsigned symbolEncoding) { + MCContext &context = streamer.getContext(); + const MCAsmInfo &asmInfo = context.getAsmInfo(); + const MCExpr *v = asmInfo.getExprForPersonalitySymbol(&symbol, + symbolEncoding, + streamer); + unsigned size = getSizeForEncoding(streamer, symbolEncoding); + streamer.EmitValue(v, size); +} + +static const MachineLocation TranslateMachineLocation( + const TargetAsmInfo &AsmInfo, + const MachineLocation &Loc) { + unsigned Reg = Loc.getReg() == MachineLocation::VirtualFP ? + MachineLocation::VirtualFP : + unsigned(AsmInfo.getDwarfRegNum(Loc.getReg(), true)); + const MachineLocation &NewLoc = Loc.isReg() ? + MachineLocation(Reg) : MachineLocation(Reg, Loc.getOffset()); + return NewLoc; +} + +namespace { + class FrameEmitterImpl { + int CFAOffset; + int CIENum; + bool UsingCFI; + + public: + FrameEmitterImpl(bool usingCFI) : CFAOffset(0), CIENum(0), + UsingCFI(usingCFI) { + } + + const MCSymbol &EmitCIE(MCStreamer &streamer, + const MCSymbol *personality, + unsigned personalityEncoding, + const MCSymbol *lsda, + unsigned lsdaEncoding); + MCSymbol *EmitFDE(MCStreamer &streamer, + const MCSymbol &cieStart, + const MCDwarfFrameInfo &frame, + bool forceLsda); + void EmitCFIInstructions(MCStreamer &streamer, + const std::vector &Instrs, + MCSymbol *BaseLabel); + void EmitCFIInstruction(MCStreamer &Streamer, + const MCCFIInstruction &Instr); + }; +} + +void FrameEmitterImpl::EmitCFIInstruction(MCStreamer &Streamer, + const MCCFIInstruction &Instr) { int dataAlignmentFactor = getDataAlignmentFactor(Streamer); switch (Instr.getOperation()) { - case MCCFIInstruction::Move: { + case MCCFIInstruction::Move: + case MCCFIInstruction::RelMove: { const MachineLocation &Dst = Instr.getDestination(); const MachineLocation &Src = Instr.getSource(); + const bool IsRelative = Instr.getOperation() == MCCFIInstruction::RelMove; // If advancing cfa. if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) { @@ -459,7 +546,12 @@ static void EmitCFIInstruction(MCStreamer &Streamer, Streamer.EmitULEB128IntValue(Src.getReg()); } - Streamer.EmitULEB128IntValue(-Src.getOffset(), 1); + if (IsRelative) + CFAOffset += Src.getOffset(); + else + CFAOffset = -Src.getOffset(); + + Streamer.EmitULEB128IntValue(CFAOffset); return; } @@ -471,7 +563,11 @@ static void EmitCFIInstruction(MCStreamer &Streamer, } unsigned Reg = Src.getReg(); - int Offset = Dst.getOffset() / dataAlignmentFactor; + + int Offset = Dst.getOffset(); + if (IsRelative) + Offset -= CFAOffset; + Offset = Offset / dataAlignmentFactor; if (Offset < 0) { Streamer.EmitIntValue(dwarf::DW_CFA_offset_extended_sf, 1); @@ -479,11 +575,11 @@ static void EmitCFIInstruction(MCStreamer &Streamer, Streamer.EmitSLEB128IntValue(Offset); } else if (Reg < 64) { Streamer.EmitIntValue(dwarf::DW_CFA_offset + Reg, 1); - Streamer.EmitULEB128IntValue(Offset, 1); + Streamer.EmitULEB128IntValue(Offset); } else { Streamer.EmitIntValue(dwarf::DW_CFA_offset_extended, 1); - Streamer.EmitULEB128IntValue(Reg, 1); - Streamer.EmitULEB128IntValue(Offset, 1); + Streamer.EmitULEB128IntValue(Reg); + Streamer.EmitULEB128IntValue(Offset); } return; } @@ -493,15 +589,21 @@ static void EmitCFIInstruction(MCStreamer &Streamer, case MCCFIInstruction::Restore: Streamer.EmitIntValue(dwarf::DW_CFA_restore_state, 1); return; + case MCCFIInstruction::SameValue: { + unsigned Reg = Instr.getDestination().getReg(); + Streamer.EmitIntValue(dwarf::DW_CFA_same_value, 1); + Streamer.EmitULEB128IntValue(Reg); + return; + } } llvm_unreachable("Unhandled case in switch"); } /// EmitFrameMoves - Emit frame instructions to describe the layout of the /// frame. -static void EmitCFIInstructions(MCStreamer &streamer, - const std::vector &Instrs, - MCSymbol *BaseLabel) { +void FrameEmitterImpl::EmitCFIInstructions(MCStreamer &streamer, + const std::vector &Instrs, + MCSymbol *BaseLabel) { for (unsigned i = 0, N = Instrs.size(); i < N; ++i) { const MCCFIInstruction &Instr = Instrs[i]; MCSymbol *Label = Instr.getLabel(); @@ -521,74 +623,31 @@ static void EmitCFIInstructions(MCStreamer &streamer, } } -static void EmitSymbol(MCStreamer &streamer, const MCSymbol &symbol, - unsigned symbolEncoding) { - MCContext &context = streamer.getContext(); - const TargetAsmInfo &asmInfo = context.getTargetAsmInfo(); - unsigned format = symbolEncoding & 0x0f; - unsigned application = symbolEncoding & 0x70; - unsigned size; - switch (format) { - default: - assert(0 && "Unknown Encoding"); - case dwarf::DW_EH_PE_absptr: - case dwarf::DW_EH_PE_signed: - size = asmInfo.getPointerSize(); - break; - case dwarf::DW_EH_PE_udata2: - case dwarf::DW_EH_PE_sdata2: - size = 2; - break; - case dwarf::DW_EH_PE_udata4: - case dwarf::DW_EH_PE_sdata4: - size = 4; - break; - case dwarf::DW_EH_PE_udata8: - case dwarf::DW_EH_PE_sdata8: - size = 8; - break; - } - switch (application) { - default: - assert(0 && "Unknown Encoding"); - break; - case 0: - streamer.EmitSymbolValue(&symbol, size); - break; - case dwarf::DW_EH_PE_pcrel: - streamer.EmitPCRelSymbolValue(&symbol, size); - break; - } -} - -static const MachineLocation TranslateMachineLocation( - const TargetAsmInfo &AsmInfo, - const MachineLocation &Loc) { - unsigned Reg = Loc.getReg() == MachineLocation::VirtualFP ? - MachineLocation::VirtualFP : - unsigned(AsmInfo.getDwarfRegNum(Loc.getReg(), true)); - const MachineLocation &NewLoc = Loc.isReg() ? - MachineLocation(Reg) : MachineLocation(Reg, Loc.getOffset()); - return NewLoc; -} - -static const MCSymbol &EmitCIE(MCStreamer &streamer, - const MCSymbol *personality, - unsigned personalityEncoding, - const MCSymbol *lsda, - unsigned lsdaEncoding) { +const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer, + const MCSymbol *personality, + unsigned personalityEncoding, + const MCSymbol *lsda, + unsigned lsdaEncoding) { MCContext &context = streamer.getContext(); const TargetAsmInfo &asmInfo = context.getTargetAsmInfo(); const MCSection §ion = *asmInfo.getEHFrameSection(); streamer.SwitchSection(§ion); - MCSymbol *sectionStart = streamer.getContext().CreateTempSymbol(); + + MCSymbol *sectionStart; + if (asmInfo.isFunctionEHFrameSymbolPrivate()) + sectionStart = context.CreateTempSymbol(); + else + sectionStart = context.GetOrCreateSymbol(Twine("EH_frame") + Twine(CIENum)); + + CIENum++; + MCSymbol *sectionEnd = streamer.getContext().CreateTempSymbol(); // Length const MCExpr *Length = MakeStartMinusEndExpr(streamer, *sectionStart, *sectionEnd, 4); streamer.EmitLabel(sectionStart); - streamer.EmitValue(Length, 4); + streamer.EmitAbsValue(Length, 4); // CIE ID streamer.EmitIntValue(0, 4); @@ -617,28 +676,35 @@ static const MCSymbol &EmitCIE(MCStreamer &streamer, streamer.EmitULEB128IntValue(asmInfo.getDwarfRARegNum(true)); // Augmentation Data Length (optional) - MCSymbol *augmentationStart = streamer.getContext().CreateTempSymbol(); - MCSymbol *augmentationEnd = streamer.getContext().CreateTempSymbol(); - const MCExpr *augmentationLength = MakeStartMinusEndExpr(streamer, - *augmentationStart, - *augmentationEnd, 0); - streamer.EmitULEB128Value(augmentationLength); + + unsigned augmentationLength = 0; + if (personality) { + // Personality Encoding + augmentationLength += 1; + // Personality + augmentationLength += getSizeForEncoding(streamer, personalityEncoding); + } + if (lsda) { + augmentationLength += 1; + } + // Encoding of the FDE pointers + augmentationLength += 1; + + streamer.EmitULEB128IntValue(augmentationLength); // Augmentation Data (optional) - streamer.EmitLabel(augmentationStart); if (personality) { // Personality Encoding streamer.EmitIntValue(personalityEncoding, 1); // Personality - EmitSymbol(streamer, *personality, personalityEncoding); + EmitPersonality(streamer, *personality, personalityEncoding); } if (lsda) { // LSDA Encoding streamer.EmitIntValue(lsdaEncoding, 1); } // Encoding of the FDE pointers - streamer.EmitIntValue(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4, 1); - streamer.EmitLabel(augmentationEnd); + streamer.EmitIntValue(asmInfo.getFDEEncoding(UsingCFI), 1); // Initial Instructions @@ -664,50 +730,66 @@ static const MCSymbol &EmitCIE(MCStreamer &streamer, return *sectionStart; } -static MCSymbol *EmitFDE(MCStreamer &streamer, - const MCSymbol &cieStart, - const MCDwarfFrameInfo &frame) { +MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer, + const MCSymbol &cieStart, + const MCDwarfFrameInfo &frame, + bool forceLsda) { MCContext &context = streamer.getContext(); MCSymbol *fdeStart = context.CreateTempSymbol(); MCSymbol *fdeEnd = context.CreateTempSymbol(); + const TargetAsmInfo &asmInfo = context.getTargetAsmInfo(); + + if (!asmInfo.isFunctionEHFrameSymbolPrivate()) { + Twine EHName = frame.Function->getName() + Twine(".eh"); + MCSymbol *EHSym = context.GetOrCreateSymbol(EHName); + streamer.EmitEHSymAttributes(frame.Function, EHSym); + streamer.EmitLabel(EHSym); + } // Length const MCExpr *Length = MakeStartMinusEndExpr(streamer, *fdeStart, *fdeEnd, 0); - streamer.EmitValue(Length, 4); + streamer.EmitAbsValue(Length, 4); streamer.EmitLabel(fdeStart); // CIE Pointer const MCExpr *offset = MakeStartMinusEndExpr(streamer, cieStart, *fdeStart, 0); - streamer.EmitValue(offset, 4); + streamer.EmitAbsValue(offset, 4); + unsigned fdeEncoding = asmInfo.getFDEEncoding(UsingCFI); + unsigned size = getSizeForEncoding(streamer, fdeEncoding); // PC Begin - streamer.EmitPCRelSymbolValue(frame.Begin, 4); + EmitSymbol(streamer, *frame.Begin, fdeEncoding); // PC Range const MCExpr *Range = MakeStartMinusEndExpr(streamer, *frame.Begin, *frame.End, 0); - streamer.EmitValue(Range, 4); + streamer.EmitAbsValue(Range, size); // Augmentation Data Length - MCSymbol *augmentationStart = streamer.getContext().CreateTempSymbol(); - MCSymbol *augmentationEnd = streamer.getContext().CreateTempSymbol(); - const MCExpr *augmentationLength = MakeStartMinusEndExpr(streamer, - *augmentationStart, - *augmentationEnd, 0); - streamer.EmitULEB128Value(augmentationLength); + unsigned augmentationLength = 0; + + if (frame.Lsda || forceLsda) + augmentationLength += getSizeForEncoding(streamer, frame.LsdaEncoding); + + streamer.EmitULEB128IntValue(augmentationLength); // Augmentation Data - streamer.EmitLabel(augmentationStart); + + // When running in "CodeGen compatibility mode" a FDE with no LSDA can be + // assigned to a CIE that requires one. In that case we output a 0 (as does + // CodeGen). if (frame.Lsda) EmitSymbol(streamer, *frame.Lsda, frame.LsdaEncoding); - streamer.EmitLabel(augmentationEnd); + else if (forceLsda) + streamer.EmitIntValue(0, getSizeForEncoding(streamer, frame.LsdaEncoding)); + // Call Frame Instructions EmitCFIInstructions(streamer, frame.Instructions, frame.Begin); // Padding - streamer.EmitValueToAlignment(4); + streamer.EmitValueToAlignment(size); return fdeEnd; } @@ -753,11 +835,78 @@ namespace llvm { }; } -void MCDwarfFrameEmitter::Emit(MCStreamer &streamer) { +// This is an implementation of CIE and FDE emission that is bug by bug +// compatible with the one in CodeGen. It is useful during the transition +// to make it easy to compare the outputs, but should probably be removed +// afterwards. +void MCDwarfFrameEmitter::EmitDarwin(MCStreamer &streamer, + bool usingCFI) { + FrameEmitterImpl Emitter(usingCFI); + DenseMap Personalities; + const MCSymbol *aCIE = NULL; + const MCDwarfFrameInfo *aFrame = NULL; + + for (unsigned i = 0, n = streamer.getNumFrameInfos(); i < n; ++i) { + const MCDwarfFrameInfo &frame = streamer.getFrameInfo(i); + if (!frame.Personality) + continue; + if (Personalities.count(frame.Personality)) + continue; + + const MCSymbol *cieStart = &Emitter.EmitCIE(streamer, frame.Personality, + frame.PersonalityEncoding, + frame.Lsda, + frame.LsdaEncoding); + aCIE = cieStart; + aFrame = &frame; + Personalities[frame.Personality] = cieStart; + } + + if (Personalities.empty()) { + const MCDwarfFrameInfo &frame = streamer.getFrameInfo(0); + aCIE = &Emitter.EmitCIE(streamer, frame.Personality, + frame.PersonalityEncoding, frame.Lsda, + frame.LsdaEncoding); + aFrame = &frame; + } + + MCSymbol *fdeEnd = NULL; + for (unsigned i = 0, n = streamer.getNumFrameInfos(); i < n; ++i) { + const MCDwarfFrameInfo &frame = streamer.getFrameInfo(i); + const MCSymbol *cieStart = Personalities[frame.Personality]; + bool hasLSDA; + if (!cieStart) { + cieStart = aCIE; + hasLSDA = aFrame->Lsda; + } else { + hasLSDA = true; + } + + fdeEnd = Emitter.EmitFDE(streamer, *cieStart, frame, + hasLSDA); + if (i != n - 1) + streamer.EmitLabel(fdeEnd); + } + const MCContext &context = streamer.getContext(); const TargetAsmInfo &asmInfo = context.getTargetAsmInfo(); + streamer.EmitValueToAlignment(asmInfo.getPointerSize()); + if (fdeEnd) + streamer.EmitLabel(fdeEnd); +} + +void MCDwarfFrameEmitter::Emit(MCStreamer &streamer, + bool usingCFI) { + const MCContext &context = streamer.getContext(); + const TargetAsmInfo &asmInfo = context.getTargetAsmInfo(); + if (!asmInfo.isFunctionEHFrameSymbolPrivate()) { + EmitDarwin(streamer, usingCFI); + return; + } + MCSymbol *fdeEnd = NULL; DenseMap CIEStarts; + FrameEmitterImpl Emitter(usingCFI); for (unsigned i = 0, n = streamer.getNumFrameInfos(); i < n; ++i) { const MCDwarfFrameInfo &frame = streamer.getFrameInfo(i); @@ -765,10 +914,10 @@ void MCDwarfFrameEmitter::Emit(MCStreamer &streamer) { frame.LsdaEncoding); const MCSymbol *&cieStart = CIEStarts[key]; if (!cieStart) - cieStart = &EmitCIE(streamer, frame.Personality, - frame.PersonalityEncoding, frame.Lsda, - frame.LsdaEncoding); - fdeEnd = EmitFDE(streamer, *cieStart, frame); + cieStart = &Emitter.EmitCIE(streamer, frame.Personality, + frame.PersonalityEncoding, frame.Lsda, + frame.LsdaEncoding); + fdeEnd = Emitter.EmitFDE(streamer, *cieStart, frame, false); if (i != n - 1) streamer.EmitLabel(fdeEnd); } @@ -782,21 +931,28 @@ void MCDwarfFrameEmitter::EmitAdvanceLoc(MCStreamer &Streamer, uint64_t AddrDelta) { SmallString<256> Tmp; raw_svector_ostream OS(Tmp); - MCDwarfFrameEmitter::EncodeAdvanceLoc(AddrDelta, OS); + const TargetAsmInfo &AsmInfo = Streamer.getContext().getTargetAsmInfo(); + MCDwarfFrameEmitter::EncodeAdvanceLoc(AddrDelta, OS, AsmInfo); Streamer.EmitBytes(OS.str(), /*AddrSpace=*/0); } void MCDwarfFrameEmitter::EncodeAdvanceLoc(uint64_t AddrDelta, - raw_ostream &OS) { + raw_ostream &OS, + const TargetAsmInfo &AsmInfo) { + // This is a small hack to facilitate the transition to CFI on OS X. It + // relaxes all address advances which lets us produces identical output + // to the one produce by CodeGen. + const bool Relax = !AsmInfo.isFunctionEHFrameSymbolPrivate(); + // FIXME: Assumes the code alignment factor is 1. if (AddrDelta == 0) { - } else if (isUIntN(6, AddrDelta)) { + } else if (isUIntN(6, AddrDelta) && !Relax) { uint8_t Opcode = dwarf::DW_CFA_advance_loc | AddrDelta; OS << Opcode; - } else if (isUInt<8>(AddrDelta)) { + } else if (isUInt<8>(AddrDelta) && !Relax) { OS << uint8_t(dwarf::DW_CFA_advance_loc1); OS << uint8_t(AddrDelta); - } else if (isUInt<16>(AddrDelta)) { + } else if (isUInt<16>(AddrDelta) && !Relax) { // FIXME: check what is the correct behavior on a big endian machine. OS << uint8_t(dwarf::DW_CFA_advance_loc2); OS << uint8_t( AddrDelta & 0xff); diff --git a/lib/MC/MCELF.cpp b/lib/MC/MCELF.cpp new file mode 100644 index 000000000000..ce7783e2862b --- /dev/null +++ b/lib/MC/MCELF.cpp @@ -0,0 +1,72 @@ +//===- lib/MC/MCELF.cpp - MC ELF ------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements ELF object file writer information. +// +//===----------------------------------------------------------------------===// + +#include "MCELF.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCELFSymbolFlags.h" +#include "llvm/MC/MCFixupKindInfo.h" +#include "llvm/Support/ELF.h" +#include "llvm/Target/TargetAsmBackend.h" + +namespace llvm { + +void MCELF::SetBinding(MCSymbolData &SD, unsigned Binding) { + assert(Binding == ELF::STB_LOCAL || Binding == ELF::STB_GLOBAL || + Binding == ELF::STB_WEAK); + uint32_t OtherFlags = SD.getFlags() & ~(0xf << ELF_STB_Shift); + SD.setFlags(OtherFlags | (Binding << ELF_STB_Shift)); +} + +unsigned MCELF::GetBinding(const MCSymbolData &SD) { + uint32_t Binding = (SD.getFlags() & (0xf << ELF_STB_Shift)) >> ELF_STB_Shift; + assert(Binding == ELF::STB_LOCAL || Binding == ELF::STB_GLOBAL || + Binding == ELF::STB_WEAK); + return Binding; +} + +void MCELF::SetType(MCSymbolData &SD, unsigned Type) { + assert(Type == ELF::STT_NOTYPE || Type == ELF::STT_OBJECT || + Type == ELF::STT_FUNC || Type == ELF::STT_SECTION || + Type == ELF::STT_FILE || Type == ELF::STT_COMMON || + Type == ELF::STT_TLS); + + uint32_t OtherFlags = SD.getFlags() & ~(0xf << ELF_STT_Shift); + SD.setFlags(OtherFlags | (Type << ELF_STT_Shift)); +} + +unsigned MCELF::GetType(const MCSymbolData &SD) { + uint32_t Type = (SD.getFlags() & (0xf << ELF_STT_Shift)) >> ELF_STT_Shift; + assert(Type == ELF::STT_NOTYPE || Type == ELF::STT_OBJECT || + Type == ELF::STT_FUNC || Type == ELF::STT_SECTION || + Type == ELF::STT_FILE || Type == ELF::STT_COMMON || + Type == ELF::STT_TLS); + return Type; +} + +void MCELF::SetVisibility(MCSymbolData &SD, unsigned Visibility) { + assert(Visibility == ELF::STV_DEFAULT || Visibility == ELF::STV_INTERNAL || + Visibility == ELF::STV_HIDDEN || Visibility == ELF::STV_PROTECTED); + + uint32_t OtherFlags = SD.getFlags() & ~(0xf << ELF_STV_Shift); + SD.setFlags(OtherFlags | (Visibility << ELF_STV_Shift)); +} + +unsigned MCELF::GetVisibility(MCSymbolData &SD) { + unsigned Visibility = + (SD.getFlags() & (0xf << ELF_STV_Shift)) >> ELF_STV_Shift; + assert(Visibility == ELF::STV_DEFAULT || Visibility == ELF::STV_INTERNAL || + Visibility == ELF::STV_HIDDEN || Visibility == ELF::STV_PROTECTED); + return Visibility; +} + +} diff --git a/lib/MC/MCELF.h b/lib/MC/MCELF.h new file mode 100644 index 000000000000..e08f1e65429a --- /dev/null +++ b/lib/MC/MCELF.h @@ -0,0 +1,35 @@ +//===- lib/MC/MCELF.h - ELF MC --------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains some support functions used by the ELF Streamer and +// ObjectWriter. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_MCELF_H +#define LLVM_MC_MCELF_H + +#include "llvm/MC/MCExpr.h" + +namespace llvm { +class MCSymbolData; + +class MCELF { + public: + static void SetBinding(MCSymbolData &SD, unsigned Binding); + static unsigned GetBinding(const MCSymbolData &SD); + static void SetType(MCSymbolData &SD, unsigned Type); + static unsigned GetType(const MCSymbolData &SD); + static void SetVisibility(MCSymbolData &SD, unsigned Visibility); + static unsigned GetVisibility(MCSymbolData &SD); +}; + +} + +#endif diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp index e49074da3994..be8e2e3891fe 100644 --- a/lib/MC/MCELFStreamer.cpp +++ b/lib/MC/MCELFStreamer.cpp @@ -11,18 +11,14 @@ // //===----------------------------------------------------------------------===// +#include "MCELFStreamer.h" +#include "MCELF.h" #include "llvm/MC/MCStreamer.h" - -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/MC/MCAssembler.h" -#include "llvm/MC/MCContext.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCELFSymbolFlags.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" -#include "llvm/MC/MCObjectStreamer.h" #include "llvm/MC/MCSection.h" -#include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCValue.h" #include "llvm/Support/Debug.h" @@ -34,148 +30,6 @@ using namespace llvm; -namespace { - -static void SetBinding(MCSymbolData &SD, unsigned Binding) { - assert(Binding == ELF::STB_LOCAL || Binding == ELF::STB_GLOBAL || - Binding == ELF::STB_WEAK); - uint32_t OtherFlags = SD.getFlags() & ~(0xf << ELF_STB_Shift); - SD.setFlags(OtherFlags | (Binding << ELF_STB_Shift)); -} - -static unsigned GetBinding(const MCSymbolData &SD) { - uint32_t Binding = (SD.getFlags() & (0xf << ELF_STB_Shift)) >> ELF_STB_Shift; - assert(Binding == ELF::STB_LOCAL || Binding == ELF::STB_GLOBAL || - Binding == ELF::STB_WEAK); - return Binding; -} - -static void SetType(MCSymbolData &SD, unsigned Type) { - assert(Type == ELF::STT_NOTYPE || Type == ELF::STT_OBJECT || - Type == ELF::STT_FUNC || Type == ELF::STT_SECTION || - Type == ELF::STT_FILE || Type == ELF::STT_COMMON || - Type == ELF::STT_TLS); - - uint32_t OtherFlags = SD.getFlags() & ~(0xf << ELF_STT_Shift); - SD.setFlags(OtherFlags | (Type << ELF_STT_Shift)); -} - -static void SetVisibility(MCSymbolData &SD, unsigned Visibility) { - assert(Visibility == ELF::STV_DEFAULT || Visibility == ELF::STV_INTERNAL || - Visibility == ELF::STV_HIDDEN || Visibility == ELF::STV_PROTECTED); - - uint32_t OtherFlags = SD.getFlags() & ~(0xf << ELF_STV_Shift); - SD.setFlags(OtherFlags | (Visibility << ELF_STV_Shift)); -} - -class MCELFStreamer : public MCObjectStreamer { -public: - MCELFStreamer(MCContext &Context, TargetAsmBackend &TAB, - raw_ostream &OS, MCCodeEmitter *Emitter) - : MCObjectStreamer(Context, TAB, OS, Emitter) {} - - ~MCELFStreamer() {} - - /// @name MCStreamer Interface - /// @{ - - virtual void InitSections(); - virtual void ChangeSection(const MCSection *Section); - virtual void EmitLabel(MCSymbol *Symbol); - virtual void EmitAssemblerFlag(MCAssemblerFlag Flag); - virtual void EmitThumbFunc(MCSymbol *Func); - virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value); - virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol); - virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute); - virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) { - assert(0 && "ELF doesn't support this directive"); - } - virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, - unsigned ByteAlignment); - virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) { - assert(0 && "ELF doesn't support this directive"); - } - - virtual void EmitCOFFSymbolStorageClass(int StorageClass) { - assert(0 && "ELF doesn't support this directive"); - } - - virtual void EmitCOFFSymbolType(int Type) { - assert(0 && "ELF doesn't support this directive"); - } - - virtual void EndCOFFSymbolDef() { - assert(0 && "ELF doesn't support this directive"); - } - - virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) { - MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); - SD.setSize(Value); - } - - virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size); - - virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, - unsigned Size = 0, unsigned ByteAlignment = 0) { - assert(0 && "ELF doesn't support this directive"); - } - virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, - uint64_t Size, unsigned ByteAlignment = 0) { - assert(0 && "ELF doesn't support this directive"); - } - virtual void EmitBytes(StringRef Data, unsigned AddrSpace); - virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0, - unsigned ValueSize = 1, - unsigned MaxBytesToEmit = 0); - virtual void EmitCodeAlignment(unsigned ByteAlignment, - unsigned MaxBytesToEmit = 0); - - virtual void EmitFileDirective(StringRef Filename); - - virtual void Finish(); - -private: - virtual void EmitInstToFragment(const MCInst &Inst); - virtual void EmitInstToData(const MCInst &Inst); - - void fixSymbolsInTLSFixups(const MCExpr *expr); - - struct LocalCommon { - MCSymbolData *SD; - uint64_t Size; - unsigned ByteAlignment; - }; - std::vector LocalCommons; - - SmallPtrSet BindingExplicitlySet; - /// @} - void SetSection(StringRef Section, unsigned Type, unsigned Flags, - SectionKind Kind) { - SwitchSection(getContext().getELFSection(Section, Type, Flags, Kind)); - } - - void SetSectionData() { - SetSection(".data", ELF::SHT_PROGBITS, - ELF::SHF_WRITE |ELF::SHF_ALLOC, - SectionKind::getDataRel()); - EmitCodeAlignment(4, 0); - } - void SetSectionText() { - SetSection(".text", ELF::SHT_PROGBITS, - ELF::SHF_EXECINSTR | - ELF::SHF_ALLOC, SectionKind::getText()); - EmitCodeAlignment(4, 0); - } - void SetSectionBss() { - SetSection(".bss", ELF::SHT_NOBITS, - ELF::SHF_WRITE | - ELF::SHF_ALLOC, SectionKind::getBSS()); - EmitCodeAlignment(4, 0); - } -}; - -} // end anonymous namespace. - void MCELFStreamer::InitSections() { // This emulates the same behavior of GNU as. This makes it easier // to compare the output as the major sections are in the same order. @@ -194,7 +48,7 @@ void MCELFStreamer::EmitLabel(MCSymbol *Symbol) { static_cast(Symbol->getSection()); MCSymbolData &SD = getAssembler().getSymbolData(*Symbol); if (Section.getFlags() & ELF::SHF_TLS) - SetType(SD, ELF::STT_TLS); + MCELF::SetType(SD, ELF::STT_TLS); } void MCELFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) { @@ -281,54 +135,54 @@ void MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol, break; case MCSA_Global: - SetBinding(SD, ELF::STB_GLOBAL); + MCELF::SetBinding(SD, ELF::STB_GLOBAL); SD.setExternal(true); BindingExplicitlySet.insert(Symbol); break; case MCSA_WeakReference: case MCSA_Weak: - SetBinding(SD, ELF::STB_WEAK); + MCELF::SetBinding(SD, ELF::STB_WEAK); SD.setExternal(true); BindingExplicitlySet.insert(Symbol); break; case MCSA_Local: - SetBinding(SD, ELF::STB_LOCAL); + MCELF::SetBinding(SD, ELF::STB_LOCAL); SD.setExternal(false); BindingExplicitlySet.insert(Symbol); break; case MCSA_ELF_TypeFunction: - SetType(SD, ELF::STT_FUNC); + MCELF::SetType(SD, ELF::STT_FUNC); break; case MCSA_ELF_TypeObject: - SetType(SD, ELF::STT_OBJECT); + MCELF::SetType(SD, ELF::STT_OBJECT); break; case MCSA_ELF_TypeTLS: - SetType(SD, ELF::STT_TLS); + MCELF::SetType(SD, ELF::STT_TLS); break; case MCSA_ELF_TypeCommon: - SetType(SD, ELF::STT_COMMON); + MCELF::SetType(SD, ELF::STT_COMMON); break; case MCSA_ELF_TypeNoType: - SetType(SD, ELF::STT_NOTYPE); + MCELF::SetType(SD, ELF::STT_NOTYPE); break; case MCSA_Protected: - SetVisibility(SD, ELF::STV_PROTECTED); + MCELF::SetVisibility(SD, ELF::STV_PROTECTED); break; case MCSA_Hidden: - SetVisibility(SD, ELF::STV_HIDDEN); + MCELF::SetVisibility(SD, ELF::STV_HIDDEN); break; case MCSA_Internal: - SetVisibility(SD, ELF::STV_INTERNAL); + MCELF::SetVisibility(SD, ELF::STV_INTERNAL); break; } } @@ -338,13 +192,13 @@ void MCELFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); if (!BindingExplicitlySet.count(Symbol)) { - SetBinding(SD, ELF::STB_GLOBAL); + MCELF::SetBinding(SD, ELF::STB_GLOBAL); SD.setExternal(true); } - SetType(SD, ELF::STT_OBJECT); + MCELF::SetType(SD, ELF::STT_OBJECT); - if (GetBinding(SD) == ELF_STB_Local) { + if (MCELF::GetBinding(SD) == ELF_STB_Local) { const MCSection *Section = getAssembler().getContext().getELFSection(".bss", ELF::SHT_NOBITS, ELF::SHF_WRITE | @@ -364,7 +218,7 @@ void MCELFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, void MCELFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size) { // FIXME: Should this be caught and done earlier? MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); - SetBinding(SD, ELF::STB_LOCAL); + MCELF::SetBinding(SD, ELF::STB_LOCAL); SD.setExternal(false); BindingExplicitlySet.insert(Symbol); // FIXME: ByteAlignment is not needed here, but is required. @@ -437,19 +291,22 @@ void MCELFStreamer::fixSymbolsInTLSFixups(const MCExpr *expr) { switch (symRef.getKind()) { default: return; + case MCSymbolRefExpr::VK_GOTTPOFF: + case MCSymbolRefExpr::VK_INDNTPOFF: case MCSymbolRefExpr::VK_NTPOFF: case MCSymbolRefExpr::VK_GOTNTPOFF: case MCSymbolRefExpr::VK_TLSGD: + case MCSymbolRefExpr::VK_TLSLD: case MCSymbolRefExpr::VK_TLSLDM: case MCSymbolRefExpr::VK_TPOFF: case MCSymbolRefExpr::VK_DTPOFF: - case MCSymbolRefExpr::VK_GOTTPOFF: - case MCSymbolRefExpr::VK_TLSLD: case MCSymbolRefExpr::VK_ARM_TLSGD: + case MCSymbolRefExpr::VK_ARM_TPOFF: + case MCSymbolRefExpr::VK_ARM_GOTTPOFF: break; } MCSymbolData &SD = getAssembler().getOrCreateSymbolData(symRef.getSymbol()); - SetType(SD, ELF::STT_TLS); + MCELF::SetType(SD, ELF::STT_TLS); break; } @@ -489,7 +346,7 @@ void MCELFStreamer::EmitInstToData(const MCInst &Inst) { void MCELFStreamer::Finish() { if (getNumFrameInfos()) - MCDwarfFrameEmitter::Emit(*this); + MCDwarfFrameEmitter::Emit(*this, true); for (std::vector::const_iterator i = LocalCommons.begin(), e = LocalCommons.end(); diff --git a/lib/MC/MCELFStreamer.h b/lib/MC/MCELFStreamer.h new file mode 100644 index 000000000000..db34d58ec600 --- /dev/null +++ b/lib/MC/MCELFStreamer.h @@ -0,0 +1,274 @@ +//===- lib/MC/MCELFStreamer.h - ELF Object Output -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file assembles .s files and emits ELF .o object files. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_MCELFSTREAMER_H +#define LLVM_MC_MCELFSTREAMER_H + +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCObjectStreamer.h" +#include "llvm/MC/MCSectionELF.h" + +namespace llvm { + +class MCELFStreamer : public MCObjectStreamer { +public: + MCELFStreamer(MCContext &Context, TargetAsmBackend &TAB, + raw_ostream &OS, MCCodeEmitter *Emitter) + : MCObjectStreamer(Context, TAB, OS, Emitter) {} + + MCELFStreamer(MCContext &Context, TargetAsmBackend &TAB, + raw_ostream &OS, MCCodeEmitter *Emitter, + MCAssembler *Assembler) + : MCObjectStreamer(Context, TAB, OS, Emitter, Assembler) {} + + + ~MCELFStreamer() {} + + /// @name MCStreamer Interface + /// @{ + + virtual void InitSections(); + virtual void ChangeSection(const MCSection *Section); + virtual void EmitLabel(MCSymbol *Symbol); + virtual void EmitAssemblerFlag(MCAssemblerFlag Flag); + virtual void EmitThumbFunc(MCSymbol *Func); + virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value); + virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol); + virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute); + virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) { + assert(0 && "ELF doesn't support this directive"); + } + virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment); + virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) { + assert(0 && "ELF doesn't support this directive"); + } + + virtual void EmitCOFFSymbolStorageClass(int StorageClass) { + assert(0 && "ELF doesn't support this directive"); + } + + virtual void EmitCOFFSymbolType(int Type) { + assert(0 && "ELF doesn't support this directive"); + } + + virtual void EndCOFFSymbolDef() { + assert(0 && "ELF doesn't support this directive"); + } + + virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) { + MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); + SD.setSize(Value); + } + + virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size); + + virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, + unsigned Size = 0, unsigned ByteAlignment = 0) { + assert(0 && "ELF doesn't support this directive"); + } + virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, + uint64_t Size, unsigned ByteAlignment = 0) { + assert(0 && "ELF doesn't support this directive"); + } + virtual void EmitBytes(StringRef Data, unsigned AddrSpace); + virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0, + unsigned ValueSize = 1, + unsigned MaxBytesToEmit = 0); + virtual void EmitCodeAlignment(unsigned ByteAlignment, + unsigned MaxBytesToEmit = 0); + + virtual void EmitFileDirective(StringRef Filename); + + virtual void Finish(); + +private: + virtual void EmitInstToFragment(const MCInst &Inst); + virtual void EmitInstToData(const MCInst &Inst); + + void fixSymbolsInTLSFixups(const MCExpr *expr); + + struct LocalCommon { + MCSymbolData *SD; + uint64_t Size; + unsigned ByteAlignment; + }; + std::vector LocalCommons; + + SmallPtrSet BindingExplicitlySet; + /// @} + void SetSection(StringRef Section, unsigned Type, unsigned Flags, + SectionKind Kind) { + SwitchSection(getContext().getELFSection(Section, Type, Flags, Kind)); + } + + void SetSectionData() { + SetSection(".data", ELF::SHT_PROGBITS, + ELF::SHF_WRITE |ELF::SHF_ALLOC, + SectionKind::getDataRel()); + EmitCodeAlignment(4, 0); + } + void SetSectionText() { + SetSection(".text", ELF::SHT_PROGBITS, + ELF::SHF_EXECINSTR | + ELF::SHF_ALLOC, SectionKind::getText()); + EmitCodeAlignment(4, 0); + } + void SetSectionBss() { + SetSection(".bss", ELF::SHT_NOBITS, + ELF::SHF_WRITE | + ELF::SHF_ALLOC, SectionKind::getBSS()); + EmitCodeAlignment(4, 0); + } +}; + +} // end llvm namespace + +#endif +//===- lib/MC/MCELFStreamer.h - ELF Object Output -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file assembles .s files and emits ELF .o object files. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_MC_MCELFSTREAMER_H +#define LLVM_MC_MCELFSTREAMER_H + +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCObjectStreamer.h" +#include "llvm/MC/MCSectionELF.h" + +namespace llvm { + +class MCELFStreamer : public MCObjectStreamer { +public: + MCELFStreamer(MCContext &Context, TargetAsmBackend &TAB, + raw_ostream &OS, MCCodeEmitter *Emitter) + : MCObjectStreamer(Context, TAB, OS, Emitter) {} + + ~MCELFStreamer() {} + + /// @name MCStreamer Interface + /// @{ + + virtual void InitSections(); + virtual void ChangeSection(const MCSection *Section); + virtual void EmitLabel(MCSymbol *Symbol); + virtual void EmitAssemblerFlag(MCAssemblerFlag Flag); + virtual void EmitThumbFunc(MCSymbol *Func); + virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value); + virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol); + virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute); + virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) { + assert(0 && "ELF doesn't support this directive"); + } + virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment); + virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) { + assert(0 && "ELF doesn't support this directive"); + } + + virtual void EmitCOFFSymbolStorageClass(int StorageClass) { + assert(0 && "ELF doesn't support this directive"); + } + + virtual void EmitCOFFSymbolType(int Type) { + assert(0 && "ELF doesn't support this directive"); + } + + virtual void EndCOFFSymbolDef() { + assert(0 && "ELF doesn't support this directive"); + } + + virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) { + MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); + SD.setSize(Value); + } + + virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size); + + virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, + unsigned Size = 0, unsigned ByteAlignment = 0) { + assert(0 && "ELF doesn't support this directive"); + } + virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, + uint64_t Size, unsigned ByteAlignment = 0) { + assert(0 && "ELF doesn't support this directive"); + } + virtual void EmitBytes(StringRef Data, unsigned AddrSpace); + virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0, + unsigned ValueSize = 1, + unsigned MaxBytesToEmit = 0); + virtual void EmitCodeAlignment(unsigned ByteAlignment, + unsigned MaxBytesToEmit = 0); + + virtual void EmitFileDirective(StringRef Filename); + + virtual void Finish(); + +private: + virtual void EmitInstToFragment(const MCInst &Inst); + virtual void EmitInstToData(const MCInst &Inst); + + void fixSymbolsInTLSFixups(const MCExpr *expr); + + struct LocalCommon { + MCSymbolData *SD; + uint64_t Size; + unsigned ByteAlignment; + }; + std::vector LocalCommons; + + SmallPtrSet BindingExplicitlySet; + /// @} + void SetSection(StringRef Section, unsigned Type, unsigned Flags, + SectionKind Kind) { + SwitchSection(getContext().getELFSection(Section, Type, Flags, Kind)); + } + + void SetSectionData() { + SetSection(".data", ELF::SHT_PROGBITS, + ELF::SHF_WRITE |ELF::SHF_ALLOC, + SectionKind::getDataRel()); + EmitCodeAlignment(4, 0); + } + void SetSectionText() { + SetSection(".text", ELF::SHT_PROGBITS, + ELF::SHF_EXECINSTR | + ELF::SHF_ALLOC, SectionKind::getText()); + EmitCodeAlignment(4, 0); + } + void SetSectionBss() { + SetSection(".bss", ELF::SHT_NOBITS, + ELF::SHF_WRITE | + ELF::SHF_ALLOC, SectionKind::getBSS()); + EmitCodeAlignment(4, 0); + } +}; + +} // end llvm namespace + +#endif diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp index 54d3743e68e4..3a674d75ed71 100644 --- a/lib/MC/MCExpr.cpp +++ b/lib/MC/MCExpr.cpp @@ -310,6 +310,11 @@ static void AttemptToFoldSymbolOffsetDifference(const MCAssembler *Asm, if (AD.getFragment() == BD.getFragment()) { Addend += (AD.getOffset() - BD.getOffset()); + // Pointers to Thumb symbols need to have their low-bit set to allow + // for interworking. + if (Asm->isThumbFunc(&SA)) + Addend |= 1; + // Clear the symbol expr pointers to indicate we have folded these // operands. A = B = 0; @@ -384,7 +389,7 @@ static bool EvaluateSymbolicAdd(const MCAssembler *Asm, // (LHS_A - RHS_B), // (RHS_A - LHS_B), // (RHS_A - RHS_B). - // Since we are attempting to be as aggresive as possible about folding, we + // Since we are attempting to be as aggressive as possible about folding, we // attempt to evaluate each possible alternative. AttemptToFoldSymbolOffsetDifference(Asm, Layout, Addrs, InSet, LHS_A, LHS_B, Result_Cst); @@ -554,3 +559,45 @@ bool MCExpr::EvaluateAsRelocatableImpl(MCValue &Res, assert(0 && "Invalid assembly expression kind!"); return false; } + +const MCSection *MCExpr::FindAssociatedSection() const { + switch (getKind()) { + case Target: + // We never look through target specific expressions. + return cast(this)->FindAssociatedSection(); + + case Constant: + return MCSymbol::AbsolutePseudoSection; + + case SymbolRef: { + const MCSymbolRefExpr *SRE = cast(this); + const MCSymbol &Sym = SRE->getSymbol(); + + if (Sym.isDefined()) + return &Sym.getSection(); + + return 0; + } + + case Unary: + return cast(this)->getSubExpr()->FindAssociatedSection(); + + case Binary: { + const MCBinaryExpr *BE = cast(this); + const MCSection *LHS_S = BE->getLHS()->FindAssociatedSection(); + const MCSection *RHS_S = BE->getRHS()->FindAssociatedSection(); + + // If either section is absolute, return the other. + if (LHS_S == MCSymbol::AbsolutePseudoSection) + return RHS_S; + if (RHS_S == MCSymbol::AbsolutePseudoSection) + return LHS_S; + + // Otherwise, return the first non-null section. + return LHS_S ? LHS_S : RHS_S; + } + } + + assert(0 && "Invalid assembly expression kind!"); + return 0; +} diff --git a/lib/MC/MCInstPrinter.cpp b/lib/MC/MCInstPrinter.cpp index 92a71541f5ad..212b85eb1fe0 100644 --- a/lib/MC/MCInstPrinter.cpp +++ b/lib/MC/MCInstPrinter.cpp @@ -19,3 +19,8 @@ MCInstPrinter::~MCInstPrinter() { StringRef MCInstPrinter::getOpcodeName(unsigned Opcode) const { return ""; } + +StringRef MCInstPrinter::getRegName(unsigned RegNo) const { + assert(0 && "Target should implement this"); + return ""; +} diff --git a/lib/MC/MCLoggingStreamer.cpp b/lib/MC/MCLoggingStreamer.cpp index 012c7f62f8af..46ea9b844a6a 100644 --- a/lib/MC/MCLoggingStreamer.cpp +++ b/lib/MC/MCLoggingStreamer.cpp @@ -154,21 +154,19 @@ class MCLoggingStreamer : public MCStreamer { } virtual void EmitValueImpl(const MCExpr *Value, unsigned Size, - bool isPCRel, unsigned AddrSpace){ + unsigned AddrSpace){ LogCall("EmitValue"); - return Child->EmitValueImpl(Value, Size, isPCRel, AddrSpace); + return Child->EmitValueImpl(Value, Size, AddrSpace); } - virtual void EmitULEB128Value(const MCExpr *Value, - unsigned AddrSpace = 0) { + virtual void EmitULEB128Value(const MCExpr *Value) { LogCall("EmitULEB128Value"); - return Child->EmitULEB128Value(Value, AddrSpace); + return Child->EmitULEB128Value(Value); } - virtual void EmitSLEB128Value(const MCExpr *Value, - unsigned AddrSpace = 0) { + virtual void EmitSLEB128Value(const MCExpr *Value) { LogCall("EmitSLEB128Value"); - return Child->EmitSLEB128Value(Value, AddrSpace); + return Child->EmitSLEB128Value(Value); } virtual void EmitGPRel32Value(const MCExpr *Value) { @@ -215,13 +213,14 @@ class MCLoggingStreamer : public MCStreamer { virtual void EmitDwarfLocDirective(unsigned FileNo, unsigned Line, unsigned Column, unsigned Flags, - unsigned Isa, unsigned Discriminator) { + unsigned Isa, unsigned Discriminator, + StringRef FileName) { LogCall("EmitDwarfLocDirective", "FileNo:" + Twine(FileNo) + " Line:" + Twine(Line) + " Column:" + Twine(Column) + " Flags:" + Twine(Flags) + " Isa:" + Twine(Isa) + " Discriminator:" + Twine(Discriminator)); return Child->EmitDwarfLocDirective(FileNo, Line, Column, Flags, - Isa, Discriminator); + Isa, Discriminator, FileName); } virtual void EmitInstruction(const MCInst &Inst) { diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp index d1f9f5cd568e..3da5b49f5405 100644 --- a/lib/MC/MCMachOStreamer.cpp +++ b/lib/MC/MCMachOStreamer.cpp @@ -44,6 +44,8 @@ class MCMachOStreamer : public MCObjectStreamer { virtual void InitSections(); virtual void EmitLabel(MCSymbol *Symbol); + virtual void EmitEHSymAttributes(const MCSymbol *Symbol, + MCSymbol *EHSymbol); virtual void EmitAssemblerFlag(MCAssemblerFlag Flag); virtual void EmitThumbFunc(MCSymbol *Func); virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value); @@ -101,6 +103,18 @@ void MCMachOStreamer::InitSections() { } +void MCMachOStreamer::EmitEHSymAttributes(const MCSymbol *Symbol, + MCSymbol *EHSymbol) { + MCSymbolData &SD = + getAssembler().getOrCreateSymbolData(*Symbol); + if (SD.isExternal()) + EmitSymbolAttribute(EHSymbol, MCSA_Global); + if (SD.getFlags() & SF_WeakDefinition) + EmitSymbolAttribute(EHSymbol, MCSA_WeakDefinition); + if (SD.isPrivateExtern()) + EmitSymbolAttribute(EHSymbol, MCSA_PrivateExtern); +} + void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) { assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); @@ -363,6 +377,9 @@ void MCMachOStreamer::EmitInstToData(const MCInst &Inst) { } void MCMachOStreamer::Finish() { + if (getNumFrameInfos()) + MCDwarfFrameEmitter::Emit(*this, true); + // We have to set the fragment atom associations so we can relax properly for // Mach-O. diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp index 08ddf01d1a36..f38b82231207 100644 --- a/lib/MC/MCNullStreamer.cpp +++ b/lib/MC/MCNullStreamer.cpp @@ -67,11 +67,9 @@ namespace { virtual void EmitBytes(StringRef Data, unsigned AddrSpace) {} virtual void EmitValueImpl(const MCExpr *Value, unsigned Size, - bool isPCRel, unsigned AddrSpace) {} - virtual void EmitULEB128Value(const MCExpr *Value, - unsigned AddrSpace = 0) {} - virtual void EmitSLEB128Value(const MCExpr *Value, - unsigned AddrSpace = 0) {} + unsigned AddrSpace) {} + virtual void EmitULEB128Value(const MCExpr *Value) {} + virtual void EmitSLEB128Value(const MCExpr *Value) {} virtual void EmitGPRel32Value(const MCExpr *Value) {} virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0, unsigned ValueSize = 1, @@ -89,7 +87,8 @@ namespace { } virtual void EmitDwarfLocDirective(unsigned FileNo, unsigned Line, unsigned Column, unsigned Flags, - unsigned Isa, unsigned Discriminator) {} + unsigned Isa, unsigned Discriminator, + StringRef FileName) {} virtual void EmitInstruction(const MCInst &Inst) {} virtual void Finish() {} diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp index e67d9b03a95a..0f349d0d0b36 100644 --- a/lib/MC/MCObjectStreamer.cpp +++ b/lib/MC/MCObjectStreamer.cpp @@ -31,6 +31,13 @@ MCObjectStreamer::MCObjectStreamer(MCContext &Context, TargetAsmBackend &TAB, { } +MCObjectStreamer::MCObjectStreamer(MCContext &Context, TargetAsmBackend &TAB, + raw_ostream &OS, MCCodeEmitter *Emitter_, + MCAssembler *_Assembler) + : MCStreamer(Context), Assembler(_Assembler), CurSectionData(0) +{ +} + MCObjectStreamer::~MCObjectStreamer() { delete &Assembler->getBackend(); delete &Assembler->getEmitter(); @@ -83,7 +90,7 @@ const MCExpr *MCObjectStreamer::AddValueSymbols(const MCExpr *Value) { } void MCObjectStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size, - bool isPCRel, unsigned AddrSpace) { + unsigned AddrSpace) { assert(AddrSpace == 0 && "Address space must be 0!"); MCDataFragment *DF = getOrCreateDataFragment(); @@ -95,15 +102,12 @@ void MCObjectStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size, } DF->addFixup(MCFixup::Create(DF->getContents().size(), Value, - MCFixup::getKindForSize(Size, isPCRel))); + MCFixup::getKindForSize(Size, false))); DF->getContents().resize(DF->getContents().size() + Size, 0); } void MCObjectStreamer::EmitLabel(MCSymbol *Symbol) { - assert(!Symbol->isVariable() && "Cannot emit a variable symbol!"); - assert(getCurrentSection() && "Cannot emit before setting section!"); - - Symbol->setSection(*getCurrentSection()); + MCStreamer::EmitLabel(Symbol); MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); @@ -117,23 +121,23 @@ void MCObjectStreamer::EmitLabel(MCSymbol *Symbol) { SD.setOffset(F->getContents().size()); } -void MCObjectStreamer::EmitULEB128Value(const MCExpr *Value, - unsigned AddrSpace) { +void MCObjectStreamer::EmitULEB128Value(const MCExpr *Value) { int64_t IntValue; if (Value->EvaluateAsAbsolute(IntValue, getAssembler())) { - EmitULEB128IntValue(IntValue, AddrSpace); + EmitULEB128IntValue(IntValue); return; } + Value = ForceExpAbs(this, getContext(), Value); new MCLEBFragment(*Value, false, getCurrentSectionData()); } -void MCObjectStreamer::EmitSLEB128Value(const MCExpr *Value, - unsigned AddrSpace) { +void MCObjectStreamer::EmitSLEB128Value(const MCExpr *Value) { int64_t IntValue; if (Value->EvaluateAsAbsolute(IntValue, getAssembler())) { - EmitSLEB128IntValue(IntValue, AddrSpace); + EmitSLEB128IntValue(IntValue); return; } + Value = ForceExpAbs(this, getContext(), Value); new MCLEBFragment(*Value, true, getCurrentSectionData()); } @@ -184,30 +188,11 @@ void MCObjectStreamer::EmitInstruction(const MCInst &Inst) { void MCObjectStreamer::EmitInstToFragment(const MCInst &Inst) { MCInstFragment *IF = new MCInstFragment(Inst, getCurrentSectionData()); - raw_svector_ostream VecOS(IF->getCode()); + SmallString<128> Code; + raw_svector_ostream VecOS(Code); getAssembler().getEmitter().EncodeInstruction(Inst, VecOS, IF->getFixups()); -} - -static const MCExpr *BuildSymbolDiff(MCContext &Context, - const MCSymbol *A, const MCSymbol *B) { - MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; - const MCExpr *ARef = - MCSymbolRefExpr::Create(A, Variant, Context); - const MCExpr *BRef = - MCSymbolRefExpr::Create(B, Variant, Context); - const MCExpr *AddrDelta = - MCBinaryExpr::Create(MCBinaryExpr::Sub, ARef, BRef, Context); - return AddrDelta; -} - -static const MCExpr *ForceExpAbs(MCObjectStreamer *Streamer, - MCContext &Context, const MCExpr* Expr) { - if (Context.getAsmInfo().hasAggressiveSymbolFolding()) - return Expr; - - MCSymbol *ABS = Context.CreateTempSymbol(); - Streamer->EmitAssignment(ABS, Expr); - return MCSymbolRefExpr::Create(ABS, Context); + VecOS.flush(); + IF->getCode().append(Code.begin(), Code.end()); } void MCObjectStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta, diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp index 89374d0c3fb9..a3d3a492ec8a 100644 --- a/lib/MC/MCParser/AsmLexer.cpp +++ b/lib/MC/MCParser/AsmLexer.cpp @@ -213,13 +213,13 @@ AsmToken AsmLexer::LexDigit() { // Requires at least one binary digit. if (CurPtr == NumStart) - return ReturnError(TokStart, "Invalid binary number"); + return ReturnError(TokStart, "invalid binary number"); StringRef Result(TokStart, CurPtr - TokStart); long long Value; if (Result.substr(2).getAsInteger(2, Value)) - return ReturnError(TokStart, "Invalid binary number"); + return ReturnError(TokStart, "invalid binary number"); // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL // suffixes on integer literals. @@ -236,11 +236,11 @@ AsmToken AsmLexer::LexDigit() { // Requires at least one hex digit. if (CurPtr == NumStart) - return ReturnError(CurPtr-2, "Invalid hexadecimal number"); + return ReturnError(CurPtr-2, "invalid hexadecimal number"); unsigned long long Result; if (StringRef(TokStart, CurPtr - TokStart).getAsInteger(0, Result)) - return ReturnError(TokStart, "Invalid hexadecimal number"); + return ReturnError(TokStart, "invalid hexadecimal number"); // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL // suffixes on integer literals. @@ -251,13 +251,13 @@ AsmToken AsmLexer::LexDigit() { } // Must be an octal number, it starts with 0. - while (*CurPtr >= '0' && *CurPtr <= '7') + while (*CurPtr >= '0' && *CurPtr <= '9') ++CurPtr; StringRef Result(TokStart, CurPtr - TokStart); long long Value; if (Result.getAsInteger(8, Value)) - return ReturnError(TokStart, "Invalid octal number"); + return ReturnError(TokStart, "invalid octal number"); // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL // suffixes on integer literals. @@ -324,8 +324,8 @@ AsmToken AsmLexer::LexQuote() { StringRef AsmLexer::LexUntilEndOfStatement() { TokStart = CurPtr; - while (!isAtStartOfComment(*CurPtr) && // Start of line comment. - *CurPtr != ';' && // End of statement marker. + while (!isAtStartOfComment(*CurPtr) && // Start of line comment. + !isAtStatementSeparator(CurPtr) && // End of statement marker. *CurPtr != '\n' && *CurPtr != '\r' && (*CurPtr != 0 || CurPtr != CurBuf->getBufferEnd())) { @@ -339,6 +339,11 @@ bool AsmLexer::isAtStartOfComment(char Char) { return Char == *MAI.getCommentString(); } +bool AsmLexer::isAtStatementSeparator(const char *Ptr) { + return strncmp(Ptr, MAI.getSeparatorString(), + strlen(MAI.getSeparatorString())) == 0; +} + AsmToken AsmLexer::LexToken() { TokStart = CurPtr; // This always consumes at least one character. @@ -346,6 +351,11 @@ AsmToken AsmLexer::LexToken() { if (isAtStartOfComment(CurChar)) return LexLineComment(); + if (isAtStatementSeparator(TokStart)) { + CurPtr += strlen(MAI.getSeparatorString()) - 1; + return AsmToken(AsmToken::EndOfStatement, + StringRef(TokStart, strlen(MAI.getSeparatorString()))); + } switch (CurChar) { default: @@ -362,8 +372,8 @@ AsmToken AsmLexer::LexToken() { // Ignore whitespace. return LexToken(); case '\n': // FALL THROUGH. - case '\r': // FALL THROUGH. - case ';': return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1)); + case '\r': + return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 1)); case ':': return AsmToken(AsmToken::Colon, StringRef(TokStart, 1)); case '+': return AsmToken(AsmToken::Plus, StringRef(TokStart, 1)); case '-': return AsmToken(AsmToken::Minus, StringRef(TokStart, 1)); diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index a84917ffb86a..d8fd27d873f6 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -251,10 +251,14 @@ class GenericAsmParser : public MCAsmParserExtension { ".cfi_def_cfa"); AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIDefCfaOffset>( ".cfi_def_cfa_offset"); + AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIAdjustCfaOffset>( + ".cfi_adjust_cfa_offset"); AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIDefCfaRegister>( ".cfi_def_cfa_register"); AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIOffset>( ".cfi_offset"); + AddDirectiveHandler<&GenericAsmParser::ParseDirectiveCFIRelOffset>( + ".cfi_rel_offset"); AddDirectiveHandler< &GenericAsmParser::ParseDirectiveCFIPersonalityOrLsda>(".cfi_personality"); AddDirectiveHandler< @@ -263,6 +267,8 @@ class GenericAsmParser : public MCAsmParserExtension { &GenericAsmParser::ParseDirectiveCFIRememberState>(".cfi_remember_state"); AddDirectiveHandler< &GenericAsmParser::ParseDirectiveCFIRestoreState>(".cfi_restore_state"); + AddDirectiveHandler< + &GenericAsmParser::ParseDirectiveCFISameValue>(".cfi_same_value"); // Macro directives. AddDirectiveHandler<&GenericAsmParser::ParseDirectiveMacrosOnOff>( @@ -287,11 +293,14 @@ class GenericAsmParser : public MCAsmParserExtension { bool ParseDirectiveCFIEndProc(StringRef, SMLoc DirectiveLoc); bool ParseDirectiveCFIDefCfa(StringRef, SMLoc DirectiveLoc); bool ParseDirectiveCFIDefCfaOffset(StringRef, SMLoc DirectiveLoc); + bool ParseDirectiveCFIAdjustCfaOffset(StringRef, SMLoc DirectiveLoc); bool ParseDirectiveCFIDefCfaRegister(StringRef, SMLoc DirectiveLoc); bool ParseDirectiveCFIOffset(StringRef, SMLoc DirectiveLoc); + bool ParseDirectiveCFIRelOffset(StringRef, SMLoc DirectiveLoc); bool ParseDirectiveCFIPersonalityOrLsda(StringRef, SMLoc DirectiveLoc); bool ParseDirectiveCFIRememberState(StringRef, SMLoc DirectiveLoc); bool ParseDirectiveCFIRestoreState(StringRef, SMLoc DirectiveLoc); + bool ParseDirectiveCFISameValue(StringRef, SMLoc DirectiveLoc); bool ParseDirectiveMacrosOnOff(StringRef, SMLoc DirectiveLoc); bool ParseDirectiveMacro(StringRef, SMLoc DirectiveLoc); @@ -517,6 +526,9 @@ bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { switch (Lexer.getKind()) { default: return TokError("unknown token in expression"); + // If we have an error assume that we've already handled it. + case AsmToken::Error: + return true; case AsmToken::Exclaim: Lex(); // Eat the operator. if (ParsePrimaryExpr(Res, EndLoc)) @@ -880,6 +892,7 @@ bool AsmParser::ParseStatement() { EatToEndOfStatement(); return false; } + // Allow an integer followed by a ':' as a directional local label. if (Lexer.is(AsmToken::Integer)) { LocalLabelVal = getTok().getIntVal(); @@ -896,13 +909,19 @@ bool AsmParser::ParseStatement() { return TokError("unexpected token at start of statement"); } } - } - else if (ParseIdentifier(IDVal)) { + + } else if (Lexer.is(AsmToken::Dot)) { + // Treat '.' as a valid identifier in this context. + Lex(); + IDVal = "."; + + } else if (ParseIdentifier(IDVal)) { if (!TheCondState.Ignore) return TokError("unexpected token at start of statement"); IDVal = ""; } + // Handle conditional assembly here before checking for skipping. We // have to do this so that .endif isn't skipped in a ".if 0" block for // example. @@ -935,6 +954,10 @@ bool AsmParser::ParseStatement() { // identifier ':' -> Label. Lex(); + // Diagnose attempt to use '.' as a label. + if (IDVal == ".") + return Error(IDLoc, "invalid use of pseudo-symbol '.' as a label"); + // Diagnose attempt to use a variable as a label. // // FIXME: Diagnostics. Note the location of the definition as a label. @@ -978,7 +1001,7 @@ bool AsmParser::ParseStatement() { return HandleMacroEntry(IDVal, IDLoc, M); // Otherwise, we have a normal instruction or directive. - if (IDVal[0] == '.') { + if (IDVal[0] == '.' && IDVal != ".") { // Assembler features if (IDVal == ".set" || IDVal == ".equ") return ParseDirectiveSet(IDVal, true); @@ -1041,7 +1064,7 @@ bool AsmParser::ParseStatement() { if (IDVal == ".fill") return ParseDirectiveFill(); - if (IDVal == ".space") + if (IDVal == ".space" || IDVal == ".skip") return ParseDirectiveSpace(); if (IDVal == ".zero") return ParseDirectiveZero(); @@ -1306,6 +1329,12 @@ bool AsmParser::ParseAssignment(StringRef Name, bool allow_redef) { if (Lexer.isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in assignment"); + // Error on assignment to '.'. + if (Name == ".") { + return Error(EqualLoc, ("assignment to pseudo-symbol '.' is unsupported " + "(use '.space' or '.org').)")); + } + // Eat the end of statement marker. Lex(); @@ -1319,7 +1348,7 @@ bool AsmParser::ParseAssignment(StringRef Name, bool allow_redef) { // FIXME: Diagnose assignment to protected identifier (e.g., register name). if (Sym->isUndefined() && !Sym->isUsed() && !Sym->isVariable()) ; // Allow redefinitions of undefined symbols only used in directives. - else if (!Sym->isUndefined() && (!Sym->isAbsolute() || !allow_redef)) + else if (!Sym->isUndefined() && (!Sym->isVariable() || !allow_redef)) return Error(EqualLoc, "redefinition of '" + Name + "'"); else if (!Sym->isVariable()) return Error(EqualLoc, "invalid assignment to '" + Name + "'"); @@ -1535,13 +1564,21 @@ bool AsmParser::ParseDirectiveRealValue(const fltSemantics &Semantics) { Lex(); if (getLexer().isNot(AsmToken::Integer) && - getLexer().isNot(AsmToken::Real)) + getLexer().isNot(AsmToken::Real) && + getLexer().isNot(AsmToken::Identifier)) return TokError("unexpected token in directive"); // Convert to an APFloat. APFloat Value(Semantics); - if (Value.convertFromString(getTok().getString(), - APFloat::rmNearestTiesToEven) == + StringRef IDVal = getTok().getString(); + if (getLexer().is(AsmToken::Identifier)) { + if (!IDVal.compare_lower("infinity") || !IDVal.compare_lower("inf")) + Value = APFloat::getInf(Semantics); + else if (!IDVal.compare_lower("nan")) + Value = APFloat::getNaN(Semantics, false, ~0); + else + return TokError("invalid floating point literal"); + } else if (Value.convertFromString(IDVal, APFloat::rmNearestTiesToEven) == APFloat::opInvalidOp) return TokError("invalid floating point literal"); if (IsNeg) @@ -2216,7 +2253,7 @@ bool GenericAsmParser::ParseDirectiveLoc(StringRef, SMLoc DirectiveLoc) { } getStreamer().EmitDwarfLocDirective(FileNumber, LineNumber, ColumnPos, Flags, - Isa, Discriminator); + Isa, Discriminator, StringRef()); return false; } @@ -2232,13 +2269,15 @@ bool GenericAsmParser::ParseDirectiveStabs(StringRef Directive, /// ::= .cfi_startproc bool GenericAsmParser::ParseDirectiveCFIStartProc(StringRef, SMLoc DirectiveLoc) { - return getStreamer().EmitCFIStartProc(); + getStreamer().EmitCFIStartProc(); + return false; } /// ParseDirectiveCFIEndProc /// ::= .cfi_endproc bool GenericAsmParser::ParseDirectiveCFIEndProc(StringRef, SMLoc DirectiveLoc) { - return getStreamer().EmitCFIEndProc(); + getStreamer().EmitCFIEndProc(); + return false; } /// ParseRegisterOrRegisterNumber - parse register name or number. @@ -2273,7 +2312,8 @@ bool GenericAsmParser::ParseDirectiveCFIDefCfa(StringRef, if (getParser().ParseAbsoluteExpression(Offset)) return true; - return getStreamer().EmitCFIDefCfa(Register, Offset); + getStreamer().EmitCFIDefCfa(Register, Offset); + return false; } /// ParseDirectiveCFIDefCfaOffset @@ -2284,7 +2324,20 @@ bool GenericAsmParser::ParseDirectiveCFIDefCfaOffset(StringRef, if (getParser().ParseAbsoluteExpression(Offset)) return true; - return getStreamer().EmitCFIDefCfaOffset(Offset); + getStreamer().EmitCFIDefCfaOffset(Offset); + return false; +} + +/// ParseDirectiveCFIAdjustCfaOffset +/// ::= .cfi_adjust_cfa_offset adjustment +bool GenericAsmParser::ParseDirectiveCFIAdjustCfaOffset(StringRef, + SMLoc DirectiveLoc) { + int64_t Adjustment = 0; + if (getParser().ParseAbsoluteExpression(Adjustment)) + return true; + + getStreamer().EmitCFIAdjustCfaOffset(Adjustment); + return false; } /// ParseDirectiveCFIDefCfaRegister @@ -2295,11 +2348,12 @@ bool GenericAsmParser::ParseDirectiveCFIDefCfaRegister(StringRef, if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc)) return true; - return getStreamer().EmitCFIDefCfaRegister(Register); + getStreamer().EmitCFIDefCfaRegister(Register); + return false; } /// ParseDirectiveCFIOffset -/// ::= .cfi_off register, offset +/// ::= .cfi_offset register, offset bool GenericAsmParser::ParseDirectiveCFIOffset(StringRef, SMLoc DirectiveLoc) { int64_t Register = 0; int64_t Offset = 0; @@ -2314,7 +2368,29 @@ bool GenericAsmParser::ParseDirectiveCFIOffset(StringRef, SMLoc DirectiveLoc) { if (getParser().ParseAbsoluteExpression(Offset)) return true; - return getStreamer().EmitCFIOffset(Register, Offset); + getStreamer().EmitCFIOffset(Register, Offset); + return false; +} + +/// ParseDirectiveCFIRelOffset +/// ::= .cfi_rel_offset register, offset +bool GenericAsmParser::ParseDirectiveCFIRelOffset(StringRef, + SMLoc DirectiveLoc) { + int64_t Register = 0; + + if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc)) + return true; + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in directive"); + Lex(); + + int64_t Offset = 0; + if (getParser().ParseAbsoluteExpression(Offset)) + return true; + + getStreamer().EmitCFIRelOffset(Register, Offset); + return false; } static bool isValidEncoding(int64_t Encoding) { @@ -2364,25 +2440,42 @@ bool GenericAsmParser::ParseDirectiveCFIPersonalityOrLsda(StringRef IDVal, MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); if (IDVal == ".cfi_personality") - return getStreamer().EmitCFIPersonality(Sym, Encoding); + getStreamer().EmitCFIPersonality(Sym, Encoding); else { assert(IDVal == ".cfi_lsda"); - return getStreamer().EmitCFILsda(Sym, Encoding); + getStreamer().EmitCFILsda(Sym, Encoding); } + return false; } /// ParseDirectiveCFIRememberState /// ::= .cfi_remember_state bool GenericAsmParser::ParseDirectiveCFIRememberState(StringRef IDVal, SMLoc DirectiveLoc) { - return getStreamer().EmitCFIRememberState(); + getStreamer().EmitCFIRememberState(); + return false; } /// ParseDirectiveCFIRestoreState /// ::= .cfi_remember_state bool GenericAsmParser::ParseDirectiveCFIRestoreState(StringRef IDVal, SMLoc DirectiveLoc) { - return getStreamer().EmitCFIRestoreState(); + getStreamer().EmitCFIRestoreState(); + return false; +} + +/// ParseDirectiveCFISameValue +/// ::= .cfi_same_value register +bool GenericAsmParser::ParseDirectiveCFISameValue(StringRef IDVal, + SMLoc DirectiveLoc) { + int64_t Register = 0; + + if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc)) + return true; + + getStreamer().EmitCFISameValue(Register); + + return false; } /// ParseDirectiveMacrosOnOff diff --git a/lib/MC/MCParser/DarwinAsmParser.cpp b/lib/MC/MCParser/DarwinAsmParser.cpp index 44f234566afd..3c092cdb19bb 100644 --- a/lib/MC/MCParser/DarwinAsmParser.cpp +++ b/lib/MC/MCParser/DarwinAsmParser.cpp @@ -100,6 +100,8 @@ class DarwinAsmParser : public MCAsmParserExtension { AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveText>(".text"); AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveThreadInitFunc>(".thread_init_func"); AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveTLV>(".tlv"); + + AddDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveIdent>(".ident"); } bool ParseDirectiveDesc(StringRef, SMLoc); @@ -277,6 +279,11 @@ class DarwinAsmParser : public MCAsmParserExtension { return ParseSectionSwitch("__DATA", "__thread_vars", MCSectionMachO::S_THREAD_LOCAL_VARIABLES); } + bool ParseSectionDirectiveIdent(StringRef, SMLoc) { + // Darwin silently ignores the .ident directive. + getParser().EatToEndOfStatement(); + return false; + } bool ParseSectionDirectiveThreadInitFunc(StringRef, SMLoc) { return ParseSectionSwitch("__DATA", "__thread_init", MCSectionMachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS); @@ -427,10 +434,12 @@ bool DarwinAsmParser::ParseDirectiveSection(StringRef, SMLoc) { StringRef Segment, Section; - unsigned TAA, StubSize; + unsigned StubSize; + unsigned TAA; + bool TAAParsed; std::string ErrorStr = MCSectionMachO::ParseSectionSpecifier(SectionSpec, Segment, Section, - TAA, StubSize); + TAA, TAAParsed, StubSize); if (!ErrorStr.empty()) return Error(Loc, ErrorStr.c_str()); diff --git a/lib/MC/MCSectionELF.cpp b/lib/MC/MCSectionELF.cpp index d32aea144e6e..dfd77c3fe813 100644 --- a/lib/MC/MCSectionELF.cpp +++ b/lib/MC/MCSectionELF.cpp @@ -39,8 +39,28 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI, return; } - OS << "\t.section\t" << getSectionName(); - + StringRef name = getSectionName(); + if (name.find_first_not_of("0123456789_." + "abcdefghijklmnopqrstuvwxyz" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ") == name.npos) { + OS << "\t.section\t" << name; + } else { + OS << "\t.section\t\""; + for (const char *b = name.begin(), *e = name.end(); b < e; ++b) { + if (*b == '"') // Unquoted " + OS << "\\\""; + else if (*b != '\\') // Neither " or backslash + OS << *b; + else if (b + 1 == e) // Trailing backslash + OS << "\\\\"; + else { + OS << b[0] << b[1]; // Quoted character + ++b; + } + } + OS << '"'; + } + // Handle the weird solaris syntax if desired. if (MAI.usesSunStyleELFSectionSwitchSyntax() && !(Flags & ELF::SHF_MERGE)) { diff --git a/lib/MC/MCSectionMachO.cpp b/lib/MC/MCSectionMachO.cpp index 577e93aed6bc..e771556262a8 100644 --- a/lib/MC/MCSectionMachO.cpp +++ b/lib/MC/MCSectionMachO.cpp @@ -180,7 +180,9 @@ std::string MCSectionMachO::ParseSectionSpecifier(StringRef Spec, // In. StringRef &Segment, // Out. StringRef &Section, // Out. unsigned &TAA, // Out. + bool &TAAParsed, // Out. unsigned &StubSize) { // Out. + TAAParsed = false; // Find the first comma. std::pair Comma = Spec.split(','); @@ -211,6 +213,7 @@ std::string MCSectionMachO::ParseSectionSpecifier(StringRef Spec, // In. "between 1 and 16 characters"; // If there is no comma after the section, we're done. + TAA = 0; StubSize = 0; if (Comma.second.empty()) return ""; @@ -235,6 +238,7 @@ std::string MCSectionMachO::ParseSectionSpecifier(StringRef Spec, // In. // Remember the TypeID. TAA = TypeID; + TAAParsed = true; // If we have no comma after the section type, there are no attributes. if (Comma.second.empty()) { diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp index 4b302c8602c9..fa245b11ade2 100644 --- a/lib/MC/MCStreamer.cpp +++ b/lib/MC/MCStreamer.cpp @@ -12,6 +12,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/SmallString.h" @@ -27,6 +28,29 @@ MCStreamer::MCStreamer(MCContext &Ctx) : Context(Ctx) { MCStreamer::~MCStreamer() { } +const MCExpr *MCStreamer::BuildSymbolDiff(MCContext &Context, + const MCSymbol *A, + const MCSymbol *B) { + MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; + const MCExpr *ARef = + MCSymbolRefExpr::Create(A, Variant, Context); + const MCExpr *BRef = + MCSymbolRefExpr::Create(B, Variant, Context); + const MCExpr *AddrDelta = + MCBinaryExpr::Create(MCBinaryExpr::Sub, ARef, BRef, Context); + return AddrDelta; +} + +const MCExpr *MCStreamer::ForceExpAbs(MCStreamer *Streamer, + MCContext &Context, const MCExpr* Expr) { + if (Context.getAsmInfo().hasAggressiveSymbolFolding()) + return Expr; + + MCSymbol *ABS = Context.CreateTempSymbol(); + Streamer->EmitAssignment(ABS, Expr); + return MCSymbolRefExpr::Create(ABS, Context); +} + raw_ostream &MCStreamer::GetCommentOS() { // By default, discard comments. return nulls(); @@ -90,30 +114,15 @@ void MCStreamer::EmitAbsValue(const MCExpr *Value, unsigned Size, void MCStreamer::EmitValue(const MCExpr *Value, unsigned Size, unsigned AddrSpace) { - EmitValueImpl(Value, Size, false, AddrSpace); -} - -void MCStreamer::EmitPCRelValue(const MCExpr *Value, unsigned Size, - unsigned AddrSpace) { - EmitValueImpl(Value, Size, true, AddrSpace); + EmitValueImpl(Value, Size, AddrSpace); } void MCStreamer::EmitSymbolValue(const MCSymbol *Sym, unsigned Size, - bool isPCRel, unsigned AddrSpace) { - EmitValueImpl(MCSymbolRefExpr::Create(Sym, getContext()), Size, isPCRel, + unsigned AddrSpace) { + EmitValueImpl(MCSymbolRefExpr::Create(Sym, getContext()), Size, AddrSpace); } -void MCStreamer::EmitSymbolValue(const MCSymbol *Sym, unsigned Size, - unsigned AddrSpace) { - EmitSymbolValue(Sym, Size, false, AddrSpace); -} - -void MCStreamer::EmitPCRelSymbolValue(const MCSymbol *Sym, unsigned Size, - unsigned AddrSpace) { - EmitSymbolValue(Sym, Size, true, AddrSpace); -} - void MCStreamer::EmitGPRel32Value(const MCExpr *Value) { report_fatal_error("unsupported directive in streamer"); } @@ -135,7 +144,8 @@ bool MCStreamer::EmitDwarfFileDirective(unsigned FileNo, void MCStreamer::EmitDwarfLocDirective(unsigned FileNo, unsigned Line, unsigned Column, unsigned Flags, unsigned Isa, - unsigned Discriminator) { + unsigned Discriminator, + StringRef FileName) { getContext().setCurrentDwarfLoc(FileNo, Line, Column, Flags, Isa, Discriminator); } @@ -152,28 +162,39 @@ void MCStreamer::EnsureValidFrame() { report_fatal_error("No open frame"); } -bool MCStreamer::EmitCFIStartProc() { - MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo(); - if (CurFrame && !CurFrame->End) { - report_fatal_error("Starting a frame before finishing the previous one!"); - return true; - } - MCDwarfFrameInfo Frame; - Frame.Begin = getContext().CreateTempSymbol(); - EmitLabel(Frame.Begin); - FrameInfos.push_back(Frame); - return false; +void MCStreamer::EmitEHSymAttributes(const MCSymbol *Symbol, + MCSymbol *EHSymbol) { } -bool MCStreamer::EmitCFIEndProc() { +void MCStreamer::EmitLabel(MCSymbol *Symbol) { + assert(!Symbol->isVariable() && "Cannot emit a variable symbol!"); + assert(getCurrentSection() && "Cannot emit before setting section!"); + Symbol->setSection(*getCurrentSection()); + + StringRef Prefix = getContext().getAsmInfo().getPrivateGlobalPrefix(); + if (!Symbol->getName().startswith(Prefix)) + LastNonPrivate = Symbol; +} + +void MCStreamer::EmitCFIStartProc() { + MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo(); + if (CurFrame && !CurFrame->End) + report_fatal_error("Starting a frame before finishing the previous one!"); + MCDwarfFrameInfo Frame; + Frame.Begin = getContext().CreateTempSymbol(); + Frame.Function = LastNonPrivate; + EmitLabel(Frame.Begin); + FrameInfos.push_back(Frame); +} + +void MCStreamer::EmitCFIEndProc() { EnsureValidFrame(); MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo(); CurFrame->End = getContext().CreateTempSymbol(); EmitLabel(CurFrame->End); - return false; } -bool MCStreamer::EmitCFIDefCfa(int64_t Register, int64_t Offset) { +void MCStreamer::EmitCFIDefCfa(int64_t Register, int64_t Offset) { EnsureValidFrame(); MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo(); MCSymbol *Label = getContext().CreateTempSymbol(); @@ -182,10 +203,9 @@ bool MCStreamer::EmitCFIDefCfa(int64_t Register, int64_t Offset) { MachineLocation Source(Register, -Offset); MCCFIInstruction Instruction(Label, Dest, Source); CurFrame->Instructions.push_back(Instruction); - return false; } -bool MCStreamer::EmitCFIDefCfaOffset(int64_t Offset) { +void MCStreamer::EmitCFIDefCfaOffset(int64_t Offset) { EnsureValidFrame(); MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo(); MCSymbol *Label = getContext().CreateTempSymbol(); @@ -194,10 +214,20 @@ bool MCStreamer::EmitCFIDefCfaOffset(int64_t Offset) { MachineLocation Source(MachineLocation::VirtualFP, -Offset); MCCFIInstruction Instruction(Label, Dest, Source); CurFrame->Instructions.push_back(Instruction); - return false; } -bool MCStreamer::EmitCFIDefCfaRegister(int64_t Register) { +void MCStreamer::EmitCFIAdjustCfaOffset(int64_t Adjustment) { + EnsureValidFrame(); + MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo(); + MCSymbol *Label = getContext().CreateTempSymbol(); + EmitLabel(Label); + MachineLocation Dest(MachineLocation::VirtualFP); + MachineLocation Source(MachineLocation::VirtualFP, Adjustment); + MCCFIInstruction Instruction(MCCFIInstruction::RelMove, Label, Dest, Source); + CurFrame->Instructions.push_back(Instruction); +} + +void MCStreamer::EmitCFIDefCfaRegister(int64_t Register) { EnsureValidFrame(); MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo(); MCSymbol *Label = getContext().CreateTempSymbol(); @@ -206,10 +236,9 @@ bool MCStreamer::EmitCFIDefCfaRegister(int64_t Register) { MachineLocation Source(MachineLocation::VirtualFP); MCCFIInstruction Instruction(Label, Dest, Source); CurFrame->Instructions.push_back(Instruction); - return false; } -bool MCStreamer::EmitCFIOffset(int64_t Register, int64_t Offset) { +void MCStreamer::EmitCFIOffset(int64_t Register, int64_t Offset) { EnsureValidFrame(); MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo(); MCSymbol *Label = getContext().CreateTempSymbol(); @@ -218,37 +247,44 @@ bool MCStreamer::EmitCFIOffset(int64_t Register, int64_t Offset) { MachineLocation Source(Register, Offset); MCCFIInstruction Instruction(Label, Dest, Source); CurFrame->Instructions.push_back(Instruction); - return false; } -bool MCStreamer::EmitCFIPersonality(const MCSymbol *Sym, +void MCStreamer::EmitCFIRelOffset(int64_t Register, int64_t Offset) { + EnsureValidFrame(); + MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo(); + MCSymbol *Label = getContext().CreateTempSymbol(); + EmitLabel(Label); + MachineLocation Dest(Register, Offset); + MachineLocation Source(Register, Offset); + MCCFIInstruction Instruction(MCCFIInstruction::RelMove, Label, Dest, Source); + CurFrame->Instructions.push_back(Instruction); +} + +void MCStreamer::EmitCFIPersonality(const MCSymbol *Sym, unsigned Encoding) { EnsureValidFrame(); MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo(); CurFrame->Personality = Sym; CurFrame->PersonalityEncoding = Encoding; - return false; } -bool MCStreamer::EmitCFILsda(const MCSymbol *Sym, unsigned Encoding) { +void MCStreamer::EmitCFILsda(const MCSymbol *Sym, unsigned Encoding) { EnsureValidFrame(); MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo(); CurFrame->Lsda = Sym; CurFrame->LsdaEncoding = Encoding; - return false; } -bool MCStreamer::EmitCFIRememberState() { +void MCStreamer::EmitCFIRememberState() { EnsureValidFrame(); MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo(); MCSymbol *Label = getContext().CreateTempSymbol(); EmitLabel(Label); MCCFIInstruction Instruction(MCCFIInstruction::Remember, Label); CurFrame->Instructions.push_back(Instruction); - return false; } -bool MCStreamer::EmitCFIRestoreState() { +void MCStreamer::EmitCFIRestoreState() { // FIXME: Error if there is no matching cfi_remember_state. EnsureValidFrame(); MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo(); @@ -256,7 +292,55 @@ bool MCStreamer::EmitCFIRestoreState() { EmitLabel(Label); MCCFIInstruction Instruction(MCCFIInstruction::Restore, Label); CurFrame->Instructions.push_back(Instruction); - return false; +} + +void MCStreamer::EmitCFISameValue(int64_t Register) { + EnsureValidFrame(); + MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo(); + MCSymbol *Label = getContext().CreateTempSymbol(); + EmitLabel(Label); + MCCFIInstruction Instruction(MCCFIInstruction::SameValue, Label, Register); + CurFrame->Instructions.push_back(Instruction); +} + +void MCStreamer::EmitFnStart() { + errs() << "Not implemented yet\n"; + abort(); +} + +void MCStreamer::EmitFnEnd() { + errs() << "Not implemented yet\n"; + abort(); +} + +void MCStreamer::EmitCantUnwind() { + errs() << "Not implemented yet\n"; + abort(); +} + +void MCStreamer::EmitHandlerData() { + errs() << "Not implemented yet\n"; + abort(); +} + +void MCStreamer::EmitPersonality(const MCSymbol *Personality) { + errs() << "Not implemented yet\n"; + abort(); +} + +void MCStreamer::EmitSetFP(unsigned FpReg, unsigned SpReg, int64_t Offset) { + errs() << "Not implemented yet\n"; + abort(); +} + +void MCStreamer::EmitPad(int64_t Offset) { + errs() << "Not implemented yet\n"; + abort(); +} + +void MCStreamer::EmitRegSave(const SmallVectorImpl &RegList, bool) { + errs() << "Not implemented yet\n"; + abort(); } /// EmitRawText - If this file is backed by an assembly streamer, this dumps diff --git a/lib/MC/MCSymbol.cpp b/lib/MC/MCSymbol.cpp index 1c71f267a4b5..c2fad1674aa4 100644 --- a/lib/MC/MCSymbol.cpp +++ b/lib/MC/MCSymbol.cpp @@ -58,9 +58,13 @@ void MCSymbol::setVariableValue(const MCExpr *Value) { "Invalid redefinition!"); this->Value = Value; - // Mark the variable as absolute as appropriate. - if (isa(Value)) - setAbsolute(); + // Variables should always be marked as in the same "section" as the value. + const MCSection *Section = Value->FindAssociatedSection(); + if (Section) { + setSection(*Section); + } else { + setUndefined(); + } } void MCSymbol::print(raw_ostream &OS) const { diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp index 8af07c74fdfe..f049b1c6e2a4 100644 --- a/lib/MC/MachObjectWriter.cpp +++ b/lib/MC/MachObjectWriter.cpp @@ -121,6 +121,33 @@ class MachObjectWriter : public MCObjectWriter { } uint64_t getSymbolAddress(const MCSymbolData* SD, const MCAsmLayout &Layout) const { + const MCSymbol &S = SD->getSymbol(); + + // If this is a variable, then recursively evaluate now. + if (S.isVariable()) { + MCValue Target; + if (!S.getVariableValue()->EvaluateAsRelocatable(Target, Layout)) + report_fatal_error("unable to evaluate offset for variable '" + + S.getName() + "'"); + + // Verify that any used symbols are defined. + if (Target.getSymA() && Target.getSymA()->getSymbol().isUndefined()) + report_fatal_error("unable to evaluate offset to undefined symbol '" + + Target.getSymA()->getSymbol().getName() + "'"); + if (Target.getSymB() && Target.getSymB()->getSymbol().isUndefined()) + report_fatal_error("unable to evaluate offset to undefined symbol '" + + Target.getSymB()->getSymbol().getName() + "'"); + + uint64_t Address = Target.getConstant(); + if (Target.getSymA()) + Address += getSymbolAddress(&Layout.getAssembler().getSymbolData( + Target.getSymA()->getSymbol()), Layout); + if (Target.getSymB()) + Address += getSymbolAddress(&Layout.getAssembler().getSymbolData( + Target.getSymB()->getSymbol()), Layout); + return Address; + } + return getSectionAddress(SD->getFragment()->getParent()) + Layout.getSymbolOffset(SD); } @@ -274,8 +301,8 @@ class MachObjectWriter : public MCObjectWriter { if (is64Bit()) Write32(0); // reserved3 - assert(OS.tell() - Start == is64Bit() ? macho::Section64Size : - macho::Section32Size); + assert(OS.tell() - Start == (is64Bit() ? macho::Section64Size : + macho::Section32Size)); } void WriteSymtabLoadCommand(uint32_t SymbolOffset, uint32_t NumSymbols, @@ -440,7 +467,7 @@ class MachObjectWriter : public MCObjectWriter { // Compensate for the relocation offset, Darwin x86_64 relocations only // have the addend and appear to have attempted to define it to be the // actual expression addend without the PCrel bias. However, instructions - // with data following the relocation are not accomodated for (see comment + // with data following the relocation are not accommodated for (see comment // below regarding SIGNED{1,2,4}), so it isn't exactly that either. Value += 1LL << Log2Size; } @@ -541,7 +568,7 @@ class MachObjectWriter : public MCObjectWriter { } // x86_64 almost always uses external relocations, except when there is no - // symbol to use as a base address (a local symbol with no preceeding + // symbol to use as a base address (a local symbol with no preceding // non-local symbol). if (Base) { Index = Base->getIndex(); @@ -550,7 +577,7 @@ class MachObjectWriter : public MCObjectWriter { // Add the local offset, if needed. if (Base != &SD) Value += Layout.getSymbolOffset(&SD) - Layout.getSymbolOffset(Base); - } else if (Symbol->isInSection()) { + } else if (Symbol->isInSection() && !Symbol->isVariable()) { // The index is the section ordinal (1-based). Index = SD.getFragment()->getParent()->getOrdinal() + 1; IsExtern = 0; @@ -821,12 +848,12 @@ class MachObjectWriter : public MCObjectWriter { // 1 - :upper16: for movt instructions // high bit of r_length: // 0 - arm instructions - // 1 - thumb instructions + // 1 - thumb instructions // the other half of the relocated expression is in the following pair // relocation entry in the the low 16 bits of r_address field. unsigned ThumbBit = 0; unsigned MovtBit = 0; - switch (Fixup.getKind()) { + switch ((unsigned)Fixup.getKind()) { default: break; case ARM::fixup_arm_movt_hi16: case ARM::fixup_arm_movt_hi16_pcrel: @@ -952,15 +979,10 @@ class MachObjectWriter : public MCObjectWriter { RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit); Log2Size = llvm::Log2_32(2); return true; - + case ARM::fixup_arm_thumb_bl: - RelocType = unsigned(macho::RIT_ARM_ThumbBranch32Bit); - Log2Size = llvm::Log2_32(4); - return true; - case ARM::fixup_arm_thumb_blx: RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit); - // Report as 'long', even though that is not quite accurate. Log2Size = llvm::Log2_32(4); return true; @@ -1033,17 +1055,17 @@ class MachObjectWriter : public MCObjectWriter { // FIXME! report_fatal_error("FIXME: relocations to absolute targets " "not yet implemented"); - } else if (SD->getSymbol().isVariable()) { - int64_t Res; - if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute( - Res, Layout, SectionAddress)) { - FixedValue = Res; - return; + } else { + // Resolve constant variables. + if (SD->getSymbol().isVariable()) { + int64_t Res; + if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute( + Res, Layout, SectionAddress)) { + FixedValue = Res; + return; + } } - report_fatal_error("unsupported relocation of variable '" + - SD->getSymbol().getName() + "'"); - } else { // Check whether we need an external or internal relocation. if (doesSymbolRequireExternRelocation(SD)) { IsExtern = 1; @@ -1055,8 +1077,10 @@ class MachObjectWriter : public MCObjectWriter { FixedValue -= Layout.getSymbolOffset(SD); } else { // The index is the section ordinal (1-based). - Index = SD->getFragment()->getParent()->getOrdinal() + 1; - FixedValue += getSectionAddress(SD->getFragment()->getParent()); + const MCSectionData &SymSD = Asm.getSectionData( + SD->getSymbol().getSection()); + Index = SymSD.getOrdinal() + 1; + FixedValue += getSectionAddress(&SymSD); } if (IsPCRel) FixedValue -= getSectionAddress(Fragment->getParent()); @@ -1132,17 +1156,17 @@ class MachObjectWriter : public MCObjectWriter { // FIXME: Currently, these are never generated (see code below). I cannot // find a case where they are actually emitted. Type = macho::RIT_Vanilla; - } else if (SD->getSymbol().isVariable()) { - int64_t Res; - if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute( - Res, Layout, SectionAddress)) { - FixedValue = Res; - return; + } else { + // Resolve constant variables. + if (SD->getSymbol().isVariable()) { + int64_t Res; + if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute( + Res, Layout, SectionAddress)) { + FixedValue = Res; + return; + } } - report_fatal_error("unsupported relocation of variable '" + - SD->getSymbol().getName() + "'"); - } else { // Check whether we need an external or internal relocation. if (doesSymbolRequireExternRelocation(SD)) { IsExtern = 1; @@ -1154,8 +1178,10 @@ class MachObjectWriter : public MCObjectWriter { FixedValue -= Layout.getSymbolOffset(SD); } else { // The index is the section ordinal (1-based). - Index = SD->getFragment()->getParent()->getOrdinal() + 1; - FixedValue += getSectionAddress(SD->getFragment()->getParent()); + const MCSectionData &SymSD = Asm.getSectionData( + SD->getSymbol().getSection()); + Index = SymSD.getOrdinal() + 1; + FixedValue += getSectionAddress(&SymSD); } if (IsPCRel) FixedValue -= getSectionAddress(Fragment->getParent()); diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp index 6ca5d37fc32e..101237aabb02 100644 --- a/lib/MC/WinCOFFObjectWriter.cpp +++ b/lib/MC/WinCOFFObjectWriter.cpp @@ -647,22 +647,27 @@ void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm, COFFSection *coff_section = SectionMap[&SectionData->getSection()]; COFFSymbol *coff_symbol = SymbolMap[&A_SD.getSymbol()]; + const MCSymbolRefExpr *SymA = Target.getSymA(); + const MCSymbolRefExpr *SymB = Target.getSymB(); + const bool CrossSection = SymB && + &SymA->getSymbol().getSection() != &SymB->getSymbol().getSection(); if (Target.getSymB()) { - if (&Target.getSymA()->getSymbol().getSection() - != &Target.getSymB()->getSymbol().getSection()) { - llvm_unreachable("Symbol relative relocations are only allowed between " - "symbols in the same section"); - } const MCSymbol *B = &Target.getSymB()->getSymbol(); MCSymbolData &B_SD = Asm.getSymbolData(*B); - FixedValue = Layout.getSymbolOffset(&A_SD) - Layout.getSymbolOffset(&B_SD); + // Offset of the symbol in the section + int64_t a = Layout.getSymbolOffset(&B_SD); + // Ofeset of the relocation in the section + int64_t b = Layout.getFragmentOffset(Fragment) + Fixup.getOffset(); + + FixedValue = b - a; // In the case where we have SymbA and SymB, we just need to store the delta // between the two symbols. Update FixedValue to account for the delta, and // skip recording the relocation. - return; + if (!CrossSection) + return; } else { FixedValue = Target.getConstant(); } @@ -673,7 +678,7 @@ void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm, Reloc.Data.VirtualAddress = Layout.getFragmentOffset(Fragment); // Turn relocations for temporary symbols into section relocations. - if (coff_symbol->MCData->getSymbol().isTemporary()) { + if (coff_symbol->MCData->getSymbol().isTemporary() || CrossSection) { Reloc.Symb = coff_symbol->Section->Symbol; FixedValue += Layout.getFragmentOffset(coff_symbol->MCData->Fragment) + coff_symbol->MCData->getOffset(); @@ -684,7 +689,12 @@ void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm, Reloc.Data.VirtualAddress += Fixup.getOffset(); - switch ((unsigned)Fixup.getKind()) { + unsigned FixupKind = Fixup.getKind(); + + if (CrossSection) + FixupKind = FK_PCRel_4; + + switch (FixupKind) { case FK_PCRel_4: case X86::reloc_riprel_4byte: case X86::reloc_riprel_4byte_movq_load: diff --git a/lib/Object/CMakeLists.txt b/lib/Object/CMakeLists.txt index 6a6814fd37d9..642a8ece8b76 100644 --- a/lib/Object/CMakeLists.txt +++ b/lib/Object/CMakeLists.txt @@ -1,5 +1,7 @@ add_llvm_library(LLVMObject MachOObject.cpp + MachOObjectFile.cpp + Object.cpp ObjectFile.cpp COFFObjectFile.cpp ELFObjectFile.cpp diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp index cfee82a0b217..86bf44baaeb6 100644 --- a/lib/Object/COFFObjectFile.cpp +++ b/lib/Object/COFFObjectFile.cpp @@ -91,6 +91,7 @@ extern char coff_coff_section_layout_static_assert namespace { class COFFObjectFile : public ObjectFile { private: + uint64_t HeaderOff; const coff_file_header *Header; const coff_section *SectionTable; const coff_symbol *SymbolTable; @@ -185,11 +186,8 @@ char COFFObjectFile::getSymbolNMTypeChar(DataRefImpl Symb) const { return ret; uint32_t Characteristics = 0; - uint32_t PointerToRawData = 0; - const coff_section *Section = getSection(symb->SectionNumber); - if (Section) { + if (const coff_section *Section = getSection(symb->SectionNumber)) { Characteristics = Section->Characteristics; - PointerToRawData = Section->PointerToRawData; } switch (symb->SectionNumber) { @@ -256,7 +254,7 @@ StringRef COFFObjectFile::getSectionName(DataRefImpl Sec) const { // Check for string table entry. First byte is '/'. if (name[0] == '/') { uint32_t Offset; - name.getAsInteger(10, Offset); + name.substr(1).getAsInteger(10, Offset); return StringRef(getString(Offset)); } @@ -287,9 +285,20 @@ bool COFFObjectFile::isSectionText(DataRefImpl Sec) const { COFFObjectFile::COFFObjectFile(MemoryBuffer *Object) : ObjectFile(Object) { - Header = reinterpret_cast(base); + + HeaderOff = 0; + + if (base[0] == 0x4d && base[1] == 0x5a) { + // PE/COFF, seek through MS-DOS compatibility stub and 4-byte + // PE signature to find 'normal' COFF header. + HeaderOff += *reinterpret_cast(base + 0x3c); + HeaderOff += 4; + } + + Header = reinterpret_cast(base + HeaderOff); SectionTable = reinterpret_cast( base + + HeaderOff + sizeof(coff_file_header) + Header->SizeOfOptionalHeader); SymbolTable = @@ -303,6 +312,7 @@ COFFObjectFile::COFFObjectFile(MemoryBuffer *Object) ObjectFile::symbol_iterator COFFObjectFile::begin_symbols() const { DataRefImpl ret; + memset(&ret, 0, sizeof(DataRefImpl)); ret.p = reinterpret_cast(SymbolTable); return symbol_iterator(SymbolRef(ret, this)); } @@ -310,18 +320,21 @@ ObjectFile::symbol_iterator COFFObjectFile::begin_symbols() const { ObjectFile::symbol_iterator COFFObjectFile::end_symbols() const { // The symbol table ends where the string table begins. DataRefImpl ret; + memset(&ret, 0, sizeof(DataRefImpl)); ret.p = reinterpret_cast(StringTable); return symbol_iterator(SymbolRef(ret, this)); } ObjectFile::section_iterator COFFObjectFile::begin_sections() const { DataRefImpl ret; + memset(&ret, 0, sizeof(DataRefImpl)); ret.p = reinterpret_cast(SectionTable); return section_iterator(SectionRef(ret, this)); } ObjectFile::section_iterator COFFObjectFile::end_sections() const { DataRefImpl ret; + memset(&ret, 0, sizeof(DataRefImpl)); ret.p = reinterpret_cast(SectionTable + Header->NumberOfSections); return section_iterator(SectionRef(ret, this)); } diff --git a/lib/Object/ELFObjectFile.cpp b/lib/Object/ELFObjectFile.cpp index 682be770f48f..d2a2726ce739 100644 --- a/lib/Object/ELFObjectFile.cpp +++ b/lib/Object/ELFObjectFile.cpp @@ -547,6 +547,7 @@ template ObjectFile::section_iterator ELFObjectFile ::begin_sections() const { DataRefImpl ret; + memset(&ret, 0, sizeof(DataRefImpl)); ret.p = reinterpret_cast(base + Header->e_shoff); return section_iterator(SectionRef(ret, this)); } @@ -555,6 +556,7 @@ template ObjectFile::section_iterator ELFObjectFile ::end_sections() const { DataRefImpl ret; + memset(&ret, 0, sizeof(DataRefImpl)); ret.p = reinterpret_cast(base + Header->e_shoff + (Header->e_shentsize * Header->e_shnum)); diff --git a/lib/Object/MachOObject.cpp b/lib/Object/MachOObject.cpp index 5e64d6323288..9890febfb616 100644 --- a/lib/Object/MachOObject.cpp +++ b/lib/Object/MachOObject.cpp @@ -12,6 +12,8 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Host.h" #include "llvm/Support/SwapByteOrder.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Debug.h" using namespace llvm; using namespace llvm::object; @@ -340,3 +342,29 @@ void MachOObject::ReadSymbol64TableEntry(uint64_t SymbolTableOffset, Index * sizeof(macho::Symbol64TableEntry)); ReadInMemoryStruct(*this, Buffer->getBuffer(), Offset, Res); } + +/* ** */ +// Object Dumping Facilities +void MachOObject::dump() const { print(dbgs()); dbgs() << '\n'; } +void MachOObject::dumpHeader() const { printHeader(dbgs()); dbgs() << '\n'; } + +void MachOObject::printHeader(raw_ostream &O) const { + O << "('cputype', " << Header.CPUType << ")\n"; + O << "('cpusubtype', " << Header.CPUSubtype << ")\n"; + O << "('filetype', " << Header.FileType << ")\n"; + O << "('num_load_commands', " << Header.NumLoadCommands << ")\n"; + O << "('load_commands_size', " << Header.SizeOfLoadCommands << ")\n"; + O << "('flag', " << Header.Flags << ")\n"; + + // Print extended header if 64-bit. + if (is64Bit()) + O << "('reserved', " << Header64Ext.Reserved << ")\n"; +} + +void MachOObject::print(raw_ostream &O) const { + O << "Header:\n"; + printHeader(O); + O << "Load Commands:\n"; + + O << "Buffer:\n"; +} diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp new file mode 100644 index 000000000000..877cbfbdb808 --- /dev/null +++ b/lib/Object/MachOObjectFile.cpp @@ -0,0 +1,327 @@ +//===- MachOObjectFile.cpp - Mach-O object file binding ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the MachOObjectFile class, which binds the MachOObject +// class to the generic ObjectFile wrapper. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/Triple.h" +#include "llvm/Object/MachOFormat.h" +#include "llvm/Object/MachOObject.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/MachO.h" + +#include +#include +#include + +using namespace llvm; +using namespace object; + +namespace llvm { + +typedef MachOObject::LoadCommandInfo LoadCommandInfo; + +class MachOObjectFile : public ObjectFile { +public: + MachOObjectFile(MemoryBuffer *Object, MachOObject *MOO) + : ObjectFile(Object), + MachOObj(MOO), + RegisteredStringTable(std::numeric_limits::max()) {} + + virtual symbol_iterator begin_symbols() const; + virtual symbol_iterator end_symbols() const; + virtual section_iterator begin_sections() const; + virtual section_iterator end_sections() const; + + virtual uint8_t getBytesInAddress() const; + virtual StringRef getFileFormatName() const; + virtual unsigned getArch() const; + +protected: + virtual SymbolRef getSymbolNext(DataRefImpl Symb) const; + virtual StringRef getSymbolName(DataRefImpl Symb) const; + virtual uint64_t getSymbolAddress(DataRefImpl Symb) const; + virtual uint64_t getSymbolSize(DataRefImpl Symb) const; + virtual char getSymbolNMTypeChar(DataRefImpl Symb) const; + virtual bool isSymbolInternal(DataRefImpl Symb) const; + + virtual SectionRef getSectionNext(DataRefImpl Sec) const; + virtual StringRef getSectionName(DataRefImpl Sec) const; + virtual uint64_t getSectionAddress(DataRefImpl Sec) const; + virtual uint64_t getSectionSize(DataRefImpl Sec) const; + virtual StringRef getSectionContents(DataRefImpl Sec) const; + virtual bool isSectionText(DataRefImpl Sec) const; + +private: + MachOObject *MachOObj; + mutable uint32_t RegisteredStringTable; + + void moveToNextSection(DataRefImpl &DRI) const; + void getSymbolTableEntry(DataRefImpl DRI, + InMemoryStruct &Res) const; + void moveToNextSymbol(DataRefImpl &DRI) const; + void getSection(DataRefImpl DRI, InMemoryStruct &Res) const; +}; + +ObjectFile *ObjectFile::createMachOObjectFile(MemoryBuffer *Buffer) { + std::string Err; + MachOObject *MachOObj = MachOObject::LoadFromBuffer(Buffer, &Err); + if (!MachOObj) + return NULL; + return new MachOObjectFile(Buffer, MachOObj); +} + +/*===-- Symbols -----------------------------------------------------------===*/ + +void MachOObjectFile::moveToNextSymbol(DataRefImpl &DRI) const { + uint32_t LoadCommandCount = MachOObj->getHeader().NumLoadCommands; + while (DRI.d.a < LoadCommandCount) { + LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a); + if (LCI.Command.Type == macho::LCT_Symtab) { + InMemoryStruct SymtabLoadCmd; + MachOObj->ReadSymtabLoadCommand(LCI, SymtabLoadCmd); + if (DRI.d.b < SymtabLoadCmd->NumSymbolTableEntries) + return; + } + + DRI.d.a++; + DRI.d.b = 0; + } +} + +void MachOObjectFile::getSymbolTableEntry(DataRefImpl DRI, + InMemoryStruct &Res) const { + InMemoryStruct SymtabLoadCmd; + LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a); + MachOObj->ReadSymtabLoadCommand(LCI, SymtabLoadCmd); + + if (RegisteredStringTable != DRI.d.a) { + MachOObj->RegisterStringTable(*SymtabLoadCmd); + RegisteredStringTable = DRI.d.a; + } + + MachOObj->ReadSymbolTableEntry(SymtabLoadCmd->SymbolTableOffset, DRI.d.b, + Res); +} + + +SymbolRef MachOObjectFile::getSymbolNext(DataRefImpl DRI) const { + DRI.d.b++; + moveToNextSymbol(DRI); + return SymbolRef(DRI, this); +} + +StringRef MachOObjectFile::getSymbolName(DataRefImpl DRI) const { + InMemoryStruct Entry; + getSymbolTableEntry(DRI, Entry); + return MachOObj->getStringAtIndex(Entry->StringIndex); +} + +uint64_t MachOObjectFile::getSymbolAddress(DataRefImpl DRI) const { + InMemoryStruct Entry; + getSymbolTableEntry(DRI, Entry); + return Entry->Value; +} + +uint64_t MachOObjectFile::getSymbolSize(DataRefImpl DRI) const { + return UnknownAddressOrSize; +} + +char MachOObjectFile::getSymbolNMTypeChar(DataRefImpl DRI) const { + InMemoryStruct Entry; + getSymbolTableEntry(DRI, Entry); + + char Char; + switch (Entry->Type & macho::STF_TypeMask) { + case macho::STT_Undefined: + Char = 'u'; + break; + case macho::STT_Absolute: + case macho::STT_Section: + Char = 's'; + break; + default: + Char = '?'; + break; + } + + if (Entry->Flags & (macho::STF_External | macho::STF_PrivateExtern)) + Char = toupper(Char); + return Char; +} + +bool MachOObjectFile::isSymbolInternal(DataRefImpl DRI) const { + InMemoryStruct Entry; + getSymbolTableEntry(DRI, Entry); + return Entry->Flags & macho::STF_StabsEntryMask; +} + +ObjectFile::symbol_iterator MachOObjectFile::begin_symbols() const { + // DRI.d.a = segment number; DRI.d.b = symbol index. + DataRefImpl DRI; + DRI.d.a = DRI.d.b = 0; + moveToNextSymbol(DRI); + return symbol_iterator(SymbolRef(DRI, this)); +} + +ObjectFile::symbol_iterator MachOObjectFile::end_symbols() const { + DataRefImpl DRI; + DRI.d.a = MachOObj->getHeader().NumLoadCommands; + DRI.d.b = 0; + return symbol_iterator(SymbolRef(DRI, this)); +} + + +/*===-- Sections ----------------------------------------------------------===*/ + +void MachOObjectFile::moveToNextSection(DataRefImpl &DRI) const { + uint32_t LoadCommandCount = MachOObj->getHeader().NumLoadCommands; + while (DRI.d.a < LoadCommandCount) { + LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a); + if (LCI.Command.Type == macho::LCT_Segment) { + InMemoryStruct SegmentLoadCmd; + MachOObj->ReadSegmentLoadCommand(LCI, SegmentLoadCmd); + if (DRI.d.b < SegmentLoadCmd->NumSections) + return; + } else if (LCI.Command.Type == macho::LCT_Segment64) { + InMemoryStruct Segment64LoadCmd; + MachOObj->ReadSegment64LoadCommand(LCI, Segment64LoadCmd); + if (DRI.d.b < Segment64LoadCmd->NumSections) + return; + } + + DRI.d.a++; + DRI.d.b = 0; + } +} + +SectionRef MachOObjectFile::getSectionNext(DataRefImpl DRI) const { + DRI.d.b++; + moveToNextSection(DRI); + return SectionRef(DRI, this); +} + +void +MachOObjectFile::getSection(DataRefImpl DRI, + InMemoryStruct &Res) const { + InMemoryStruct SLC; + LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a); + MachOObj->ReadSegmentLoadCommand(LCI, SLC); + MachOObj->ReadSection(LCI, DRI.d.b, Res); +} + +StringRef MachOObjectFile::getSectionName(DataRefImpl DRI) const { + InMemoryStruct SLC; + LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a); + MachOObj->ReadSegmentLoadCommand(LCI, SLC); + InMemoryStruct Sect; + MachOObj->ReadSection(LCI, DRI.d.b, Sect); + + static char Result[34]; + strcpy(Result, SLC->Name); + strcat(Result, ","); + strcat(Result, Sect->Name); + return StringRef(Result); +} + +uint64_t MachOObjectFile::getSectionAddress(DataRefImpl DRI) const { + InMemoryStruct Sect; + getSection(DRI, Sect); + return Sect->Address; +} + +uint64_t MachOObjectFile::getSectionSize(DataRefImpl DRI) const { + InMemoryStruct Sect; + getSection(DRI, Sect); + return Sect->Size; +} + +StringRef MachOObjectFile::getSectionContents(DataRefImpl DRI) const { + InMemoryStruct Sect; + getSection(DRI, Sect); + return MachOObj->getData(Sect->Offset, Sect->Size); +} + +bool MachOObjectFile::isSectionText(DataRefImpl DRI) const { + InMemoryStruct SLC; + LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a); + MachOObj->ReadSegmentLoadCommand(LCI, SLC); + return !strcmp(SLC->Name, "__TEXT"); +} + +ObjectFile::section_iterator MachOObjectFile::begin_sections() const { + DataRefImpl DRI; + DRI.d.a = DRI.d.b = 0; + moveToNextSection(DRI); + return section_iterator(SectionRef(DRI, this)); +} + +ObjectFile::section_iterator MachOObjectFile::end_sections() const { + DataRefImpl DRI; + DRI.d.a = MachOObj->getHeader().NumLoadCommands; + DRI.d.b = 0; + return section_iterator(SectionRef(DRI, this)); +} + +/*===-- Miscellaneous -----------------------------------------------------===*/ + +uint8_t MachOObjectFile::getBytesInAddress() const { + return MachOObj->is64Bit() ? 8 : 4; +} + +StringRef MachOObjectFile::getFileFormatName() const { + if (!MachOObj->is64Bit()) { + switch (MachOObj->getHeader().CPUType) { + case llvm::MachO::CPUTypeI386: + return "Mach-O 32-bit i386"; + case llvm::MachO::CPUTypeARM: + return "Mach-O arm"; + case llvm::MachO::CPUTypePowerPC: + return "Mach-O 32-bit ppc"; + default: + assert((MachOObj->getHeader().CPUType & llvm::MachO::CPUArchABI64) == 0 && + "64-bit object file when we're not 64-bit?"); + return "Mach-O 32-bit unknown"; + } + } + + switch (MachOObj->getHeader().CPUType) { + case llvm::MachO::CPUTypeX86_64: + return "Mach-O 64-bit x86-64"; + case llvm::MachO::CPUTypePowerPC64: + return "Mach-O 64-bit ppc64"; + default: + assert((MachOObj->getHeader().CPUType & llvm::MachO::CPUArchABI64) == 1 && + "32-bit object file when we're 64-bit?"); + return "Mach-O 64-bit unknown"; + } +} + +unsigned MachOObjectFile::getArch() const { + switch (MachOObj->getHeader().CPUType) { + case llvm::MachO::CPUTypeI386: + return Triple::x86; + case llvm::MachO::CPUTypeX86_64: + return Triple::x86_64; + case llvm::MachO::CPUTypeARM: + return Triple::arm; + case llvm::MachO::CPUTypePowerPC: + return Triple::ppc; + case llvm::MachO::CPUTypePowerPC64: + return Triple::ppc64; + default: + return Triple::UnknownArch; + } +} + +} // end namespace llvm + diff --git a/lib/Object/Object.cpp b/lib/Object/Object.cpp new file mode 100644 index 000000000000..603b23c74e93 --- /dev/null +++ b/lib/Object/Object.cpp @@ -0,0 +1,59 @@ +//===- Object.cpp - C bindings to the object file library--------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the C bindings to the file-format-independent object +// library. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/ObjectFile.h" +#include "llvm-c/Object.h" + +using namespace llvm; +using namespace object; + +LLVMObjectFileRef LLVMCreateObjectFile(LLVMMemoryBufferRef MemBuf) { + return wrap(ObjectFile::createObjectFile(unwrap(MemBuf))); +} + +void LLVMDisposeObjectFile(LLVMObjectFileRef ObjectFile) { + delete unwrap(ObjectFile); +} + +LLVMSectionIteratorRef LLVMGetSections(LLVMObjectFileRef ObjectFile) { + ObjectFile::section_iterator SI = unwrap(ObjectFile)->begin_sections(); + return wrap(new ObjectFile::section_iterator(SI)); +} + +void LLVMDisposeSectionIterator(LLVMSectionIteratorRef SI) { + delete unwrap(SI); +} + +LLVMBool LLVMIsSectionIteratorAtEnd(LLVMObjectFileRef ObjectFile, + LLVMSectionIteratorRef SI) { + return (*unwrap(SI) == unwrap(ObjectFile)->end_sections()) ? 1 : 0; +} + +void LLVMMoveToNextSection(LLVMSectionIteratorRef SI) { + // We can't use unwrap() here because the argument to ++ must be an lvalue. + ++*reinterpret_cast(SI); +} + +const char *LLVMGetSectionName(LLVMSectionIteratorRef SI) { + return (*unwrap(SI))->getName().data(); +} + +uint64_t LLVMGetSectionSize(LLVMSectionIteratorRef SI) { + return (*unwrap(SI))->getSize(); +} + +const char *LLVMGetSectionContents(LLVMSectionIteratorRef SI) { + return (*unwrap(SI))->getContents().data(); +} + diff --git a/lib/Object/ObjectFile.cpp b/lib/Object/ObjectFile.cpp index 161ae3a083f1..47b63115a94c 100644 --- a/lib/Object/ObjectFile.cpp +++ b/lib/Object/ObjectFile.cpp @@ -55,7 +55,7 @@ ObjectFile *ObjectFile::createObjectFile(MemoryBuffer *Object) { case sys::Mach_O_DynamicLinker_FileType: case sys::Mach_O_Bundle_FileType: case sys::Mach_O_DynamicallyLinkedSharedLibStub_FileType: - return 0; + return createMachOObjectFile(Object); case sys::COFF_FileType: return createCOFFObjectFile(Object); default: diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp index e765ba0a27bb..c3169acabbc7 100644 --- a/lib/Support/APFloat.cpp +++ b/lib/Support/APFloat.cpp @@ -726,7 +726,7 @@ APFloat::bitwiseIsEqual(const APFloat &rhs) const { } APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) -{ + : exponent2(0), sign2(0) { assertArithmeticOK(ourSemantics); initialize(&ourSemantics); sign = 0; @@ -736,14 +736,15 @@ APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) normalize(rmNearestTiesToEven, lfExactlyZero); } -APFloat::APFloat(const fltSemantics &ourSemantics) { +APFloat::APFloat(const fltSemantics &ourSemantics) : exponent2(0), sign2(0) { assertArithmeticOK(ourSemantics); initialize(&ourSemantics); category = fcZero; sign = false; } -APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag) { +APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag) + : exponent2(0), sign2(0) { assertArithmeticOK(ourSemantics); // Allocates storage if necessary but does not initialize it. initialize(&ourSemantics); @@ -751,7 +752,7 @@ APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag) { APFloat::APFloat(const fltSemantics &ourSemantics, fltCategory ourCategory, bool negative) -{ + : exponent2(0), sign2(0) { assertArithmeticOK(ourSemantics); initialize(&ourSemantics); category = ourCategory; @@ -763,14 +764,13 @@ APFloat::APFloat(const fltSemantics &ourSemantics, } APFloat::APFloat(const fltSemantics &ourSemantics, StringRef text) -{ + : exponent2(0), sign2(0) { assertArithmeticOK(ourSemantics); initialize(&ourSemantics); convertFromString(text, rmNearestTiesToEven); } -APFloat::APFloat(const APFloat &rhs) -{ +APFloat::APFloat(const APFloat &rhs) : exponent2(0), sign2(0) { initialize(rhs.semantics); assign(rhs); } @@ -3257,18 +3257,15 @@ APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) { return Val; } -APFloat::APFloat(const APInt& api, bool isIEEE) -{ +APFloat::APFloat(const APInt& api, bool isIEEE) : exponent2(0), sign2(0) { initFromAPInt(api, isIEEE); } -APFloat::APFloat(float f) -{ +APFloat::APFloat(float f) : exponent2(0), sign2(0) { initFromAPInt(APInt::floatToBits(f)); } -APFloat::APFloat(double d) -{ +APFloat::APFloat(double d) : exponent2(0), sign2(0) { initFromAPInt(APInt::doubleToBits(d)); } @@ -3565,3 +3562,37 @@ void APFloat::toString(SmallVectorImpl &Str, for (; I != NDigits; ++I) Str.push_back(buffer[NDigits-I-1]); } + +bool APFloat::getExactInverse(APFloat *inv) const { + // We can only guarantee the existence of an exact inverse for IEEE floats. + if (semantics != &IEEEhalf && semantics != &IEEEsingle && + semantics != &IEEEdouble && semantics != &IEEEquad) + return false; + + // Special floats and denormals have no exact inverse. + if (category != fcNormal) + return false; + + // Check that the number is a power of two by making sure that only the + // integer bit is set in the significand. + if (significandLSB() != semantics->precision - 1) + return false; + + // Get the inverse. + APFloat reciprocal(*semantics, 1ULL); + if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK) + return false; + + // Avoid multiplication with a denormal, it is not safe on all platforms and + // may be slower than a normal division. + if (reciprocal.significandMSB() + 1 < reciprocal.semantics->precision) + return false; + + assert(reciprocal.category == fcNormal && + reciprocal.significandLSB() == reciprocal.semantics->precision - 1); + + if (inv) + *inv = reciprocal; + + return true; +} diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp index 08f36d2af3a1..23a22ac68f3f 100644 --- a/lib/Support/APInt.cpp +++ b/lib/Support/APInt.cpp @@ -1517,13 +1517,15 @@ APInt::ms APInt::magic() const { /// division by a constant as a sequence of multiplies, adds and shifts. /// Requires that the divisor not be 0. Taken from "Hacker's Delight", Henry /// S. Warren, Jr., chapter 10. -APInt::mu APInt::magicu() const { +/// LeadingZeros can be used to simplify the calculation if the upper bits +/// of the divided value are known zero. +APInt::mu APInt::magicu(unsigned LeadingZeros) const { const APInt& d = *this; unsigned p; APInt nc, delta, q1, r1, q2, r2; struct mu magu; magu.a = 0; // initialize "add" indicator - APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()); + APInt allOnes = APInt::getAllOnesValue(d.getBitWidth()).lshr(LeadingZeros); APInt signedMin = APInt::getSignedMinValue(d.getBitWidth()); APInt signedMax = APInt::getSignedMaxValue(d.getBitWidth()); @@ -2076,6 +2078,16 @@ APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const { return Res; } +APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const { + APInt Res = *this * RHS; + + if (*this != 0 && RHS != 0) + Overflow = Res.udiv(RHS) != *this || Res.udiv(*this) != RHS; + else + Overflow = false; + return Res; +} + APInt APInt::sshl_ov(unsigned ShAmt, bool &Overflow) const { Overflow = ShAmt >= getBitWidth(); if (Overflow) diff --git a/lib/Support/Allocator.cpp b/lib/Support/Allocator.cpp index 5e27df6628eb..215b0f249d96 100644 --- a/lib/Support/Allocator.cpp +++ b/lib/Support/Allocator.cpp @@ -136,6 +136,14 @@ unsigned BumpPtrAllocator::GetNumSlabs() const { return NumSlabs; } +size_t BumpPtrAllocator::getTotalMemory() const { + size_t TotalMemory = 0; + for (MemSlab *Slab = CurSlab; Slab != 0; Slab = Slab->NextPtr) { + TotalMemory += Slab->Size; + } + return TotalMemory; +} + void BumpPtrAllocator::PrintStats() const { unsigned NumSlabs = 0; size_t TotalMemory = 0; diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp index 7e744993a7cb..7f1c0d320b11 100644 --- a/lib/Support/CommandLine.cpp +++ b/lib/Support/CommandLine.cpp @@ -186,12 +186,14 @@ static Option *LookupOption(StringRef &Arg, StringRef &Value, /// have already been stripped. static Option *LookupNearestOption(StringRef Arg, const StringMap &OptionsMap, - const char *&NearestString) { + std::string &NearestString) { // Reject all dashes. if (Arg.empty()) return 0; // Split on any equal sign. - StringRef LHS = Arg.split('=').first; + std::pair SplitArg = Arg.split('='); + StringRef &LHS = SplitArg.first; // LHS == Arg when no '=' is present. + StringRef &RHS = SplitArg.second; // Find the closest match. Option *Best = 0; @@ -204,14 +206,19 @@ static Option *LookupNearestOption(StringRef Arg, if (O->ArgStr[0]) OptionNames.push_back(O->ArgStr); + bool PermitValue = O->getValueExpectedFlag() != cl::ValueDisallowed; + StringRef Flag = PermitValue ? LHS : Arg; for (size_t i = 0, e = OptionNames.size(); i != e; ++i) { StringRef Name = OptionNames[i]; unsigned Distance = StringRef(Name).edit_distance( - Arg, /*AllowReplacements=*/true, /*MaxEditDistance=*/BestDistance); + Flag, /*AllowReplacements=*/true, /*MaxEditDistance=*/BestDistance); if (!Best || Distance < BestDistance) { Best = O; - NearestString = OptionNames[i]; BestDistance = Distance; + if (RHS.empty() || !PermitValue) + NearestString = OptionNames[i]; + else + NearestString = std::string(OptionNames[i]) + "=" + RHS.str(); } } } @@ -611,7 +618,7 @@ void cl::ParseCommandLineOptions(int argc, char **argv, for (int i = 1; i < argc; ++i) { Option *Handler = 0; Option *NearestHandler = 0; - const char *NearestHandlerString = 0; + std::string NearestHandlerString; StringRef Value; StringRef ArgName = ""; @@ -908,8 +915,6 @@ void alias::printOptionInfo(size_t GlobalWidth) const { errs().indent(GlobalWidth-L-6) << " - " << HelpStr << "\n"; } - - //===----------------------------------------------------------------------===// // Parser Implementation code... // @@ -939,7 +944,11 @@ void basic_parser_impl::printOptionInfo(const Option &O, outs().indent(GlobalWidth-getOptionWidth(O)) << " - " << O.HelpStr << '\n'; } - +void basic_parser_impl::printOptionName(const Option &O, + size_t GlobalWidth) const { + outs() << " -" << O.ArgStr; + outs().indent(GlobalWidth-std::strlen(O.ArgStr)); +} // parser implementation @@ -1083,6 +1092,89 @@ void generic_parser_base::printOptionInfo(const Option &O, } } +static const size_t MaxOptWidth = 8; // arbitrary spacing for printOptionDiff + +// printGenericOptionDiff - Print the value of this option and it's default. +// +// "Generic" options have each value mapped to a name. +void generic_parser_base:: +printGenericOptionDiff(const Option &O, const GenericOptionValue &Value, + const GenericOptionValue &Default, + size_t GlobalWidth) const { + outs() << " -" << O.ArgStr; + outs().indent(GlobalWidth-std::strlen(O.ArgStr)); + + unsigned NumOpts = getNumOptions(); + for (unsigned i = 0; i != NumOpts; ++i) { + if (Value.compare(getOptionValue(i))) + continue; + + outs() << "= " << getOption(i); + size_t L = std::strlen(getOption(i)); + size_t NumSpaces = MaxOptWidth > L ? MaxOptWidth - L : 0; + outs().indent(NumSpaces) << " (default: "; + for (unsigned j = 0; j != NumOpts; ++j) { + if (Default.compare(getOptionValue(j))) + continue; + outs() << getOption(j); + break; + } + outs() << ")\n"; + return; + } + outs() << "= *unknown option value*\n"; +} + +// printOptionDiff - Specializations for printing basic value types. +// +#define PRINT_OPT_DIFF(T) \ + void parser:: \ + printOptionDiff(const Option &O, T V, OptionValue D, \ + size_t GlobalWidth) const { \ + printOptionName(O, GlobalWidth); \ + std::string Str; \ + { \ + raw_string_ostream SS(Str); \ + SS << V; \ + } \ + outs() << "= " << Str; \ + size_t NumSpaces = MaxOptWidth > Str.size() ? MaxOptWidth - Str.size() : 0;\ + outs().indent(NumSpaces) << " (default: "; \ + if (D.hasValue()) \ + outs() << D.getValue(); \ + else \ + outs() << "*no default*"; \ + outs() << ")\n"; \ + } \ + +PRINT_OPT_DIFF(bool) +PRINT_OPT_DIFF(boolOrDefault) +PRINT_OPT_DIFF(int) +PRINT_OPT_DIFF(unsigned) +PRINT_OPT_DIFF(double) +PRINT_OPT_DIFF(float) +PRINT_OPT_DIFF(char) + +void parser:: +printOptionDiff(const Option &O, StringRef V, OptionValue D, + size_t GlobalWidth) const { + printOptionName(O, GlobalWidth); + outs() << "= " << V; + size_t NumSpaces = MaxOptWidth > V.size() ? MaxOptWidth - V.size() : 0; + outs().indent(NumSpaces) << " (default: "; + if (D.hasValue()) + outs() << D.getValue(); + else + outs() << "*no default*"; + outs() << ")\n"; +} + +// Print a placeholder for options that don't yet support printOptionDiff(). +void basic_parser_impl:: +printOptionNoValue(const Option &O, size_t GlobalWidth) const { + printOptionName(O, GlobalWidth); + outs() << "= *cannot print option value*\n"; +} //===----------------------------------------------------------------------===// // -help and -help-hidden option implementation @@ -1094,6 +1186,35 @@ static int OptNameCompare(const void *LHS, const void *RHS) { return strcmp(((pair_ty*)LHS)->first, ((pair_ty*)RHS)->first); } +// Copy Options into a vector so we can sort them as we like. +static void +sortOpts(StringMap &OptMap, + SmallVectorImpl< std::pair > &Opts, + bool ShowHidden) { + SmallPtrSet OptionSet; // Duplicate option detection. + + for (StringMap::iterator I = OptMap.begin(), E = OptMap.end(); + I != E; ++I) { + // Ignore really-hidden options. + if (I->second->getOptionHiddenFlag() == ReallyHidden) + continue; + + // Unless showhidden is set, ignore hidden flags. + if (I->second->getOptionHiddenFlag() == Hidden && !ShowHidden) + continue; + + // If we've already seen this option, don't add it to the list again. + if (!OptionSet.insert(I->second)) + continue; + + Opts.push_back(std::pair(I->getKey().data(), + I->second)); + } + + // Sort the options list alphabetically. + qsort(Opts.data(), Opts.size(), sizeof(Opts[0]), OptNameCompare); +} + namespace { class HelpPrinter { @@ -1115,30 +1236,8 @@ class HelpPrinter { StringMap OptMap; GetOptionInfo(PositionalOpts, SinkOpts, OptMap); - // Copy Options into a vector so we can sort them as we like. SmallVector, 128> Opts; - SmallPtrSet OptionSet; // Duplicate option detection. - - for (StringMap::iterator I = OptMap.begin(), E = OptMap.end(); - I != E; ++I) { - // Ignore really-hidden options. - if (I->second->getOptionHiddenFlag() == ReallyHidden) - continue; - - // Unless showhidden is set, ignore hidden flags. - if (I->second->getOptionHiddenFlag() == Hidden && !ShowHidden) - continue; - - // If we've already seen this option, don't add it to the list again. - if (!OptionSet.insert(I->second)) - continue; - - Opts.push_back(std::pair(I->getKey().data(), - I->second)); - } - - // Sort the options list alphabetically. - qsort(Opts.data(), Opts.size(), sizeof(Opts[0]), OptNameCompare); + sortOpts(OptMap, Opts, ShowHidden); if (ProgramOverview) outs() << "OVERVIEW: " << ProgramOverview << "\n"; @@ -1197,6 +1296,38 @@ static cl::opt > HHOp("help-hidden", cl::desc("Display all available options"), cl::location(HiddenPrinter), cl::Hidden, cl::ValueDisallowed); +static cl::opt +PrintOptions("print-options", + cl::desc("Print non-default options after command line parsing"), + cl::Hidden, cl::init(false)); + +static cl::opt +PrintAllOptions("print-all-options", + cl::desc("Print all option values after command line parsing"), + cl::Hidden, cl::init(false)); + +// Print the value of each option. +void cl::PrintOptionValues() { + if (!PrintOptions && !PrintAllOptions) return; + + // Get all the options. + SmallVector PositionalOpts; + SmallVector SinkOpts; + StringMap OptMap; + GetOptionInfo(PositionalOpts, SinkOpts, OptMap); + + SmallVector, 128> Opts; + sortOpts(OptMap, Opts, /*ShowHidden*/true); + + // Compute the maximum argument length... + size_t MaxArgLen = 0; + for (size_t i = 0, e = Opts.size(); i != e; ++i) + MaxArgLen = std::max(MaxArgLen, Opts[i].second->getOptionWidth()); + + for (size_t i = 0, e = Opts.size(); i != e; ++i) + Opts[i].second->printOptionValue(MaxArgLen, PrintAllOptions); +} + static void (*OverrideVersionPrinter)() = 0; static int TargetArraySortFn(const void *LHS, const void *RHS) { diff --git a/lib/Support/CrashRecoveryContext.cpp b/lib/Support/CrashRecoveryContext.cpp index bf8ca3f844b4..899c3890d78a 100644 --- a/lib/Support/CrashRecoveryContext.cpp +++ b/lib/Support/CrashRecoveryContext.cpp @@ -57,12 +57,36 @@ struct CrashRecoveryContextImpl { static sys::Mutex gCrashRecoveryContexMutex; static bool gCrashRecoveryEnabled = false; +static sys::ThreadLocal + tlIsRecoveringFromCrash; + +CrashRecoveryContextCleanup::~CrashRecoveryContextCleanup() {} + CrashRecoveryContext::~CrashRecoveryContext() { + // Reclaim registered resources. + CrashRecoveryContextCleanup *i = head; + tlIsRecoveringFromCrash.set(head); + while (i) { + CrashRecoveryContextCleanup *tmp = i; + i = tmp->next; + tmp->cleanupFired = true; + tmp->recoverResources(); + delete tmp; + } + tlIsRecoveringFromCrash.erase(); + CrashRecoveryContextImpl *CRCI = (CrashRecoveryContextImpl *) Impl; delete CRCI; } +bool CrashRecoveryContext::isRecoveringFromCrash() { + return tlIsRecoveringFromCrash.get() != 0; +} + CrashRecoveryContext *CrashRecoveryContext::GetCurrent() { + if (!gCrashRecoveryEnabled) + return 0; + const CrashRecoveryContextImpl *CRCI = CurrentContext.get(); if (!CRCI) return 0; @@ -70,6 +94,33 @@ CrashRecoveryContext *CrashRecoveryContext::GetCurrent() { return CRCI->CRC; } +void CrashRecoveryContext::registerCleanup(CrashRecoveryContextCleanup *cleanup) +{ + if (!cleanup) + return; + if (head) + head->prev = cleanup; + cleanup->next = head; + head = cleanup; +} + +void +CrashRecoveryContext::unregisterCleanup(CrashRecoveryContextCleanup *cleanup) { + if (!cleanup) + return; + if (cleanup == head) { + head = cleanup->next; + if (head) + head->prev = 0; + } + else { + cleanup->prev->next = cleanup->next; + if (cleanup->next) + cleanup->next->prev = cleanup->prev; + } + delete cleanup; +} + #ifdef LLVM_ON_WIN32 // FIXME: No real Win32 implementation currently. diff --git a/lib/Support/Dwarf.cpp b/lib/Support/Dwarf.cpp index 9799ef54792b..74a9fda7ab64 100644 --- a/lib/Support/Dwarf.cpp +++ b/lib/Support/Dwarf.cpp @@ -203,6 +203,10 @@ const char *llvm::dwarf::AttributeString(unsigned Attribute) { case DW_AT_APPLE_major_runtime_vers: return "DW_AT_APPLE_major_runtime_vers"; case DW_AT_APPLE_runtime_class: return "DW_AT_APPLE_runtime_class"; case DW_AT_APPLE_omit_frame_ptr: return "DW_AT_APPLE_omit_frame_ptr"; + case DW_AT_APPLE_property_name: return "DW_AT_APPLE_property_name"; + case DW_AT_APPLE_property_getter: return "DW_AT_APPLE_property_getter"; + case DW_AT_APPLE_property_setter: return "DW_AT_APPLE_property_setter"; + case DW_AT_APPLE_property_attribute: return "DW_AT_APPLE_property_attribute"; } return 0; } @@ -391,6 +395,7 @@ const char *llvm::dwarf::OperationEncodingString(unsigned Encoding) { case DW_OP_call_ref: return "DW_OP_call_ref"; case DW_OP_form_tls_address: return "DW_OP_form_tls_address"; case DW_OP_call_frame_cfa: return "DW_OP_call_frame_cfa"; + case DW_OP_bit_piece: return "DW_OP_bit_piece"; case DW_OP_lo_user: return "DW_OP_lo_user"; case DW_OP_hi_user: return "DW_OP_hi_user"; } diff --git a/lib/Support/ErrorHandling.cpp b/lib/Support/ErrorHandling.cpp index 3579546d757d..e6cc57db8243 100644 --- a/lib/Support/ErrorHandling.cpp +++ b/lib/Support/ErrorHandling.cpp @@ -32,7 +32,6 @@ #endif using namespace llvm; -using namespace std; static fatal_error_handler_t ErrorHandler = 0; static void *ErrorHandlerUserData = 0; diff --git a/lib/Support/FileUtilities.cpp b/lib/Support/FileUtilities.cpp index 5dbabee7a7ed..4c8c0c63ffc4 100644 --- a/lib/Support/FileUtilities.cpp +++ b/lib/Support/FileUtilities.cpp @@ -198,7 +198,7 @@ int llvm::DiffFilesWithTolerance(const sys::PathWithStatus &FileA, return 1; } - // Now its safe to mmap the files into memory becasue both files + // Now its safe to mmap the files into memory because both files // have a non-zero size. error_code ec; OwningPtr F1; diff --git a/lib/Support/FoldingSet.cpp b/lib/Support/FoldingSet.cpp index a4f80a90d6d0..d2e35b8eb676 100644 --- a/lib/Support/FoldingSet.cpp +++ b/lib/Support/FoldingSet.cpp @@ -147,6 +147,11 @@ void FoldingSetNodeID::AddString(StringRef String) { Bits.push_back(V); } +// AddNodeID - Adds the Bit data of another ID to *this. +void FoldingSetNodeID::AddNodeID(const FoldingSetNodeID &ID) { + Bits.append(ID.Bits.begin(), ID.Bits.end()); +} + /// ComputeHash - Compute a strong hash value for this FoldingSetNodeID, used to /// lookup the node in the FoldingSetImpl. unsigned FoldingSetNodeID::ComputeHash() const { diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp index 4dacf9691d6e..911c64accec3 100644 --- a/lib/Support/Host.cpp +++ b/lib/Support/Host.cpp @@ -214,6 +214,8 @@ std::string sys::getHostCPUName() { // As found in a Summer 2010 model iMac. case 37: // Intel Core i7, laptop version. return "corei7"; + case 42: // SandyBridge + return "sandybridge"; case 28: // Intel Atom processor. All processors are manufactured using // the 45 nm process diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp index a0c650d6820b..e2b5b7a58523 100644 --- a/lib/Support/MemoryBuffer.cpp +++ b/lib/Support/MemoryBuffer.cpp @@ -46,8 +46,10 @@ MemoryBuffer::~MemoryBuffer() { } /// init - Initialize this MemoryBuffer as a reference to externally allocated /// memory, memory that we know is already null terminated. -void MemoryBuffer::init(const char *BufStart, const char *BufEnd) { - assert(BufEnd[0] == 0 && "Buffer is not null terminated!"); +void MemoryBuffer::init(const char *BufStart, const char *BufEnd, + bool RequiresNullTerminator) { + assert((!RequiresNullTerminator || BufEnd[0] == 0) && + "Buffer is not null terminated!"); BufferStart = BufStart; BufferEnd = BufEnd; } @@ -65,32 +67,39 @@ static void CopyStringRef(char *Memory, StringRef Data) { /// GetNamedBuffer - Allocates a new MemoryBuffer with Name copied after it. template -static T* GetNamedBuffer(StringRef Buffer, StringRef Name) { +static T* GetNamedBuffer(StringRef Buffer, StringRef Name, + bool RequiresNullTerminator) { char *Mem = static_cast(operator new(sizeof(T) + Name.size() + 1)); CopyStringRef(Mem + sizeof(T), Name); - return new (Mem) T(Buffer); + return new (Mem) T(Buffer, RequiresNullTerminator); } namespace { /// MemoryBufferMem - Named MemoryBuffer pointing to a block of memory. class MemoryBufferMem : public MemoryBuffer { public: - MemoryBufferMem(StringRef InputData) { - init(InputData.begin(), InputData.end()); + MemoryBufferMem(StringRef InputData, bool RequiresNullTerminator) { + init(InputData.begin(), InputData.end(), RequiresNullTerminator); } virtual const char *getBufferIdentifier() const { // The name is stored after the class itself. return reinterpret_cast(this + 1); } + + virtual BufferKind getBufferKind() const { + return MemoryBuffer_Malloc; + } }; } /// getMemBuffer - Open the specified memory range as a MemoryBuffer. Note /// that EndPtr[0] must be a null byte and be accessible! MemoryBuffer *MemoryBuffer::getMemBuffer(StringRef InputData, - StringRef BufferName) { - return GetNamedBuffer(InputData, BufferName); + StringRef BufferName, + bool RequiresNullTerminator) { + return GetNamedBuffer(InputData, BufferName, + RequiresNullTerminator); } /// getMemBufferCopy - Open the specified memory range as a MemoryBuffer, @@ -127,7 +136,7 @@ MemoryBuffer *MemoryBuffer::getNewUninitMemBuffer(size_t Size, char *Buf = Mem + AlignedStringLen; Buf[Size] = 0; // Null terminate buffer. - return new (Mem) MemoryBufferMem(StringRef(Buf, Size)); + return new (Mem) MemoryBufferMem(StringRef(Buf, Size), true); } /// getNewMemBuffer - Allocate a new MemoryBuffer of the specified size that @@ -172,26 +181,41 @@ namespace { /// sys::Path::UnMapFilePages method. class MemoryBufferMMapFile : public MemoryBufferMem { public: - MemoryBufferMMapFile(StringRef Buffer) - : MemoryBufferMem(Buffer) { } + MemoryBufferMMapFile(StringRef Buffer, bool RequiresNullTerminator) + : MemoryBufferMem(Buffer, RequiresNullTerminator) { } ~MemoryBufferMMapFile() { - sys::Path::UnMapFilePages(getBufferStart(), getBufferSize()); + static int PageSize = sys::Process::GetPageSize(); + + uintptr_t Start = reinterpret_cast(getBufferStart()); + size_t Size = getBufferSize(); + uintptr_t RealStart = Start & ~(PageSize - 1); + size_t RealSize = Size + (Start - RealStart); + + sys::Path::UnMapFilePages(reinterpret_cast(RealStart), + RealSize); + } + + virtual BufferKind getBufferKind() const { + return MemoryBuffer_MMap; } }; } error_code MemoryBuffer::getFile(StringRef Filename, OwningPtr &result, - int64_t FileSize) { + int64_t FileSize, + bool RequiresNullTerminator) { // Ensure the path is null terminated. SmallString<256> PathBuf(Filename.begin(), Filename.end()); - return MemoryBuffer::getFile(PathBuf.c_str(), result, FileSize); + return MemoryBuffer::getFile(PathBuf.c_str(), result, FileSize, + RequiresNullTerminator); } error_code MemoryBuffer::getFile(const char *Filename, OwningPtr &result, - int64_t FileSize) { + int64_t FileSize, + bool RequiresNullTerminator) { int OpenFlags = O_RDONLY; #ifdef O_BINARY OpenFlags |= O_BINARY; // Open input file in binary mode on win32. @@ -200,17 +224,32 @@ error_code MemoryBuffer::getFile(const char *Filename, if (FD == -1) { return error_code(errno, posix_category()); } - error_code ret = getOpenFile(FD, Filename, result, FileSize); + error_code ret = getOpenFile(FD, Filename, result, FileSize, FileSize, + 0, RequiresNullTerminator); close(FD); return ret; } -error_code MemoryBuffer::getOpenFile(int FD, const char *Filename, - OwningPtr &result, - int64_t FileSize) { +static bool shouldUseMmap(int FD, + size_t FileSize, + size_t MapSize, + off_t Offset, + bool RequiresNullTerminator, + int PageSize) { + // We don't use mmap for small files because this can severely fragment our + // address space. + if (MapSize < 4096*4) + return false; + + if (!RequiresNullTerminator) + return true; + + // If we don't know the file size, use fstat to find out. fstat on an open // file descriptor is cheaper than stat on a random path. - if (FileSize == -1) { + // FIXME: this chunk of code is duplicated, but it avoids a fstat when + // RequiresNullTerminator = false and MapSize != -1. + if (FileSize == size_t(-1)) { struct stat FileInfo; // TODO: This should use fstat64 when available. if (fstat(FD, &FileInfo) == -1) { @@ -219,23 +258,59 @@ error_code MemoryBuffer::getOpenFile(int FD, const char *Filename, FileSize = FileInfo.st_size; } + // If we need a null terminator and the end of the map is inside the file, + // we cannot use mmap. + size_t End = Offset + MapSize; + assert(End <= FileSize); + if (End != FileSize) + return false; - // If the file is large, try to use mmap to read it in. We don't use mmap - // for small files, because this can severely fragment our address space. Also - // don't try to map files that are exactly a multiple of the system page size, - // as the file would not have the required null terminator. - // - // FIXME: Can we just mmap an extra page in the latter case? - if (FileSize >= 4096*4 && - (FileSize & (sys::Process::GetPageSize()-1)) != 0) { - if (const char *Pages = sys::Path::MapInFilePages(FD, FileSize)) { + // Don't try to map files that are exactly a multiple of the system page size + // if we need a null terminator. + if ((FileSize & (PageSize -1)) == 0) + return false; + + return true; +} + +error_code MemoryBuffer::getOpenFile(int FD, const char *Filename, + OwningPtr &result, + size_t FileSize, size_t MapSize, + off_t Offset, + bool RequiresNullTerminator) { + static int PageSize = sys::Process::GetPageSize(); + + // Default is to map the full file. + if (MapSize == size_t(-1)) { + // If we don't know the file size, use fstat to find out. fstat on an open + // file descriptor is cheaper than stat on a random path. + if (FileSize == size_t(-1)) { + struct stat FileInfo; + // TODO: This should use fstat64 when available. + if (fstat(FD, &FileInfo) == -1) { + return error_code(errno, posix_category()); + } + FileSize = FileInfo.st_size; + } + MapSize = FileSize; + } + + if (shouldUseMmap(FD, FileSize, MapSize, Offset, RequiresNullTerminator, + PageSize)) { + off_t RealMapOffset = Offset & ~(PageSize - 1); + off_t Delta = Offset - RealMapOffset; + size_t RealMapSize = MapSize + Delta; + + if (const char *Pages = sys::Path::MapInFilePages(FD, + RealMapSize, + RealMapOffset)) { result.reset(GetNamedBuffer( - StringRef(Pages, FileSize), Filename)); + StringRef(Pages + Delta, MapSize), Filename, RequiresNullTerminator)); return success; } } - MemoryBuffer *Buf = MemoryBuffer::getNewUninitMemBuffer(FileSize, Filename); + MemoryBuffer *Buf = MemoryBuffer::getNewUninitMemBuffer(MapSize, Filename); if (!Buf) { // Failed to create a buffer. The only way it can fail is if // new(std::nothrow) returns 0. @@ -245,7 +320,10 @@ error_code MemoryBuffer::getOpenFile(int FD, const char *Filename, OwningPtr SB(Buf); char *BufPtr = const_cast(SB->getBufferStart()); - size_t BytesLeft = FileSize; + size_t BytesLeft = MapSize; + if (lseek(FD, Offset, SEEK_SET) == -1) + return error_code(errno, posix_category()); + while (BytesLeft) { ssize_t NumRead = ::read(FD, BufPtr, BytesLeft); if (NumRead == -1) { diff --git a/lib/Support/Path.cpp b/lib/Support/Path.cpp index e5e875bc54d7..8fbaf2d42bf9 100644 --- a/lib/Support/Path.cpp +++ b/lib/Support/Path.cpp @@ -15,11 +15,15 @@ #include "llvm/Support/FileSystem.h" #include "llvm/Config/config.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/Endian.h" #include #include #include using namespace llvm; using namespace sys; +namespace { +using support::ulittle32_t; +} //===----------------------------------------------------------------------===// //=== WARNING: Implementation here must contain only TRULY operating system @@ -88,15 +92,21 @@ sys::IdentifyFileType(const char *magic, unsigned length) { } break; + // The two magic numbers for mach-o are: + // 0xfeedface - 32-bit mach-o + // 0xfeedfacf - 64-bit mach-o case 0xFE: - case 0xCE: { + case 0xCE: + case 0xCF: { uint16_t type = 0; if (magic[0] == char(0xFE) && magic[1] == char(0xED) && - magic[2] == char(0xFA) && magic[3] == char(0xCE)) { + magic[2] == char(0xFA) && + (magic[3] == char(0xCE) || magic[3] == char(0xCF))) { /* Native endian */ if (length >= 16) type = magic[14] << 8 | magic[15]; - } else if (magic[0] == char(0xCE) && magic[1] == char(0xFA) && - magic[2] == char(0xED) && magic[3] == char(0xFE)) { + } else if ((magic[0] == char(0xCE) || magic[0] == char(0xCF)) && + magic[1] == char(0xFA) && magic[2] == char(0xED) && + magic[3] == char(0xFE)) { /* Reverse endian */ if (length >= 14) type = magic[13] << 8 | magic[12]; } @@ -129,6 +139,16 @@ sys::IdentifyFileType(const char *magic, unsigned length) { if (magic[1] == 0x02) return COFF_FileType; break; + + case 0x4d: // Possible MS-DOS stub on Windows PE file + if (magic[1] == 0x5a) { + uint32_t off = *reinterpret_cast(magic + 0x3c); + // PE/COFF file, either EXE or DLL. + if (off < length && memcmp(magic + off, "PE\0\0",4) == 0) + return COFF_FileType; + } + break; + case 0x64: // x86-64 Windows. if (magic[1] == char(0x86)) return COFF_FileType; diff --git a/lib/Support/PrettyStackTrace.cpp b/lib/Support/PrettyStackTrace.cpp index a9f4709e4b93..082b7012eb23 100644 --- a/lib/Support/PrettyStackTrace.cpp +++ b/lib/Support/PrettyStackTrace.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// // // This file defines some helpful functions for dealing with the possibility of -// Unix signals occuring while your program is running. +// Unix signals occurring while your program is running. // //===----------------------------------------------------------------------===// diff --git a/lib/Support/Regex.cpp b/lib/Support/Regex.cpp index 309ffb02dec6..d293da07d684 100644 --- a/lib/Support/Regex.cpp +++ b/lib/Support/Regex.cpp @@ -82,7 +82,7 @@ bool Regex::match(StringRef String, SmallVectorImpl *Matches){ Matches->push_back(StringRef()); continue; } - assert(pm[i].rm_eo > pm[i].rm_so); + assert(pm[i].rm_eo >= pm[i].rm_so); Matches->push_back(StringRef(String.data()+pm[i].rm_so, pm[i].rm_eo-pm[i].rm_so)); } diff --git a/lib/Support/Signals.cpp b/lib/Support/Signals.cpp index a3af37d5fe6a..a11789372d93 100644 --- a/lib/Support/Signals.cpp +++ b/lib/Support/Signals.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// // // This file defines some helpful functions for dealing with the possibility of -// Unix signals occuring while your program is running. +// Unix signals occurring while your program is running. // //===----------------------------------------------------------------------===// diff --git a/lib/Support/SmallPtrSet.cpp b/lib/Support/SmallPtrSet.cpp index 504e6497a3cb..997ce0b74cd2 100644 --- a/lib/Support/SmallPtrSet.cpp +++ b/lib/Support/SmallPtrSet.cpp @@ -52,10 +52,14 @@ bool SmallPtrSetImpl::insert_imp(const void * Ptr) { // Otherwise, hit the big set case, which will call grow. } - // If more than 3/4 of the array is full, grow. - if (NumElements*4 >= CurArraySize*3 || - CurArraySize-(NumElements+NumTombstones) < CurArraySize/8) - Grow(); + if (NumElements*4 >= CurArraySize*3) { + // If more than 3/4 of the array is full, grow. + Grow(CurArraySize < 64 ? 128 : CurArraySize*2); + } else if (CurArraySize-(NumElements+NumTombstones) < CurArraySize/8) { + // If fewer of 1/8 of the array is empty (meaning that many are filled with + // tombstones), rehash. + Grow(CurArraySize); + } // Okay, we know we have space. Find a hash bucket. const void **Bucket = const_cast(FindBucketFor(Ptr)); @@ -125,10 +129,9 @@ const void * const *SmallPtrSetImpl::FindBucketFor(const void *Ptr) const { /// Grow - Allocate a larger backing store for the buckets and move it over. /// -void SmallPtrSetImpl::Grow() { +void SmallPtrSetImpl::Grow(unsigned NewSize) { // Allocate at twice as many buckets, but at least 128. unsigned OldSize = CurArraySize; - unsigned NewSize = OldSize < 64 ? 128 : OldSize*2; const void **OldBuckets = CurArray; bool WasSmall = isSmall(); diff --git a/lib/Support/Statistic.cpp b/lib/Support/Statistic.cpp index f0ed62690fd3..1e733d92e610 100644 --- a/lib/Support/Statistic.cpp +++ b/lib/Support/Statistic.cpp @@ -101,6 +101,10 @@ void llvm::EnableStatistics() { Enabled.setValue(true); } +bool llvm::AreStatisticsEnabled() { + return Enabled; +} + void llvm::PrintStatistics(raw_ostream &OS) { StatisticInfo &Stats = *StatInfo; diff --git a/lib/Support/StringMap.cpp b/lib/Support/StringMap.cpp index 90ec29950262..a1ac512fa244 100644 --- a/lib/Support/StringMap.cpp +++ b/lib/Support/StringMap.cpp @@ -169,6 +169,8 @@ StringMapEntryBase *StringMapImpl::RemoveKey(StringRef Key) { TheTable[Bucket].Item = getTombstoneVal(); --NumItems; ++NumTombstones; + assert(NumItems + NumTombstones <= NumBuckets); + return Result; } @@ -177,7 +179,19 @@ StringMapEntryBase *StringMapImpl::RemoveKey(StringRef Key) { /// RehashTable - Grow the table, redistributing values into the buckets with /// the appropriate mod-of-hashtable-size. void StringMapImpl::RehashTable() { - unsigned NewSize = NumBuckets*2; + unsigned NewSize; + + // If the hash table is now more than 3/4 full, or if fewer than 1/8 of + // the buckets are empty (meaning that many are filled with tombstones), + // grow/rehash the table. + if (NumItems*4 > NumBuckets*3) { + NewSize = NumBuckets*2; + } else if (NumBuckets-(NumItems+NumTombstones) < NumBuckets/8) { + NewSize = NumBuckets; + } else { + return; + } + // Allocate one extra bucket which will always be non-empty. This allows the // iterators to stop at end. ItemBucket *NewTableArray =(ItemBucket*)calloc(NewSize+1, sizeof(ItemBucket)); @@ -212,4 +226,5 @@ void StringMapImpl::RehashTable() { TheTable = NewTableArray; NumBuckets = NewSize; + NumTombstones = 0; } diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp index 539805196450..8c3fc094cd11 100644 --- a/lib/Support/StringRef.cpp +++ b/lib/Support/StringRef.cpp @@ -131,7 +131,7 @@ unsigned StringRef::edit_distance(llvm::StringRef Other, /// find - Search for the first string \arg Str in the string. /// -/// \return - The index of the first occurence of \arg Str, or npos if not +/// \return - The index of the first occurrence of \arg Str, or npos if not /// found. size_t StringRef::find(StringRef Str, size_t From) const { size_t N = Str.size(); @@ -145,7 +145,7 @@ size_t StringRef::find(StringRef Str, size_t From) const { /// rfind - Search for the last string \arg Str in the string. /// -/// \return - The index of the last occurence of \arg Str, or npos if not +/// \return - The index of the last occurrence of \arg Str, or npos if not /// found. size_t StringRef::rfind(StringRef Str) const { size_t N = Str.size(); diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index 36edf6eefa70..dbdb303a4fdd 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -41,7 +41,8 @@ const char *Triple::getArchTypeName(ArchType Kind) { case x86_64: return "x86_64"; case xcore: return "xcore"; case mblaze: return "mblaze"; - case ptx: return "ptx"; + case ptx32: return "ptx32"; + case ptx64: return "ptx64"; } return ""; @@ -74,7 +75,8 @@ const char *Triple::getArchTypePrefix(ArchType Kind) { case xcore: return "xcore"; - case ptx: return "ptx"; + case ptx32: return "ptx"; + case ptx64: return "ptx"; } } @@ -84,6 +86,7 @@ const char *Triple::getVendorTypeName(VendorType Kind) { case Apple: return "apple"; case PC: return "pc"; + case SCEI: return "scei"; } return ""; @@ -98,8 +101,10 @@ const char *Triple::getOSTypeName(OSType Kind) { case Darwin: return "darwin"; case DragonFly: return "dragonfly"; case FreeBSD: return "freebsd"; + case IOS: return "ios"; case Linux: return "linux"; case Lv2: return "lv2"; + case MacOSX: return "macosx"; case MinGW32: return "mingw32"; case NetBSD: return "netbsd"; case OpenBSD: return "openbsd"; @@ -162,8 +167,10 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) { return x86_64; if (Name == "xcore") return xcore; - if (Name == "ptx") - return ptx; + if (Name == "ptx32") + return ptx32; + if (Name == "ptx64") + return ptx64; return UnknownArch; } @@ -202,15 +209,17 @@ Triple::ArchType Triple::getArchTypeForDarwinArchName(StringRef Str) { Str == "armv6" || Str == "armv7") return Triple::arm; - if (Str == "ptx") - return Triple::ptx; + if (Str == "ptx32") + return Triple::ptx32; + if (Str == "ptx64") + return Triple::ptx64; return Triple::UnknownArch; } // Returns architecture name that is understood by the target assembler. const char *Triple::getArchNameForAssembler() { - if (getOS() != Triple::Darwin && getVendor() != Triple::Apple) + if (!isOSDarwin() && getVendor() != Triple::Apple) return NULL; StringRef Str = getArchName(); @@ -235,8 +244,10 @@ const char *Triple::getArchNameForAssembler() { return "armv6"; if (Str == "armv7" || Str == "thumbv7") return "armv7"; - if (Str == "ptx") - return "ptx"; + if (Str == "ptx32") + return "ptx32"; + if (Str == "ptx64") + return "ptx64"; return NULL; } @@ -285,8 +296,10 @@ Triple::ArchType Triple::ParseArch(StringRef ArchName) { return tce; else if (ArchName == "xcore") return xcore; - else if (ArchName == "ptx") - return ptx; + else if (ArchName == "ptx32") + return ptx32; + else if (ArchName == "ptx64") + return ptx64; else return UnknownArch; } @@ -296,6 +309,8 @@ Triple::VendorType Triple::ParseVendor(StringRef VendorName) { return Apple; else if (VendorName == "pc") return PC; + else if (VendorName == "scei") + return SCEI; else return UnknownVendor; } @@ -311,10 +326,14 @@ Triple::OSType Triple::ParseOS(StringRef OSName) { return DragonFly; else if (OSName.startswith("freebsd")) return FreeBSD; + else if (OSName.startswith("ios")) + return IOS; else if (OSName.startswith("linux")) return Linux; else if (OSName.startswith("lv2")) return Lv2; + else if (OSName.startswith("macosx")) + return MacOSX; else if (OSName.startswith("mingw32")) return MinGW32; else if (OSName.startswith("netbsd")) @@ -523,67 +542,44 @@ StringRef Triple::getOSAndEnvironmentName() const { static unsigned EatNumber(StringRef &Str) { assert(!Str.empty() && Str[0] >= '0' && Str[0] <= '9' && "Not a number"); - unsigned Result = Str[0]-'0'; + unsigned Result = 0; - // Eat the digit. - Str = Str.substr(1); - - // Handle "darwin11". - if (Result == 1 && !Str.empty() && Str[0] >= '0' && Str[0] <= '9') { + do { + // Consume the leading digit. Result = Result*10 + (Str[0] - '0'); + // Eat the digit. Str = Str.substr(1); - } + } while (!Str.empty() && Str[0] >= '0' && Str[0] <= '9'); return Result; } -/// getDarwinNumber - Parse the 'darwin number' out of the specific target -/// triple. For example, if we have darwin8.5 return 8,5,0. If any entry is -/// not defined, return 0's. This requires that the triple have an OSType of -/// darwin before it is called. -void Triple::getDarwinNumber(unsigned &Maj, unsigned &Min, - unsigned &Revision) const { - assert(getOS() == Darwin && "Not a darwin target triple!"); +void Triple::getOSVersion(unsigned &Major, unsigned &Minor, + unsigned &Micro) const { StringRef OSName = getOSName(); - assert(OSName.startswith("darwin") && "Unknown darwin target triple!"); - // Strip off "darwin". - OSName = OSName.substr(6); + // Assume that the OS portion of the triple starts with the canonical name. + StringRef OSTypeName = getOSTypeName(getOS()); + if (OSName.startswith(OSTypeName)) + OSName = OSName.substr(OSTypeName.size()); - Maj = Min = Revision = 0; + // Any unset version defaults to 0. + Major = Minor = Micro = 0; - if (OSName.empty() || OSName[0] < '0' || OSName[0] > '9') - return; + // Parse up to three components. + unsigned *Components[3] = { &Major, &Minor, &Micro }; + for (unsigned i = 0; i != 3; ++i) { + if (OSName.empty() || OSName[0] < '0' || OSName[0] > '9') + break; - // The major version is the first digit. - Maj = EatNumber(OSName); - if (OSName.empty()) return; + // Consume the leading number. + *Components[i] = EatNumber(OSName); - // Handle minor version: 10.4.9 -> darwin8.9. - if (OSName[0] != '.') - return; - - // Eat the '.'. - OSName = OSName.substr(1); - - if (OSName.empty() || OSName[0] < '0' || OSName[0] > '9') - return; - - Min = EatNumber(OSName); - if (OSName.empty()) return; - - // Handle revision darwin8.9.1 - if (OSName[0] != '.') - return; - - // Eat the '.'. - OSName = OSName.substr(1); - - if (OSName.empty() || OSName[0] < '0' || OSName[0] > '9') - return; - - Revision = EatNumber(OSName); + // Consume the separator, if present. + if (OSName.startswith(".")) + OSName = OSName.substr(1); + } } void Triple::setTriple(const Twine &Str) { diff --git a/lib/Support/Unix/Host.inc b/lib/Support/Unix/Host.inc index ed74b6759901..8cbec8cd7ee8 100644 --- a/lib/Support/Unix/Host.inc +++ b/lib/Support/Unix/Host.inc @@ -87,10 +87,7 @@ std::string sys::getHostTriple() { std::string::size_type DarwinDashIdx = Triple.find("-darwin"); if (DarwinDashIdx != std::string::npos) { Triple.resize(DarwinDashIdx + strlen("-darwin")); - - // Only add the major part of the os version. - std::string Version = getOSVersion(); - Triple += Version.substr(0, Version.find('.')); + Triple += getOSVersion(); } return Triple; diff --git a/lib/Support/Unix/Memory.inc b/lib/Support/Unix/Memory.inc index 4312d67183c4..5a57a2870636 100644 --- a/lib/Support/Unix/Memory.inc +++ b/lib/Support/Unix/Memory.inc @@ -124,7 +124,7 @@ bool llvm::sys::Memory::setExecutable (MemoryBlock &M, std::string *ErrMsg) { (vm_size_t)M.Size, 0, VM_PROT_READ | VM_PROT_EXECUTE | VM_PROT_COPY); return KERN_SUCCESS == kr; #else - return false; + return true; #endif } diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc index 0f6e800505e1..430cf2ed8e8f 100644 --- a/lib/Support/Unix/Path.inc +++ b/lib/Support/Unix/Path.inc @@ -869,18 +869,18 @@ Path::makeUnique(bool reuse_current, std::string* ErrMsg) { return false; } -const char *Path::MapInFilePages(int FD, uint64_t FileSize) { +const char *Path::MapInFilePages(int FD, size_t FileSize, off_t Offset) { int Flags = MAP_PRIVATE; #ifdef MAP_FILE Flags |= MAP_FILE; #endif - void *BasePtr = ::mmap(0, FileSize, PROT_READ, Flags, FD, 0); + void *BasePtr = ::mmap(0, FileSize, PROT_READ, Flags, FD, Offset); if (BasePtr == MAP_FAILED) return 0; return (const char*)BasePtr; } -void Path::UnMapFilePages(const char *BasePtr, uint64_t FileSize) { +void Path::UnMapFilePages(const char *BasePtr, size_t FileSize) { ::munmap((void*)BasePtr, FileSize); } diff --git a/lib/Support/Unix/Program.inc b/lib/Support/Unix/Program.inc index 1104bc7503e1..9f0a9ef0523f 100644 --- a/lib/Support/Unix/Program.inc +++ b/lib/Support/Unix/Program.inc @@ -132,7 +132,7 @@ static bool RedirectIO(const Path *Path, int FD, std::string* ErrMsg) { #ifdef HAVE_POSIX_SPAWN static bool RedirectIO_PS(const Path *Path, int FD, std::string *ErrMsg, - posix_spawn_file_actions_t &FileActions) { + posix_spawn_file_actions_t *FileActions) { if (Path == 0) // Noop return false; const char *File; @@ -142,7 +142,7 @@ static bool RedirectIO_PS(const Path *Path, int FD, std::string *ErrMsg, else File = Path->c_str(); - if (int Err = posix_spawn_file_actions_addopen(&FileActions, FD, + if (int Err = posix_spawn_file_actions_addopen(FileActions, FD, File, FD == 0 ? O_RDONLY : O_WRONLY|O_CREAT, 0666)) return MakeErrMsg(ErrMsg, "Cannot dup2", Err); return false; @@ -185,10 +185,13 @@ Program::Execute(const Path &path, const char **args, const char **envp, // posix_spawn. It is more efficient than fork/exec. #ifdef HAVE_POSIX_SPAWN if (memoryLimit == 0) { - posix_spawn_file_actions_t FileActions; - posix_spawn_file_actions_init(&FileActions); + posix_spawn_file_actions_t FileActionsStore; + posix_spawn_file_actions_t *FileActions = 0; if (redirects) { + FileActions = &FileActionsStore; + posix_spawn_file_actions_init(FileActions); + // Redirect stdin/stdout. if (RedirectIO_PS(redirects[0], 0, ErrMsg, FileActions) || RedirectIO_PS(redirects[1], 1, ErrMsg, FileActions)) @@ -200,7 +203,7 @@ Program::Execute(const Path &path, const char **args, const char **envp, } else { // If stdout and stderr should go to the same place, redirect stderr // to the FD already open for stdout. - if (int Err = posix_spawn_file_actions_adddup2(&FileActions, 1, 2)) + if (int Err = posix_spawn_file_actions_adddup2(FileActions, 1, 2)) return !MakeErrMsg(ErrMsg, "Can't redirect stderr to stdout", Err); } } @@ -216,10 +219,11 @@ Program::Execute(const Path &path, const char **args, const char **envp, // Explicitly initialized to prevent what appears to be a valgrind false // positive. pid_t PID = 0; - int Err = posix_spawn(&PID, path.c_str(), &FileActions, /*attrp*/0, + int Err = posix_spawn(&PID, path.c_str(), FileActions, /*attrp*/0, const_cast(args), const_cast(envp)); - posix_spawn_file_actions_destroy(&FileActions); + if (FileActions) + posix_spawn_file_actions_destroy(FileActions); if (Err) return !MakeErrMsg(ErrMsg, "posix_spawn failed", Err); @@ -232,7 +236,7 @@ Program::Execute(const Path &path, const char **args, const char **envp, // Create a child process. int child = fork(); switch (child) { - // An error occured: Return to the caller. + // An error occurred: Return to the caller. case -1: MakeErrMsg(ErrMsg, "Couldn't fork"); return false; diff --git a/lib/Support/Unix/Signals.inc b/lib/Support/Unix/Signals.inc index 0a617591551d..e286869e775d 100644 --- a/lib/Support/Unix/Signals.inc +++ b/lib/Support/Unix/Signals.inc @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// // // This file defines some helpful functions for dealing with the possibility of -// Unix signals occuring while your program is running. +// Unix signals occurring while your program is running. // //===----------------------------------------------------------------------===// @@ -274,6 +274,9 @@ void llvm::sys::PrintStackTraceOnErrorSignal() { #ifdef __APPLE__ +#include +#include + int raise(int sig) { return pthread_kill(pthread_self(), sig); } @@ -291,9 +294,6 @@ void __assert_rtn(const char *func, abort(); } -#include -#include - void abort() { raise(SIGABRT); usleep(1000); diff --git a/lib/Support/Windows/DynamicLibrary.inc b/lib/Support/Windows/DynamicLibrary.inc index 2c14366c0761..4227844ae506 100644 --- a/lib/Support/Windows/DynamicLibrary.inc +++ b/lib/Support/Windows/DynamicLibrary.inc @@ -41,41 +41,12 @@ using namespace sys; static std::vector OpenedHandles; -#ifdef _WIN64 - typedef DWORD64 ModuleBaseType; -#else - typedef ULONG ModuleBaseType; -#endif - extern "C" { -// Use old callback if: -// - Not using Visual Studio -// - Visual Studio 2005 or earlier but only if we are not using the Windows SDK -// or Windows SDK version is older than 6.0 -// Use new callback if: -// - Newer Visual Studio (comes with newer SDK). -// - Visual Studio 2005 with Windows SDK 6.0+ -#if defined(_MSC_VER) - #if _MSC_VER < 1500 && (!defined(VER_PRODUCTBUILD) || VER_PRODUCTBUILD < 6000) - #define OLD_ELM_CALLBACK_DECL 1 - #endif -#elif defined(__MINGW64__) - // Use new callback. -#elif defined(__MINGW32__) - #define OLD_ELM_CALLBACK_DECL 1 -#endif -#ifdef OLD_ELM_CALLBACK_DECL - static BOOL CALLBACK ELM_Callback(PSTR ModuleName, - ModuleBaseType ModuleBase, + static BOOL CALLBACK ELM_Callback(WIN32_ELMCB_PCSTR ModuleName, + ULONG_PTR ModuleBase, ULONG ModuleSize, PVOID UserContext) -#else - static BOOL CALLBACK ELM_Callback(PCSTR ModuleName, - ModuleBaseType ModuleBase, - ULONG ModuleSize, - PVOID UserContext) -#endif { // Ignore VC++ runtimes prior to 7.1. Somehow some of them get loaded // into the process. diff --git a/lib/Support/Windows/Path.inc b/lib/Support/Windows/Path.inc index 625f67aa912a..42a92f9c6dfe 100644 --- a/lib/Support/Windows/Path.inc +++ b/lib/Support/Windows/Path.inc @@ -882,7 +882,17 @@ Path::makeUnique(bool reuse_current, std::string* ErrMsg) { // Find a numeric suffix that isn't used by an existing file. Assume there // won't be more than 1 million files with the same prefix. Probably a safe // bet. - static unsigned FCounter = 0; + static int FCounter = -1; + if (FCounter < 0) { + // Give arbitrary initial seed. + // FIXME: We should use sys::fs::unique_file() in future. + LARGE_INTEGER cnt64; + DWORD x = GetCurrentProcessId(); + x = (x << 16) | (x >> 16); + if (QueryPerformanceCounter(&cnt64)) // RDTSC + x ^= cnt64.HighPart ^ cnt64.LowPart; + FCounter = x % 1000000; + } do { sprintf(FNBuffer+offset, "-%06u", FCounter); if (++FCounter > 999999) @@ -908,12 +918,12 @@ Path::createTemporaryFileOnDisk(bool reuse_current, std::string* ErrMsg) { } /// MapInFilePages - Not yet implemented on win32. -const char *Path::MapInFilePages(int FD, uint64_t FileSize) { +const char *Path::MapInFilePages(int FD, size_t FileSize, off_t Offset) { return 0; } /// MapInFilePages - Not yet implemented on win32. -void Path::UnMapFilePages(const char *Base, uint64_t FileSize) { +void Path::UnMapFilePages(const char *Base, size_t FileSize) { assert(0 && "NOT IMPLEMENTED"); } diff --git a/lib/Support/Windows/PathV2.inc b/lib/Support/Windows/PathV2.inc index 8effb0c737dd..af71b73cd693 100644 --- a/lib/Support/Windows/PathV2.inc +++ b/lib/Support/Windows/PathV2.inc @@ -449,7 +449,14 @@ error_code status(const Twine &path, file_status &result) { SmallString<128> path_storage; SmallVector path_utf16; - if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage), + StringRef path8 = path.toStringRef(path_storage); + // FIXME: We should detect as many "special file name" as possible. + if (path8.compare_lower("nul") == 0) { + result = file_status(file_type::character_file); + return success; + } + + if (error_code ec = UTF8ToUTF16(path8, path_utf16)) return ec; @@ -649,7 +656,7 @@ error_code get_magic(const Twine &path, uint32_t len, ::CloseHandle(file); if (!read_success || (bytes_read != len)) { // Set result size to the number of bytes read if it's valid. - if (bytes_read >= 0 && bytes_read <= len) + if (bytes_read <= len) result.set_size(bytes_read); // ERROR_HANDLE_EOF is mapped to errc::value_too_large. return ec; diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp index 80ea7407b44e..5a71fa3d8cea 100644 --- a/lib/Support/raw_ostream.cpp +++ b/lib/Support/raw_ostream.cpp @@ -220,6 +220,36 @@ raw_ostream &raw_ostream::operator<<(const void *P) { } raw_ostream &raw_ostream::operator<<(double N) { +#ifdef _WIN32 + // On MSVCRT and compatible, output of %e is incompatible to Posix + // by default. Number of exponent digits should be at least 2. "%+03d" + // FIXME: Implement our formatter to here or Support/Format.h! + int fpcl = _fpclass(N); + + // negative zero + if (fpcl == _FPCLASS_NZ) + return *this << "-0.000000e+00"; + + char buf[16]; + unsigned len; + len = snprintf(buf, sizeof(buf), "%e", N); + if (len <= sizeof(buf) - 2) { + if (len >= 5 && buf[len - 5] == 'e' && buf[len - 3] == '0') { + int cs = buf[len - 4]; + if (cs == '+' || cs == '-') { + int c1 = buf[len - 2]; + int c0 = buf[len - 1]; + if (isdigit(c1) && isdigit(c0)) { + // Trim leading '0': "...e+012" -> "...e+12\0" + buf[len - 3] = c1; + buf[len - 2] = c0; + buf[--len] = 0; + } + } + } + return this->operator<<(buf); + } +#endif return this->operator<<(format("%e", N)); } @@ -265,15 +295,23 @@ raw_ostream &raw_ostream::write(const char *Ptr, size_t Size) { return write(Ptr, Size); } - // Write out the data in buffer-sized blocks until the remainder - // fits within the buffer. - do { - size_t NumBytes = OutBufEnd - OutBufCur; - copy_to_buffer(Ptr, NumBytes); - flush_nonempty(); - Ptr += NumBytes; - Size -= NumBytes; - } while (OutBufCur+Size > OutBufEnd); + size_t NumBytes = OutBufEnd - OutBufCur; + + // If the buffer is empty at this point we have a string that is larger + // than the buffer. Directly write the chunk that is a multiple of the + // preferred buffer size and put the remainder in the buffer. + if (BUILTIN_EXPECT(OutBufCur == OutBufStart, false)) { + size_t BytesToWrite = Size - (Size % NumBytes); + write_impl(Ptr, BytesToWrite); + copy_to_buffer(Ptr + BytesToWrite, Size - BytesToWrite); + return *this; + } + + // We don't have enough space in the buffer to fit the string in. Insert as + // much as possible, flush and start over with the remainder. + copy_to_buffer(Ptr, NumBytes); + flush_nonempty(); + return write(Ptr + NumBytes, Size - NumBytes); } copy_to_buffer(Ptr, Size); @@ -458,6 +496,14 @@ raw_fd_ostream::~raw_fd_ostream() { } } +#ifdef __MINGW32__ + // On mingw, global dtors should not call exit(). + // report_fatal_error() invokes exit(). We know report_fatal_error() + // might not write messages to stderr when any errors were detected + // on FD == 2. + if (FD == 2) return; +#endif + // If there are any pending errors, report them now. Clients wishing // to avoid report_fatal_error calls should check for errors with // has_error() and clear the error flag with clear_error() before diff --git a/lib/Support/regcomp.c b/lib/Support/regcomp.c index cd018d5dc5bc..46c91a9c497c 100644 --- a/lib/Support/regcomp.c +++ b/lib/Support/regcomp.c @@ -780,7 +780,7 @@ p_b_cclass(struct parse *p, cset *cs) const char *u; char c; - while (MORE() && isalpha(PEEK())) + while (MORE() && isalpha((uch)PEEK())) NEXT(); len = p->next - sp; for (cp = cclasses; cp->name != NULL; cp++) diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index bf4315fc6c3e..6af5f85e8a85 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -51,6 +51,12 @@ def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true", // to just not use them. def FeatureHasSlowFPVMLx : SubtargetFeature<"slowfpvmlx", "SlowFPVMLx", "true", "Disable VFP / NEON MAC instructions">; + +// Cortex-A8 / A9 Advanced SIMD has multiplier accumulator forwarding. +def FeatureVMLxForwarding : SubtargetFeature<"vmlx-forwarding", + "HasVMLxForwarding", "true", + "Has multiplier accumulator forwarding">; + // Some processors benefit from using NEON instructions for scalar // single-precision FP operations. def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP", @@ -61,6 +67,14 @@ def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP", def FeaturePref32BitThumb : SubtargetFeature<"32bit", "Pref32BitThumb", "true", "Prefer 32-bit Thumb instrs">; +/// Some instructions update CPSR partially, which can add false dependency for +/// out-of-order implementation, e.g. Cortex-A9, unless each individual bit is +/// mapped to a separate physical register. Avoid partial CPSR update for these +/// processors. +def FeatureAvoidPartialCPSR : SubtargetFeature<"avoid-partial-cpsr", + "AvoidCPSRPartialUpdate", "true", + "Avoid CPSR partial update for OOO execution">; + // Multiprocessing extension. def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true", "Supports Multiprocessing extension">; @@ -100,11 +114,13 @@ def ProcOthers : SubtargetFeature<"others", "ARMProcFamily", "Others", def ProcA8 : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8", "Cortex-A8 ARM processors", [FeatureSlowFPBrcc, FeatureNEONForFP, - FeatureHasSlowFPVMLx, FeatureT2XtPk]>; + FeatureHasSlowFPVMLx, FeatureVMLxForwarding, + FeatureT2XtPk]>; def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9", "Cortex-A9 ARM processors", - [FeatureHasSlowFPVMLx, FeatureT2XtPk, - FeatureFP16]>; + [FeatureVMLxForwarding, + FeatureT2XtPk, FeatureFP16, + FeatureAvoidPartialCPSR]>; class ProcNoItin Features> : Processor; @@ -171,6 +187,8 @@ def : Processor<"cortex-a8", CortexA8Itineraries, [ArchV7A, ProcA8]>; def : Processor<"cortex-a9", CortexA9Itineraries, [ArchV7A, ProcA9]>; +def : Processor<"cortex-a9-mp", CortexA9Itineraries, + [ArchV7A, ProcA9, FeatureMP]>; // V7M Processors. def : ProcNoItin<"cortex-m3", [ArchV7M]>; diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h index 19fbf0548b02..595708fa7881 100644 --- a/lib/Target/ARM/ARMAddressingModes.h +++ b/lib/Target/ARM/ARMAddressingModes.h @@ -408,16 +408,18 @@ namespace ARM_AM { // // The first operand is always a Reg. The second operand is a reg if in // reg/reg form, otherwise it's reg#0. The third field encodes the operation - // in bit 12, the immediate in bits 0-11, and the shift op in 13-15. + // in bit 12, the immediate in bits 0-11, and the shift op in 13-15. The + // fourth operand 16-17 encodes the index mode. // // If this addressing mode is a frame index (before prolog/epilog insertion // and code rewriting), this operand will have the form: FI#, reg0, // with no shift amount for the frame offset. // - static inline unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO) { + static inline unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO, + unsigned IdxMode = 0) { assert(Imm12 < (1 << 12) && "Imm too large!"); bool isSub = Opc == sub; - return Imm12 | ((int)isSub << 12) | (SO << 13); + return Imm12 | ((int)isSub << 12) | (SO << 13) | (IdxMode << 16) ; } static inline unsigned getAM2Offset(unsigned AM2Opc) { return AM2Opc & ((1 << 12)-1); @@ -426,7 +428,10 @@ namespace ARM_AM { return ((AM2Opc >> 12) & 1) ? sub : add; } static inline ShiftOpc getAM2ShiftOpc(unsigned AM2Opc) { - return (ShiftOpc)(AM2Opc >> 13); + return (ShiftOpc)((AM2Opc >> 13) & 7); + } + static inline unsigned getAM2IdxMode(unsigned AM2Opc) { + return (AM2Opc >> 16); } @@ -441,12 +446,14 @@ namespace ARM_AM { // // The first operand is always a Reg. The second operand is a reg if in // reg/reg form, otherwise it's reg#0. The third field encodes the operation - // in bit 8, the immediate in bits 0-7. + // in bit 8, the immediate in bits 0-7. The fourth operand 9-10 encodes the + // index mode. /// getAM3Opc - This function encodes the addrmode3 opc field. - static inline unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset) { + static inline unsigned getAM3Opc(AddrOpc Opc, unsigned char Offset, + unsigned IdxMode = 0) { bool isSub = Opc == sub; - return ((int)isSub << 8) | Offset; + return ((int)isSub << 8) | Offset | (IdxMode << 9); } static inline unsigned char getAM3Offset(unsigned AM3Opc) { return AM3Opc & 0xFF; @@ -454,6 +461,9 @@ namespace ARM_AM { static inline AddrOpc getAM3Op(unsigned AM3Opc) { return ((AM3Opc >> 8) & 1) ? sub : add; } + static inline unsigned getAM3IdxMode(unsigned AM3Opc) { + return (AM3Opc >> 9); + } //===--------------------------------------------------------------------===// // Addressing Mode #4 diff --git a/lib/Target/ARM/ARMAsmBackend.cpp b/lib/Target/ARM/ARMAsmBackend.cpp index ec23449d7d42..f0628192308f 100644 --- a/lib/Target/ARM/ARMAsmBackend.cpp +++ b/lib/Target/ARM/ARMAsmBackend.cpp @@ -246,7 +246,7 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { } uint32_t out = (opc << 21); - out |= (Value & 0x800) << 14; + out |= (Value & 0x800) << 15; out |= (Value & 0x700) << 4; out |= (Value & 0x0FF); @@ -416,21 +416,22 @@ void ELFARMAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data, // FIXME: This should be in a separate file. class DarwinARMAsmBackend : public ARMAsmBackend { public: - DarwinARMAsmBackend(const Target &T) : ARMAsmBackend(T) { } - - void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, - uint64_t Value) const; + const object::mach::CPUSubtypeARM Subtype; + DarwinARMAsmBackend(const Target &T, object::mach::CPUSubtypeARM st) + : ARMAsmBackend(T), Subtype(st) { } MCObjectWriter *createObjectWriter(raw_ostream &OS) const { - // FIXME: Subtarget info should be derived. Force v7 for now. return createMachObjectWriter(new ARMMachObjectWriter( /*Is64Bit=*/false, object::mach::CTM_ARM, - object::mach::CSARM_V7), + Subtype), OS, /*IsLittleEndian=*/true); } + void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, + uint64_t Value) const; + virtual bool doesSectionRequireSymbols(const MCSection &Section) const { return false; } @@ -499,14 +500,17 @@ void DarwinARMAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data, TargetAsmBackend *llvm::createARMAsmBackend(const Target &T, const std::string &TT) { - switch (Triple(TT).getOS()) { - case Triple::Darwin: - return new DarwinARMAsmBackend(T); - case Triple::MinGW32: - case Triple::Cygwin: - case Triple::Win32: - assert(0 && "Windows not supported on ARM"); - default: - return new ELFARMAsmBackend(T, Triple(TT).getOS()); + Triple TheTriple(TT); + + if (TheTriple.isOSDarwin()) { + if (TheTriple.getArchName() == "armv6" || + TheTriple.getArchName() == "thumbv6") + return new DarwinARMAsmBackend(T, object::mach::CSARM_V6); + return new DarwinARMAsmBackend(T, object::mach::CSARM_V7); } + + if (TheTriple.isOSWindows()) + assert(0 && "Windows not supported on ARM"); + + return new ELFARMAsmBackend(T, Triple(TT).getOS()); } diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index db12b8e4fc2d..c428e1852a46 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -88,6 +88,11 @@ namespace { case ARMBuildAttrs::CPU_name: Streamer.EmitRawText(StringRef("\t.cpu ") + LowercaseString(String)); break; + /* GAS requires .fpu to be emitted regardless of EABI attribute */ + case ARMBuildAttrs::Advanced_SIMD_arch: + case ARMBuildAttrs::VFP_arch: + Streamer.EmitRawText(StringRef("\t.fpu ") + LowercaseString(String)); + break; default: assert(0 && "Unsupported Text attribute in ASM Mode"); break; } } @@ -167,6 +172,117 @@ getDebugValueLocation(const MachineInstr *MI) const { return Location; } +/// getDwarfRegOpSize - get size required to emit given machine location using +/// dwarf encoding. +unsigned ARMAsmPrinter::getDwarfRegOpSize(const MachineLocation &MLoc) const { + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + if (RI->getDwarfRegNum(MLoc.getReg(), false) != -1) + return AsmPrinter::getDwarfRegOpSize(MLoc); + else { + unsigned Reg = MLoc.getReg(); + if (Reg >= ARM::S0 && Reg <= ARM::S31) { + assert(ARM::S0 + 31 == ARM::S31 && "Unexpected ARM S register numbering"); + // S registers are described as bit-pieces of a register + // S[2x] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 0) + // S[2x+1] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 32) + + unsigned SReg = Reg - ARM::S0; + unsigned Rx = 256 + (SReg >> 1); + OutStreamer.AddComment("Loc expr size"); + // DW_OP_regx + ULEB + DW_OP_bit_piece + ULEB + ULEB + // 1 + ULEB(Rx) + 1 + 1 + 1 + return 4 + MCAsmInfo::getULEB128Size(Rx); + } + + if (Reg >= ARM::Q0 && Reg <= ARM::Q15) { + assert(ARM::Q0 + 15 == ARM::Q15 && "Unexpected ARM Q register numbering"); + // Q registers Q0-Q15 are described by composing two D registers together. + // Qx = DW_OP_regx(256+2x) DW_OP_piece(8) DW_OP_regx(256+2x+1) DW_OP_piece(8) + + unsigned QReg = Reg - ARM::Q0; + unsigned D1 = 256 + 2 * QReg; + unsigned D2 = D1 + 1; + + OutStreamer.AddComment("Loc expr size"); + // DW_OP_regx + ULEB + DW_OP_piece + ULEB(8) + + // DW_OP_regx + ULEB + DW_OP_piece + ULEB(8); + // 6 + ULEB(D1) + ULEB(D2) + return 6 + MCAsmInfo::getULEB128Size(D1) + MCAsmInfo::getULEB128Size(D2); + } + } + return 0; +} + +/// EmitDwarfRegOp - Emit dwarf register operation. +void ARMAsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const { + const TargetRegisterInfo *RI = TM.getRegisterInfo(); + if (RI->getDwarfRegNum(MLoc.getReg(), false) != -1) + AsmPrinter::EmitDwarfRegOp(MLoc); + else { + unsigned Reg = MLoc.getReg(); + if (Reg >= ARM::S0 && Reg <= ARM::S31) { + assert(ARM::S0 + 31 == ARM::S31 && "Unexpected ARM S register numbering"); + // S registers are described as bit-pieces of a register + // S[2x] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 0) + // S[2x+1] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 32) + + unsigned SReg = Reg - ARM::S0; + bool odd = SReg & 0x1; + unsigned Rx = 256 + (SReg >> 1); + OutStreamer.AddComment("Loc expr size"); + // DW_OP_regx + ULEB + DW_OP_bit_piece + ULEB + ULEB + // 1 + ULEB(Rx) + 1 + 1 + 1 + EmitInt16(4 + MCAsmInfo::getULEB128Size(Rx)); + + OutStreamer.AddComment("DW_OP_regx for S register"); + EmitInt8(dwarf::DW_OP_regx); + + OutStreamer.AddComment(Twine(SReg)); + EmitULEB128(Rx); + + if (odd) { + OutStreamer.AddComment("DW_OP_bit_piece 32 32"); + EmitInt8(dwarf::DW_OP_bit_piece); + EmitULEB128(32); + EmitULEB128(32); + } else { + OutStreamer.AddComment("DW_OP_bit_piece 32 0"); + EmitInt8(dwarf::DW_OP_bit_piece); + EmitULEB128(32); + EmitULEB128(0); + } + } else if (Reg >= ARM::Q0 && Reg <= ARM::Q15) { + assert(ARM::Q0 + 15 == ARM::Q15 && "Unexpected ARM Q register numbering"); + // Q registers Q0-Q15 are described by composing two D registers together. + // Qx = DW_OP_regx(256+2x) DW_OP_piece(8) DW_OP_regx(256+2x+1) DW_OP_piece(8) + + unsigned QReg = Reg - ARM::Q0; + unsigned D1 = 256 + 2 * QReg; + unsigned D2 = D1 + 1; + + OutStreamer.AddComment("Loc expr size"); + // DW_OP_regx + ULEB + DW_OP_piece + ULEB(8) + + // DW_OP_regx + ULEB + DW_OP_piece + ULEB(8); + // 6 + ULEB(D1) + ULEB(D2) + EmitInt16(6 + MCAsmInfo::getULEB128Size(D1) + MCAsmInfo::getULEB128Size(D2)); + + OutStreamer.AddComment("DW_OP_regx for Q register: D1"); + EmitInt8(dwarf::DW_OP_regx); + EmitULEB128(D1); + OutStreamer.AddComment("DW_OP_piece 8"); + EmitInt8(dwarf::DW_OP_piece); + EmitULEB128(8); + + OutStreamer.AddComment("DW_OP_regx for Q register: D2"); + EmitInt8(dwarf::DW_OP_regx); + EmitULEB128(D2); + OutStreamer.AddComment("DW_OP_piece 8"); + EmitInt8(dwarf::DW_OP_piece); + EmitULEB128(8); + } + } +} + void ARMAsmPrinter::EmitFunctionEntryLabel() { if (AFI->isThumbFunction()) { OutStreamer.EmitAssemblerFlag(MCAF_Code16); @@ -453,10 +569,13 @@ void ARMAsmPrinter::emitAttributes() { emitARMAttributeSection(); + /* GAS expect .fpu to be emitted, regardless of VFP build attribute */ + bool emitFPU = false; AttributeEmitter *AttrEmitter; - if (OutStreamer.hasRawTextSupport()) + if (OutStreamer.hasRawTextSupport()) { AttrEmitter = new AsmAttributeEmitter(OutStreamer); - else { + emitFPU = true; + } else { MCObjectStreamer &O = static_cast(OutStreamer); AttrEmitter = new ObjectAttributeEmitter(O); } @@ -490,10 +609,36 @@ void ARMAsmPrinter::emitAttributes() { ARMBuildAttrs::Allowed); } - // FIXME: Emit FPU type - if (Subtarget->hasVFP2()) + if (Subtarget->hasNEON() && emitFPU) { + /* NEON is not exactly a VFP architecture, but GAS emit one of + * neon/vfpv3/vfpv2 for .fpu parameters */ + AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon"); + /* If emitted for NEON, omit from VFP below, since you can have both + * NEON and VFP in build attributes but only one .fpu */ + emitFPU = false; + } + + /* VFPv3 + .fpu */ + if (Subtarget->hasVFP3()) { + AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch, + ARMBuildAttrs::AllowFPv3A); + if (emitFPU) + AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "vfpv3"); + + /* VFPv2 + .fpu */ + } else if (Subtarget->hasVFP2()) { AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch, ARMBuildAttrs::AllowFPv2); + if (emitFPU) + AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "vfpv2"); + } + + /* TODO: ARMBuildAttrs::Allowed is not completely accurate, + * since NEON can have 1 (allowed) or 2 (fused MAC operations) */ + if (Subtarget->hasNEON()) { + AttrEmitter->EmitAttribute(ARMBuildAttrs::Advanced_SIMD_arch, + ARMBuildAttrs::Allowed); + } // Signal various FP modes. if (!UnsafeFPMath) { @@ -777,10 +922,161 @@ void ARMAsmPrinter::EmitPatchedInstruction(const MachineInstr *MI, OutStreamer.EmitInstruction(TmpInst); } +void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { + assert(MI->getFlag(MachineInstr::FrameSetup) && + "Only instruction which are involved into frame setup code are allowed"); + + const MachineFunction &MF = *MI->getParent()->getParent(); + const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); + const ARMFunctionInfo &AFI = *MF.getInfo(); + + unsigned FramePtr = RegInfo->getFrameRegister(MF); + unsigned Opc = MI->getOpcode(); + unsigned SrcReg, DstReg; + + if (Opc == ARM::tPUSH || Opc == ARM::tLDRpci) { + // Two special cases: + // 1) tPUSH does not have src/dst regs. + // 2) for Thumb1 code we sometimes materialize the constant via constpool + // load. Yes, this is pretty fragile, but for now I don't see better + // way... :( + SrcReg = DstReg = ARM::SP; + } else { + SrcReg = MI->getOperand(1).getReg(); + DstReg = MI->getOperand(0).getReg(); + } + + // Try to figure out the unwinding opcode out of src / dst regs. + if (MI->getDesc().mayStore()) { + // Register saves. + assert(DstReg == ARM::SP && + "Only stack pointer as a destination reg is supported"); + + SmallVector RegList; + // Skip src & dst reg, and pred ops. + unsigned StartOp = 2 + 2; + // Use all the operands. + unsigned NumOffset = 0; + + switch (Opc) { + default: + MI->dump(); + assert(0 && "Unsupported opcode for unwinding information"); + case ARM::tPUSH: + // Special case here: no src & dst reg, but two extra imp ops. + StartOp = 2; NumOffset = 2; + case ARM::STMDB_UPD: + case ARM::t2STMDB_UPD: + case ARM::VSTMDDB_UPD: + assert(SrcReg == ARM::SP && + "Only stack pointer as a source reg is supported"); + for (unsigned i = StartOp, NumOps = MI->getNumOperands() - NumOffset; + i != NumOps; ++i) + RegList.push_back(MI->getOperand(i).getReg()); + break; + case ARM::STR_PRE: + assert(MI->getOperand(2).getReg() == ARM::SP && + "Only stack pointer as a source reg is supported"); + RegList.push_back(SrcReg); + break; + } + OutStreamer.EmitRegSave(RegList, Opc == ARM::VSTMDDB_UPD); + } else { + // Changes of stack / frame pointer. + if (SrcReg == ARM::SP) { + int64_t Offset = 0; + switch (Opc) { + default: + MI->dump(); + assert(0 && "Unsupported opcode for unwinding information"); + case ARM::MOVr: + case ARM::tMOVgpr2gpr: + case ARM::tMOVgpr2tgpr: + Offset = 0; + break; + case ARM::ADDri: + Offset = -MI->getOperand(2).getImm(); + break; + case ARM::SUBri: + case ARM::t2SUBrSPi: + Offset = MI->getOperand(2).getImm(); + break; + case ARM::tSUBspi: + Offset = MI->getOperand(2).getImm()*4; + break; + case ARM::tADDspi: + case ARM::tADDrSPi: + Offset = -MI->getOperand(2).getImm()*4; + break; + case ARM::tLDRpci: { + // Grab the constpool index and check, whether it corresponds to + // original or cloned constpool entry. + unsigned CPI = MI->getOperand(1).getIndex(); + const MachineConstantPool *MCP = MF.getConstantPool(); + if (CPI >= MCP->getConstants().size()) + CPI = AFI.getOriginalCPIdx(CPI); + assert(CPI != -1U && "Invalid constpool index"); + + // Derive the actual offset. + const MachineConstantPoolEntry &CPE = MCP->getConstants()[CPI]; + assert(!CPE.isMachineConstantPoolEntry() && "Invalid constpool entry"); + // FIXME: Check for user, it should be "add" instruction! + Offset = -cast(CPE.Val.ConstVal)->getSExtValue(); + break; + } + } + + if (DstReg == FramePtr && FramePtr != ARM::SP) + // Set-up of the frame pointer. Positive values correspond to "add" + // instruction. + OutStreamer.EmitSetFP(FramePtr, ARM::SP, -Offset); + else if (DstReg == ARM::SP) { + // Change of SP by an offset. Positive values correspond to "sub" + // instruction. + OutStreamer.EmitPad(Offset); + } else { + MI->dump(); + assert(0 && "Unsupported opcode for unwinding information"); + } + } else if (DstReg == ARM::SP) { + // FIXME: .movsp goes here + MI->dump(); + assert(0 && "Unsupported opcode for unwinding information"); + } + else { + MI->dump(); + assert(0 && "Unsupported opcode for unwinding information"); + } + } +} + +extern cl::opt EnableARMEHABI; + void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { unsigned Opc = MI->getOpcode(); switch (Opc) { default: break; + case ARM::B: { + // B is just a Bcc with an 'always' predicate. + MCInst TmpInst; + LowerARMMachineInstrToMCInst(MI, TmpInst, *this); + TmpInst.setOpcode(ARM::Bcc); + // Add predicate operands. + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); + OutStreamer.EmitInstruction(TmpInst); + return; + } + case ARM::LDMIA_RET: { + // LDMIA_RET is just a normal LDMIA_UPD instruction that targets PC and as + // such has additional code-gen properties and scheduling information. + // To emit it, we just construct as normal and set the opcode to LDMIA_UPD. + MCInst TmpInst; + LowerARMMachineInstrToMCInst(MI, TmpInst, *this); + TmpInst.setOpcode(ARM::LDMIA_UPD); + OutStreamer.EmitInstruction(TmpInst); + return; + } case ARM::t2ADDrSPi: case ARM::t2ADDrSPi12: case ARM::t2SUBrSPi: @@ -850,6 +1146,26 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { OutStreamer.EmitInstruction(TmpInst); return; } + // Darwin call instructions are just normal call instructions with different + // clobber semantics (they clobber R9). + case ARM::BLr9: + case ARM::BLr9_pred: + case ARM::BLXr9: + case ARM::BLXr9_pred: { + unsigned newOpc; + switch (Opc) { + default: assert(0); + case ARM::BLr9: newOpc = ARM::BL; break; + case ARM::BLr9_pred: newOpc = ARM::BL_pred; break; + case ARM::BLXr9: newOpc = ARM::BLX; break; + case ARM::BLXr9_pred: newOpc = ARM::BLX_pred; break; + } + MCInst TmpInst; + LowerARMMachineInstrToMCInst(MI, TmpInst, *this); + TmpInst.setOpcode(newOpc); + OutStreamer.EmitInstruction(TmpInst); + return; + } case ARM::BXr9_CALL: case ARM::BX_CALL: { { @@ -1502,6 +1818,49 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { } return; } + // Tail jump branches are really just branch instructions with additional + // code-gen attributes. Convert them to the canonical form here. + case ARM::TAILJMPd: + case ARM::TAILJMPdND: { + MCInst TmpInst, TmpInst2; + // Lower the instruction as-is to get the operands properly converted. + LowerARMMachineInstrToMCInst(MI, TmpInst2, *this); + TmpInst.setOpcode(ARM::Bcc); + TmpInst.addOperand(TmpInst2.getOperand(0)); + // Add predicate operands. + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); + OutStreamer.AddComment("TAILCALL"); + OutStreamer.EmitInstruction(TmpInst); + return; + } + case ARM::tTAILJMPd: + case ARM::tTAILJMPdND: { + MCInst TmpInst, TmpInst2; + LowerARMMachineInstrToMCInst(MI, TmpInst2, *this); + TmpInst.setOpcode(ARM::tB); + TmpInst.addOperand(TmpInst2.getOperand(0)); + OutStreamer.AddComment("TAILCALL"); + OutStreamer.EmitInstruction(TmpInst); + return; + } + case ARM::TAILJMPrND: + case ARM::tTAILJMPrND: + case ARM::TAILJMPr: + case ARM::tTAILJMPr: { + unsigned newOpc = (Opc == ARM::TAILJMPr || Opc == ARM::TAILJMPrND) + ? ARM::BX : ARM::tBX; + MCInst TmpInst; + TmpInst.setOpcode(newOpc); + TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg())); + // Predicate. + TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL)); + TmpInst.addOperand(MCOperand::CreateReg(0)); + OutStreamer.AddComment("TAILCALL"); + OutStreamer.EmitInstruction(TmpInst); + return; + } + // These are the pseudos created to comply with stricter operand restrictions // on ARMv5. Lower them now to "normal" instructions, since all the // restrictions are already satisfied. @@ -1530,6 +1889,11 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { MCInst TmpInst; LowerARMMachineInstrToMCInst(MI, TmpInst, *this); + + // Emit unwinding stuff for frame-related instructions + if (EnableARMEHABI && MI->getFlag(MachineInstr::FrameSetup)) + EmitUnwindingInstruction(MI); + OutStreamer.EmitInstruction(TmpInst); } @@ -1538,10 +1902,11 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { //===----------------------------------------------------------------------===// static MCInstPrinter *createARMMCInstPrinter(const Target &T, + TargetMachine &TM, unsigned SyntaxVariant, const MCAsmInfo &MAI) { if (SyntaxVariant == 0) - return new ARMInstPrinter(MAI); + return new ARMInstPrinter(TM, MAI); return 0; } diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h index 585268442ce4..1ee1b7024d15 100644 --- a/lib/Target/ARM/ARMAsmPrinter.h +++ b/lib/Target/ARM/ARMAsmPrinter.h @@ -82,11 +82,20 @@ class LLVM_LIBRARY_VISIBILITY ARMAsmPrinter : public AsmPrinter { // Generic helper used to emit e.g. ARMv5 mul pseudos void EmitPatchedInstruction(const MachineInstr *MI, unsigned TargetOpc); + void EmitUnwindingInstruction(const MachineInstr *MI); + public: void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS); MachineLocation getDebugValueLocation(const MachineInstr *MI) const; + /// getDwarfRegOpSize - get size required to emit given machine location + /// using dwarf encoding. + virtual unsigned getDwarfRegOpSize(const MachineLocation &MLoc) const; + + /// EmitDwarfRegOp - Emit dwarf register operation. + virtual void EmitDwarfRegOp(const MachineLocation &MLoc) const; + virtual unsigned getISAEncoding() { // ARM/Darwin adds ISA to the DWARF info for each function. if (!Subtarget->isTargetDarwin()) diff --git a/lib/Target/ARM/ARMBaseInfo.h b/lib/Target/ARM/ARMBaseInfo.h index a56cc1a9f249..36edbad7a601 100644 --- a/lib/Target/ARM/ARMBaseInfo.h +++ b/lib/Target/ARM/ARMBaseInfo.h @@ -200,6 +200,59 @@ inline static unsigned getARMRegisterNumbering(unsigned Reg) { } namespace ARMII { + + /// ARM Index Modes + enum IndexMode { + IndexModeNone = 0, + IndexModePre = 1, + IndexModePost = 2, + IndexModeUpd = 3 + }; + + /// ARM Addressing Modes + enum AddrMode { + AddrModeNone = 0, + AddrMode1 = 1, + AddrMode2 = 2, + AddrMode3 = 3, + AddrMode4 = 4, + AddrMode5 = 5, + AddrMode6 = 6, + AddrModeT1_1 = 7, + AddrModeT1_2 = 8, + AddrModeT1_4 = 9, + AddrModeT1_s = 10, // i8 * 4 for pc and sp relative data + AddrModeT2_i12 = 11, + AddrModeT2_i8 = 12, + AddrModeT2_so = 13, + AddrModeT2_pc = 14, // +/- i12 for pc relative data + AddrModeT2_i8s4 = 15, // i8 * 4 + AddrMode_i12 = 16 + }; + + inline static const char *AddrModeToString(AddrMode addrmode) { + switch (addrmode) { + default: llvm_unreachable("Unknown memory operation"); + case AddrModeNone: return "AddrModeNone"; + case AddrMode1: return "AddrMode1"; + case AddrMode2: return "AddrMode2"; + case AddrMode3: return "AddrMode3"; + case AddrMode4: return "AddrMode4"; + case AddrMode5: return "AddrMode5"; + case AddrMode6: return "AddrMode6"; + case AddrModeT1_1: return "AddrModeT1_1"; + case AddrModeT1_2: return "AddrModeT1_2"; + case AddrModeT1_4: return "AddrModeT1_4"; + case AddrModeT1_s: return "AddrModeT1_s"; + case AddrModeT2_i12: return "AddrModeT2_i12"; + case AddrModeT2_i8: return "AddrModeT2_i8"; + case AddrModeT2_so: return "AddrModeT2_so"; + case AddrModeT2_pc: return "AddrModeT2_pc"; + case AddrModeT2_i8s4: return "AddrModeT2_i8s4"; + case AddrMode_i12: return "AddrMode_i12"; + } + } + /// Target Operand Flag enum. enum TOF { //===------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 2268e59ea7b1..44a397611526 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1021,7 +1021,7 @@ reMaterialize(MachineBasicBlock &MBB, MachineInstrBuilder MIB = BuildMI(MBB, I, Orig->getDebugLoc(), get(Opcode), DestReg) .addConstantPoolIndex(CPI).addImm(PCLabelId); - (*MIB).setMemRefs(Orig->memoperands_begin(), Orig->memoperands_end()); + MIB->setMemRefs(Orig->memoperands_begin(), Orig->memoperands_end()); break; } } @@ -1080,11 +1080,18 @@ bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0, int CPI1 = MO1.getIndex(); const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0]; const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1]; - ARMConstantPoolValue *ACPV0 = - static_cast(MCPE0.Val.MachineCPVal); - ARMConstantPoolValue *ACPV1 = - static_cast(MCPE1.Val.MachineCPVal); - return ACPV0->hasSameValue(ACPV1); + bool isARMCP0 = MCPE0.isMachineConstantPoolEntry(); + bool isARMCP1 = MCPE1.isMachineConstantPoolEntry(); + if (isARMCP0 && isARMCP1) { + ARMConstantPoolValue *ACPV0 = + static_cast(MCPE0.Val.MachineCPVal); + ARMConstantPoolValue *ACPV1 = + static_cast(MCPE1.Val.MachineCPVal); + return ACPV0->hasSameValue(ACPV1); + } else if (!isARMCP0 && !isARMCP1) { + return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal; + } + return false; } else if (Opcode == ARM::PICLDR) { if (MI1->getOpcode() != Opcode) return false; @@ -1194,7 +1201,7 @@ bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, } /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to -/// determine (in conjuction with areLoadsFromSameBasePtr) if two loads should +/// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should /// be scheduled togther. On some targets if two loads are loading from /// addresses in the same cache line, it's better if they are scheduled /// together. This function takes two integers that represent the load offsets @@ -1263,19 +1270,19 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI, } bool ARMBaseInstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB, - unsigned NumCyles, + unsigned NumCycles, unsigned ExtraPredCycles, float Probability, float Confidence) const { - if (!NumCyles) + if (!NumCycles) return false; // Attempt to estimate the relative costs of predication versus branching. - float UnpredCost = Probability * NumCyles; + float UnpredCost = Probability * NumCycles; UnpredCost += 1.0; // The branch itself UnpredCost += (1.0 - Confidence) * Subtarget.getMispredictionPenalty(); - return (float)(NumCyles + ExtraPredCycles) < UnpredCost; + return (float)(NumCycles + ExtraPredCycles) < UnpredCost; } bool ARMBaseInstrInfo:: @@ -1328,7 +1335,7 @@ void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, DebugLoc dl, unsigned DestReg, unsigned BaseReg, int NumBytes, ARMCC::CondCodes Pred, unsigned PredReg, - const ARMBaseInstrInfo &TII) { + const ARMBaseInstrInfo &TII, unsigned MIFlags) { bool isSub = NumBytes < 0; if (isSub) NumBytes = -NumBytes; @@ -1346,7 +1353,8 @@ void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB, unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri; BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) .addReg(BaseReg, RegState::Kill).addImm(ThisVal) - .addImm((unsigned)Pred).addReg(PredReg).addReg(0); + .addImm((unsigned)Pred).addReg(PredReg).addReg(0) + .setMIFlags(MIFlags); BaseReg = DestReg; } } @@ -1610,18 +1618,84 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask, // Set the "zero" bit in CPSR. switch (MI->getOpcode()) { default: break; + case ARM::RSBrr: + case ARM::RSBri: + case ARM::RSCrr: + case ARM::RSCri: + case ARM::ADDrr: case ARM::ADDri: - case ARM::ANDri: - case ARM::t2ANDri: + case ARM::ADCrr: + case ARM::ADCri: + case ARM::SUBrr: case ARM::SUBri: + case ARM::SBCrr: + case ARM::SBCri: + case ARM::t2RSBri: + case ARM::t2ADDrr: case ARM::t2ADDri: + case ARM::t2ADCrr: + case ARM::t2ADCri: + case ARM::t2SUBrr: case ARM::t2SUBri: + case ARM::t2SBCrr: + case ARM::t2SBCri: + case ARM::ANDrr: + case ARM::ANDri: + case ARM::t2ANDrr: + case ARM::t2ANDri: + case ARM::ORRrr: + case ARM::ORRri: + case ARM::t2ORRrr: + case ARM::t2ORRri: + case ARM::EORrr: + case ARM::EORri: + case ARM::t2EORrr: + case ARM::t2EORri: { + // Scan forward for the use of CPSR, if it's a conditional code requires + // checking of V bit, then this is not safe to do. If we can't find the + // CPSR use (i.e. used in another block), then it's not safe to perform + // the optimization. + bool isSafe = false; + I = CmpInstr; + E = MI->getParent()->end(); + while (!isSafe && ++I != E) { + const MachineInstr &Instr = *I; + for (unsigned IO = 0, EO = Instr.getNumOperands(); + !isSafe && IO != EO; ++IO) { + const MachineOperand &MO = Instr.getOperand(IO); + if (!MO.isReg() || MO.getReg() != ARM::CPSR) + continue; + if (MO.isDef()) { + isSafe = true; + break; + } + // Condition code is after the operand before CPSR. + ARMCC::CondCodes CC = (ARMCC::CondCodes)Instr.getOperand(IO-1).getImm(); + switch (CC) { + default: + isSafe = true; + break; + case ARMCC::VS: + case ARMCC::VC: + case ARMCC::GE: + case ARMCC::LT: + case ARMCC::GT: + case ARMCC::LE: + return false; + } + } + } + + if (!isSafe) + return false; + // Toggle the optional operand to CPSR. MI->getOperand(5).setReg(ARM::CPSR); MI->getOperand(5).setIsDef(true); CmpInstr->eraseFromParent(); return true; } + } return false; } @@ -1741,9 +1815,7 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, llvm_unreachable("Unexpected multi-uops instruction!"); break; case ARM::VLDMQIA: - case ARM::VLDMQDB: case ARM::VSTMQIA: - case ARM::VSTMQDB: return 2; // The number of uOps for load / store multiple are determined by the number @@ -1757,19 +1829,15 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, // is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1. case ARM::VLDMDIA: - case ARM::VLDMDDB: case ARM::VLDMDIA_UPD: case ARM::VLDMDDB_UPD: case ARM::VLDMSIA: - case ARM::VLDMSDB: case ARM::VLDMSIA_UPD: case ARM::VLDMSDB_UPD: case ARM::VSTMDIA: - case ARM::VSTMDDB: case ARM::VSTMDIA_UPD: case ARM::VSTMDDB_UPD: case ARM::VSTMSIA: - case ARM::VSTMSDB: case ARM::VSTMSIA_UPD: case ARM::VSTMSDB_UPD: { unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands(); @@ -1859,7 +1927,6 @@ ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData, switch (DefTID.getOpcode()) { default: break; case ARM::VLDMSIA: - case ARM::VLDMSDB: case ARM::VLDMSIA_UPD: case ARM::VLDMSDB_UPD: isSLoad = true; @@ -1935,7 +2002,6 @@ ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData, switch (UseTID.getOpcode()) { default: break; case ARM::VSTMSIA: - case ARM::VSTMSDB: case ARM::VSTMSIA_UPD: case ARM::VSTMSDB_UPD: isSStore = true; @@ -2006,11 +2072,9 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, break; case ARM::VLDMDIA: - case ARM::VLDMDDB: case ARM::VLDMDIA_UPD: case ARM::VLDMDDB_UPD: case ARM::VLDMSIA: - case ARM::VLDMSDB: case ARM::VLDMSIA_UPD: case ARM::VLDMSDB_UPD: DefCycle = getVLDMDefCycle(ItinData, DefTID, DefClass, DefIdx, DefAlign); @@ -2049,11 +2113,9 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, break; case ARM::VSTMDIA: - case ARM::VSTMDDB: case ARM::VSTMDIA_UPD: case ARM::VSTMDDB_UPD: case ARM::VSTMSIA: - case ARM::VSTMSDB: case ARM::VSTMSIA_UPD: case ARM::VSTMSDB_UPD: UseCycle = getVSTMUseCycle(ItinData, UseTID, UseClass, UseIdx, UseAlign); @@ -2160,6 +2222,101 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, } } + if (DefAlign < 8 && Subtarget.isCortexA9()) + switch (DefTID.getOpcode()) { + default: break; + case ARM::VLD1q8: + case ARM::VLD1q16: + case ARM::VLD1q32: + case ARM::VLD1q64: + case ARM::VLD1q8_UPD: + case ARM::VLD1q16_UPD: + case ARM::VLD1q32_UPD: + case ARM::VLD1q64_UPD: + case ARM::VLD2d8: + case ARM::VLD2d16: + case ARM::VLD2d32: + case ARM::VLD2q8: + case ARM::VLD2q16: + case ARM::VLD2q32: + case ARM::VLD2d8_UPD: + case ARM::VLD2d16_UPD: + case ARM::VLD2d32_UPD: + case ARM::VLD2q8_UPD: + case ARM::VLD2q16_UPD: + case ARM::VLD2q32_UPD: + case ARM::VLD3d8: + case ARM::VLD3d16: + case ARM::VLD3d32: + case ARM::VLD1d64T: + case ARM::VLD3d8_UPD: + case ARM::VLD3d16_UPD: + case ARM::VLD3d32_UPD: + case ARM::VLD1d64T_UPD: + case ARM::VLD3q8_UPD: + case ARM::VLD3q16_UPD: + case ARM::VLD3q32_UPD: + case ARM::VLD4d8: + case ARM::VLD4d16: + case ARM::VLD4d32: + case ARM::VLD1d64Q: + case ARM::VLD4d8_UPD: + case ARM::VLD4d16_UPD: + case ARM::VLD4d32_UPD: + case ARM::VLD1d64Q_UPD: + case ARM::VLD4q8_UPD: + case ARM::VLD4q16_UPD: + case ARM::VLD4q32_UPD: + case ARM::VLD1DUPq8: + case ARM::VLD1DUPq16: + case ARM::VLD1DUPq32: + case ARM::VLD1DUPq8_UPD: + case ARM::VLD1DUPq16_UPD: + case ARM::VLD1DUPq32_UPD: + case ARM::VLD2DUPd8: + case ARM::VLD2DUPd16: + case ARM::VLD2DUPd32: + case ARM::VLD2DUPd8_UPD: + case ARM::VLD2DUPd16_UPD: + case ARM::VLD2DUPd32_UPD: + case ARM::VLD4DUPd8: + case ARM::VLD4DUPd16: + case ARM::VLD4DUPd32: + case ARM::VLD4DUPd8_UPD: + case ARM::VLD4DUPd16_UPD: + case ARM::VLD4DUPd32_UPD: + case ARM::VLD1LNd8: + case ARM::VLD1LNd16: + case ARM::VLD1LNd32: + case ARM::VLD1LNd8_UPD: + case ARM::VLD1LNd16_UPD: + case ARM::VLD1LNd32_UPD: + case ARM::VLD2LNd8: + case ARM::VLD2LNd16: + case ARM::VLD2LNd32: + case ARM::VLD2LNq16: + case ARM::VLD2LNq32: + case ARM::VLD2LNd8_UPD: + case ARM::VLD2LNd16_UPD: + case ARM::VLD2LNd32_UPD: + case ARM::VLD2LNq16_UPD: + case ARM::VLD2LNq32_UPD: + case ARM::VLD4LNd8: + case ARM::VLD4LNd16: + case ARM::VLD4LNd32: + case ARM::VLD4LNq16: + case ARM::VLD4LNq32: + case ARM::VLD4LNd8_UPD: + case ARM::VLD4LNd16_UPD: + case ARM::VLD4LNd32_UPD: + case ARM::VLD4LNq16_UPD: + case ARM::VLD4LNq32_UPD: + // If the address is not 64-bit aligned, the latencies of these + // instructions increases by one. + ++Latency; + break; + } + return Latency; } @@ -2226,6 +2383,113 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, } } + if (DefAlign < 8 && Subtarget.isCortexA9()) + switch (DefTID.getOpcode()) { + default: break; + case ARM::VLD1q8Pseudo: + case ARM::VLD1q16Pseudo: + case ARM::VLD1q32Pseudo: + case ARM::VLD1q64Pseudo: + case ARM::VLD1q8Pseudo_UPD: + case ARM::VLD1q16Pseudo_UPD: + case ARM::VLD1q32Pseudo_UPD: + case ARM::VLD1q64Pseudo_UPD: + case ARM::VLD2d8Pseudo: + case ARM::VLD2d16Pseudo: + case ARM::VLD2d32Pseudo: + case ARM::VLD2q8Pseudo: + case ARM::VLD2q16Pseudo: + case ARM::VLD2q32Pseudo: + case ARM::VLD2d8Pseudo_UPD: + case ARM::VLD2d16Pseudo_UPD: + case ARM::VLD2d32Pseudo_UPD: + case ARM::VLD2q8Pseudo_UPD: + case ARM::VLD2q16Pseudo_UPD: + case ARM::VLD2q32Pseudo_UPD: + case ARM::VLD3d8Pseudo: + case ARM::VLD3d16Pseudo: + case ARM::VLD3d32Pseudo: + case ARM::VLD1d64TPseudo: + case ARM::VLD3d8Pseudo_UPD: + case ARM::VLD3d16Pseudo_UPD: + case ARM::VLD3d32Pseudo_UPD: + case ARM::VLD1d64TPseudo_UPD: + case ARM::VLD3q8Pseudo_UPD: + case ARM::VLD3q16Pseudo_UPD: + case ARM::VLD3q32Pseudo_UPD: + case ARM::VLD3q8oddPseudo: + case ARM::VLD3q16oddPseudo: + case ARM::VLD3q32oddPseudo: + case ARM::VLD3q8oddPseudo_UPD: + case ARM::VLD3q16oddPseudo_UPD: + case ARM::VLD3q32oddPseudo_UPD: + case ARM::VLD4d8Pseudo: + case ARM::VLD4d16Pseudo: + case ARM::VLD4d32Pseudo: + case ARM::VLD1d64QPseudo: + case ARM::VLD4d8Pseudo_UPD: + case ARM::VLD4d16Pseudo_UPD: + case ARM::VLD4d32Pseudo_UPD: + case ARM::VLD1d64QPseudo_UPD: + case ARM::VLD4q8Pseudo_UPD: + case ARM::VLD4q16Pseudo_UPD: + case ARM::VLD4q32Pseudo_UPD: + case ARM::VLD4q8oddPseudo: + case ARM::VLD4q16oddPseudo: + case ARM::VLD4q32oddPseudo: + case ARM::VLD4q8oddPseudo_UPD: + case ARM::VLD4q16oddPseudo_UPD: + case ARM::VLD4q32oddPseudo_UPD: + case ARM::VLD1DUPq8Pseudo: + case ARM::VLD1DUPq16Pseudo: + case ARM::VLD1DUPq32Pseudo: + case ARM::VLD1DUPq8Pseudo_UPD: + case ARM::VLD1DUPq16Pseudo_UPD: + case ARM::VLD1DUPq32Pseudo_UPD: + case ARM::VLD2DUPd8Pseudo: + case ARM::VLD2DUPd16Pseudo: + case ARM::VLD2DUPd32Pseudo: + case ARM::VLD2DUPd8Pseudo_UPD: + case ARM::VLD2DUPd16Pseudo_UPD: + case ARM::VLD2DUPd32Pseudo_UPD: + case ARM::VLD4DUPd8Pseudo: + case ARM::VLD4DUPd16Pseudo: + case ARM::VLD4DUPd32Pseudo: + case ARM::VLD4DUPd8Pseudo_UPD: + case ARM::VLD4DUPd16Pseudo_UPD: + case ARM::VLD4DUPd32Pseudo_UPD: + case ARM::VLD1LNq8Pseudo: + case ARM::VLD1LNq16Pseudo: + case ARM::VLD1LNq32Pseudo: + case ARM::VLD1LNq8Pseudo_UPD: + case ARM::VLD1LNq16Pseudo_UPD: + case ARM::VLD1LNq32Pseudo_UPD: + case ARM::VLD2LNd8Pseudo: + case ARM::VLD2LNd16Pseudo: + case ARM::VLD2LNd32Pseudo: + case ARM::VLD2LNq16Pseudo: + case ARM::VLD2LNq32Pseudo: + case ARM::VLD2LNd8Pseudo_UPD: + case ARM::VLD2LNd16Pseudo_UPD: + case ARM::VLD2LNd32Pseudo_UPD: + case ARM::VLD2LNq16Pseudo_UPD: + case ARM::VLD2LNq32Pseudo_UPD: + case ARM::VLD4LNd8Pseudo: + case ARM::VLD4LNd16Pseudo: + case ARM::VLD4LNd32Pseudo: + case ARM::VLD4LNq16Pseudo: + case ARM::VLD4LNq32Pseudo: + case ARM::VLD4LNd8Pseudo_UPD: + case ARM::VLD4LNd16Pseudo_UPD: + case ARM::VLD4LNd32Pseudo_UPD: + case ARM::VLD4LNq16Pseudo_UPD: + case ARM::VLD4LNq32Pseudo_UPD: + // If the address is not 64-bit aligned, the latencies of these + // instructions increases by one. + ++Latency; + break; + } + return Latency; } @@ -2264,9 +2528,7 @@ int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, default: return ItinData->getStageLatency(get(Opcode).getSchedClass()); case ARM::VLDMQIA: - case ARM::VLDMQDB: case ARM::VSTMQIA: - case ARM::VSTMQDB: return 2; } } diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 7e2183d7cd5e..9a2faf8f9aae 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -34,25 +34,7 @@ namespace ARMII { //===------------------------------------------------------------------===// // This four-bit field describes the addressing mode used. - - AddrModeMask = 0x1f, - AddrModeNone = 0, - AddrMode1 = 1, - AddrMode2 = 2, - AddrMode3 = 3, - AddrMode4 = 4, - AddrMode5 = 5, - AddrMode6 = 6, - AddrModeT1_1 = 7, - AddrModeT1_2 = 8, - AddrModeT1_4 = 9, - AddrModeT1_s = 10, // i8 * 4 for pc and sp relative data - AddrModeT2_i12 = 11, - AddrModeT2_i8 = 12, - AddrModeT2_so = 13, - AddrModeT2_pc = 14, // +/- i12 for pc relative data - AddrModeT2_i8s4 = 15, // i8 * 4 - AddrMode_i12 = 16, + AddrModeMask = 0x1f, // The AddrMode enums are declared in ARMBaseInfo.h // Size* - Flags to keep track of the size of an instruction. SizeShift = 5, @@ -64,11 +46,9 @@ namespace ARMII { // IndexMode - Unindex, pre-indexed, or post-indexed are valid for load // and store ops only. Generic "updating" flag is used for ld/st multiple. + // The index mode enums are declared in ARMBaseInfo.h IndexModeShift = 8, IndexModeMask = 3 << IndexModeShift, - IndexModePre = 1, - IndexModePost = 2, - IndexModeUpd = 3, //===------------------------------------------------------------------===// // Instruction encoding formats. @@ -311,7 +291,7 @@ class ARMBaseInstrInfo : public TargetInstrInfoImpl { int64_t &Offset1, int64_t &Offset2)const; /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to - /// determine (in conjuction with areLoadsFromSameBasePtr) if two loads should + /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should /// be scheduled togther. On some targets if two loads are loading from /// addresses in the same cache line, it's better if they are scheduled /// together. This function takes two integers that represent the load offsets @@ -327,7 +307,7 @@ class ARMBaseInstrInfo : public TargetInstrInfoImpl { const MachineFunction &MF) const; virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB, - unsigned NumCyles, unsigned ExtraPredCycles, + unsigned NumCycles, unsigned ExtraPredCycles, float Prob, float Confidence) const; virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB, @@ -337,10 +317,10 @@ class ARMBaseInstrInfo : public TargetInstrInfoImpl { float Probability, float Confidence) const; virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB, - unsigned NumCyles, + unsigned NumCycles, float Probability, float Confidence) const { - return NumCyles == 1; + return NumCycles == 1; } /// AnalyzeCompare - For a comparison instruction, return the source register @@ -496,19 +476,19 @@ void emitARMRegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, DebugLoc dl, unsigned DestReg, unsigned BaseReg, int NumBytes, ARMCC::CondCodes Pred, unsigned PredReg, - const ARMBaseInstrInfo &TII); + const ARMBaseInstrInfo &TII, unsigned MIFlags = 0); void emitT2RegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, DebugLoc dl, unsigned DestReg, unsigned BaseReg, int NumBytes, ARMCC::CondCodes Pred, unsigned PredReg, - const ARMBaseInstrInfo &TII); + const ARMBaseInstrInfo &TII, unsigned MIFlags = 0); void emitThumbRegPlusImmediate(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, + MachineBasicBlock::iterator &MBBI, DebugLoc dl, unsigned DestReg, unsigned BaseReg, int NumBytes, const TargetInstrInfo &TII, const ARMBaseRegisterInfo& MRI, - DebugLoc dl); + unsigned MIFlags = 0); /// rewriteARMFrameIndex / rewriteT2FrameIndex - diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 67a4b7d49398..ea1f08a7da8d 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -88,7 +88,7 @@ BitVector ARMBaseRegisterInfo:: getReservedRegs(const MachineFunction &MF) const { const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - // FIXME: avoid re-calculating this everytime. + // FIXME: avoid re-calculating this every time. BitVector Reserved(getNumRegs()); Reserved.set(ARM::SP); Reserved.set(ARM::PC); @@ -342,12 +342,51 @@ ARMBaseRegisterInfo::canCombineSubRegIndices(const TargetRegisterClass *RC, return false; } +const TargetRegisterClass* +ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) + const { + const TargetRegisterClass *Super = RC; + TargetRegisterClass::sc_iterator I = RC->superclasses_begin(); + do { + switch (Super->getID()) { + case ARM::GPRRegClassID: + case ARM::SPRRegClassID: + case ARM::DPRRegClassID: + case ARM::QPRRegClassID: + case ARM::QQPRRegClassID: + case ARM::QQQQPRRegClassID: + return Super; + } + Super = *I++; + } while (Super); + return RC; +} const TargetRegisterClass * ARMBaseRegisterInfo::getPointerRegClass(unsigned Kind) const { return ARM::GPRRegisterClass; } +unsigned +ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, + MachineFunction &MF) const { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + + switch (RC->getID()) { + default: + return 0; + case ARM::tGPRRegClassID: + return TFI->hasFP(MF) ? 4 : 5; + case ARM::GPRRegClassID: { + unsigned FP = TFI->hasFP(MF) ? 1 : 0; + return 10 - FP - (STI.isR9Reserved() ? 1 : 0); + } + case ARM::SPRRegClassID: // Currently not used as 'rep' register class. + case ARM::DPRRegClassID: + return 32 - 10; + } +} + /// getAllocationOrder - Returns the register allocation order for a specified /// register class in the form of a pair of TargetRegisterClass iterators. std::pair @@ -428,6 +467,10 @@ ARMBaseRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC, ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8 }; + // We only support even/odd hints for GPR and rGPR. + if (RC != ARM::GPRRegisterClass && RC != ARM::rGPRRegisterClass) + return std::make_pair(RC->allocation_order_begin(MF), + RC->allocation_order_end(MF)); if (HintType == ARMRI::RegPairEven) { if (isPhysicalRegister(HintReg) && getRegisterPairEven(HintReg, MF) == 0) @@ -530,6 +573,29 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg, } } +bool +ARMBaseRegisterInfo::avoidWriteAfterWrite(const TargetRegisterClass *RC) const { + // CortexA9 has a Write-after-write hazard for NEON registers. + if (!STI.isCortexA9()) + return false; + + switch (RC->getID()) { + case ARM::DPRRegClassID: + case ARM::DPR_8RegClassID: + case ARM::DPR_VFP2RegClassID: + case ARM::QPRRegClassID: + case ARM::QPR_8RegClassID: + case ARM::QPR_VFP2RegClassID: + case ARM::SPRRegClassID: + case ARM::SPR_8RegClassID: + // Avoid reusing S, D, and Q registers. + // Don't increase register pressure for QQ and QQQQ. + return true; + default: + return false; + } +} + bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const ARMFunctionInfo *AFI = MF.getInfo(); @@ -806,7 +872,7 @@ emitLoadConstPool(MachineBasicBlock &MBB, DebugLoc dl, unsigned DestReg, unsigned SubIdx, int Val, ARMCC::CondCodes Pred, - unsigned PredReg) const { + unsigned PredReg, unsigned MIFlags) const { MachineFunction &MF = *MBB.getParent(); MachineConstantPool *ConstantPool = MF.getConstantPool(); const Constant *C = @@ -816,7 +882,8 @@ emitLoadConstPool(MachineBasicBlock &MBB, BuildMI(MBB, MBBI, dl, TII.get(ARM::LDRcp)) .addReg(DestReg, getDefRegState(true), SubIdx) .addConstantPoolIndex(Idx) - .addImm(0).addImm(Pred).addReg(PredReg); + .addImm(0).addImm(Pred).addReg(PredReg) + .setMIFlags(MIFlags); } bool ARMBaseRegisterInfo:: diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index ba6bd2b32082..9edf72df2158 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -128,6 +128,12 @@ class ARMBaseRegisterInfo : public ARMGenRegisterInfo { const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const; + const TargetRegisterClass* + getLargestLegalSuperClass(const TargetRegisterClass *RC) const; + + unsigned getRegPressureLimit(const TargetRegisterClass *RC, + MachineFunction &MF) const; + std::pair getAllocationOrder(const TargetRegisterClass *RC, unsigned HintType, unsigned HintReg, @@ -139,6 +145,8 @@ class ARMBaseRegisterInfo : public ARMGenRegisterInfo { void UpdateRegAllocHint(unsigned Reg, unsigned NewReg, MachineFunction &MF) const; + virtual bool avoidWriteAfterWrite(const TargetRegisterClass *RC) const; + bool hasBasePointer(const MachineFunction &MF) const; bool canRealignStack(const MachineFunction &MF) const; @@ -176,7 +184,8 @@ class ARMBaseRegisterInfo : public ARMGenRegisterInfo { unsigned DestReg, unsigned SubIdx, int Val, ARMCC::CondCodes Pred = ARMCC::AL, - unsigned PredReg = 0) const; + unsigned PredReg = 0, + unsigned MIFlags = MachineInstr::NoFlags)const; /// Code Generation virtual methods... virtual bool isReservedReg(const MachineFunction &MF, unsigned Reg) const; diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td index 426ba13a8e11..d2981c0af8ca 100644 --- a/lib/Target/ARM/ARMCallingConv.td +++ b/lib/Target/ARM/ARMCallingConv.td @@ -22,6 +22,9 @@ class CCIfAlign: //===----------------------------------------------------------------------===// def CC_ARM_APCS : CallingConv<[ + // Handles byval parameters. + CCIfByVal>, + CCIfType<[i8, i16], CCPromoteToType>, // Handle all vector types as either f64 or v2f64. diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index 9bbf6a030687..fa7371626f29 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -312,6 +312,15 @@ namespace { unsigned getRegisterListOpValue(const MachineInstr &MI, unsigned Op) const { return 0; } + unsigned getShiftRight8Imm(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getShiftRight16Imm(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getShiftRight32Imm(const MachineInstr &MI, unsigned Op) + const { return 0; } + unsigned getShiftRight64Imm(const MachineInstr &MI, unsigned Op) + const { return 0; } + /// getMovi32Value - Return binary encoding of operand for movw/movt. If the /// machine operand requires relocation, record the relocation and return /// zero. @@ -969,7 +978,7 @@ unsigned ARMCodeEmitter::getMachineSoImmOpValue(unsigned SoImm) { unsigned ARMCodeEmitter::getAddrModeSBit(const MachineInstr &MI, const TargetInstrDesc &TID) const { - for (unsigned i = MI.getNumOperands(), e = TID.getNumOperands(); i != e; --i){ + for (unsigned i = MI.getNumOperands(), e = TID.getNumOperands(); i >= e; --i){ const MachineOperand &MO = MI.getOperand(i-1); if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) return 1 << ARMII::S_BitShift; diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index 13d1b33d1165..baf95a33dd4b 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -1650,24 +1650,27 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) { unsigned BrOffset = GetOffsetOf(Br.MI) + 4 - 2; unsigned DestOffset = BBOffsets[DestBB->getNumber()]; if (BrOffset < DestOffset && (DestOffset - BrOffset) <= 126) { - MachineBasicBlock::iterator CmpMI = Br.MI; --CmpMI; - if (CmpMI->getOpcode() == ARM::tCMPi8) { - unsigned Reg = CmpMI->getOperand(0).getReg(); - Pred = llvm::getInstrPredicate(CmpMI, PredReg); - if (Pred == ARMCC::AL && - CmpMI->getOperand(1).getImm() == 0 && - isARMLowRegister(Reg)) { - MachineBasicBlock *MBB = Br.MI->getParent(); - MachineInstr *NewBR = - BuildMI(*MBB, CmpMI, Br.MI->getDebugLoc(), TII->get(NewOpc)) - .addReg(Reg).addMBB(DestBB, Br.MI->getOperand(0).getTargetFlags()); - CmpMI->eraseFromParent(); - Br.MI->eraseFromParent(); - Br.MI = NewBR; - BBSizes[MBB->getNumber()] -= 2; - AdjustBBOffsetsAfter(MBB, -2); - ++NumCBZ; - MadeChange = true; + MachineBasicBlock::iterator CmpMI = Br.MI; + if (CmpMI != Br.MI->getParent()->begin()) { + --CmpMI; + if (CmpMI->getOpcode() == ARM::tCMPi8) { + unsigned Reg = CmpMI->getOperand(0).getReg(); + Pred = llvm::getInstrPredicate(CmpMI, PredReg); + if (Pred == ARMCC::AL && + CmpMI->getOperand(1).getImm() == 0 && + isARMLowRegister(Reg)) { + MachineBasicBlock *MBB = Br.MI->getParent(); + MachineInstr *NewBR = + BuildMI(*MBB, CmpMI, Br.MI->getDebugLoc(), TII->get(NewOpc)) + .addReg(Reg).addMBB(DestBB,Br.MI->getOperand(0).getTargetFlags()); + CmpMI->eraseFromParent(); + Br.MI->eraseFromParent(); + Br.MI = NewBR; + BBSizes[MBB->getNumber()] -= 2; + AdjustBBOffsetsAfter(MBB, -2); + ++NumCBZ; + MadeChange = true; + } } } } diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index bd753d29abde..b6b3c75943b5 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -455,6 +455,10 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) { // Add an implicit def for the super-register. MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead)); TransferImpOps(MI, MIB, MIB); + + // Transfer memoperands. + MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + MI.eraseFromParent(); } @@ -496,10 +500,13 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) { MIB.addOperand(MI.getOperand(OpIdx++)); MIB.addOperand(MI.getOperand(OpIdx++)); - if (SrcIsKill) - // Add an implicit kill for the super-reg. - (*MIB).addRegisterKilled(SrcReg, TRI, true); + if (SrcIsKill) // Add an implicit kill for the super-reg. + MIB->addRegisterKilled(SrcReg, TRI, true); TransferImpOps(MI, MIB, MIB); + + // Transfer memoperands. + MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + MI.eraseFromParent(); } @@ -622,9 +629,8 @@ void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI, MIB.addOperand(MI.getOperand(OpIdx++)); MIB.addOperand(MI.getOperand(OpIdx++)); - if (SrcIsKill) - // Add an implicit kill for the super-reg. - (*MIB).addRegisterKilled(SrcReg, TRI, true); + if (SrcIsKill) // Add an implicit kill for the super-reg. + MIB->addRegisterKilled(SrcReg, TRI, true); TransferImpOps(MI, MIB, MIB); MI.eraseFromParent(); } @@ -655,8 +661,8 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, unsigned SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal); LO16 = LO16.addImm(SOImmValV1); HI16 = HI16.addImm(SOImmValV2); - (*LO16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); - (*HI16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + LO16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + HI16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); LO16.addImm(Pred).addReg(PredReg).addReg(0); HI16.addImm(Pred).addReg(PredReg).addReg(0); TransferImpOps(MI, LO16, HI16); @@ -692,8 +698,8 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16); } - (*LO16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); - (*HI16).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + LO16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + HI16->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); LO16.addImm(Pred).addReg(PredReg); HI16.addImm(Pred).addReg(PredReg); @@ -708,6 +714,78 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, switch (Opcode) { default: return false; + case ARM::VMOVScc: + case ARM::VMOVDcc: { + unsigned newOpc = Opcode == ARM::VMOVScc ? ARM::VMOVS : ARM::VMOVD; + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(newOpc), + MI.getOperand(1).getReg()) + .addReg(MI.getOperand(2).getReg(), + getKillRegState(MI.getOperand(2).isKill())) + .addImm(MI.getOperand(3).getImm()) // 'pred' + .addReg(MI.getOperand(4).getReg()); + + MI.eraseFromParent(); + return true; + } + case ARM::MOVCCr: { + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVr), + MI.getOperand(1).getReg()) + .addReg(MI.getOperand(2).getReg(), + getKillRegState(MI.getOperand(2).isKill())) + .addImm(MI.getOperand(3).getImm()) // 'pred' + .addReg(MI.getOperand(4).getReg()) + .addReg(0); // 's' bit + + MI.eraseFromParent(); + return true; + } + case ARM::MOVCCs: { + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVs), + (MI.getOperand(1).getReg())) + .addReg(MI.getOperand(2).getReg(), + getKillRegState(MI.getOperand(2).isKill())) + .addReg(MI.getOperand(3).getReg(), + getKillRegState(MI.getOperand(3).isKill())) + .addImm(MI.getOperand(4).getImm()) + .addImm(MI.getOperand(5).getImm()) // 'pred' + .addReg(MI.getOperand(6).getReg()) + .addReg(0); // 's' bit + + MI.eraseFromParent(); + return true; + } + case ARM::MOVCCi16: { + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi16), + MI.getOperand(1).getReg()) + .addImm(MI.getOperand(2).getImm()) + .addImm(MI.getOperand(3).getImm()) // 'pred' + .addReg(MI.getOperand(4).getReg()); + + MI.eraseFromParent(); + return true; + } + case ARM::MOVCCi: { + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi), + MI.getOperand(1).getReg()) + .addImm(MI.getOperand(2).getImm()) + .addImm(MI.getOperand(3).getImm()) // 'pred' + .addReg(MI.getOperand(4).getReg()) + .addReg(0); // 's' bit + + MI.eraseFromParent(); + return true; + } + case ARM::MVNCCi: { + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MVNi), + MI.getOperand(1).getReg()) + .addImm(MI.getOperand(2).getImm()) + .addImm(MI.getOperand(3).getImm()) // 'pred' + .addReg(MI.getOperand(4).getReg()) + .addReg(0); // 's' bit + + MI.eraseFromParent(); + return true; + } case ARM::Int_eh_sjlj_dispatchsetup: { MachineFunction &MF = *MI.getParent()->getParent(); const ARMBaseInstrInfo *AII = @@ -726,9 +804,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, llvm::emitT2RegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6, FramePtr, -NumBytes, ARMCC::AL, 0, *TII); } else if (AFI->isThumbFunction()) { - llvm::emitThumbRegPlusImmediate(MBB, MBBI, ARM::R6, - FramePtr, -NumBytes, - *TII, RI, MI.getDebugLoc()); + llvm::emitThumbRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6, + FramePtr, -NumBytes, *TII, RI); } else { llvm::emitARMRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6, FramePtr, -NumBytes, ARMCC::AL, 0, @@ -785,7 +862,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, TII->get(ARM::BL)) .addExternalSymbol("__aeabi_read_tp", 0); - (*MIB).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); TransferImpOps(MI, MIB, MIB); MI.eraseFromParent(); return true; @@ -800,7 +877,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewLdOpc), DstReg) .addOperand(MI.getOperand(1))); - (*MIB1).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + MIB1->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); MachineInstrBuilder MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPICADD)) .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) @@ -823,7 +900,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, const MachineOperand &MO1 = MI.getOperand(1); const GlobalValue *GV = MO1.getGlobal(); unsigned TF = MO1.getTargetFlags(); - bool isARM = Opcode != ARM::t2MOV_ga_pcrel; + bool isARM = (Opcode != ARM::t2MOV_ga_pcrel && Opcode != ARM::t2MOV_ga_dyn); bool isPIC = (Opcode != ARM::MOV_ga_dyn && Opcode != ARM::t2MOV_ga_dyn); unsigned LO16Opc = isARM ? ARM::MOVi16_ga_pcrel : ARM::t2MOVi16_ga_pcrel; unsigned HI16Opc = isARM ? ARM::MOVTi16_ga_pcrel : ARM::t2MOVTi16_ga_pcrel; @@ -856,7 +933,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, if (isARM) { AddDefaultPred(MIB3); if (Opcode == ARM::MOV_ga_pcrel_ldr) - (*MIB2).setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + MIB2->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); } TransferImpOps(MI, MIB1, MIB3); MI.eraseFromParent(); @@ -896,9 +973,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, return true; } - case ARM::VLDMQIA: - case ARM::VLDMQDB: { - unsigned NewOpc = (Opcode == ARM::VLDMQIA) ? ARM::VLDMDIA : ARM::VLDMDDB; + case ARM::VLDMQIA: { + unsigned NewOpc = ARM::VLDMDIA; MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)); unsigned OpIdx = 0; @@ -927,9 +1003,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, return true; } - case ARM::VSTMQIA: - case ARM::VSTMQDB: { - unsigned NewOpc = (Opcode == ARM::VSTMQIA) ? ARM::VSTMDIA : ARM::VSTMDDB; + case ARM::VSTMQIA: { + unsigned NewOpc = ARM::VSTMDIA; MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)); unsigned OpIdx = 0; @@ -950,9 +1025,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, unsigned D1 = TRI->getSubReg(SrcReg, ARM::dsub_1); MIB.addReg(D0).addReg(D1); - if (SrcIsKill) - // Add an implicit kill for the Q register. - (*MIB).addRegisterKilled(SrcReg, TRI, true); + if (SrcIsKill) // Add an implicit kill for the Q register. + MIB->addRegisterKilled(SrcReg, TRI, true); TransferImpOps(MI, MIB, MIB); MI.eraseFromParent(); @@ -960,14 +1034,16 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, } case ARM::VDUPfqf: case ARM::VDUPfdf:{ - unsigned NewOpc = Opcode == ARM::VDUPfqf ? ARM::VDUPLNfq : ARM::VDUPLNfd; + unsigned NewOpc = Opcode == ARM::VDUPfqf ? ARM::VDUPLN32q : + ARM::VDUPLN32d; MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)); unsigned OpIdx = 0; unsigned SrcReg = MI.getOperand(1).getReg(); unsigned Lane = getARMRegisterNumbering(SrcReg) & 1; unsigned DReg = TRI->getMatchingSuperReg(SrcReg, - Lane & 1 ? ARM::ssub_1 : ARM::ssub_0, &ARM::DPR_VFP2RegClass); + Lane & 1 ? ARM::ssub_1 : ARM::ssub_0, + &ARM::DPR_VFP2RegClass); // The lane is [0,1] for the containing DReg superregister. // Copy the dst/src register operands. MIB.addOperand(MI.getOperand(OpIdx++)); diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 26f48b308316..3baf274b76b8 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// #include "ARM.h" +#include "ARMAddressingModes.h" #include "ARMBaseInstrInfo.h" #include "ARMCallingConv.h" #include "ARMRegisterInfo.h" @@ -26,6 +27,7 @@ #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Module.h" +#include "llvm/Operator.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" @@ -115,6 +117,11 @@ class ARMFastISel : public FastISel { const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill); + virtual unsigned FastEmitInst_rrr(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill, + unsigned Op2, bool Op2IsKill); virtual unsigned FastEmitInst_ri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, @@ -123,14 +130,18 @@ class ARMFastISel : public FastISel { const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, const ConstantFP *FPImm); - virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode, - const TargetRegisterClass *RC, - uint64_t Imm); virtual unsigned FastEmitInst_rri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, unsigned Op1, bool Op1IsKill, uint64_t Imm); + virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + uint64_t Imm); + virtual unsigned FastEmitInst_ii(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + uint64_t Imm1, uint64_t Imm2); + virtual unsigned FastEmitInst_extractsubreg(MVT RetVT, unsigned Op0, bool Op0IsKill, uint32_t Idx); @@ -193,6 +204,7 @@ class ARMFastISel : public FastISel { // OptionalDef handling routines. private: + bool isARMNEONPred(const MachineInstr *MI); bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR); const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB); void AddLoadStoreOperands(EVT VT, Address &Addr, @@ -221,6 +233,21 @@ bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) { return true; } +bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) { + const TargetInstrDesc &TID = MI->getDesc(); + + // If we're a thumb2 or not NEON function we were handled via isPredicable. + if ((TID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON || + AFI->isThumb2Function()) + return false; + + for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) + if (TID.OpInfo[i].isPredicate()) + return true; + + return false; +} + // If the machine is predicable go ahead and add the predicate operands, if // it needs default CC operands add those. // TODO: If we want to support thumb1 then we'll need to deal with optional @@ -230,8 +257,10 @@ const MachineInstrBuilder & ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) { MachineInstr *MI = &*MIB; - // Do we use a predicate? - if (TII.isPredicable(MI)) + // Do we use a predicate? or... + // Are we NEON in ARM mode and have a predicate operand? If so, I know + // we're not predicable but add it anyways. + if (TII.isPredicable(MI) || isARMNEONPred(MI)) AddDefaultPred(MIB); // Do we optionally set a predicate? Preds is size > 0 iff the predicate @@ -296,6 +325,31 @@ unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode, return ResultReg; } +unsigned ARMFastISel::FastEmitInst_rrr(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, + unsigned Op1, bool Op1IsKill, + unsigned Op2, bool Op2IsKill) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill) + .addReg(Op2, Op2IsKill * RegState::Kill)); + else { + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill) + .addReg(Op1, Op1IsKill * RegState::Kill) + .addReg(Op2, Op2IsKill * RegState::Kill)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::COPY), ResultReg) + .addReg(II.ImplicitDefs[0])); + } + return ResultReg; +} + unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode, const TargetRegisterClass *RC, unsigned Op0, bool Op0IsKill, @@ -384,6 +438,26 @@ unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode, return ResultReg; } +unsigned ARMFastISel::FastEmitInst_ii(unsigned MachineInstOpcode, + const TargetRegisterClass *RC, + uint64_t Imm1, uint64_t Imm2) { + unsigned ResultReg = createResultReg(RC); + const TargetInstrDesc &II = TII.get(MachineInstOpcode); + + if (II.getNumDefs() >= 1) + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addImm(Imm1).addImm(Imm2)); + else { + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addImm(Imm1).addImm(Imm2)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::COPY), + ResultReg) + .addReg(II.ImplicitDefs[0])); + } + return ResultReg; +} + unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT, unsigned Op0, bool Op0IsKill, uint32_t Idx) { @@ -667,24 +741,29 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) { TmpOffset += SL->getElementOffset(Idx); } else { uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType()); - SmallVector Worklist; - Worklist.push_back(Op); - do { - Op = Worklist.pop_back_val(); + for (;;) { if (const ConstantInt *CI = dyn_cast(Op)) { // Constant-offset addressing. TmpOffset += CI->getSExtValue() * S; - } else if (isa(Op) && - isa(cast(Op)->getOperand(1))) { - // An add with a constant operand. Fold the constant. + break; + } + if (isa(Op) && + (!isa(Op) || + FuncInfo.MBBMap[cast(Op)->getParent()] + == FuncInfo.MBB) && + isa(cast(Op)->getOperand(1))) { + // An add (in the same block) with a constant operand. Fold the + // constant. ConstantInt *CI = - cast(cast(Op)->getOperand(1)); + cast(cast(Op)->getOperand(1)); TmpOffset += CI->getSExtValue() * S; - // Add the other operand back to the work list. - Worklist.push_back(cast(Op)->getOperand(0)); - } else - goto unsupported_gep; - } while (!Worklist.empty()); + // Iterate on the other operand. + Op = cast(Op)->getOperand(0); + continue; + } + // Unsupported + goto unsupported_gep; + } } } @@ -767,26 +846,9 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT) { // Since the offset is too large for the load/store instruction // get the reg+offset into a register. if (needsLowering) { - ARMCC::CondCodes Pred = ARMCC::AL; - unsigned PredReg = 0; - - TargetRegisterClass *RC = isThumb ? ARM::tGPRRegisterClass : - ARM::GPRRegisterClass; - unsigned BaseReg = createResultReg(RC); - - if (!isThumb) - emitARMRegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - BaseReg, Addr.Base.Reg, Addr.Offset, - Pred, PredReg, - static_cast(TII)); - else { - assert(AFI->isThumb2Function()); - emitT2RegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - BaseReg, Addr.Base.Reg, Addr.Offset, Pred, PredReg, - static_cast(TII)); - } + Addr.Base.Reg = FastEmit_ri_(MVT::i32, ISD::ADD, Addr.Base.Reg, + /*Op0IsKill*/false, Addr.Offset, MVT::i32); Addr.Offset = 0; - Addr.Base.Reg = BaseReg; } } @@ -797,7 +859,7 @@ void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr, if (VT.getSimpleVT().SimpleTy == MVT::f32 || VT.getSimpleVT().SimpleTy == MVT::f64) Addr.Offset /= 4; - + // Frame base works a bit differently. Handle it separately. if (Addr.BaseType == Address::FrameIndexBase) { int FI = Addr.Base.FI; @@ -819,7 +881,7 @@ void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr, } else { // Now add the rest of the operands. MIB.addReg(Addr.Base.Reg); - + // ARM halfword load/stores need an additional operand. if (!isThumb && VT.getSimpleVT().SimpleTy == MVT::i16) MIB.addReg(0); @@ -1007,18 +1069,16 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { // behavior. // TODO: Factor this out. if (const CmpInst *CI = dyn_cast(BI->getCondition())) { - if (CI->hasOneUse() && (CI->getParent() == I->getParent())) { - MVT VT; - const Type *Ty = CI->getOperand(0)->getType(); - if (!isTypeLegal(Ty, VT)) - return false; - + MVT SourceVT; + const Type *Ty = CI->getOperand(0)->getType(); + if (CI->hasOneUse() && (CI->getParent() == I->getParent()) + && isTypeLegal(Ty, SourceVT)) { bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy()); if (isFloat && !Subtarget->hasVFP2()) return false; unsigned CmpOpc; - switch (VT.SimpleTy) { + switch (SourceVT.SimpleTy) { default: return false; // TODO: Verify compares. case MVT::f32: @@ -1033,7 +1093,14 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { } // Get the compare predicate. - ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate()); + // Try to take advantage of fallthrough opportunities. + CmpInst::Predicate Predicate = CI->getPredicate(); + if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { + std::swap(TBB, FBB); + Predicate = CmpInst::getInversePredicate(Predicate); + } + + ARMCC::CondCodes ARMPred = getComparePred(Predicate); // We may not handle every CC for now. if (ARMPred == ARMCC::AL) return false; @@ -1057,6 +1124,30 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc)) .addMBB(TBB).addImm(ARMPred).addReg(ARM::CPSR); + FastEmitBranch(FBB, DL); + FuncInfo.MBB->addSuccessor(TBB); + return true; + } + } else if (TruncInst *TI = dyn_cast(BI->getCondition())) { + MVT SourceVT; + if (TI->hasOneUse() && TI->getParent() == I->getParent() && + (isTypeLegal(TI->getOperand(0)->getType(), SourceVT))) { + unsigned TstOpc = isThumb ? ARM::t2TSTri : ARM::TSTri; + unsigned OpReg = getRegForValue(TI->getOperand(0)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TstOpc)) + .addReg(OpReg).addImm(1)); + + unsigned CCMode = ARMCC::NE; + if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { + std::swap(TBB, FBB); + CCMode = ARMCC::EQ; + } + + unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc)) + .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR); + FastEmitBranch(FBB, DL); FuncInfo.MBB->addSuccessor(TBB); return true; @@ -1066,14 +1157,26 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { unsigned CmpReg = getRegForValue(BI->getCondition()); if (CmpReg == 0) return false; - // Re-set the flags just in case. - unsigned CmpOpc = isThumb ? ARM::t2CMPri : ARM::CMPri; - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) - .addReg(CmpReg).addImm(0)); + // We've been divorced from our compare! Our block was split, and + // now our compare lives in a predecessor block. We musn't + // re-compare here, as the children of the compare aren't guaranteed + // live across the block boundary (we *could* check for this). + // Regardless, the compare has been done in the predecessor block, + // and it left a value for us in a virtual register. Ergo, we test + // the one-bit value left in the virtual register. + unsigned TstOpc = isThumb ? ARM::t2TSTri : ARM::TSTri; + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TstOpc)) + .addReg(CmpReg).addImm(1)); + + unsigned CCMode = ARMCC::NE; + if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { + std::swap(TBB, FBB); + CCMode = ARMCC::EQ; + } unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc)) - .addMBB(TBB).addImm(ARMCC::NE).addReg(ARM::CPSR); + .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR); FastEmitBranch(FBB, DL); FuncInfo.MBB->addSuccessor(TBB); return true; @@ -1636,17 +1739,9 @@ bool ARMFastISel::SelectRet(const Instruction *I) { unsigned ARMFastISel::ARMSelectCallOp(const GlobalValue *GV) { - // Depend our opcode for thumb on whether or not we're targeting an - // externally callable function. For libcalls we'll just pass a NULL GV - // in here. - bool isExternal = false; - if (!GV || GV->hasExternalLinkage()) isExternal = true; - // Darwin needs the r9 versions of the opcodes. bool isDarwin = Subtarget->isTargetDarwin(); - if (isThumb && isExternal) { - return isDarwin ? ARM::tBLXi_r9 : ARM::tBLXi; - } else if (isThumb) { + if (isThumb) { return isDarwin ? ARM::tBLr9 : ARM::tBL; } else { return isDarwin ? ARM::BLr9 : ARM::BL; @@ -1671,9 +1766,6 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { else if (!isTypeLegal(RetTy, RetVT)) return false; - // For now we're using BLX etc on the assumption that we have v5t ops. - if (!Subtarget->hasV5TOps()) return false; - // TODO: For now if we have long calls specified we don't handle the call. if (EnableARMLongCalls) return false; @@ -1711,7 +1803,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes)) return false; - // Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops. + // Issue the call, BLr9 for darwin, BL otherwise. // TODO: Turn this into the table of arm call ops. MachineInstrBuilder MIB; unsigned CallOpc = ARMSelectCallOp(NULL); @@ -1772,13 +1864,9 @@ bool ARMFastISel::SelectCall(const Instruction *I) { else if (!isTypeLegal(RetTy, RetVT)) return false; - // For now we're using BLX etc on the assumption that we have v5t ops. - // TODO: Maybe? - if (!Subtarget->hasV5TOps()) return false; - // TODO: For now if we have long calls specified we don't handle the call. if (EnableARMLongCalls) return false; - + // Set up the argument vectors. SmallVector Args; SmallVector ArgRegs; @@ -1827,7 +1915,7 @@ bool ARMFastISel::SelectCall(const Instruction *I) { if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes)) return false; - // Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops. + // Issue the call, BLr9 for darwin, BL otherwise. // TODO: Turn this into the table of arm call ops. MachineInstrBuilder MIB; unsigned CallOpc = ARMSelectCallOp(GV); @@ -1842,7 +1930,7 @@ bool ARMFastISel::SelectCall(const Instruction *I) { MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc)) .addGlobalAddress(GV, 0, 0)); - + // Add implicit physical register uses to the call. for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) MIB.addReg(RegArgs[i]); diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 68c33f098ec9..e2e95d47b37b 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -106,14 +106,13 @@ static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, DebugLoc dl, const ARMBaseInstrInfo &TII, - int NumBytes, - ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) { + int NumBytes, unsigned MIFlags = MachineInstr::NoFlags) { if (isARM) emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, - Pred, PredReg, TII); + ARMCC::AL, 0, TII, MIFlags); else emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, - Pred, PredReg, TII); + ARMCC::AL, 0, TII, MIFlags); } void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { @@ -141,11 +140,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { // Allocate the vararg register save area. This is not counted in NumBytes. if (VARegSaveSize) - emitSPUpdate(isARM, MBB, MBBI, dl, TII, -VARegSaveSize); + emitSPUpdate(isARM, MBB, MBBI, dl, TII, -VARegSaveSize, + MachineInstr::FrameSetup); if (!AFI->hasStackFrame()) { if (NumBytes != 0) - emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes); + emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes, + MachineInstr::FrameSetup); return; } @@ -196,7 +197,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri : ARM::t2ADDri; MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(ADDriOpc), FramePtr) - .addFrameIndex(FramePtrSpillFI).addImm(0); + .addFrameIndex(FramePtrSpillFI).addImm(0) + .setMIFlag(MachineInstr::FrameSetup); AddDefaultCC(AddDefaultPred(MIB)); } @@ -226,7 +228,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { NumBytes = DPRCSOffset; if (NumBytes) { // Adjust SP after all the callee-save spills. - emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes); + emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes, + MachineInstr::FrameSetup); if (HasFP && isARM) // Restore from fp only in ARM mode: e.g. sub sp, r7, #24 // Note it's not safe to do this in Thumb2 mode because it would have @@ -282,6 +285,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { // of the stack pointer is at this point. Any variable size objects // will be allocated after this, so we can still use the base pointer // to reference locals. + // FIXME: Clarify FrameSetup flags here. if (RegInfo->hasBasePointer(MF)) { if (isARM) BuildMI(MBB, MBBI, dl, @@ -396,8 +400,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, // Jump to label or value in register. if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND) { unsigned TCOpcode = (RetOpcode == ARM::TCRETURNdi) - ? (STI.isThumb() ? ARM::TAILJMPdt : ARM::TAILJMPd) - : (STI.isThumb() ? ARM::TAILJMPdNDt : ARM::TAILJMPdND); + ? (STI.isThumb() ? ARM::tTAILJMPd : ARM::TAILJMPd) + : (STI.isThumb() ? ARM::tTAILJMPdND : ARM::TAILJMPdND); MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode)); if (JumpTarget.isGlobal()) MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), @@ -408,10 +412,12 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, JumpTarget.getTargetFlags()); } } else if (RetOpcode == ARM::TCRETURNri) { - BuildMI(MBB, MBBI, dl, TII.get(ARM::TAILJMPr)). + BuildMI(MBB, MBBI, dl, + TII.get(STI.isThumb() ? ARM::tTAILJMPr : ARM::TAILJMPr)). addReg(JumpTarget.getReg(), RegState::Kill); } else if (RetOpcode == ARM::TCRETURNriND) { - BuildMI(MBB, MBBI, dl, TII.get(ARM::TAILJMPrND)). + BuildMI(MBB, MBBI, dl, + TII.get(STI.isThumb() ? ARM::tTAILJMPrND : ARM::TAILJMPrND)). addReg(JumpTarget.getReg(), RegState::Kill); } @@ -439,8 +445,7 @@ ARMFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, int ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF, - int FI, - unsigned &FrameReg, + int FI, unsigned &FrameReg, int SPAdj) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const ARMBaseRegisterInfo *RegInfo = @@ -484,19 +489,23 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF, return FPOffset; } else if (MFI->hasVarSizedObjects()) { assert(RegInfo->hasBasePointer(MF) && "missing base pointer!"); - // Try to use the frame pointer if we can, else use the base pointer - // since it's available. This is handy for the emergency spill slot, in - // particular. if (AFI->isThumb2Function()) { + // Try to use the frame pointer if we can, else use the base pointer + // since it's available. This is handy for the emergency spill slot, in + // particular. if (FPOffset >= -255 && FPOffset < 0) { FrameReg = RegInfo->getFrameRegister(MF); return FPOffset; } - } else - FrameReg = RegInfo->getBaseRegister(); + } } else if (AFI->isThumb2Function()) { + // Use add , sp, # + // ldr , [sp, #] + // if at all possible to save space. + if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020) + return Offset; // In Thumb2 mode, the negative offset is very limited. Try to avoid - // out of range references. + // out of range references. ldr ,[, #-] if (FPOffset >= -255 && FPOffset < 0) { FrameReg = RegInfo->getFrameRegister(MF); return FPOffset; @@ -524,7 +533,8 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB, const std::vector &CSI, unsigned StmOpc, unsigned StrOpc, bool NoGap, - bool(*Func)(unsigned, bool)) const { + bool(*Func)(unsigned, bool), + unsigned MIFlags) const { MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); @@ -567,14 +577,14 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB, if (Regs.size() > 1 || StrOpc== 0) { MachineInstrBuilder MIB = AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP) - .addReg(ARM::SP)); + .addReg(ARM::SP).setMIFlags(MIFlags)); for (unsigned i = 0, e = Regs.size(); i < e; ++i) MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second)); } else if (Regs.size() == 1) { MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc), ARM::SP) .addReg(Regs[0].first, getKillRegState(Regs[0].second)) - .addReg(ARM::SP); + .addReg(ARM::SP).setMIFlags(MIFlags); // ARM mode needs an extra reg0 here due to addrmode2. Will go away once // that refactoring is complete (eventually). if (StrOpc == ARM::STR_PRE) { @@ -676,9 +686,12 @@ bool ARMFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD; unsigned PushOneOpc = AFI->isThumbFunction() ? ARM::t2STR_PRE : ARM::STR_PRE; unsigned FltOpc = ARM::VSTMDDB_UPD; - emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register); - emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register); - emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register); + emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, + MachineInstr::FrameSetup); + emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, + MachineInstr::FrameSetup); + emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register, + MachineInstr::FrameSetup); return true; } diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h index 1288b706c599..61bb8afa40f2 100644 --- a/lib/Target/ARM/ARMFrameLowering.h +++ b/lib/Target/ARM/ARMFrameLowering.h @@ -51,7 +51,8 @@ class ARMFrameLowering : public TargetFrameLowering { bool canSimplifyCallFramePseudos(const MachineFunction &MF) const; int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const; - int ResolveFrameIndexReference(const MachineFunction &MF, int FI, + int ResolveFrameIndexReference(const MachineFunction &MF, + int FI, unsigned &FrameReg, int SPAdj) const; int getFrameIndexOffset(const MachineFunction &MF, int FI) const; @@ -62,7 +63,8 @@ class ARMFrameLowering : public TargetFrameLowering { void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, unsigned StmOpc, unsigned StrOpc, bool NoGap, - bool(*Func)(unsigned, bool)) const; + bool(*Func)(unsigned, bool), + unsigned MIFlags = 0) const; void emitPopInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, unsigned LdmOpc, unsigned LdrOpc, bool isVarArg, bool NoGap, diff --git a/lib/Target/ARM/ARMHazardRecognizer.cpp b/lib/Target/ARM/ARMHazardRecognizer.cpp index e97ce50bc429..517bba8cee8e 100644 --- a/lib/Target/ARM/ARMHazardRecognizer.cpp +++ b/lib/Target/ARM/ARMHazardRecognizer.cpp @@ -49,6 +49,8 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { const TargetInstrDesc &LastTID = LastMI->getDesc(); // Skip over one non-VFP / NEON instruction. if (!LastTID.isBarrier() && + // On A9, AGU and NEON/FPU are muxed. + !(STI.isCortexA9() && (LastTID.mayLoad() || LastTID.mayStore())) && (LastTID.TSFlags & ARMII::DomainMask) == ARMII::DomainGeneral) { MachineBasicBlock::iterator I = LastMI; if (I != LastMI->getParent()->begin()) { diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index f0d5a7d7c2e7..abe5a316a45b 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -45,7 +45,7 @@ DisableShifterOp("disable-shifter-op", cl::Hidden, static cl::opt CheckVMLxHazard("check-vmlx-hazard", cl::Hidden, cl::desc("Check fp vmla / vmls hazard at isel time"), - cl::init(false)); + cl::init(true)); //===--------------------------------------------------------------------===// /// ARMDAGToDAGISel - ARM specific code to select ARM machine @@ -91,9 +91,14 @@ class ARMDAGToDAGISel : public SelectionDAGISel { bool isShifterOpProfitable(const SDValue &Shift, ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt); bool SelectShifterOperandReg(SDValue N, SDValue &A, - SDValue &B, SDValue &C); + SDValue &B, SDValue &C, + bool CheckProfitability = true); bool SelectShiftShifterOperandReg(SDValue N, SDValue &A, - SDValue &B, SDValue &C); + SDValue &B, SDValue &C) { + // Don't apply the profitability check + return SelectShifterOperandReg(N, A, B, C, false); + } + bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc); @@ -174,16 +179,6 @@ class ARMDAGToDAGISel : public SelectionDAGISel { return ARM_AM::getT2SOImmVal(~Imm) != -1; } - inline bool Pred_so_imm(SDNode *inN) const { - ConstantSDNode *N = cast(inN); - return is_so_imm(N->getZExtValue()); - } - - inline bool Pred_t2_so_imm(SDNode *inN) const { - ConstantSDNode *N = cast(inN); - return is_t2_so_imm(N->getZExtValue()); - } - // Include the pieces autogenerated from the target description. #include "ARMGenDAGISel.inc" @@ -373,7 +368,8 @@ bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift, bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue N, SDValue &BaseReg, SDValue &ShReg, - SDValue &Opc) { + SDValue &Opc, + bool CheckProfitability) { if (DisableShifterOp) return false; @@ -390,7 +386,7 @@ bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue N, ShImmVal = RHS->getZExtValue() & 31; } else { ShReg = N.getOperand(1); - if (!isShifterOpProfitable(N, ShOpcVal, ShImmVal)) + if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal)) return false; } Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), @@ -398,30 +394,6 @@ bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue N, return true; } -bool ARMDAGToDAGISel::SelectShiftShifterOperandReg(SDValue N, - SDValue &BaseReg, - SDValue &ShReg, - SDValue &Opc) { - ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N); - - // Don't match base register only case. That is matched to a separate - // lower complexity pattern with explicit register operand. - if (ShOpcVal == ARM_AM::no_shift) return false; - - BaseReg = N.getOperand(0); - unsigned ShImmVal = 0; - // Do not check isShifterOpProfitable. This must return true. - if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) { - ShReg = CurDAG->getRegister(0, MVT::i32); - ShImmVal = RHS->getZExtValue() & 31; - } else { - ShReg = N.getOperand(1); - } - Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), - MVT::i32); - return true; -} - bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm) { @@ -437,7 +409,7 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, OffImm = CurDAG->getTargetConstant(0, MVT::i32); return true; } - + if (N.getOpcode() == ARMISD::Wrapper && !(Subtarget->useMovt() && N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) { @@ -1138,7 +1110,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, OffImm = CurDAG->getTargetConstant(0, MVT::i32); return true; } - + if (N.getOpcode() == ARMISD::Wrapper && !(Subtarget->useMovt() && N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) { @@ -1183,7 +1155,7 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && !CurDAG->isBaseWithConstantOffset(N)) return false; - + if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) { int RHSC = (int)RHS->getSExtValue(); if (N.getOpcode() == ISD::SUB) @@ -1571,6 +1543,11 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, Ops.data(), Ops.size()); } + // Transfer memoperands. + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = cast(N)->getMemOperand(); + cast(VLd)->setMemRefs(MemOp, MemOp + 1); + if (NumVecs == 1) return VLd; @@ -1600,6 +1577,9 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) return NULL; + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = cast(N)->getMemOperand(); + SDValue Chain = N->getOperand(0); EVT VT = N->getOperand(Vec0Idx).getValueType(); bool is64BitVector = VT.is64BitVector(); @@ -1672,7 +1652,13 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, Ops.push_back(Pred); Ops.push_back(Reg0); Ops.push_back(Chain); - return CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size()); + SDNode *VSt = + CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size()); + + // Transfer memoperands. + cast(VSt)->setMemRefs(MemOp, MemOp + 1); + + return VSt; } // Otherwise, quad registers are stored with two separate instructions, @@ -1693,6 +1679,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, MemAddr.getValueType(), MVT::Other, OpsA, 7); + cast(VStA)->setMemRefs(MemOp, MemOp + 1); Chain = SDValue(VStA, 1); // Store the odd D registers. @@ -1709,8 +1696,10 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, Ops.push_back(Pred); Ops.push_back(Reg0); Ops.push_back(Chain); - return CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, - Ops.data(), Ops.size()); + SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, + Ops.data(), Ops.size()); + cast(VStB)->setMemRefs(MemOp, MemOp + 1); + return VStB; } SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, @@ -1726,6 +1715,9 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) return NULL; + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = cast(N)->getMemOperand(); + SDValue Chain = N->getOperand(0); unsigned Lane = cast(N->getOperand(Vec0Idx + NumVecs))->getZExtValue(); @@ -1812,6 +1804,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, QOpcodes[OpcodeIndex]); SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size()); + cast(VLdLn)->setMemRefs(MemOp, MemOp + 1); if (!IsLoad) return VLdLn; @@ -1838,6 +1831,9 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align)) return NULL; + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = cast(N)->getMemOperand(); + SDValue Chain = N->getOperand(0); EVT VT = N->getValueType(0); @@ -1882,12 +1878,13 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; std::vector ResTys; - ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts)); + ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,ResTyElts)); if (isUpdating) ResTys.push_back(MVT::i32); ResTys.push_back(MVT::Other); SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), Ops.size()); + cast(VLdDup)->setMemRefs(MemOp, MemOp + 1); SuperReg = SDValue(VLdDup, 0); // Extract the subregisters. @@ -2168,7 +2165,7 @@ SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) { // Emits: (tMOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc) // Pattern complexity = 6 cost = 11 size = 0 // - // Also FCPYScc and FCPYDcc. + // Also VMOVScc and VMOVDcc. SDValue Tmp2 = CurDAG->getTargetConstant(CCVal, MVT::i32); SDValue Ops[] = { FalseVal, TrueVal, Tmp2, CCR, InFlag }; unsigned Opc = 0; @@ -2450,34 +2447,6 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } case ARMISD::CMOV: return SelectCMOVOp(N); - case ARMISD::CNEG: { - EVT VT = N->getValueType(0); - SDValue N0 = N->getOperand(0); - SDValue N1 = N->getOperand(1); - SDValue N2 = N->getOperand(2); - SDValue N3 = N->getOperand(3); - SDValue InFlag = N->getOperand(4); - assert(N2.getOpcode() == ISD::Constant); - assert(N3.getOpcode() == ISD::Register); - - SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned) - cast(N2)->getZExtValue()), - MVT::i32); - SDValue Ops[] = { N0, N1, Tmp2, N3, InFlag }; - unsigned Opc = 0; - switch (VT.getSimpleVT().SimpleTy) { - default: assert(false && "Illegal conditional move type!"); - break; - case MVT::f32: - Opc = ARM::VNEGScc; - break; - case MVT::f64: - Opc = ARM::VNEGDcc; - break; - } - return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5); - } - case ARMISD::VZIP: { unsigned Opc = 0; EVT VT = N->getValueType(0); @@ -2870,6 +2839,35 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { break; } + case ARMISD::VTBL1: { + DebugLoc dl = N->getDebugLoc(); + EVT VT = N->getValueType(0); + SmallVector Ops; + + Ops.push_back(N->getOperand(0)); + Ops.push_back(N->getOperand(1)); + Ops.push_back(getAL(CurDAG)); // Predicate + Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register + return CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops.data(), Ops.size()); + } + case ARMISD::VTBL2: { + DebugLoc dl = N->getDebugLoc(); + EVT VT = N->getValueType(0); + + // Form a REG_SEQUENCE to force register allocation. + SDValue V0 = N->getOperand(0); + SDValue V1 = N->getOperand(1); + SDValue RegSeq = SDValue(PairDRegs(MVT::v16i8, V0, V1), 0); + + SmallVector Ops; + Ops.push_back(RegSeq); + Ops.push_back(N->getOperand(2)); + Ops.push_back(getAL(CurDAG)); // Predicate + Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register + return CurDAG->getMachineNode(ARM::VTBL2Pseudo, dl, VT, + Ops.data(), Ops.size()); + } + case ISD::CONCAT_VECTORS: return SelectConcatVector(N); } diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index ab9f9e1571e3..0a31b87c4b56 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -72,6 +72,11 @@ ARMInterworking("arm-interworking", cl::Hidden, cl::desc("Enable / disable ARM interworking (for debugging only)"), cl::init(true)); +// The APCS parameter registers. +static const unsigned GPRArgRegs[] = { + ARM::R0, ARM::R1, ARM::R2, ARM::R3 +}; + void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, EVT PromotedBitwiseVT) { if (VT != PromotedLdStVT) { @@ -393,6 +398,12 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS); } + // Use divmod iOS compiler-rt calls. + if (Subtarget->getTargetTriple().getOS() == Triple::IOS) { + setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4"); + setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4"); + } + if (Subtarget->isThumb1Only()) addRegisterClass(MVT::i32, ARM::tGPRRegisterClass); else @@ -461,6 +472,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::UDIV, MVT::v8i8, Custom); setOperationAction(ISD::VSETCC, MVT::v1i64, Expand); setOperationAction(ISD::VSETCC, MVT::v2i64, Expand); + // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with + // a destination type that is wider than the source. + setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom); + setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); setTargetDAGCombine(ISD::INTRINSIC_VOID); setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); @@ -502,18 +517,15 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) } // i64 operation support. + setOperationAction(ISD::MUL, MVT::i64, Expand); + setOperationAction(ISD::MULHU, MVT::i32, Expand); if (Subtarget->isThumb1Only()) { - setOperationAction(ISD::MUL, MVT::i64, Expand); - setOperationAction(ISD::MULHU, MVT::i32, Expand); - setOperationAction(ISD::MULHS, MVT::i32, Expand); setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); - } else { - setOperationAction(ISD::MUL, MVT::i64, Expand); - setOperationAction(ISD::MULHU, MVT::i32, Expand); - if (!Subtarget->hasV6Ops()) - setOperationAction(ISD::MULHS, MVT::i32, Expand); } + if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()) + setOperationAction(ISD::MULHS, MVT::i32, Expand); + setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); @@ -597,6 +609,18 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i8, Expand); setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i16, Expand); setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i8, Expand); + setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i16, Expand); + setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i8, Expand); + setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i16, Expand); + setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i8, Expand); + setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i16, Expand); + setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand); + setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i8, Expand); + setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i16, Expand); + setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand); // Since the libcalls include locking, fold in the fences setShouldFoldAtomicFences(true); } @@ -716,7 +740,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) // pressure of the register class's representative and all of it's super // classes' representatives transitively. We have not implemented this because // of the difficulty prior to coalescing of modeling operand register classes -// due to the common occurence of cross class copies and subregister insertions +// due to the common occurrence of cross class copies and subregister insertions // and extractions. std::pair ARMTargetLowering::findRepresentativeClass(EVT VT) const{ @@ -778,7 +802,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::BCC_i64: return "ARMISD::BCC_i64"; case ARMISD::FMSTAT: return "ARMISD::FMSTAT"; case ARMISD::CMOV: return "ARMISD::CMOV"; - case ARMISD::CNEG: return "ARMISD::CNEG"; case ARMISD::RBIT: return "ARMISD::RBIT"; @@ -853,6 +876,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VZIP: return "ARMISD::VZIP"; case ARMISD::VUZP: return "ARMISD::VUZP"; case ARMISD::VTRN: return "ARMISD::VTRN"; + case ARMISD::VTBL1: return "ARMISD::VTBL1"; + case ARMISD::VTBL2: return "ARMISD::VTBL2"; case ARMISD::VMULLs: return "ARMISD::VMULLs"; case ARMISD::VMULLu: return "ARMISD::VMULLu"; case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; @@ -861,6 +886,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::BFI: return "ARMISD::BFI"; case ARMISD::VORRIMM: return "ARMISD::VORRIMM"; case ARMISD::VBICIMM: return "ARMISD::VBICIMM"; + case ARMISD::VBSL: return "ARMISD::VBSL"; case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP"; case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP"; case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP"; @@ -946,27 +972,6 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { return Sched::RegPressure; } -// FIXME: Move to RegInfo -unsigned -ARMTargetLowering::getRegPressureLimit(const TargetRegisterClass *RC, - MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - - switch (RC->getID()) { - default: - return 0; - case ARM::tGPRRegClassID: - return TFI->hasFP(MF) ? 4 : 5; - case ARM::GPRRegClassID: { - unsigned FP = TFI->hasFP(MF) ? 1 : 0; - return 10 - FP - (Subtarget->isR9Reserved() ? 1 : 0); - } - case ARM::SPRRegClassID: // Currently not used as 'rep' register class. - case ARM::DPRRegClassID: - return 32 - 10; - } -} - //===----------------------------------------------------------------------===// // Lowering Code //===----------------------------------------------------------------------===// @@ -1130,22 +1135,6 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, return Chain; } -/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified -/// by "Src" to address "Dst" of size "Size". Alignment information is -/// specified by the specific parameter attribute. The copy will be passed as -/// a byval function parameter. -/// Sometimes what we are copying is the end of a larger object, the part that -/// does not fit in registers. -static SDValue -CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, - ISD::ArgFlagsTy Flags, SelectionDAG &DAG, - DebugLoc dl) { - SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); - return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), - /*isVolatile=*/false, /*AlwaysInline=*/false, - MachinePointerInfo(0), MachinePointerInfo(0)); -} - /// LowerMemOpCallTo - Store the argument to the stack. SDValue ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, @@ -1156,9 +1145,6 @@ ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, unsigned LocMemOffset = VA.getLocMemOffset(); SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); - if (Flags.isByVal()) - return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl); - return DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo::getStack(LocMemOffset), false, false, 0); @@ -1224,6 +1210,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, SmallVector ArgLocs; CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, *DAG.getContext()); + CCInfo.setCallOrPrologue(Call); CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv, /* Return*/ false, isVarArg)); @@ -1253,6 +1240,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, CCValAssign &VA = ArgLocs[i]; SDValue Arg = OutVals[realArgIdx]; ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; + bool isByVal = Flags.isByVal(); // Promote the value if needed. switch (VA.getLocInfo()) { @@ -1299,6 +1287,43 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, } } else if (VA.isRegLoc()) { RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); + } else if (isByVal) { + assert(VA.isMemLoc()); + unsigned offset = 0; + + // True if this byval aggregate will be split between registers + // and memory. + if (CCInfo.isFirstByValRegValid()) { + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + unsigned int i, j; + for (i = 0, j = CCInfo.getFirstByValReg(); j < ARM::R4; i++, j++) { + SDValue Const = DAG.getConstant(4*i, MVT::i32); + SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); + SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, + MachinePointerInfo(), + false, false, 0); + MemOpChains.push_back(Load.getValue(1)); + RegsToPass.push_back(std::make_pair(j, Load)); + } + offset = ARM::R4 - CCInfo.getFirstByValReg(); + CCInfo.clearFirstByValReg(); + } + + unsigned LocMemOffset = VA.getLocMemOffset(); + SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset); + SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, + StkPtrOff); + SDValue SrcOffset = DAG.getIntPtrConstant(4*offset); + SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset); + SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, + MVT::i32); + MemOpChains.push_back(DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, + Flags.getByValAlign(), + /*isVolatile=*/false, + /*AlwaysInline=*/false, + MachinePointerInfo(0), + MachinePointerInfo(0))); + } else if (!IsSibCall) { assert(VA.isMemLoc()); @@ -1332,7 +1357,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // than necessary, because it means that each store effectively depends // on every argument instead of just those arguments it would clobber. - // Do not flag preceeding copytoreg stuff together with the following stuff. + // Do not flag preceding copytoreg stuff together with the following stuff. InFlag = SDValue(); for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, @@ -1492,6 +1517,35 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, dl, DAG, InVals); } +/// HandleByVal - Every parameter *after* a byval parameter is passed +/// on the stack. Remember the next parameter register to allocate, +/// and then confiscate the rest of the parameter registers to insure +/// this. +void +llvm::ARMTargetLowering::HandleByVal(CCState *State, unsigned &size) const { + unsigned reg = State->AllocateReg(GPRArgRegs, 4); + assert((State->getCallOrPrologue() == Prologue || + State->getCallOrPrologue() == Call) && + "unhandled ParmContext"); + if ((!State->isFirstByValRegValid()) && + (ARM::R0 <= reg) && (reg <= ARM::R3)) { + State->setFirstByValReg(reg); + // At a call site, a byval parameter that is split between + // registers and memory needs its size truncated here. In a + // function prologue, such byval parameters are reassembled in + // memory, and are not truncated. + if (State->getCallOrPrologue() == Call) { + unsigned excess = 4 * (ARM::R4 - reg); + assert(size >= excess && "expected larger existing stack allocation"); + size -= excess; + } + } + // Confiscate any remaining parameter registers to preclude their + // assignment to subsequent parameters. + while (State->AllocateReg(GPRArgRegs, 4)) + ; +} + /// MatchingStackOffset - Return true if the given stack call argument is /// already available in the same position (relatively) of the caller's /// incoming argument stack. @@ -1813,6 +1867,16 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N) const { return HasRet; } +bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { + if (!EnableARMTailCalls) + return false; + + if (!CI->isTailCall()) + return false; + + return !Subtarget->isThumb1Only(); +} + // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is // one of the above mentioned nodes. It has to be wrapped because otherwise @@ -2096,7 +2160,7 @@ ARMTargetLowering::LowerEH_SJLJ_DISPATCHSETUP(SDValue Op, SelectionDAG &DAG) const { DebugLoc dl = Op.getDebugLoc(); return DAG.getNode(ARMISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other, - Op.getOperand(0), Op.getOperand(1)); + Op.getOperand(0)); } SDValue @@ -2151,6 +2215,13 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, } return Result; } + case Intrinsic::arm_neon_vmulls: + case Intrinsic::arm_neon_vmullu: { + unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls) + ? ARMISD::VMULLs : ARMISD::VMULLu; + return DAG.getNode(NewOpc, Op.getDebugLoc(), Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + } } } @@ -2257,6 +2328,88 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2); } +void +ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF, + unsigned &VARegSize, unsigned &VARegSaveSize) + const { + unsigned NumGPRs; + if (CCInfo.isFirstByValRegValid()) + NumGPRs = ARM::R4 - CCInfo.getFirstByValReg(); + else { + unsigned int firstUnalloced; + firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs, + sizeof(GPRArgRegs) / + sizeof(GPRArgRegs[0])); + NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0; + } + + unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); + VARegSize = NumGPRs * 4; + VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1); +} + +// The remaining GPRs hold either the beginning of variable-argument +// data, or the beginning of an aggregate passed by value (usuall +// byval). Either way, we allocate stack slots adjacent to the data +// provided by our caller, and store the unallocated registers there. +// If this is a variadic function, the va_list pointer will begin with +// these values; otherwise, this reassembles a (byval) structure that +// was split between registers and memory. +void +ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, + DebugLoc dl, SDValue &Chain, + unsigned ArgOffset) const { + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + ARMFunctionInfo *AFI = MF.getInfo(); + unsigned firstRegToSaveIndex; + if (CCInfo.isFirstByValRegValid()) + firstRegToSaveIndex = CCInfo.getFirstByValReg() - ARM::R0; + else { + firstRegToSaveIndex = CCInfo.getFirstUnallocated + (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0])); + } + + unsigned VARegSize, VARegSaveSize; + computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize); + if (VARegSaveSize) { + // If this function is vararg, store any remaining integer argument regs + // to their spots on the stack so that they may be loaded by deferencing + // the result of va_next. + AFI->setVarArgsRegSaveSize(VARegSaveSize); + AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(VARegSaveSize, + ArgOffset + VARegSaveSize + - VARegSize, + false)); + SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(), + getPointerTy()); + + SmallVector MemOps; + for (; firstRegToSaveIndex < 4; ++firstRegToSaveIndex) { + TargetRegisterClass *RC; + if (AFI->isThumb1OnlyFunction()) + RC = ARM::tGPRRegisterClass; + else + RC = ARM::GPRRegisterClass; + + unsigned VReg = MF.addLiveIn(GPRArgRegs[firstRegToSaveIndex], RC); + SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); + SDValue Store = + DAG.getStore(Val.getValue(1), dl, Val, FIN, + MachinePointerInfo::getFixedStack(AFI->getVarArgsFrameIndex()), + false, false, 0); + MemOps.push_back(Store); + FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, + DAG.getConstant(4, getPointerTy())); + } + if (!MemOps.empty()) + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &MemOps[0], MemOps.size()); + } else + // This will point to the next argument passed via stack. + AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, true)); +} + SDValue ARMTargetLowering::LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, @@ -2265,7 +2418,6 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { - MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -2275,12 +2427,15 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, SmallVector ArgLocs; CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, *DAG.getContext()); + CCInfo.setCallOrPrologue(Prologue); CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForNode(CallConv, /* Return*/ false, isVarArg)); SmallVector ArgValues; + int lastInsIndex = -1; + SDValue ArgValue; for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; @@ -2288,7 +2443,6 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, if (VA.isRegLoc()) { EVT RegVT = VA.getLocVT(); - SDValue ArgValue; if (VA.needsCustom()) { // f64 and vector types are split up into multiple registers or // combinations of registers and stack slots. @@ -2364,67 +2518,45 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, assert(VA.isMemLoc()); assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered"); - unsigned ArgSize = VA.getLocVT().getSizeInBits()/8; - int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), true); + int index = ArgLocs[i].getValNo(); - // Create load nodes to retrieve arguments from the stack. - SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); - InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, - MachinePointerInfo::getFixedStack(FI), - false, false, 0)); + // Some Ins[] entries become multiple ArgLoc[] entries. + // Process them only once. + if (index != lastInsIndex) + { + ISD::ArgFlagsTy Flags = Ins[index].Flags; + // FIXME: For now, all byval parameter objects are marked mutable. + // This can be changed with more analysis. + // In case of tail call optimization mark all arguments mutable. + // Since they could be overwritten by lowering of arguments in case of + // a tail call. + if (Flags.isByVal()) { + unsigned VARegSize, VARegSaveSize; + computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize); + VarArgStyleRegisters(CCInfo, DAG, dl, Chain, 0); + unsigned Bytes = Flags.getByValSize() - VARegSize; + if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects. + int FI = MFI->CreateFixedObject(Bytes, + VA.getLocMemOffset(), false); + InVals.push_back(DAG.getFrameIndex(FI, getPointerTy())); + } else { + int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8, + VA.getLocMemOffset(), true); + + // Create load nodes to retrieve arguments from the stack. + SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); + InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, + MachinePointerInfo::getFixedStack(FI), + false, false, 0)); + } + lastInsIndex = index; + } } } // varargs - if (isVarArg) { - static const unsigned GPRArgRegs[] = { - ARM::R0, ARM::R1, ARM::R2, ARM::R3 - }; - - unsigned NumGPRs = CCInfo.getFirstUnallocated - (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0])); - - unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); - unsigned VARegSize = (4 - NumGPRs) * 4; - unsigned VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1); - unsigned ArgOffset = CCInfo.getNextStackOffset(); - if (VARegSaveSize) { - // If this function is vararg, store any remaining integer argument regs - // to their spots on the stack so that they may be loaded by deferencing - // the result of va_next. - AFI->setVarArgsRegSaveSize(VARegSaveSize); - AFI->setVarArgsFrameIndex( - MFI->CreateFixedObject(VARegSaveSize, - ArgOffset + VARegSaveSize - VARegSize, - false)); - SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(), - getPointerTy()); - - SmallVector MemOps; - for (; NumGPRs < 4; ++NumGPRs) { - TargetRegisterClass *RC; - if (AFI->isThumb1OnlyFunction()) - RC = ARM::tGPRRegisterClass; - else - RC = ARM::GPRRegisterClass; - - unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC); - SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); - SDValue Store = - DAG.getStore(Val.getValue(1), dl, Val, FIN, - MachinePointerInfo::getFixedStack(AFI->getVarArgsFrameIndex()), - false, false, 0); - MemOps.push_back(Store); - FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, - DAG.getConstant(4, getPointerTy())); - } - if (!MemOps.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &MemOps[0], MemOps.size()); - } else - // This will point to the next argument passed via stack. - AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, true)); - } + if (isVarArg) + VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getNextStackOffset()); return Chain; } @@ -2517,6 +2649,27 @@ ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp); } +/// duplicateCmp - Glue values can have only one use, so this function +/// duplicates a comparison node. +SDValue +ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const { + unsigned Opc = Cmp.getOpcode(); + DebugLoc DL = Cmp.getDebugLoc(); + if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ) + return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); + + assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation"); + Cmp = Cmp.getOperand(0); + Opc = Cmp.getOpcode(); + if (Opc == ARMISD::CMPFP) + Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); + else { + assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT"); + Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0)); + } + return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp); +} + SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue Cond = Op.getOperand(0); SDValue SelectTrue = Op.getOperand(1); @@ -2552,7 +2705,7 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { EVT VT = Cond.getValueType(); SDValue ARMcc = Cond.getOperand(2); SDValue CCR = Cond.getOperand(3); - SDValue Cmp = Cond.getOperand(4); + SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG); return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp); } } @@ -2681,8 +2834,8 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { // If one of the operand is zero, it's safe to ignore the NaN case since // we only care about equality comparisons. (SeenZero || (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS)))) { - // If unsafe fp math optimization is enabled and there are no othter uses of - // the CMP operands, and the condition code is EQ oe NE, we can optimize it + // If unsafe fp math optimization is enabled and there are no other uses of + // the CMP operands, and the condition code is EQ or NE, we can optimize it // to an integer comparison. if (CC == ISD::SETOEQ) CC = ISD::SETEQ; @@ -2811,8 +2964,39 @@ static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); } +static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { + EVT VT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + + EVT OperandVT = Op.getOperand(0).getValueType(); + assert(OperandVT == MVT::v4i16 && "Invalid type for custom lowering!"); + if (VT != MVT::v4f32) + return DAG.UnrollVectorOp(Op.getNode()); + + unsigned CastOpc; + unsigned Opc; + switch (Op.getOpcode()) { + default: + assert(0 && "Invalid opcode!"); + case ISD::SINT_TO_FP: + CastOpc = ISD::SIGN_EXTEND; + Opc = ISD::SINT_TO_FP; + break; + case ISD::UINT_TO_FP: + CastOpc = ISD::ZERO_EXTEND; + Opc = ISD::UINT_TO_FP; + break; + } + + Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0)); + return DAG.getNode(Opc, dl, VT, Op); +} + static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) { EVT VT = Op.getValueType(); + if (VT.isVector()) + return LowerVectorINT_TO_FP(Op, DAG); + DebugLoc dl = Op.getDebugLoc(); unsigned Opc; @@ -2860,7 +3044,10 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT, DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1), DAG.getConstant(32, MVT::i32)); - } + } else if (VT == MVT::f32) + Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64, + DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1), + DAG.getConstant(32, MVT::i32)); Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0); Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1); @@ -2869,11 +3056,11 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes); SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask, DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes)); - + SDValue Res = DAG.getNode(ISD::OR, dl, OpVT, DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask), DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot)); - if (SrcVT == MVT::f32) { + if (VT == MVT::f32) { Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res); Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res, DAG.getConstant(0, MVT::i32)); @@ -3508,6 +3695,13 @@ static bool isVREVMask(const SmallVectorImpl &M, EVT VT, return true; } +static bool isVTBLMask(const SmallVectorImpl &M, EVT VT) { + // We can handle <8 x i8> vector shuffles. If the index in the mask is out of + // range, then 0 is placed into the resulting vector. So pretty much any mask + // of 8 elements can work here. + return VT == MVT::v8i8 && M.size() == 8; +} + static bool isVTRNMask(const SmallVectorImpl &M, EVT VT, unsigned &WhichResult) { unsigned EltSz = VT.getVectorElementType().getSizeInBits(); @@ -3947,6 +4141,7 @@ ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl &M, isVREVMask(M, VT, 32) || isVREVMask(M, VT, 16) || isVEXTMask(M, VT, ReverseVEXT, Imm) || + isVTBLMask(M, VT) || isVTRNMask(M, VT, WhichResult) || isVUZPMask(M, VT, WhichResult) || isVZIPMask(M, VT, WhichResult) || @@ -4024,6 +4219,29 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, } } +static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, + SmallVectorImpl &ShuffleMask, + SelectionDAG &DAG) { + // Check to see if we can use the VTBL instruction. + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); + DebugLoc DL = Op.getDebugLoc(); + + SmallVector VTBLMask; + for (SmallVectorImpl::iterator + I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I) + VTBLMask.push_back(DAG.getConstant(*I, MVT::i32)); + + if (V2.getNode()->getOpcode() == ISD::UNDEF) + return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1, + DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, + &VTBLMask[0], 8)); + + return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2, + DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, + &VTBLMask[0], 8)); +} + static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); @@ -4141,6 +4359,12 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ISD::BITCAST, dl, VT, Val); } + if (VT == MVT::v8i8) { + SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG); + if (NewOp.getNode()) + return NewOp; + } + return SDValue(); } @@ -4290,6 +4514,28 @@ static SDValue SkipExtension(SDNode *N, SelectionDAG &DAG) { MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts); } +static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) { + unsigned Opcode = N->getOpcode(); + if (Opcode == ISD::ADD || Opcode == ISD::SUB) { + SDNode *N0 = N->getOperand(0).getNode(); + SDNode *N1 = N->getOperand(1).getNode(); + return N0->hasOneUse() && N1->hasOneUse() && + isSignExtended(N0, DAG) && isSignExtended(N1, DAG); + } + return false; +} + +static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) { + unsigned Opcode = N->getOpcode(); + if (Opcode == ISD::ADD || Opcode == ISD::SUB) { + SDNode *N0 = N->getOperand(0).getNode(); + SDNode *N1 = N->getOperand(1).getNode(); + return N0->hasOneUse() && N1->hasOneUse() && + isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG); + } + return false; +} + static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { // Multiplications are only custom-lowered for 128-bit vectors so that // VMULL can be detected. Otherwise v2i64 multiplications are not legal. @@ -4298,29 +4544,73 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { SDNode *N0 = Op.getOperand(0).getNode(); SDNode *N1 = Op.getOperand(1).getNode(); unsigned NewOpc = 0; - if (isSignExtended(N0, DAG) && isSignExtended(N1, DAG)) + bool isMLA = false; + bool isN0SExt = isSignExtended(N0, DAG); + bool isN1SExt = isSignExtended(N1, DAG); + if (isN0SExt && isN1SExt) NewOpc = ARMISD::VMULLs; - else if (isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG)) - NewOpc = ARMISD::VMULLu; - else if (VT == MVT::v2i64) - // Fall through to expand this. It is not legal. - return SDValue(); - else - // Other vector multiplications are legal. - return Op; + else { + bool isN0ZExt = isZeroExtended(N0, DAG); + bool isN1ZExt = isZeroExtended(N1, DAG); + if (isN0ZExt && isN1ZExt) + NewOpc = ARMISD::VMULLu; + else if (isN1SExt || isN1ZExt) { + // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these + // into (s/zext A * s/zext C) + (s/zext B * s/zext C) + if (isN1SExt && isAddSubSExt(N0, DAG)) { + NewOpc = ARMISD::VMULLs; + isMLA = true; + } else if (isN1ZExt && isAddSubZExt(N0, DAG)) { + NewOpc = ARMISD::VMULLu; + isMLA = true; + } else if (isN0ZExt && isAddSubZExt(N1, DAG)) { + std::swap(N0, N1); + NewOpc = ARMISD::VMULLu; + isMLA = true; + } + } + + if (!NewOpc) { + if (VT == MVT::v2i64) + // Fall through to expand this. It is not legal. + return SDValue(); + else + // Other vector multiplications are legal. + return Op; + } + } // Legalize to a VMULL instruction. DebugLoc DL = Op.getDebugLoc(); - SDValue Op0 = SkipExtension(N0, DAG); + SDValue Op0; SDValue Op1 = SkipExtension(N1, DAG); + if (!isMLA) { + Op0 = SkipExtension(N0, DAG); + assert(Op0.getValueType().is64BitVector() && + Op1.getValueType().is64BitVector() && + "unexpected types for extended operands to VMULL"); + return DAG.getNode(NewOpc, DL, VT, Op0, Op1); + } - assert(Op0.getValueType().is64BitVector() && - Op1.getValueType().is64BitVector() && - "unexpected types for extended operands to VMULL"); - return DAG.getNode(NewOpc, DL, VT, Op0, Op1); + // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during + // isel lowering to take advantage of no-stall back to back vmul + vmla. + // vmull q0, d4, d6 + // vmlal q0, d5, d6 + // is faster than + // vaddl q0, d4, d5 + // vmovl q1, d6 + // vmul q0, q0, q1 + SDValue N00 = SkipExtension(N0->getOperand(0).getNode(), DAG); + SDValue N01 = SkipExtension(N0->getOperand(1).getNode(), DAG); + EVT Op1VT = Op1.getValueType(); + return DAG.getNode(N0->getOpcode(), DL, VT, + DAG.getNode(NewOpc, DL, VT, + DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1), + DAG.getNode(NewOpc, DL, VT, + DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1)); } -static SDValue +static SDValue LowerSDIV_v4i8(SDValue X, SDValue Y, DebugLoc dl, SelectionDAG &DAG) { // Convert to float // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo)); @@ -4331,7 +4621,7 @@ LowerSDIV_v4i8(SDValue X, SDValue Y, DebugLoc dl, SelectionDAG &DAG) { Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y); // Get reciprocal estimate. // float4 recip = vrecpeq_f32(yf); - Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, + Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), Y); // Because char has a smaller range than uchar, we can actually get away // without any newton steps. This requires that we use a weird bias @@ -4349,7 +4639,7 @@ LowerSDIV_v4i8(SDValue X, SDValue Y, DebugLoc dl, SelectionDAG &DAG) { return X; } -static SDValue +static SDValue LowerSDIV_v4i16(SDValue N0, SDValue N1, DebugLoc dl, SelectionDAG &DAG) { SDValue N2; // Convert to float. @@ -4359,13 +4649,13 @@ LowerSDIV_v4i16(SDValue N0, SDValue N1, DebugLoc dl, SelectionDAG &DAG) { N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1); N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0); N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1); - + // Use reciprocal estimate and one refinement step. // float4 recip = vrecpeq_f32(yf); // recip *= vrecpsq_f32(yf, recip); - N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, + N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), N1); - N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, + N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32), N1, N2); N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); @@ -4395,15 +4685,15 @@ static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) { SDValue N0 = Op.getOperand(0); SDValue N1 = Op.getOperand(1); SDValue N2, N3; - + if (VT == MVT::v8i8) { N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0); N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1); - + N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, DAG.getIntPtrConstant(4)); N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, - DAG.getIntPtrConstant(4)); + DAG.getIntPtrConstant(4)); N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, DAG.getIntPtrConstant(0)); N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, @@ -4414,7 +4704,7 @@ static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) { N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2); N0 = LowerCONCAT_VECTORS(N0, DAG); - + N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0); return N0; } @@ -4430,32 +4720,32 @@ static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) { SDValue N0 = Op.getOperand(0); SDValue N1 = Op.getOperand(1); SDValue N2, N3; - + if (VT == MVT::v8i8) { N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0); N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1); - + N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, DAG.getIntPtrConstant(4)); N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, - DAG.getIntPtrConstant(4)); + DAG.getIntPtrConstant(4)); N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, DAG.getIntPtrConstant(0)); N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, DAG.getIntPtrConstant(0)); - + N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16 N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16 - + N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2); N0 = LowerCONCAT_VECTORS(N0, DAG); - - N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8, + + N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8, DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, MVT::i32), N0); return N0; } - + // v4i16 sdiv ... Convert to float. // float4 yf = vcvt_f32_s32(vmovl_u16(y)); // float4 xf = vcvt_f32_s32(vmovl_u16(x)); @@ -4468,13 +4758,13 @@ static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) { // float4 recip = vrecpeq_f32(yf); // recip *= vrecpsq_f32(yf, recip); // recip *= vrecpsq_f32(yf, recip); - N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, + N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), N1); - N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, + N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32), N1, N2); N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); - N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, + N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32), N1, N2); N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); @@ -4503,7 +4793,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::GlobalAddress: return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) : LowerGlobalAddressELF(Op, DAG); - case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); + case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::BR_CC: return LowerBR_CC(Op, DAG); @@ -4524,7 +4814,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::EH_SJLJ_DISPATCHSETUP: return LowerEH_SJLJ_DISPATCHSETUP(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG, Subtarget); - case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG); + case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG); case ISD::SHL: case ISD::SRL: case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget); @@ -4754,6 +5044,109 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, return BB; } +MachineBasicBlock * +ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, + MachineBasicBlock *BB, + unsigned Size, + bool signExtend, + ARMCC::CondCodes Cond) const { + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction *MF = BB->getParent(); + MachineFunction::iterator It = BB; + ++It; + + unsigned dest = MI->getOperand(0).getReg(); + unsigned ptr = MI->getOperand(1).getReg(); + unsigned incr = MI->getOperand(2).getReg(); + unsigned oldval = dest; + DebugLoc dl = MI->getDebugLoc(); + + bool isThumb2 = Subtarget->isThumb2(); + unsigned ldrOpc, strOpc, extendOpc; + switch (Size) { + default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); + case 1: + ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; + strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB; + extendOpc = isThumb2 ? ARM::t2SXTBr : ARM::SXTBr; + break; + case 2: + ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; + strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; + extendOpc = isThumb2 ? ARM::t2SXTHr : ARM::SXTHr; + break; + case 4: + ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; + strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; + extendOpc = 0; + break; + } + + MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); + MF->insert(It, loopMBB); + MF->insert(It, exitMBB); + + // Transfer the remainder of BB and its successor edges to exitMBB. + exitMBB->splice(exitMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + exitMBB->transferSuccessorsAndUpdatePHIs(BB); + + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + unsigned scratch = RegInfo.createVirtualRegister(ARM::GPRRegisterClass); + unsigned scratch2 = RegInfo.createVirtualRegister(ARM::GPRRegisterClass); + + // thisMBB: + // ... + // fallthrough --> loopMBB + BB->addSuccessor(loopMBB); + + // loopMBB: + // ldrex dest, ptr + // (sign extend dest, if required) + // cmp dest, incr + // cmov.cond scratch2, dest, incr + // strex scratch, scratch2, ptr + // cmp scratch, #0 + // bne- loopMBB + // fallthrough --> exitMBB + BB = loopMBB; + AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr)); + + // Sign extend the value, if necessary. + if (signExtend && extendOpc) { + oldval = RegInfo.createVirtualRegister(ARM::GPRRegisterClass); + AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval).addReg(dest)); + } + + // Build compare and cmov instructions. + AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) + .addReg(oldval).addReg(incr)); + BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr), scratch2) + .addReg(oldval).addReg(incr).addImm(Cond).addReg(ARM::CPSR); + + AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2) + .addReg(ptr)); + AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) + .addReg(scratch).addImm(0)); + BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) + .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); + + BB->addSuccessor(loopMBB); + BB->addSuccessor(exitMBB); + + // exitMBB: + // ... + BB = exitMBB; + + MI->eraseFromParent(); // The instruction is gone now. + + return BB; +} + static MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) { for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), @@ -4763,6 +5156,72 @@ MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) { llvm_unreachable("Expecting a BB with two successors!"); } +// FIXME: This opcode table should obviously be expressed in the target +// description. We probably just need a "machine opcode" value in the pseudo +// instruction. But the ideal solution maybe to simply remove the "S" version +// of the opcode altogether. +struct AddSubFlagsOpcodePair { + unsigned PseudoOpc; + unsigned MachineOpc; +}; + +static AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = { + {ARM::ADCSri, ARM::ADCri}, + {ARM::ADCSrr, ARM::ADCrr}, + {ARM::ADCSrs, ARM::ADCrs}, + {ARM::SBCSri, ARM::SBCri}, + {ARM::SBCSrr, ARM::SBCrr}, + {ARM::SBCSrs, ARM::SBCrs}, + {ARM::RSBSri, ARM::RSBri}, + {ARM::RSBSrr, ARM::RSBrr}, + {ARM::RSBSrs, ARM::RSBrs}, + {ARM::RSCSri, ARM::RSCri}, + {ARM::RSCSrs, ARM::RSCrs}, + {ARM::t2ADCSri, ARM::t2ADCri}, + {ARM::t2ADCSrr, ARM::t2ADCrr}, + {ARM::t2ADCSrs, ARM::t2ADCrs}, + {ARM::t2SBCSri, ARM::t2SBCri}, + {ARM::t2SBCSrr, ARM::t2SBCrr}, + {ARM::t2SBCSrs, ARM::t2SBCrs}, + {ARM::t2RSBSri, ARM::t2RSBri}, + {ARM::t2RSBSrs, ARM::t2RSBrs}, +}; + +// Convert and Add or Subtract with Carry and Flags to a generic opcode with +// CPSR operand. e.g. ADCS (...) -> ADC (... CPSR). +// +// FIXME: Somewhere we should assert that CPSR is in the correct +// position to be recognized by the target descrition as the 'S' bit. +bool ARMTargetLowering::RemapAddSubWithFlags(MachineInstr *MI, + MachineBasicBlock *BB) const { + unsigned OldOpc = MI->getOpcode(); + unsigned NewOpc = 0; + + // This is only called for instructions that need remapping, so iterating over + // the tiny opcode table is not costly. + static const int NPairs = + sizeof(AddSubFlagsOpcodeMap) / sizeof(AddSubFlagsOpcodePair); + for (AddSubFlagsOpcodePair *Pair = &AddSubFlagsOpcodeMap[0], + *End = &AddSubFlagsOpcodeMap[NPairs]; Pair != End; ++Pair) { + if (OldOpc == Pair->PseudoOpc) { + NewOpc = Pair->MachineOpc; + break; + } + } + if (!NewOpc) + return false; + + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc dl = MI->getDebugLoc(); + MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc)); + for (unsigned i = 0; i < MI->getNumOperands(); ++i) + MIB.addOperand(MI->getOperand(i)); + AddDefaultPred(MIB); + MIB.addReg(ARM::CPSR, RegState::Define); // S bit + MI->eraseFromParent(); + return true; +} + MachineBasicBlock * ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { @@ -4770,10 +5229,13 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, DebugLoc dl = MI->getDebugLoc(); bool isThumb2 = Subtarget->isThumb2(); switch (MI->getOpcode()) { - default: + default: { + if (RemapAddSubWithFlags(MI, BB)) + return BB; + MI->dump(); llvm_unreachable("Unexpected instr type to insert"); - + } case ARM::ATOMIC_LOAD_ADD_I8: return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); case ARM::ATOMIC_LOAD_ADD_I16: @@ -4816,6 +5278,34 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case ARM::ATOMIC_LOAD_SUB_I32: return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); + case ARM::ATOMIC_LOAD_MIN_I8: + return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::LT); + case ARM::ATOMIC_LOAD_MIN_I16: + return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::LT); + case ARM::ATOMIC_LOAD_MIN_I32: + return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::LT); + + case ARM::ATOMIC_LOAD_MAX_I8: + return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::GT); + case ARM::ATOMIC_LOAD_MAX_I16: + return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::GT); + case ARM::ATOMIC_LOAD_MAX_I32: + return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::GT); + + case ARM::ATOMIC_LOAD_UMIN_I8: + return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::LO); + case ARM::ATOMIC_LOAD_UMIN_I16: + return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::LO); + case ARM::ATOMIC_LOAD_UMIN_I32: + return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::LO); + + case ARM::ATOMIC_LOAD_UMAX_I8: + return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::HI); + case ARM::ATOMIC_LOAD_UMAX_I16: + return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::HI); + case ARM::ATOMIC_LOAD_UMAX_I32: + return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::HI); + case ARM::ATOMIC_SWAP_I8: return EmitAtomicBinary(MI, BB, 1, 0); case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0); case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0); @@ -5034,6 +5524,42 @@ static SDValue PerformSUBCombine(SDNode *N, return SDValue(); } +/// PerformVMULCombine +/// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the +/// special multiplier accumulator forwarding. +/// vmul d3, d0, d2 +/// vmla d3, d1, d2 +/// is faster than +/// vadd d3, d0, d1 +/// vmul d3, d3, d2 +static SDValue PerformVMULCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + if (!Subtarget->hasVMLxForwarding()) + return SDValue(); + + SelectionDAG &DAG = DCI.DAG; + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + unsigned Opcode = N0.getOpcode(); + if (Opcode != ISD::ADD && Opcode != ISD::SUB && + Opcode != ISD::FADD && Opcode != ISD::FSUB) { + Opcode = N0.getOpcode(); + if (Opcode != ISD::ADD && Opcode != ISD::SUB && + Opcode != ISD::FADD && Opcode != ISD::FSUB) + return SDValue(); + std::swap(N0, N1); + } + + EVT VT = N->getValueType(0); + DebugLoc DL = N->getDebugLoc(); + SDValue N00 = N0->getOperand(0); + SDValue N01 = N0->getOperand(1); + return DAG.getNode(Opcode, DL, VT, + DAG.getNode(ISD::MUL, DL, VT, N00, N1), + DAG.getNode(ISD::MUL, DL, VT, N01, N1)); +} + static SDValue PerformMULCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { @@ -5046,6 +5572,8 @@ static SDValue PerformMULCombine(SDNode *N, return SDValue(); EVT VT = N->getValueType(0); + if (VT.is64BitVector() || VT.is128BitVector()) + return PerformVMULCombine(N, DCI, Subtarget); if (VT != MVT::i32) return SDValue(); @@ -5088,12 +5616,16 @@ static SDValue PerformMULCombine(SDNode *N, static SDValue PerformANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { + // Attempt to use immediate-form VBIC BuildVectorSDNode *BVN = dyn_cast(N->getOperand(1)); DebugLoc dl = N->getDebugLoc(); EVT VT = N->getValueType(0); SelectionDAG &DAG = DCI.DAG; + if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) + return SDValue(); + APInt SplatBits, SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; @@ -5127,6 +5659,9 @@ static SDValue PerformORCombine(SDNode *N, EVT VT = N->getValueType(0); SelectionDAG &DAG = DCI.DAG; + if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) + return SDValue(); + APInt SplatBits, SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; @@ -5147,6 +5682,37 @@ static SDValue PerformORCombine(SDNode *N, } } + SDValue N0 = N->getOperand(0); + if (N0.getOpcode() != ISD::AND) + return SDValue(); + SDValue N1 = N->getOperand(1); + + // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant. + if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() && + DAG.getTargetLoweringInfo().isTypeLegal(VT)) { + APInt SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + + BuildVectorSDNode *BVN0 = dyn_cast(N0->getOperand(1)); + APInt SplatBits0; + if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize, + HasAnyUndefs) && !HasAnyUndefs) { + BuildVectorSDNode *BVN1 = dyn_cast(N1->getOperand(1)); + APInt SplatBits1; + if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize, + HasAnyUndefs) && !HasAnyUndefs && + SplatBits0 == ~SplatBits1) { + // Canonicalize the vector type to make instruction selection simpler. + EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; + SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT, + N0->getOperand(1), N0->getOperand(0), + N1->getOperand(0)); + return DAG.getNode(ISD::BITCAST, dl, VT, Result); + } + } + } + // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when // reasonable. @@ -5154,19 +5720,16 @@ static SDValue PerformORCombine(SDNode *N, if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops()) return SDValue(); - SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); DebugLoc DL = N->getDebugLoc(); // 1) or (and A, mask), val => ARMbfi A, val, mask // iff (val & mask) == val // // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2) - // && CountPopulation_32(mask) == CountPopulation_32(~mask2) + // && mask == ~mask2 // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2) - // && CountPopulation_32(mask) == CountPopulation_32(~mask2) + // && ~mask == mask2 // (i.e., copy a bitfield value into another bitfield of the same width) - if (N0.getOpcode() != ISD::AND) - return SDValue(); if (VT != MVT::i32) return SDValue(); @@ -5209,26 +5772,26 @@ static SDValue PerformORCombine(SDNode *N, return SDValue(); unsigned Mask2 = N11C->getZExtValue(); + // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern + // as is to match. if (ARM::isBitFieldInvertedMask(Mask) && - ARM::isBitFieldInvertedMask(~Mask2) && - (CountPopulation_32(Mask) == CountPopulation_32(~Mask2))) { + (Mask == ~Mask2)) { // The pack halfword instruction works better for masks that fit it, // so use that when it's available. if (Subtarget->hasT2ExtractPack() && (Mask == 0xffff || Mask == 0xffff0000)) return SDValue(); // 2a - unsigned lsb = CountTrailingZeros_32(Mask2); + unsigned amt = CountTrailingZeros_32(Mask2); Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0), - DAG.getConstant(lsb, MVT::i32)); + DAG.getConstant(amt, MVT::i32)); Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res, DAG.getConstant(Mask, MVT::i32)); // Do not add new nodes to DAG combiner worklist. DCI.CombineTo(N, Res, false); return SDValue(); } else if (ARM::isBitFieldInvertedMask(~Mask) && - ARM::isBitFieldInvertedMask(Mask2) && - (CountPopulation_32(~Mask) == CountPopulation_32(Mask2))) { + (~Mask == Mask2)) { // The pack halfword instruction works better for masks that fit it, // so use that when it's available. if (Subtarget->hasT2ExtractPack() && @@ -5239,7 +5802,7 @@ static SDValue PerformORCombine(SDNode *N, Res = DAG.getNode(ISD::SRL, DL, VT, N00, DAG.getConstant(lsb, MVT::i32)); Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res, - DAG.getConstant(Mask2, MVT::i32)); + DAG.getConstant(Mask2, MVT::i32)); // Do not add new nodes to DAG combiner worklist. DCI.CombineTo(N, Res, false); return SDValue(); @@ -5294,6 +5857,37 @@ static SDValue PerformVMOVRRDCombine(SDNode *N, SDValue InDouble = N->getOperand(0); if (InDouble.getOpcode() == ARMISD::VMOVDRR) return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1)); + + // vmovrrd(load f64) -> (load i32), (load i32) + SDNode *InNode = InDouble.getNode(); + if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() && + InNode->getValueType(0) == MVT::f64 && + InNode->getOperand(1).getOpcode() == ISD::FrameIndex && + !cast(InNode)->isVolatile()) { + // TODO: Should this be done for non-FrameIndex operands? + LoadSDNode *LD = cast(InNode); + + SelectionDAG &DAG = DCI.DAG; + DebugLoc DL = LD->getDebugLoc(); + SDValue BasePtr = LD->getBasePtr(); + SDValue NewLD1 = DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, + LD->getPointerInfo(), LD->isVolatile(), + LD->isNonTemporal(), LD->getAlignment()); + + SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, + DAG.getConstant(4, MVT::i32)); + SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, NewLD1.getValue(1), OffsetPtr, + LD->getPointerInfo(), LD->isVolatile(), + LD->isNonTemporal(), + std::min(4U, LD->getAlignment() / 2)); + + DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1)); + SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2); + DCI.RemoveFromWorklist(LD); + DAG.DeleteNode(LD); + return Result; + } + return SDValue(); } @@ -5323,8 +5917,28 @@ static SDValue PerformSTORECombine(SDNode *N, // Otherwise, the i64 value will be legalized to a pair of i32 values. StoreSDNode *St = cast(N); SDValue StVal = St->getValue(); - if (!ISD::isNormalStore(St) || St->isVolatile() || - StVal.getValueType() != MVT::i64 || + if (!ISD::isNormalStore(St) || St->isVolatile()) + return SDValue(); + + if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR && + StVal.getNode()->hasOneUse() && !St->isVolatile()) { + SelectionDAG &DAG = DCI.DAG; + DebugLoc DL = St->getDebugLoc(); + SDValue BasePtr = St->getBasePtr(); + SDValue NewST1 = DAG.getStore(St->getChain(), DL, + StVal.getNode()->getOperand(0), BasePtr, + St->getPointerInfo(), St->isVolatile(), + St->isNonTemporal(), St->getAlignment()); + + SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, + DAG.getConstant(4, MVT::i32)); + return DAG.getStore(NewST1.getValue(0), DL, StVal.getNode()->getOperand(1), + OffsetPtr, St->getPointerInfo(), St->isVolatile(), + St->isNonTemporal(), + std::min(4U, St->getAlignment() / 2)); + } + + if (StVal.getValueType() != MVT::i64 || StVal.getNode()->getOpcode() != ISD::EXTRACT_VECTOR_ELT) return SDValue(); @@ -5553,7 +6167,7 @@ static SDValue CombineBaseUpdate(SDNode *N, EVT VecTy; if (isLoad) VecTy = N->getValueType(0); - else + else VecTy = N->getOperand(AddrOpIdx+1).getValueType(); unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8; if (isLaneOp) @@ -5603,7 +6217,7 @@ static SDValue CombineBaseUpdate(SDNode *N, DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs)); break; - } + } return SDValue(); } diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index dc400c485ec6..a2e626062ac6 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -57,7 +57,6 @@ namespace llvm { CMPFPw0, // ARM VFP compare against zero instruction, sets FPSCR. FMSTAT, // ARM fmstat instruction. CMOV, // ARM conditional move instructions. - CNEG, // ARM conditional negate instructions. BCC_i64, @@ -89,7 +88,7 @@ namespace llvm { MEMBARRIER_MCR, // Memory barrier (MCR) PRELOAD, // Preload - + VCEQ, // Vector compare equal. VCEQZ, // Vector compare equal to zero. VCGE, // Vector compare greater than or equal. @@ -154,6 +153,8 @@ namespace llvm { VZIP, // zip (interleave) VUZP, // unzip (deinterleave) VTRN, // transpose + VTBL1, // 1-register shuffle with mask + VTBL2, // 2-register shuffle with mask // Vector multiply long: VMULLs, // ...signed @@ -172,12 +173,15 @@ namespace llvm { // Bit-field insert BFI, - + // Vector OR with immediate VORRIMM, // Vector AND with NOT of immediate VBICIMM, + // Vector bitwise select + VBSL, + // Vector load N-element structure to all lanes: VLD2DUP = ISD::FIRST_TARGET_MEMORY_OPCODE, VLD3DUP, @@ -330,9 +334,6 @@ namespace llvm { Sched::Preference getSchedulingPreference(SDNode *N) const; - unsigned getRegPressureLimit(const TargetRegisterClass *RC, - MachineFunction &MF) const; - bool isShuffleMaskLegal(const SmallVectorImpl &M, EVT VT) const; bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; @@ -407,7 +408,7 @@ namespace llvm { SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const; SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, + SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST) const; SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const; @@ -425,6 +426,13 @@ namespace llvm { DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const; + void VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, + DebugLoc dl, SDValue &Chain, unsigned ArgOffset) + const; + + void computeRegArea(CCState &CCInfo, MachineFunction &MF, + unsigned &VARegSize, unsigned &VARegSaveSize) const; + virtual SDValue LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg, @@ -435,6 +443,9 @@ namespace llvm { DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const; + /// HandleByVal - Target-specific cleanup for ByVal support. + virtual void HandleByVal(CCState *, unsigned &) const; + /// IsEligibleForTailCallOptimization - Check whether the call is eligible /// for tail call optimization. Targets which want to do tail call /// optimization should implement this function. @@ -456,10 +467,13 @@ namespace llvm { virtual bool isUsedByReturnOnly(SDNode *N) const; + virtual bool mayBeEmittedAsTailCall(CallInst *CI) const; + SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &ARMcc, SelectionDAG &DAG, DebugLoc dl) const; SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, DebugLoc dl) const; + SDValue duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const; SDValue OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const; @@ -470,16 +484,22 @@ namespace llvm { MachineBasicBlock *BB, unsigned Size, unsigned BinOpcode) const; + MachineBasicBlock * EmitAtomicBinaryMinMax(MachineInstr *MI, + MachineBasicBlock *BB, + unsigned Size, + bool signExtend, + ARMCC::CondCodes Cond) const; + bool RemapAddSubWithFlags(MachineInstr *MI, MachineBasicBlock *BB) const; }; - + enum NEONModImmType { VMOVModImm, VMVNModImm, OtherModImm }; - - + + namespace ARM { FastISel *createFastISel(FunctionLoweringInfo &funcInfo); } diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 359ac45cee1d..f5fb98ece4af 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -206,19 +206,30 @@ def setend_op : Operand { let PrintMethod = "printSetendOperand"; } -def cps_opt : Operand { - let PrintMethod = "printCPSOptionOperand"; -} - def msr_mask : Operand { let PrintMethod = "printMSRMaskOperand"; let ParserMatchClass = MSRMaskOperand; } -// A8.6.117, A8.6.118. Different instructions are generated for #0 and #-0. -// The neg_zero operand translates -0 to -1, -1 to -2, ..., etc. -def neg_zero : Operand { - let PrintMethod = "printNegZeroOperand"; +// Shift Right Immediate - A shift right immediate is encoded differently from +// other shift immediates. The imm6 field is encoded like so: +// +// Offset Encoding +// 8 imm6<5:3> = '001', 8 - is encoded in imm6<2:0> +// 16 imm6<5:4> = '01', 16 - is encoded in imm6<3:0> +// 32 imm6<5> = '1', 32 - is encoded in imm6<4:0> +// 64 64 - is encoded in imm6<5:0> +def shr_imm8 : Operand { + let EncoderMethod = "getShiftRight8Imm"; +} +def shr_imm16 : Operand { + let EncoderMethod = "getShiftRight16Imm"; +} +def shr_imm32 : Operand { + let EncoderMethod = "getShiftRight32Imm"; +} +def shr_imm64 : Operand { + let EncoderMethod = "getShiftRight64Imm"; } //===----------------------------------------------------------------------===// @@ -279,6 +290,7 @@ class PseudoInst pattern> let OutOperandList = oops; let InOperandList = iops; let Pattern = pattern; + let isCodeGenOnly = 1; } // PseudoInst that's ARM-mode only. @@ -422,11 +434,11 @@ class AIstrex opcod, dag oops, dag iops, InstrItinClass itin, opc, asm, "", pattern> { bits<4> Rd; bits<4> Rt; - bits<4> Rn; + bits<4> addr; let Inst{27-23} = 0b00011; let Inst{22-21} = opcod; let Inst{20} = 0; - let Inst{19-16} = Rn; + let Inst{19-16} = addr; let Inst{15-12} = Rd; let Inst{11-4} = 0b11111001; let Inst{3-0} = Rt; @@ -513,6 +525,24 @@ class AI2stridx pattern> + : AI2ldstidx<0, isByte, isPre, oops, iops, im, f, itin, opc, asm, cstr, + pattern> { + // AM2 store w/ two operands: (GPR, am2offset) + // {17-14} Rn + // {13} 1 == Rm, 0 == imm12 + // {12} isAdd + // {11-0} imm12/Rm + bits<18> addr; + let Inst{25} = addr{13}; + let Inst{23} = addr{12}; + let Inst{19-16} = addr{17-14}; + let Inst{11-0} = addr{11-0}; +} // addrmode3 instructions class AI3ld op, bit op20, dag oops, dag iops, Format f, @@ -547,6 +577,34 @@ class AI3ldstidx op, bit op20, bit isLd, bit isPre, dag oops, dag iops, let Inst{15-12} = Rt; // Rt let Inst{7-4} = op; } + +// FIXME: Merge with the above class when addrmode2 gets used for LDR, LDRB +// but for now use this class for LDRSBT, LDRHT, LDSHT. +class AI3ldstidxT op, bit op20, bit isLd, bit isPre, dag oops, dag iops, + IndexMode im, Format f, InstrItinClass itin, string opc, + string asm, string cstr, list pattern> + : I { + // {13} 1 == imm8, 0 == Rm + // {12-9} Rn + // {8} isAdd + // {7-4} imm7_4/zero + // {3-0} imm3_0/Rm + bits<14> addr; + bits<4> Rt; + let Inst{27-25} = 0b000; + let Inst{24} = isPre; // P bit + let Inst{23} = addr{8}; // U bit + let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm + let Inst{20} = op20; // L bit + let Inst{19-16} = addr{12-9}; // Rn + let Inst{15-12} = Rt; // Rt + let Inst{11-8} = addr{7-4}; // imm7_4/zero + let Inst{7-4} = op; + let Inst{3-0} = addr{3-0}; // imm3_0/Rm + let AsmMatchConverter = "CvtLdWriteBackRegAddrMode3"; +} + class AI3stridx op, bit isByte, bit isPre, dag oops, dag iops, IndexMode im, Format f, InstrItinClass itin, string opc, string asm, string cstr, list pattern> @@ -619,12 +677,25 @@ class AI3sthpo pattern> : I { + // {13} 1 == imm8, 0 == Rm + // {12-9} Rn + // {8} isAdd + // {7-4} imm7_4/zero + // {3-0} imm3_0/Rm + bits<14> addr; + bits<4> Rt; + let Inst{3-0} = addr{3-0}; // imm3_0/Rm let Inst{4} = 1; let Inst{5} = 1; // H bit let Inst{6} = 0; // S bit let Inst{7} = 1; + let Inst{11-8} = addr{7-4}; // imm7_4/zero + let Inst{15-12} = Rt; // Rt + let Inst{19-16} = addr{12-9}; // Rn let Inst{20} = 0; // L bit let Inst{21} = 0; // W bit + let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm + let Inst{23} = addr{8}; // U bit let Inst{24} = 0; // P bit let Inst{27-25} = 0b000; } @@ -1670,7 +1741,8 @@ class N2VImm op11_8, bit op7, bit op6, bit op4, } // NEON 3 vector register format. -class N3V op21_20, bits<4> op11_8, bit op6, bit op4, + +class N3VCommon op21_20, bits<4> op11_8, bit op6, bit op4, dag oops, dag iops, Format f, InstrItinClass itin, string opc, string dt, string asm, string cstr, list pattern> : NDataI { @@ -1680,6 +1752,13 @@ class N3V op21_20, bits<4> op11_8, bit op6, bit op4, let Inst{11-8} = op11_8; let Inst{6} = op6; let Inst{4} = op4; +} + +class N3V op21_20, bits<4> op11_8, bit op6, bit op4, + dag oops, dag iops, Format f, InstrItinClass itin, + string opc, string dt, string asm, string cstr, list pattern> + : N3VCommon { // Instruction operands. bits<5> Vd; @@ -1694,6 +1773,47 @@ class N3V op21_20, bits<4> op11_8, bit op6, bit op4, let Inst{5} = Vm{4}; } +class N3VLane32 op21_20, bits<4> op11_8, bit op6, bit op4, + dag oops, dag iops, Format f, InstrItinClass itin, + string opc, string dt, string asm, string cstr, list pattern> + : N3VCommon { + + // Instruction operands. + bits<5> Vd; + bits<5> Vn; + bits<5> Vm; + bit lane; + + let Inst{15-12} = Vd{3-0}; + let Inst{22} = Vd{4}; + let Inst{19-16} = Vn{3-0}; + let Inst{7} = Vn{4}; + let Inst{3-0} = Vm{3-0}; + let Inst{5} = lane; +} + +class N3VLane16 op21_20, bits<4> op11_8, bit op6, bit op4, + dag oops, dag iops, Format f, InstrItinClass itin, + string opc, string dt, string asm, string cstr, list pattern> + : N3VCommon { + + // Instruction operands. + bits<5> Vd; + bits<5> Vn; + bits<5> Vm; + bits<2> lane; + + let Inst{15-12} = Vd{3-0}; + let Inst{22} = Vd{4}; + let Inst{19-16} = Vn{3-0}; + let Inst{7} = Vn{4}; + let Inst{2-0} = Vm{2-0}; + let Inst{5} = lane{1}; + let Inst{3} = lane{0}; +} + // Same as N3V except it doesn't have a data type suffix. class N3VX op21_20, bits<4> op11_8, bit op6, bit op4, @@ -1730,6 +1850,8 @@ class NVLaneOp opcod1, bits<4> opcod2, bits<2> opcod3, let Inst{11-8} = opcod2; let Inst{6-5} = opcod3; let Inst{4} = 1; + // A8.6.303, A8.6.328, A8.6.329 + let Inst{3-0} = 0b0000; let OutOperandList = oops; let InOperandList = !con(iops, (ins pred:$p)); diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 6e3fe2e039f5..209c1a3fd96a 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -58,7 +58,7 @@ def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisInt<2>]>; def SDT_ARMEH_SJLJ_Longjmp: SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisInt<1>]>; -def SDT_ARMEH_SJLJ_DispatchSetup: SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; +def SDT_ARMEH_SJLJ_DispatchSetup: SDTypeProfile<0, 0, []>; def SDT_ARMMEMBARRIER : SDTypeProfile<0, 1, [SDTCisInt<0>]>; @@ -93,8 +93,6 @@ def ARMretflag : SDNode<"ARMISD::RET_FLAG", SDTNone, def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov, [SDNPInGlue]>; -def ARMcneg : SDNode<"ARMISD::CNEG", SDT_ARMCMov, - [SDNPInGlue]>; def ARMbrcond : SDNode<"ARMISD::BRCOND", SDT_ARMBrcond, [SDNPHasChain, SDNPInGlue, SDNPOutGlue]>; @@ -205,13 +203,13 @@ def so_imm_not_XFORM : SDNodeXForm; /// imm1_15 predicate - True if the 32-bit immediate is in the range [1,15]. -def imm1_15 : PatLeaf<(i32 imm), [{ - return (int32_t)N->getZExtValue() >= 1 && (int32_t)N->getZExtValue() < 16; +def imm1_15 : ImmLeaf= 1 && (int32_t)Imm < 16; }]>; /// imm16_31 predicate - True if the 32-bit immediate is in the range [16,31]. -def imm16_31 : PatLeaf<(i32 imm), [{ - return (int32_t)N->getZExtValue() >= 16 && (int32_t)N->getZExtValue() < 32; +def imm16_31 : ImmLeaf= 16 && (int32_t)Imm < 32; }]>; def so_imm_neg : @@ -241,8 +239,8 @@ def lo16AllZero : PatLeaf<(i32 imm), [{ /// imm0_65535 predicate - True if the 32-bit immediate is in the range /// [0.65535]. -def imm0_65535 : PatLeaf<(i32 imm), [{ - return (uint32_t)N->getZExtValue() < 65536; +def imm0_65535 : ImmLeaf= 0 && Imm < 65536; }]>; class BinOpFrag : PatFrag<(ops node:$LHS, node:$RHS), res>; @@ -377,17 +375,23 @@ def neon_vcvt_imm32 : Operand { } // rot_imm: An integer that encodes a rotate amount. Must be 8, 16, or 24. -def rot_imm : Operand, PatLeaf<(i32 imm), [{ - int32_t v = (int32_t)N->getZExtValue(); +def rot_imm : Operand, ImmLeaf { let EncoderMethod = "getRotImmOpValue"; } +def ShifterAsmOperand : AsmOperandClass { + let Name = "Shifter"; + let SuperClasses = []; +} + // shift_imm: An integer that encodes a shift amount and the type of shift // (currently either asr or lsl) using the same encoding used for the // immediates in so_reg operands. def shift_imm : Operand { let PrintMethod = "printShiftImmOperand"; + let ParserMatchClass = ShifterAsmOperand; } // shifter_operand operands: so_reg and so_imm. @@ -396,19 +400,21 @@ def so_reg : Operand, // reg reg imm [shl,srl,sra,rotr]> { let EncoderMethod = "getSORegOpValue"; let PrintMethod = "printSORegOperand"; - let MIOperandInfo = (ops GPR, GPR, i32imm); + let MIOperandInfo = (ops GPR, GPR, shift_imm); } def shift_so_reg : Operand, // reg reg imm ComplexPattern { let EncoderMethod = "getSORegOpValue"; let PrintMethod = "printSORegOperand"; - let MIOperandInfo = (ops GPR, GPR, i32imm); + let MIOperandInfo = (ops GPR, GPR, shift_imm); } // so_imm - Match a 32-bit shifter_operand immediate operand, which is an // 8-bit immediate rotated by an arbitrary number of bits. -def so_imm : Operand, PatLeaf<(imm), [{ return Pred_so_imm(N); }]> { +def so_imm : Operand, ImmLeaf { let EncoderMethod = "getSOImmOpValue"; let PrintMethod = "printSOImmOperand"; } @@ -429,13 +435,13 @@ def arm_i32imm : PatLeaf<(imm), [{ }]>; /// imm0_31 predicate - True if the 32-bit immediate is in the range [0,31]. -def imm0_31 : Operand, PatLeaf<(imm), [{ - return (int32_t)N->getZExtValue() < 32; +def imm0_31 : Operand, ImmLeaf= 0 && Imm < 32; }]>; /// imm0_31_m1 - Matches and prints like imm0_31, but encodes as 'value - 1'. -def imm0_31_m1 : Operand, PatLeaf<(imm), [{ - return (int32_t)N->getZExtValue() < 32; +def imm0_31_m1 : Operand, ImmLeaf= 0 && Imm < 32; }]> { let EncoderMethod = "getImmMinusOneOpValue"; } @@ -458,19 +464,30 @@ def bf_inv_mask_imm : Operand, } /// lsb_pos_imm - position of the lsb bit, used by BFI4p and t2BFI4p -def lsb_pos_imm : Operand, PatLeaf<(imm), [{ - return isInt<5>(N->getSExtValue()); +def lsb_pos_imm : Operand, ImmLeaf(Imm); }]>; /// width_imm - number of bits to be copied, used by BFI4p and t2BFI4p -def width_imm : Operand, PatLeaf<(imm), [{ - return N->getSExtValue() > 0 && N->getSExtValue() <= 32; +def width_imm : Operand, ImmLeaf 0 && Imm <= 32; }] > { let EncoderMethod = "getMsbOpValue"; } // Define ARM specific addressing modes. +def MemMode2AsmOperand : AsmOperandClass { + let Name = "MemMode2"; + let SuperClasses = []; + let ParserMethod = "tryParseMemMode2Operand"; +} + +def MemMode3AsmOperand : AsmOperandClass { + let Name = "MemMode3"; + let SuperClasses = []; + let ParserMethod = "tryParseMemMode3Operand"; +} // addrmode_imm12 := reg +/- imm12 // @@ -501,6 +518,7 @@ def addrmode2 : Operand, ComplexPattern { let EncoderMethod = "getAddrMode2OpValue"; let PrintMethod = "printAddrMode2Operand"; + let ParserMatchClass = MemMode2AsmOperand; let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm); } @@ -519,6 +537,7 @@ def addrmode3 : Operand, ComplexPattern { let EncoderMethod = "getAddrMode3OpValue"; let PrintMethod = "printAddrMode3Operand"; + let ParserMatchClass = MemMode3AsmOperand; let MIOperandInfo = (ops GPR:$base, GPR:$offsreg, i32imm:$offsimm); } @@ -586,6 +605,21 @@ def addrmodepc : Operand, let MIOperandInfo = (ops GPR, i32imm); } +def MemMode7AsmOperand : AsmOperandClass { + let Name = "MemMode7"; + let SuperClasses = []; +} + +// addrmode7 := reg +// Used by load/store exclusive instructions. Useful to enable right assembly +// parsing and printing. Not used for any codegen matching. +// +def addrmode7 : Operand { + let PrintMethod = "printAddrMode7Operand"; + let MIOperandInfo = (ops GPR); + let ParserMatchClass = MemMode7AsmOperand; +} + def nohash_imm : Operand { let PrintMethod = "printNoHashImmediate"; } @@ -902,52 +936,23 @@ multiclass AI1_adde_sube_irs opcod, string opc, PatFrag opnode, let Inst{19-16} = Rn; } } -// Carry setting variants -let isCodeGenOnly = 1, Defs = [CPSR] in { -multiclass AI1_adde_sube_s_irs opcod, string opc, PatFrag opnode, - bit Commutable = 0> { - def Sri : AXI1, - Requires<[IsARM]> { - bits<4> Rd; - bits<4> Rn; - bits<12> imm; - let Inst{15-12} = Rd; - let Inst{19-16} = Rn; - let Inst{11-0} = imm; - let Inst{20} = 1; - let Inst{25} = 1; - } - def Srr : AXI1, - Requires<[IsARM]> { - bits<4> Rd; - bits<4> Rn; - bits<4> Rm; - let Inst{11-4} = 0b00000000; - let isCommutable = Commutable; - let Inst{3-0} = Rm; - let Inst{15-12} = Rd; - let Inst{19-16} = Rn; - let Inst{20} = 1; - let Inst{25} = 0; - } - def Srs : AXI1, - Requires<[IsARM]> { - bits<4> Rd; - bits<4> Rn; - bits<12> shift; - let Inst{11-0} = shift; - let Inst{15-12} = Rd; - let Inst{19-16} = Rn; - let Inst{20} = 1; - let Inst{25} = 0; - } } + +// Carry setting variants +// NOTE: CPSR def omitted because it will be handled by the custom inserter. +let usesCustomInserter = 1 in { +multiclass AI1_adde_sube_s_irs { + def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), + Size4Bytes, IIC_iALUi, + [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>; + def rr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), + Size4Bytes, IIC_iALUr, + [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]> { + let isCommutable = Commutable; + } + def rs : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), + Size4Bytes, IIC_iALUsr, + [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]>; } } @@ -972,6 +977,7 @@ multiclass AI_ldr1 { bits<4> Rt; bits<17> shift; + let shift{4} = 0; // Inst{4} = 0 let Inst{23} = shift{12}; // U (add = ('U' == 1)) let Inst{19-16} = shift{16-13}; // Rn let Inst{15-12} = Rt; @@ -1001,6 +1007,7 @@ multiclass AI_str1 { bits<4> Rt; bits<17> shift; + let shift{4} = 0; // Inst{4} = 0 let Inst{23} = shift{12}; // U (add = ('U' == 1)) let Inst{19-16} = shift{16-13}; // Rn let Inst{15-12} = Rt; @@ -1249,7 +1256,7 @@ let neverHasSideEffects = 1, isReMaterializable = 1 in // The 'adr' mnemonic encodes differently if the label is before or after // the instruction. The {24-21} opcode bits are set by the fixup, as we don't // know until then which form of the instruction will be used. -def ADR : AI1<0, (outs GPR:$Rd), (ins adrlabel:$label), +def ADR : AI1<{0,?,?,0}, (outs GPR:$Rd), (ins adrlabel:$label), MiscFrm, IIC_iALUi, "adr", "\t$Rd, #$label", []> { bits<4> Rd; bits<12> label; @@ -1311,6 +1318,9 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { // before calls from potentially appearing dead. let isCall = 1, // On non-Darwin platforms R9 is callee-saved. + // FIXME: Do we really need a non-predicated version? If so, it should + // at least be a pseudo instruction expanding to the predicated version + // at MC lowering time. Defs = [R0, R1, R2, R3, R12, LR, D0, D1, D2, D3, D4, D5, D6, D7, D16, D17, D18, D19, D20, D21, D22, D23, @@ -1340,7 +1350,16 @@ let isCall = 1, Requires<[IsARM, HasV5T, IsNotDarwin]> { bits<4> func; let Inst{31-4} = 0b1110000100101111111111110011; - let Inst{3-0} = func; + let Inst{3-0} = func; + } + + def BLX_pred : AI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm, + IIC_Br, "blx", "\t$func", + [(ARMcall_pred GPR:$func)]>, + Requires<[IsARM, HasV5T, IsNotDarwin]> { + bits<4> func; + let Inst{27-4} = 0b000100101111111111110011; + let Inst{3-0} = func; } // ARMv4T @@ -1364,30 +1383,25 @@ let isCall = 1, D16, D17, D18, D19, D20, D21, D22, D23, D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR], Uses = [R7, SP] in { - def BLr9 : ABXI<0b1011, (outs), (ins bltarget:$func, variable_ops), - IIC_Br, "bl\t$func", - [(ARMcall tglobaladdr:$func)]>, Requires<[IsARM, IsDarwin]> { - let Inst{31-28} = 0b1110; - bits<24> func; - let Inst{23-0} = func; - } + def BLr9 : ARMPseudoInst<(outs), (ins bltarget:$func, variable_ops), + Size4Bytes, IIC_Br, + [(ARMcall tglobaladdr:$func)]>, Requires<[IsARM, IsDarwin]>; - def BLr9_pred : ABI<0b1011, (outs), (ins bltarget:$func, variable_ops), - IIC_Br, "bl", "\t$func", + def BLr9_pred : ARMPseudoInst<(outs), + (ins bltarget:$func, pred:$p, variable_ops), + Size4Bytes, IIC_Br, [(ARMcall_pred tglobaladdr:$func)]>, - Requires<[IsARM, IsDarwin]> { - bits<24> func; - let Inst{23-0} = func; - } + Requires<[IsARM, IsDarwin]>; // ARMv5T and above - def BLXr9 : AXI<(outs), (ins GPR:$func, variable_ops), BrMiscFrm, - IIC_Br, "blx\t$func", - [(ARMcall GPR:$func)]>, Requires<[IsARM, HasV5T, IsDarwin]> { - bits<4> func; - let Inst{31-4} = 0b1110000100101111111111110011; - let Inst{3-0} = func; - } + def BLXr9 : ARMPseudoInst<(outs), (ins GPR:$func, variable_ops), + Size4Bytes, IIC_Br, + [(ARMcall GPR:$func)]>, Requires<[IsARM, HasV5T, IsDarwin]>; + + def BLXr9_pred: ARMPseudoInst<(outs), (ins GPR:$func, pred:$p, variable_ops), + Size4Bytes, IIC_Br, + [(ARMcall_pred GPR:$func)]>, + Requires<[IsARM, HasV5T, IsDarwin]>; // ARMv4T // Note: Restrict $func to the tGPR regclass to prevent it being in LR. @@ -1403,11 +1417,7 @@ let isCall = 1, // Tail calls. -// FIXME: These should probably be xformed into the non-TC versions of the -// instructions as part of MC lowering. -// FIXME: These seem to be used for both Thumb and ARM instruction selection. -// Thumb should have its own version since the instruction is actually -// different, even though the mnemonic is the same. +// FIXME: The Thumb versions of these should live in ARMInstrThumb.td let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { // Darwin versions. let Defs = [R0, R1, R2, R3, R9, R12, @@ -1421,21 +1431,21 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops), IIC_Br, []>, Requires<[IsDarwin]>; - def TAILJMPd : ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops), - IIC_Br, "b\t$dst @ TAILCALL", + def TAILJMPd : ARMPseudoInst<(outs), (ins brtarget:$dst, variable_ops), + Size4Bytes, IIC_Br, []>, Requires<[IsARM, IsDarwin]>; - def TAILJMPdt: ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops), - IIC_Br, "b.w\t$dst @ TAILCALL", + def tTAILJMPd: tPseudoInst<(outs), (ins brtarget:$dst, variable_ops), + Size4Bytes, IIC_Br, []>, Requires<[IsThumb, IsDarwin]>; - def TAILJMPr : AXI<(outs), (ins tcGPR:$dst, variable_ops), - BrMiscFrm, IIC_Br, "bx\t$dst @ TAILCALL", - []>, Requires<[IsDarwin]> { - bits<4> dst; - let Inst{31-4} = 0b1110000100101111111111110001; - let Inst{3-0} = dst; - } + def TAILJMPr : ARMPseudoInst<(outs), (ins tcGPR:$dst, variable_ops), + Size4Bytes, IIC_Br, + []>, Requires<[IsARM, IsDarwin]>; + + def tTAILJMPr : tPseudoInst<(outs), (ins tcGPR:$dst, variable_ops), + Size4Bytes, IIC_Br, + []>, Requires<[IsThumb, IsDarwin]>; } // Non-Darwin versions (the difference is R9). @@ -1450,34 +1460,31 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in { def TCRETURNriND : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops), IIC_Br, []>, Requires<[IsNotDarwin]>; - def TAILJMPdND : ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops), - IIC_Br, "b\t$dst @ TAILCALL", + def TAILJMPdND : ARMPseudoInst<(outs), (ins brtarget:$dst, variable_ops), + Size4Bytes, IIC_Br, []>, Requires<[IsARM, IsNotDarwin]>; - def TAILJMPdNDt : ABXI<0b1010, (outs), (ins brtarget:$dst, variable_ops), - IIC_Br, "b.w\t$dst @ TAILCALL", + def tTAILJMPdND : tPseudoInst<(outs), (ins brtarget:$dst, variable_ops), + Size4Bytes, IIC_Br, []>, Requires<[IsThumb, IsNotDarwin]>; - def TAILJMPrND : AXI<(outs), (ins tcGPR:$dst, variable_ops), - BrMiscFrm, IIC_Br, "bx\t$dst @ TAILCALL", - []>, Requires<[IsNotDarwin]> { - bits<4> dst; - let Inst{31-4} = 0b1110000100101111111111110001; - let Inst{3-0} = dst; - } + def TAILJMPrND : ARMPseudoInst<(outs), (ins tcGPR:$dst, variable_ops), + Size4Bytes, IIC_Br, + []>, Requires<[IsARM, IsNotDarwin]>; + def tTAILJMPrND : tPseudoInst<(outs), (ins tcGPR:$dst, variable_ops), + Size4Bytes, IIC_Br, + []>, Requires<[IsThumb, IsNotDarwin]>; } } let isBranch = 1, isTerminator = 1 in { - // B is "predicable" since it can be xformed into a Bcc. + // B is "predicable" since it's just a Bcc with an 'always' condition. let isBarrier = 1 in { let isPredicable = 1 in - def B : ABXI<0b1010, (outs), (ins brtarget:$target), IIC_Br, - "b\t$target", [(br bb:$target)]> { - bits<24> target; - let Inst{31-28} = 0b1110; - let Inst{23-0} = target; - } + // FIXME: We shouldn't need this pseudo at all. Just using Bcc directly + // should be sufficient. + def B : ARMPseudoInst<(outs), (ins brtarget:$target), Size4Bytes, IIC_Br, + [(br bb:$target)]>; let isNotDuplicable = 1, isIndirectBranch = 1 in { def BR_JTr : ARMPseudoInst<(outs), @@ -1509,6 +1516,16 @@ let isBranch = 1, isTerminator = 1 in { } } +// BLX (immediate) -- for disassembly only +def BLXi : AXI<(outs), (ins br_target:$target), BrMiscFrm, NoItinerary, + "blx\t$target", [/* pattern left blank */]>, + Requires<[IsARM, HasV5T]> { + let Inst{31-25} = 0b1111101; + bits<25> target; + let Inst{23-0} = target{24-1}; + let Inst{24} = target{0}; +} + // Branch and Exchange Jazelle -- for disassembly only def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func", [/* For disassembly only; pattern left blank */]> { @@ -1533,6 +1550,7 @@ def SVC : ABI<0b1111, (outs), (ins i32imm:$svc), IIC_Br, "svc", "\t$svc", let Inst{23-0} = svc; } } +def : MnemonicAlias<"swi", "svc">; // Store Return State is a system instruction -- for disassembly only let isCodeGenOnly = 1 in { // FIXME: This should not use submode! @@ -1541,6 +1559,8 @@ def SRSW : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, i32imm:$mode), [/* For disassembly only; pattern left blank */]> { let Inst{31-28} = 0b1111; let Inst{22-20} = 0b110; // W = 1 + let Inst{19-8} = 0xd05; + let Inst{7-5} = 0b000; } def SRS : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, i32imm:$mode), @@ -1548,6 +1568,8 @@ def SRS : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, i32imm:$mode), [/* For disassembly only; pattern left blank */]> { let Inst{31-28} = 0b1111; let Inst{22-20} = 0b100; // W = 0 + let Inst{19-8} = 0xd05; + let Inst{7-5} = 0b000; } // Return From Exception is a system instruction -- for disassembly only @@ -1556,6 +1578,7 @@ def RFEW : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, GPR:$base), [/* For disassembly only; pattern left blank */]> { let Inst{31-28} = 0b1111; let Inst{22-20} = 0b011; // W = 1 + let Inst{15-0} = 0x0a00; } def RFE : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, GPR:$base), @@ -1563,6 +1586,7 @@ def RFE : ABXI<{1,0,0,?}, (outs), (ins ldstm_mode:$amode, GPR:$base), [/* For disassembly only; pattern left blank */]> { let Inst{31-28} = 0b1111; let Inst{22-20} = 0b001; // W = 0 + let Inst{15-0} = 0x0a00; } } // isCodeGenOnly = 1 @@ -1610,15 +1634,11 @@ def LDRSB : AI3ld<0b1101, 1, (outs GPR:$Rt), (ins addrmode3:$addr), LdMiscFrm, IIC_iLoad_bh_r, "ldrsb", "\t$Rt, $addr", [(set GPR:$Rt, (sextloadi8 addrmode3:$addr))]>; -let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1, - isCodeGenOnly = 1 in { // $dst2 doesn't exist in asmstring? -// FIXME: $dst2 isn't in the asm string as it's implied by $Rd (dst2 = Rd+1) -// how to represent that such that tblgen is happy and we don't -// mark this codegen only? +let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { // Load doubleword def LDRD : AI3ld<0b1101, 0, (outs GPR:$Rd, GPR:$dst2), (ins addrmode3:$addr), LdMiscFrm, - IIC_iLoad_d_r, "ldrd", "\t$Rd, $addr", + IIC_iLoad_d_r, "ldrd", "\t$Rd, $dst2, $addr", []>, Requires<[IsARM, HasV5TE]>; } @@ -1636,6 +1656,7 @@ multiclass AI2_ldridx { let Inst{23} = addr{12}; let Inst{19-16} = addr{17-14}; let Inst{11-0} = addr{11-0}; + let AsmMatchConverter = "CvtLdWriteBackRegAddrMode2"; } def _POST : AI2ldstidx<1, isByte, 0, (outs GPR:$Rt, GPR:$Rn_wb), (ins GPR:$Rn, am2offset:$offset), @@ -1688,40 +1709,80 @@ let mayLoad = 1, neverHasSideEffects = 1 in { defm LDRH : AI3_ldridx<0b1011, 1, "ldrh", IIC_iLoad_bh_ru>; defm LDRSH : AI3_ldridx<0b1111, 1, "ldrsh", IIC_iLoad_bh_ru>; defm LDRSB : AI3_ldridx<0b1101, 1, "ldrsb", IIC_iLoad_bh_ru>; -let hasExtraDefRegAllocReq = 1, isCodeGenOnly = 1 in -defm LDRD : AI3_ldridx<0b1101, 0, "ldrd", IIC_iLoad_d_ru>; +let hasExtraDefRegAllocReq = 1 in { +def LDRD_PRE : AI3ldstidx<0b1101, 0, 1, 1, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb), + (ins addrmode3:$addr), IndexModePre, + LdMiscFrm, IIC_iLoad_d_ru, + "ldrd", "\t$Rt, $Rt2, $addr!", + "$addr.base = $Rn_wb", []> { + bits<14> addr; + let Inst{23} = addr{8}; // U bit + let Inst{22} = addr{13}; // 1 == imm8, 0 == Rm + let Inst{19-16} = addr{12-9}; // Rn + let Inst{11-8} = addr{7-4}; // imm7_4/zero + let Inst{3-0} = addr{3-0}; // imm3_0/Rm +} +def LDRD_POST: AI3ldstidx<0b1101, 0, 1, 0, (outs GPR:$Rt, GPR:$Rt2, GPR:$Rn_wb), + (ins GPR:$Rn, am3offset:$offset), IndexModePost, + LdMiscFrm, IIC_iLoad_d_ru, + "ldrd", "\t$Rt, $Rt2, [$Rn], $offset", + "$Rn = $Rn_wb", []> { + bits<10> offset; + bits<4> Rn; + let Inst{23} = offset{8}; // U bit + let Inst{22} = offset{9}; // 1 == imm8, 0 == Rm + let Inst{19-16} = Rn; + let Inst{11-8} = offset{7-4}; // imm7_4/zero + let Inst{3-0} = offset{3-0}; // imm3_0/Rm +} +} // hasExtraDefRegAllocReq = 1 } // mayLoad = 1, neverHasSideEffects = 1 // LDRT, LDRBT, LDRSBT, LDRHT, LDRSHT are for disassembly only. let mayLoad = 1, neverHasSideEffects = 1 in { -def LDRT : AI2ldstidx<1, 0, 0, (outs GPR:$dst, GPR:$base_wb), - (ins GPR:$base, am2offset:$offset), IndexModeNone, - LdFrm, IIC_iLoad_ru, - "ldrt", "\t$dst, [$base], $offset", "$base = $base_wb", []> { +def LDRT : AI2ldstidx<1, 0, 0, (outs GPR:$Rt, GPR:$base_wb), + (ins addrmode2:$addr), IndexModePost, LdFrm, IIC_iLoad_ru, + "ldrt", "\t$Rt, $addr", "$addr.base = $base_wb", []> { + // {17-14} Rn + // {13} 1 == Rm, 0 == imm12 + // {12} isAdd + // {11-0} imm12/Rm + bits<18> addr; + let Inst{25} = addr{13}; + let Inst{23} = addr{12}; + let Inst{21} = 1; // overwrite + let Inst{19-16} = addr{17-14}; + let Inst{11-0} = addr{11-0}; + let AsmMatchConverter = "CvtLdWriteBackRegAddrMode2"; +} +def LDRBT : AI2ldstidx<1, 1, 0, (outs GPR:$Rt, GPR:$base_wb), + (ins addrmode2:$addr), IndexModePost, LdFrm, IIC_iLoad_bh_ru, + "ldrbt", "\t$Rt, $addr", "$addr.base = $base_wb", []> { + // {17-14} Rn + // {13} 1 == Rm, 0 == imm12 + // {12} isAdd + // {11-0} imm12/Rm + bits<18> addr; + let Inst{25} = addr{13}; + let Inst{23} = addr{12}; + let Inst{21} = 1; // overwrite + let Inst{19-16} = addr{17-14}; + let Inst{11-0} = addr{11-0}; + let AsmMatchConverter = "CvtLdWriteBackRegAddrMode2"; +} +def LDRSBT : AI3ldstidxT<0b1101, 1, 1, 0, (outs GPR:$Rt, GPR:$base_wb), + (ins addrmode3:$addr), IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru, + "ldrsbt", "\t$Rt, $addr", "$addr.base = $base_wb", []> { let Inst{21} = 1; // overwrite } -def LDRBT : AI2ldstidx<1, 1, 0, (outs GPR:$dst, GPR:$base_wb), - (ins GPR:$base, am2offset:$offset), IndexModeNone, - LdFrm, IIC_iLoad_bh_ru, - "ldrbt", "\t$dst, [$base], $offset", "$base = $base_wb", []> { +def LDRHT : AI3ldstidxT<0b1011, 1, 1, 0, (outs GPR:$Rt, GPR:$base_wb), + (ins addrmode3:$addr), IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru, + "ldrht", "\t$Rt, $addr", "$addr.base = $base_wb", []> { let Inst{21} = 1; // overwrite } -def LDRSBT : AI3ldstidx<0b1101, 1, 1, 0, (outs GPR:$dst, GPR:$base_wb), - (ins GPR:$base, am3offset:$offset), IndexModePost, - LdMiscFrm, IIC_iLoad_bh_ru, - "ldrsbt", "\t$dst, [$base], $offset", "$base = $base_wb", []> { - let Inst{21} = 1; // overwrite -} -def LDRHT : AI3ldstidx<0b1011, 1, 1, 0, (outs GPR:$dst, GPR:$base_wb), - (ins GPR:$base, am3offset:$offset), IndexModePost, - LdMiscFrm, IIC_iLoad_bh_ru, - "ldrht", "\t$dst, [$base], $offset", "$base = $base_wb", []> { - let Inst{21} = 1; // overwrite -} -def LDRSHT : AI3ldstidx<0b1111, 1, 1, 0, (outs GPR:$dst, GPR:$base_wb), - (ins GPR:$base, am3offset:$offset), IndexModePost, - LdMiscFrm, IIC_iLoad_bh_ru, - "ldrsht", "\t$dst, [$base], $offset", "$base = $base_wb", []> { +def LDRSHT : AI3ldstidxT<0b1111, 1, 1, 0, (outs GPR:$Rt, GPR:$base_wb), + (ins addrmode3:$addr), IndexModePost, LdMiscFrm, IIC_iLoad_bh_ru, + "ldrsht", "\t$Rt, $addr", "$addr.base = $base_wb", []> { let Inst{21} = 1; // overwrite } } @@ -1734,55 +1795,61 @@ def STRH : AI3str<0b1011, (outs), (ins GPR:$Rt, addrmode3:$addr), StMiscFrm, [(truncstorei16 GPR:$Rt, addrmode3:$addr)]>; // Store doubleword -let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1, - isCodeGenOnly = 1 in // $src2 doesn't exist in asm string -def STRD : AI3str<0b1111, (outs), (ins GPR:$src1, GPR:$src2, addrmode3:$addr), +let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in +def STRD : AI3str<0b1111, (outs), (ins GPR:$Rt, GPR:$src2, addrmode3:$addr), StMiscFrm, IIC_iStore_d_r, - "strd", "\t$src1, $addr", []>, Requires<[IsARM, HasV5TE]>; + "strd", "\t$Rt, $src2, $addr", []>, Requires<[IsARM, HasV5TE]>; // Indexed stores def STR_PRE : AI2stridx<0, 1, (outs GPR:$Rn_wb), (ins GPR:$Rt, GPR:$Rn, am2offset:$offset), IndexModePre, StFrm, IIC_iStore_ru, - "str", "\t$Rt, [$Rn, $offset]!", "$Rn = $Rn_wb", + "str", "\t$Rt, [$Rn, $offset]!", + "$Rn = $Rn_wb,@earlyclobber $Rn_wb", [(set GPR:$Rn_wb, (pre_store GPR:$Rt, GPR:$Rn, am2offset:$offset))]>; def STR_POST : AI2stridx<0, 0, (outs GPR:$Rn_wb), (ins GPR:$Rt, GPR:$Rn, am2offset:$offset), IndexModePost, StFrm, IIC_iStore_ru, - "str", "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb", + "str", "\t$Rt, [$Rn], $offset", + "$Rn = $Rn_wb,@earlyclobber $Rn_wb", [(set GPR:$Rn_wb, (post_store GPR:$Rt, GPR:$Rn, am2offset:$offset))]>; def STRB_PRE : AI2stridx<1, 1, (outs GPR:$Rn_wb), (ins GPR:$Rt, GPR:$Rn, am2offset:$offset), IndexModePre, StFrm, IIC_iStore_bh_ru, - "strb", "\t$Rt, [$Rn, $offset]!", "$Rn = $Rn_wb", + "strb", "\t$Rt, [$Rn, $offset]!", + "$Rn = $Rn_wb,@earlyclobber $Rn_wb", [(set GPR:$Rn_wb, (pre_truncsti8 GPR:$Rt, GPR:$Rn, am2offset:$offset))]>; def STRB_POST: AI2stridx<1, 0, (outs GPR:$Rn_wb), (ins GPR:$Rt, GPR:$Rn, am2offset:$offset), IndexModePost, StFrm, IIC_iStore_bh_ru, - "strb", "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb", + "strb", "\t$Rt, [$Rn], $offset", + "$Rn = $Rn_wb,@earlyclobber $Rn_wb", [(set GPR:$Rn_wb, (post_truncsti8 GPR:$Rt, GPR:$Rn, am2offset:$offset))]>; def STRH_PRE : AI3stridx<0b1011, 0, 1, (outs GPR:$Rn_wb), (ins GPR:$Rt, GPR:$Rn, am3offset:$offset), IndexModePre, StMiscFrm, IIC_iStore_ru, - "strh", "\t$Rt, [$Rn, $offset]!", "$Rn = $Rn_wb", + "strh", "\t$Rt, [$Rn, $offset]!", + "$Rn = $Rn_wb,@earlyclobber $Rn_wb", [(set GPR:$Rn_wb, (pre_truncsti16 GPR:$Rt, GPR:$Rn, am3offset:$offset))]>; def STRH_POST: AI3stridx<0b1011, 0, 0, (outs GPR:$Rn_wb), (ins GPR:$Rt, GPR:$Rn, am3offset:$offset), IndexModePost, StMiscFrm, IIC_iStore_bh_ru, - "strh", "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb", + "strh", "\t$Rt, [$Rn], $offset", + "$Rn = $Rn_wb,@earlyclobber $Rn_wb", [(set GPR:$Rn_wb, (post_truncsti16 GPR:$Rt, GPR:$Rn, am3offset:$offset))]>; // For disassembly only +let mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 in { def STRD_PRE : AI3stdpr<(outs GPR:$base_wb), (ins GPR:$src1, GPR:$src2, GPR:$base, am3offset:$offset), StMiscFrm, IIC_iStore_d_ru, @@ -1795,31 +1862,32 @@ def STRD_POST: AI3stdpo<(outs GPR:$base_wb), StMiscFrm, IIC_iStore_d_ru, "strd", "\t$src1, $src2, [$base], $offset", "$base = $base_wb", []>; +} // mayStore = 1, neverHasSideEffects = 1, hasExtraSrcRegAllocReq = 1 // STRT, STRBT, and STRHT are for disassembly only. -def STRT : AI2stridx<0, 0, (outs GPR:$Rn_wb), - (ins GPR:$Rt, GPR:$Rn,am2offset:$offset), - IndexModeNone, StFrm, IIC_iStore_ru, - "strt", "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb", - [/* For disassembly only; pattern left blank */]> { - let Inst{21} = 1; // overwrite -} - -def STRBT : AI2stridx<1, 0, (outs GPR:$Rn_wb), - (ins GPR:$Rt, GPR:$Rn, am2offset:$offset), - IndexModeNone, StFrm, IIC_iStore_bh_ru, - "strbt", "\t$Rt, [$Rn], $offset", "$Rn = $Rn_wb", +def STRT : AI2stridxT<0, 0, (outs GPR:$Rn_wb), (ins GPR:$Rt, addrmode2:$addr), + IndexModePost, StFrm, IIC_iStore_ru, + "strt", "\t$Rt, $addr", "$addr.base = $Rn_wb", [/* For disassembly only; pattern left blank */]> { let Inst{21} = 1; // overwrite + let AsmMatchConverter = "CvtStWriteBackRegAddrMode2"; } -def STRHT: AI3sthpo<(outs GPR:$base_wb), - (ins GPR:$src, GPR:$base,am3offset:$offset), +def STRBT : AI2stridxT<1, 0, (outs GPR:$Rn_wb), (ins GPR:$Rt, addrmode2:$addr), + IndexModePost, StFrm, IIC_iStore_bh_ru, + "strbt", "\t$Rt, $addr", "$addr.base = $Rn_wb", + [/* For disassembly only; pattern left blank */]> { + let Inst{21} = 1; // overwrite + let AsmMatchConverter = "CvtStWriteBackRegAddrMode2"; +} + +def STRHT: AI3sthpo<(outs GPR:$base_wb), (ins GPR:$Rt, addrmode3:$addr), StMiscFrm, IIC_iStore_bh_ru, - "strht", "\t$src, [$base], $offset", "$base = $base_wb", + "strht", "\t$Rt, $addr", "$addr.base = $base_wb", [/* For disassembly only; pattern left blank */]> { let Inst{21} = 1; // overwrite + let AsmMatchConverter = "CvtStWriteBackRegAddrMode3"; } //===----------------------------------------------------------------------===// @@ -1892,7 +1960,7 @@ multiclass arm_ldst_mult; // FIXME: Should pc be an implicit operand like PICADD, etc? let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1, hasExtraDefRegAllocReq = 1, isCodeGenOnly = 1 in -// FIXME: Should be a pseudo-instruction. -def LDMIA_RET : AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, - reglist:$regs, variable_ops), - IndexModeUpd, LdStMulFrm, IIC_iLoad_mBr, - "ldmia${p}\t$Rn!, $regs", - "$Rn = $wb", []> { - let Inst{24-23} = 0b01; // Increment After - let Inst{21} = 1; // Writeback - let Inst{20} = 1; // Load -} +def LDMIA_RET : ARMPseudoInst<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, + reglist:$regs, variable_ops), + Size4Bytes, IIC_iLoad_mBr, []>, + RegConstraint<"$Rn = $wb">; //===----------------------------------------------------------------------===// // Move Instructions. @@ -1933,6 +1995,7 @@ def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr, bits<4> Rd; bits<4> Rm; + let Inst{19-16} = 0b0000; let Inst{11-4} = 0b00000000; let Inst{25} = 0; let Inst{3-0} = Rm; @@ -1959,6 +2022,7 @@ def MOVs : AsI1<0b1101, (outs GPR:$Rd), (ins shift_so_reg:$src), bits<4> Rd; bits<12> src; let Inst{15-12} = Rd; + let Inst{19-16} = 0b0000; let Inst{11-0} = src; let Inst{25} = 0; } @@ -2145,10 +2209,12 @@ defm SBC : AI1_adde_sube_irs<0b0110, "sbc", BinOpFrag<(sube_dead_carry node:$LHS, node:$RHS)>>; // ADC and SUBC with 's' bit set. -defm ADCS : AI1_adde_sube_s_irs<0b0101, "adcs", - BinOpFrag<(adde_live_carry node:$LHS, node:$RHS)>, 1>; -defm SBCS : AI1_adde_sube_s_irs<0b0110, "sbcs", - BinOpFrag<(sube_live_carry node:$LHS, node:$RHS) >>; +let usesCustomInserter = 1 in { +defm ADCS : AI1_adde_sube_s_irs< + BinOpFrag<(adde_live_carry node:$LHS, node:$RHS)>, 1>; +defm SBCS : AI1_adde_sube_s_irs< + BinOpFrag<(sube_live_carry node:$LHS, node:$RHS) >>; +} def RSBri : AsI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm, IIC_iALUi, "rsb", "\t$Rd, $Rn, $imm", @@ -2190,31 +2256,17 @@ def RSBrs : AsI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), } // RSB with 's' bit set. -let isCodeGenOnly = 1, Defs = [CPSR] in { -def RSBSri : AI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), DPFrm, - IIC_iALUi, "rsbs", "\t$Rd, $Rn, $imm", - [(set GPR:$Rd, (subc so_imm:$imm, GPR:$Rn))]> { - bits<4> Rd; - bits<4> Rn; - bits<12> imm; - let Inst{25} = 1; - let Inst{20} = 1; - let Inst{15-12} = Rd; - let Inst{19-16} = Rn; - let Inst{11-0} = imm; -} -def RSBSrs : AI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), - DPSoRegFrm, IIC_iALUsr, "rsbs", "\t$Rd, $Rn, $shift", - [(set GPR:$Rd, (subc so_reg:$shift, GPR:$Rn))]> { - bits<4> Rd; - bits<4> Rn; - bits<12> shift; - let Inst{25} = 0; - let Inst{20} = 1; - let Inst{11-0} = shift; - let Inst{15-12} = Rd; - let Inst{19-16} = Rn; -} +// NOTE: CPSR def omitted because it will be handled by the custom inserter. +let usesCustomInserter = 1 in { +def RSBSri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), + Size4Bytes, IIC_iALUi, + [(set GPR:$Rd, (subc so_imm:$imm, GPR:$Rn))]>; +def RSBSrr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), + Size4Bytes, IIC_iALUr, + [/* For disassembly only; pattern left blank */]>; +def RSBSrs : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), + Size4Bytes, IIC_iALUsr, + [(set GPR:$Rd, (subc so_reg:$shift, GPR:$Rn))]>; } let Uses = [CPSR] in { @@ -2258,34 +2310,14 @@ def RSCrs : AsI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), } } -// FIXME: Allow these to be predicated. -let isCodeGenOnly = 1, Defs = [CPSR], Uses = [CPSR] in { -def RSCSri : AXI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), - DPFrm, IIC_iALUi, "rscs\t$Rd, $Rn, $imm", - [(set GPR:$Rd, (sube_dead_carry so_imm:$imm, GPR:$Rn))]>, - Requires<[IsARM]> { - bits<4> Rd; - bits<4> Rn; - bits<12> imm; - let Inst{25} = 1; - let Inst{20} = 1; - let Inst{15-12} = Rd; - let Inst{19-16} = Rn; - let Inst{11-0} = imm; -} -def RSCSrs : AXI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), - DPSoRegFrm, IIC_iALUsr, "rscs\t$Rd, $Rn, $shift", - [(set GPR:$Rd, (sube_dead_carry so_reg:$shift, GPR:$Rn))]>, - Requires<[IsARM]> { - bits<4> Rd; - bits<4> Rn; - bits<12> shift; - let Inst{25} = 0; - let Inst{20} = 1; - let Inst{11-0} = shift; - let Inst{15-12} = Rd; - let Inst{19-16} = Rn; -} +// NOTE: CPSR def omitted because it will be handled by the custom inserter. +let usesCustomInserter = 1, Uses = [CPSR] in { +def RSCSri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm), + Size4Bytes, IIC_iALUi, + [(set GPR:$Rd, (sube_dead_carry so_imm:$imm, GPR:$Rn))]>; +def RSCSrs : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift), + Size4Bytes, IIC_iALUsr, + [(set GPR:$Rd, (sube_dead_carry so_reg:$shift, GPR:$Rn))]>; } // (sub X, imm) gets canonicalized to (add X, -imm). Match this form. @@ -2300,8 +2332,10 @@ def : ARMPat<(addc GPR:$src, so_imm_neg:$imm), // The with-carry-in form matches bitwise not instead of the negation. // Effectively, the inverse interpretation of the carry flag already accounts // for part of the negation. -def : ARMPat<(adde GPR:$src, so_imm_not:$imm), +def : ARMPat<(adde_dead_carry GPR:$src, so_imm_not:$imm), (SBCri GPR:$src, so_imm_not:$imm)>; +def : ARMPat<(adde_live_carry GPR:$src, so_imm_not:$imm), + (SBCSri GPR:$src, so_imm_not:$imm)>; // Note: These are implemented in C++ code, because they have to generate // ADD/SUBrs instructions, which use a complex pattern that a xform function @@ -2617,14 +2651,16 @@ def MULv5: ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, def MUL : AsMul1I32<0b0000000, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm", [(set GPR:$Rd, (mul GPR:$Rn, GPR:$Rm))]>, - Requires<[IsARM, HasV6]>; + Requires<[IsARM, HasV6]> { + let Inst{15-12} = 0b0000; +} } let Constraints = "@earlyclobber $Rd" in def MLAv5: ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra, pred:$p, cc_out:$s), - Size4Bytes, IIC_iMAC32, - [(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))]>, + Size4Bytes, IIC_iMAC32, + [(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))]>, Requires<[IsARM, NoV6]> { bits<4> Ra; let Inst{15-12} = Ra; @@ -2657,7 +2693,7 @@ let neverHasSideEffects = 1 in { let isCommutable = 1 in { let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in { def SMULLv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), + (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), Size4Bytes, IIC_iMUL64, []>, Requires<[IsARM, NoV6]>; @@ -2681,15 +2717,15 @@ def UMULL : AsMul1I64<0b0000100, (outs GPR:$RdLo, GPR:$RdHi), // Multiply + accumulate let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in { def SMLALv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), + (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), Size4Bytes, IIC_iMAC64, []>, Requires<[IsARM, NoV6]>; def UMLALv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), + (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), Size4Bytes, IIC_iMAC64, []>, Requires<[IsARM, NoV6]>; def UMAALv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi), - (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), + (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), Size4Bytes, IIC_iMAC64, []>, Requires<[IsARM, NoV6]>; @@ -2970,17 +3006,25 @@ def REVSH : AMiscA1I<0b01101111, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm), IIC_iUNAr, "revsh", "\t$Rd, $Rm", [(set GPR:$Rd, (sext_inreg - (or (srl (and GPR:$Rm, 0xFF00), (i32 8)), + (or (srl GPR:$Rm, (i32 8)), (shl GPR:$Rm, (i32 8))), i16))]>, Requires<[IsARM, HasV6]>; +def : ARMV6Pat<(sext_inreg (or (srl (and GPR:$Rm, 0xFF00), (i32 8)), + (shl GPR:$Rm, (i32 8))), i16), + (REVSH GPR:$Rm)>; + +// Need the AddedComplexity or else MOVs + REV would be chosen. +let AddedComplexity = 5 in +def : ARMV6Pat<(sra (bswap GPR:$Rm), (i32 16)), (REVSH GPR:$Rm)>; + def lsl_shift_imm : SDNodeXFormgetZExtValue()); return CurDAG->getTargetConstant(Sh, MVT::i32); }]>; -def lsl_amt : PatLeaf<(i32 imm), [{ - return (N->getZExtValue() < 32); +def lsl_amt : ImmLeaf 0 && Imm < 32; }], lsl_shift_imm>; def PKHBT : APKHI<0b01101000, 0, (outs GPR:$Rd), @@ -3002,8 +3046,8 @@ def asr_shift_imm : SDNodeXFormgetTargetConstant(Sh, MVT::i32); }]>; -def asr_amt : PatLeaf<(i32 imm), [{ - return (N->getZExtValue() <= 32); +def asr_amt : ImmLeaf 0 && Imm <= 32; }], asr_shift_imm>; // Note: Shifts of 1-15 bits will be transformed to srl instead of sra and @@ -3119,88 +3163,43 @@ def BCCZi64 : PseudoInst<(outs), // Conditional moves // FIXME: should be able to write a pattern for ARMcmov, but can't use // a two-value operand where a dag node expects two operands. :( -// FIXME: These should all be pseudo-instructions that get expanded to -// the normal MOV instructions. That would fix the dependency on -// special casing them in tblgen. let neverHasSideEffects = 1 in { -def MOVCCr : AI1<0b1101, (outs GPR:$Rd), (ins GPR:$false, GPR:$Rm), DPFrm, - IIC_iCMOVr, "mov", "\t$Rd, $Rm", - [/*(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, imm:$cc, CCR:$ccr))*/]>, - RegConstraint<"$false = $Rd">, UnaryDP { - bits<4> Rd; - bits<4> Rm; - let Inst{25} = 0; - let Inst{20} = 0; - let Inst{15-12} = Rd; - let Inst{11-4} = 0b00000000; - let Inst{3-0} = Rm; -} - -def MOVCCs : AI1<0b1101, (outs GPR:$Rd), - (ins GPR:$false, so_reg:$shift), DPSoRegFrm, IIC_iCMOVsr, - "mov", "\t$Rd, $shift", - [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_reg:$shift, imm:$cc, CCR:$ccr))*/]>, - RegConstraint<"$false = $Rd">, UnaryDP { - bits<4> Rd; - bits<12> shift; - let Inst{25} = 0; - let Inst{20} = 0; - let Inst{19-16} = 0; - let Inst{15-12} = Rd; - let Inst{11-0} = shift; -} +def MOVCCr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, GPR:$Rm, pred:$p), + Size4Bytes, IIC_iCMOVr, + [/*(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, imm:$cc, CCR:$ccr))*/]>, + RegConstraint<"$false = $Rd">; +def MOVCCs : ARMPseudoInst<(outs GPR:$Rd), + (ins GPR:$false, so_reg:$shift, pred:$p), + Size4Bytes, IIC_iCMOVsr, + [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_reg:$shift, imm:$cc, CCR:$ccr))*/]>, + RegConstraint<"$false = $Rd">; let isMoveImm = 1 in -def MOVCCi16 : AI1<0b1000, (outs GPR:$Rd), (ins GPR:$false, i32imm_hilo16:$imm), - DPFrm, IIC_iMOVi, - "movw", "\t$Rd, $imm", - []>, - RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>, - UnaryDP { - bits<4> Rd; - bits<16> imm; - let Inst{25} = 1; - let Inst{20} = 0; - let Inst{19-16} = imm{15-12}; - let Inst{15-12} = Rd; - let Inst{11-0} = imm{11-0}; -} +def MOVCCi16 : ARMPseudoInst<(outs GPR:$Rd), + (ins GPR:$false, i32imm_hilo16:$imm, pred:$p), + Size4Bytes, IIC_iMOVi, + []>, + RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>; let isMoveImm = 1 in -def MOVCCi : AI1<0b1101, (outs GPR:$Rd), - (ins GPR:$false, so_imm:$imm), DPFrm, IIC_iCMOVi, - "mov", "\t$Rd, $imm", +def MOVCCi : ARMPseudoInst<(outs GPR:$Rd), + (ins GPR:$false, so_imm:$imm, pred:$p), + Size4Bytes, IIC_iCMOVi, [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm:$imm, imm:$cc, CCR:$ccr))*/]>, - RegConstraint<"$false = $Rd">, UnaryDP { - bits<4> Rd; - bits<12> imm; - let Inst{25} = 1; - let Inst{20} = 0; - let Inst{19-16} = 0b0000; - let Inst{15-12} = Rd; - let Inst{11-0} = imm; -} + RegConstraint<"$false = $Rd">; // Two instruction predicate mov immediate. let isMoveImm = 1 in -def MOVCCi32imm : PseudoInst<(outs GPR:$Rd), - (ins GPR:$false, i32imm:$src, pred:$p), - IIC_iCMOVix2, []>, RegConstraint<"$false = $Rd">; +def MOVCCi32imm : ARMPseudoInst<(outs GPR:$Rd), + (ins GPR:$false, i32imm:$src, pred:$p), + Size8Bytes, IIC_iCMOVix2, []>, RegConstraint<"$false = $Rd">; let isMoveImm = 1 in -def MVNCCi : AI1<0b1111, (outs GPR:$Rd), - (ins GPR:$false, so_imm:$imm), DPFrm, IIC_iCMOVi, - "mvn", "\t$Rd, $imm", +def MVNCCi : ARMPseudoInst<(outs GPR:$Rd), + (ins GPR:$false, so_imm:$imm, pred:$p), + Size4Bytes, IIC_iCMOVi, [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm, imm:$cc, CCR:$ccr))*/]>, - RegConstraint<"$false = $Rd">, UnaryDP { - bits<4> Rd; - bits<12> imm; - let Inst{25} = 1; - let Inst{20} = 0; - let Inst{19-16} = 0b0000; - let Inst{15-12} = Rd; - let Inst{11-0} = imm; -} + RegConstraint<"$false = $Rd">; } // neverHasSideEffects //===----------------------------------------------------------------------===// @@ -3221,13 +3220,6 @@ def DMB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, let Inst{31-4} = 0xf57ff05; let Inst{3-0} = opt; } - -def DMB_MCR : AInoP<(outs), (ins GPR:$zero), MiscFrm, NoItinerary, - "mcr", "\tp15, 0, $zero, c7, c10, 5", - [(ARMMemBarrierMCR GPR:$zero)]>, - Requires<[IsARM, HasV6]> { - // FIXME: add encoding -} } def DSB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, @@ -3266,6 +3258,18 @@ let usesCustomInserter = 1 in { def ATOMIC_LOAD_NAND_I8 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, [(set GPR:$dst, (atomic_load_nand_8 GPR:$ptr, GPR:$incr))]>; + def ATOMIC_LOAD_MIN_I8 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, + [(set GPR:$dst, (atomic_load_min_8 GPR:$ptr, GPR:$val))]>; + def ATOMIC_LOAD_MAX_I8 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, + [(set GPR:$dst, (atomic_load_max_8 GPR:$ptr, GPR:$val))]>; + def ATOMIC_LOAD_UMIN_I8 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, + [(set GPR:$dst, (atomic_load_min_8 GPR:$ptr, GPR:$val))]>; + def ATOMIC_LOAD_UMAX_I8 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, + [(set GPR:$dst, (atomic_load_max_8 GPR:$ptr, GPR:$val))]>; def ATOMIC_LOAD_ADD_I16 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, [(set GPR:$dst, (atomic_load_add_16 GPR:$ptr, GPR:$incr))]>; @@ -3284,6 +3288,18 @@ let usesCustomInserter = 1 in { def ATOMIC_LOAD_NAND_I16 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, [(set GPR:$dst, (atomic_load_nand_16 GPR:$ptr, GPR:$incr))]>; + def ATOMIC_LOAD_MIN_I16 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, + [(set GPR:$dst, (atomic_load_min_16 GPR:$ptr, GPR:$val))]>; + def ATOMIC_LOAD_MAX_I16 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, + [(set GPR:$dst, (atomic_load_max_16 GPR:$ptr, GPR:$val))]>; + def ATOMIC_LOAD_UMIN_I16 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, + [(set GPR:$dst, (atomic_load_min_16 GPR:$ptr, GPR:$val))]>; + def ATOMIC_LOAD_UMAX_I16 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, + [(set GPR:$dst, (atomic_load_max_16 GPR:$ptr, GPR:$val))]>; def ATOMIC_LOAD_ADD_I32 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, [(set GPR:$dst, (atomic_load_add_32 GPR:$ptr, GPR:$incr))]>; @@ -3302,6 +3318,18 @@ let usesCustomInserter = 1 in { def ATOMIC_LOAD_NAND_I32 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, [(set GPR:$dst, (atomic_load_nand_32 GPR:$ptr, GPR:$incr))]>; + def ATOMIC_LOAD_MIN_I32 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, + [(set GPR:$dst, (atomic_load_min_32 GPR:$ptr, GPR:$val))]>; + def ATOMIC_LOAD_MAX_I32 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, + [(set GPR:$dst, (atomic_load_max_32 GPR:$ptr, GPR:$val))]>; + def ATOMIC_LOAD_UMIN_I32 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, + [(set GPR:$dst, (atomic_load_min_32 GPR:$ptr, GPR:$val))]>; + def ATOMIC_LOAD_UMAX_I32 : PseudoInst< + (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, + [(set GPR:$dst, (atomic_load_max_32 GPR:$ptr, GPR:$val))]>; def ATOMIC_SWAP_I8 : PseudoInst< (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary, @@ -3326,39 +3354,26 @@ let usesCustomInserter = 1 in { } let mayLoad = 1 in { -def LDREXB : AIldrex<0b10, (outs GPR:$Rt), (ins GPR:$Rn), NoItinerary, - "ldrexb", "\t$Rt, [$Rn]", - []>; -def LDREXH : AIldrex<0b11, (outs GPR:$Rt), (ins GPR:$Rn), NoItinerary, - "ldrexh", "\t$Rt, [$Rn]", - []>; -def LDREX : AIldrex<0b00, (outs GPR:$Rt), (ins GPR:$Rn), NoItinerary, - "ldrex", "\t$Rt, [$Rn]", - []>; -def LDREXD : AIldrex<0b01, (outs GPR:$Rt, GPR:$Rt2), (ins GPR:$Rn), - NoItinerary, - "ldrexd", "\t$Rt, $Rt2, [$Rn]", - []>; +def LDREXB : AIldrex<0b10, (outs GPR:$Rt), (ins addrmode7:$addr), NoItinerary, + "ldrexb", "\t$Rt, $addr", []>; +def LDREXH : AIldrex<0b11, (outs GPR:$Rt), (ins addrmode7:$addr), NoItinerary, + "ldrexh", "\t$Rt, $addr", []>; +def LDREX : AIldrex<0b00, (outs GPR:$Rt), (ins addrmode7:$addr), NoItinerary, + "ldrex", "\t$Rt, $addr", []>; +def LDREXD : AIldrex<0b01, (outs GPR:$Rt, GPR:$Rt2), (ins addrmode7:$addr), + NoItinerary, "ldrexd", "\t$Rt, $Rt2, $addr", []>; } let mayStore = 1, Constraints = "@earlyclobber $Rd" in { -def STREXB : AIstrex<0b10, (outs GPR:$Rd), (ins GPR:$src, GPR:$Rn), - NoItinerary, - "strexb", "\t$Rd, $src, [$Rn]", - []>; -def STREXH : AIstrex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, GPR:$Rn), - NoItinerary, - "strexh", "\t$Rd, $Rt, [$Rn]", - []>; -def STREX : AIstrex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, GPR:$Rn), - NoItinerary, - "strex", "\t$Rd, $Rt, [$Rn]", - []>; +def STREXB : AIstrex<0b10, (outs GPR:$Rd), (ins GPR:$Rt, addrmode7:$addr), + NoItinerary, "strexb", "\t$Rd, $Rt, $addr", []>; +def STREXH : AIstrex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, addrmode7:$addr), + NoItinerary, "strexh", "\t$Rd, $Rt, $addr", []>; +def STREX : AIstrex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, addrmode7:$addr), + NoItinerary, "strex", "\t$Rd, $Rt, $addr", []>; def STREXD : AIstrex<0b01, (outs GPR:$Rd), - (ins GPR:$Rt, GPR:$Rt2, GPR:$Rn), - NoItinerary, - "strexd", "\t$Rd, $Rt, $Rt2, [$Rn]", - []>; + (ins GPR:$Rt, GPR:$Rt2, addrmode7:$addr), + NoItinerary, "strexd", "\t$Rd, $Rt, $Rt2, $addr", []>; } // Clear-Exclusive is for disassembly only. @@ -3376,12 +3391,330 @@ def SWPB : AIswp<1, (outs GPR:$Rt), (ins GPR:$Rt2, GPR:$Rn), "swpb", [/* For disassembly only; pattern left blank */]>; } +//===----------------------------------------------------------------------===// +// Coprocessor Instructions. +// + +def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1, + c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2), + NoItinerary, "cdp", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", + [/* For disassembly only; pattern left blank */]> { + bits<4> opc1; + bits<4> CRn; + bits<4> CRd; + bits<4> cop; + bits<3> opc2; + bits<4> CRm; + + let Inst{3-0} = CRm; + let Inst{4} = 0; + let Inst{7-5} = opc2; + let Inst{11-8} = cop; + let Inst{15-12} = CRd; + let Inst{19-16} = CRn; + let Inst{23-20} = opc1; +} + +def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1, + c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2), + NoItinerary, "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", + [/* For disassembly only; pattern left blank */]> { + let Inst{31-28} = 0b1111; + bits<4> opc1; + bits<4> CRn; + bits<4> CRd; + bits<4> cop; + bits<3> opc2; + bits<4> CRm; + + let Inst{3-0} = CRm; + let Inst{4} = 0; + let Inst{7-5} = opc2; + let Inst{11-8} = cop; + let Inst{15-12} = CRd; + let Inst{19-16} = CRn; + let Inst{23-20} = opc1; +} + +class ACI + : InoP { + let Inst{27-25} = 0b110; +} + +multiclass LdStCop op31_28, bit load, dag ops, string opc, string cond>{ + + def _OFFSET : ACI<(outs), + !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops), + !strconcat(opc, cond), "\tp$cop, cr$CRd, $addr"> { + let Inst{31-28} = op31_28; + let Inst{24} = 1; // P = 1 + let Inst{21} = 0; // W = 0 + let Inst{22} = 0; // D = 0 + let Inst{20} = load; + } + + def _PRE : ACI<(outs), + !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops), + !strconcat(opc, cond), "\tp$cop, cr$CRd, $addr!", IndexModePre> { + let Inst{31-28} = op31_28; + let Inst{24} = 1; // P = 1 + let Inst{21} = 1; // W = 1 + let Inst{22} = 0; // D = 0 + let Inst{20} = load; + } + + def _POST : ACI<(outs), + !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops), + !strconcat(opc, cond), "\tp$cop, cr$CRd, $addr", IndexModePost> { + let Inst{31-28} = op31_28; + let Inst{24} = 0; // P = 0 + let Inst{21} = 1; // W = 1 + let Inst{22} = 0; // D = 0 + let Inst{20} = load; + } + + def _OPTION : ACI<(outs), + !con((ins nohash_imm:$cop,nohash_imm:$CRd,GPR:$base, nohash_imm:$option), + ops), + !strconcat(opc, cond), "\tp$cop, cr$CRd, [$base], \\{$option\\}"> { + let Inst{31-28} = op31_28; + let Inst{24} = 0; // P = 0 + let Inst{23} = 1; // U = 1 + let Inst{21} = 0; // W = 0 + let Inst{22} = 0; // D = 0 + let Inst{20} = load; + } + + def L_OFFSET : ACI<(outs), + !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops), + !strconcat(!strconcat(opc, "l"), cond), "\tp$cop, cr$CRd, $addr"> { + let Inst{31-28} = op31_28; + let Inst{24} = 1; // P = 1 + let Inst{21} = 0; // W = 0 + let Inst{22} = 1; // D = 1 + let Inst{20} = load; + } + + def L_PRE : ACI<(outs), + !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops), + !strconcat(!strconcat(opc, "l"), cond), "\tp$cop, cr$CRd, $addr!", + IndexModePre> { + let Inst{31-28} = op31_28; + let Inst{24} = 1; // P = 1 + let Inst{21} = 1; // W = 1 + let Inst{22} = 1; // D = 1 + let Inst{20} = load; + } + + def L_POST : ACI<(outs), + !con((ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), ops), + !strconcat(!strconcat(opc, "l"), cond), "\tp$cop, cr$CRd, $addr", + IndexModePost> { + let Inst{31-28} = op31_28; + let Inst{24} = 0; // P = 0 + let Inst{21} = 1; // W = 1 + let Inst{22} = 1; // D = 1 + let Inst{20} = load; + } + + def L_OPTION : ACI<(outs), + !con((ins nohash_imm:$cop, nohash_imm:$CRd,GPR:$base,nohash_imm:$option), + ops), + !strconcat(!strconcat(opc, "l"), cond), + "\tp$cop, cr$CRd, [$base], \\{$option\\}"> { + let Inst{31-28} = op31_28; + let Inst{24} = 0; // P = 0 + let Inst{23} = 1; // U = 1 + let Inst{21} = 0; // W = 0 + let Inst{22} = 1; // D = 1 + let Inst{20} = load; + } +} + +defm LDC : LdStCop<{?,?,?,?}, 1, (ins pred:$p), "ldc", "${p}">; +defm LDC2 : LdStCop<0b1111, 1, (ins), "ldc2", "">; +defm STC : LdStCop<{?,?,?,?}, 0, (ins pred:$p), "stc", "${p}">; +defm STC2 : LdStCop<0b1111, 0, (ins), "stc2", "">; + +//===----------------------------------------------------------------------===// +// Move between coprocessor and ARM core register -- for disassembly only +// + +class MovRCopro + : ABI<0b1110, oops, iops, NoItinerary, opc, + "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2", + [/* For disassembly only; pattern left blank */]> { + let Inst{20} = direction; + let Inst{4} = 1; + + bits<4> Rt; + bits<4> cop; + bits<3> opc1; + bits<3> opc2; + bits<4> CRm; + bits<4> CRn; + + let Inst{15-12} = Rt; + let Inst{11-8} = cop; + let Inst{23-21} = opc1; + let Inst{7-5} = opc2; + let Inst{3-0} = CRm; + let Inst{19-16} = CRn; +} + +def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */, + (outs), (ins p_imm:$cop, i32imm:$opc1, + GPR:$Rt, c_imm:$CRn, c_imm:$CRm, + i32imm:$opc2)>; +def MRC : MovRCopro<"mrc", 1 /* from coprocessor to ARM core register */, + (outs GPR:$Rt), (ins p_imm:$cop, i32imm:$opc1, + c_imm:$CRn, c_imm:$CRm, i32imm:$opc2)>; + +class MovRCopro2 + : ABXI<0b1110, oops, iops, NoItinerary, + !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"), + [/* For disassembly only; pattern left blank */]> { + let Inst{31-28} = 0b1111; + let Inst{20} = direction; + let Inst{4} = 1; + + bits<4> Rt; + bits<4> cop; + bits<3> opc1; + bits<3> opc2; + bits<4> CRm; + bits<4> CRn; + + let Inst{15-12} = Rt; + let Inst{11-8} = cop; + let Inst{23-21} = opc1; + let Inst{7-5} = opc2; + let Inst{3-0} = CRm; + let Inst{19-16} = CRn; +} + +def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */, + (outs), (ins p_imm:$cop, i32imm:$opc1, + GPR:$Rt, c_imm:$CRn, c_imm:$CRm, + i32imm:$opc2)>; +def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */, + (outs GPR:$Rt), (ins p_imm:$cop, i32imm:$opc1, + c_imm:$CRn, c_imm:$CRm, + i32imm:$opc2)>; + +class MovRRCopro + : ABI<0b1100, (outs), (ins p_imm:$cop, i32imm:$opc1, + GPR:$Rt, GPR:$Rt2, c_imm:$CRm), + NoItinerary, opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm", + [/* For disassembly only; pattern left blank */]> { + let Inst{23-21} = 0b010; + let Inst{20} = direction; + + bits<4> Rt; + bits<4> Rt2; + bits<4> cop; + bits<4> opc1; + bits<4> CRm; + + let Inst{15-12} = Rt; + let Inst{19-16} = Rt2; + let Inst{11-8} = cop; + let Inst{7-4} = opc1; + let Inst{3-0} = CRm; +} + +def MCRR : MovRRCopro<"mcrr", 0 /* from ARM core register to coprocessor */>; +def MRRC : MovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */>; + +class MovRRCopro2 + : ABXI<0b1100, (outs), (ins p_imm:$cop, i32imm:$opc1, + GPR:$Rt, GPR:$Rt2, c_imm:$CRm), + NoItinerary, !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"), + [/* For disassembly only; pattern left blank */]> { + let Inst{31-28} = 0b1111; + let Inst{23-21} = 0b010; + let Inst{20} = direction; + + bits<4> Rt; + bits<4> Rt2; + bits<4> cop; + bits<4> opc1; + bits<4> CRm; + + let Inst{15-12} = Rt; + let Inst{19-16} = Rt2; + let Inst{11-8} = cop; + let Inst{7-4} = opc1; + let Inst{3-0} = CRm; +} + +def MCRR2 : MovRRCopro2<"mcrr2", 0 /* from ARM core register to coprocessor */>; +def MRRC2 : MovRRCopro2<"mrrc2", 1 /* from coprocessor to ARM core register */>; + +//===----------------------------------------------------------------------===// +// Move between special register and ARM core register -- for disassembly only +// + +// Move to ARM core register from Special Register +def MRS : ABI<0b0001, (outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, cpsr", + [/* For disassembly only; pattern left blank */]> { + bits<4> Rd; + let Inst{23-16} = 0b00001111; + let Inst{15-12} = Rd; + let Inst{7-4} = 0b0000; +} + +def MRSsys : ABI<0b0001, (outs GPR:$Rd), (ins), NoItinerary,"mrs","\t$Rd, spsr", + [/* For disassembly only; pattern left blank */]> { + bits<4> Rd; + let Inst{23-16} = 0b01001111; + let Inst{15-12} = Rd; + let Inst{7-4} = 0b0000; +} + +// Move from ARM core register to Special Register +// +// No need to have both system and application versions, the encodings are the +// same and the assembly parser has no way to distinguish between them. The mask +// operand contains the special register (R Bit) in bit 4 and bits 3-0 contains +// the mask with the fields to be accessed in the special register. +def MSR : ABI<0b0001, (outs), (ins msr_mask:$mask, GPR:$Rn), NoItinerary, + "msr", "\t$mask, $Rn", + [/* For disassembly only; pattern left blank */]> { + bits<5> mask; + bits<4> Rn; + + let Inst{23} = 0; + let Inst{22} = mask{4}; // R bit + let Inst{21-20} = 0b10; + let Inst{19-16} = mask{3-0}; + let Inst{15-12} = 0b1111; + let Inst{11-4} = 0b00000000; + let Inst{3-0} = Rn; +} + +def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask, so_imm:$a), NoItinerary, + "msr", "\t$mask, $a", + [/* For disassembly only; pattern left blank */]> { + bits<5> mask; + bits<12> a; + + let Inst{23} = 0; + let Inst{22} = mask{4}; // R bit + let Inst{21-20} = 0b10; + let Inst{19-16} = mask{3-0}; + let Inst{15-12} = 0b1111; + let Inst{11-0} = a; +} + //===----------------------------------------------------------------------===// // TLS Instructions // // __aeabi_read_tp preserves the registers r1-r3. -// This is a pseudo inst so that we can get the encoding right, +// This is a pseudo inst so that we can get the encoding right, // complete with fixup for the aeabi_read_tp function. let isCall = 1, Defs = [R0, R12, LR, CPSR], Uses = [SP] in { @@ -3397,7 +3730,7 @@ let isCall = 1, // here, and we're using the stack frame for the containing function to // save/restore registers, we can't keep anything live in regs across // the eh_sjlj_setjmp(), else it will almost certainly have been tromped upon -// when we get here from a longjmp(). We force everthing out of registers +// when we get here from a longjmp(). We force everything out of registers // except for our own input by listing the relevant registers in Defs. By // doing so, we also cause the prologue/epilogue code to actively preserve // all of the callee-saved resgisters, which is exactly what we want. @@ -3440,8 +3773,8 @@ def Int_eh_sjlj_longjmp : PseudoInst<(outs), (ins GPR:$src, GPR:$scratch), // that need the instruction size). let isBarrier = 1, hasSideEffects = 1 in def Int_eh_sjlj_dispatchsetup : - PseudoInst<(outs), (ins GPR:$src), NoItinerary, - [(ARMeh_sjlj_dispatchsetup GPR:$src)]>, + PseudoInst<(outs), (ins), NoItinerary, + [(ARMeh_sjlj_dispatchsetup)]>, Requires<[IsDarwin]>; //===----------------------------------------------------------------------===// @@ -3583,6 +3916,12 @@ def : ARMV5TEPat<(add GPR:$acc, (sra (mul GPR:$a, sext_16_node:$b), (i32 16))), (SMLAWB GPR:$a, GPR:$b, GPR:$acc)>; + +// Pre-v7 uses MCR for synchronization barriers. +def : ARMPat<(ARMMemBarrierMCR GPR:$zero), (MCR 15, 0, GPR:$zero, 7, 10, 5)>, + Requires<[IsARM, HasV6]>; + + //===----------------------------------------------------------------------===// // Thumb Support // @@ -3607,305 +3946,3 @@ include "ARMInstrVFP.td" include "ARMInstrNEON.td" -//===----------------------------------------------------------------------===// -// Coprocessor Instructions. For disassembly only. -// - -def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1, - c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2), - NoItinerary, "cdp", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", - [/* For disassembly only; pattern left blank */]> { - bits<4> opc1; - bits<4> CRn; - bits<4> CRd; - bits<4> cop; - bits<3> opc2; - bits<4> CRm; - - let Inst{3-0} = CRm; - let Inst{4} = 0; - let Inst{7-5} = opc2; - let Inst{11-8} = cop; - let Inst{15-12} = CRd; - let Inst{19-16} = CRn; - let Inst{23-20} = opc1; -} - -def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1, - c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2), - NoItinerary, "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", - [/* For disassembly only; pattern left blank */]> { - let Inst{31-28} = 0b1111; - bits<4> opc1; - bits<4> CRn; - bits<4> CRd; - bits<4> cop; - bits<3> opc2; - bits<4> CRm; - - let Inst{3-0} = CRm; - let Inst{4} = 0; - let Inst{7-5} = opc2; - let Inst{11-8} = cop; - let Inst{15-12} = CRd; - let Inst{19-16} = CRn; - let Inst{23-20} = opc1; -} - -class ACI - : I { - let Inst{27-25} = 0b110; -} - -multiclass LdStCop op31_28, bit load, string opc> { - - def _OFFSET : ACI<(outs), - (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), - opc, "\tp$cop, cr$CRd, $addr"> { - let Inst{31-28} = op31_28; - let Inst{24} = 1; // P = 1 - let Inst{21} = 0; // W = 0 - let Inst{22} = 0; // D = 0 - let Inst{20} = load; - } - - def _PRE : ACI<(outs), - (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), - opc, "\tp$cop, cr$CRd, $addr!"> { - let Inst{31-28} = op31_28; - let Inst{24} = 1; // P = 1 - let Inst{21} = 1; // W = 1 - let Inst{22} = 0; // D = 0 - let Inst{20} = load; - } - - def _POST : ACI<(outs), - (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, am2offset:$offset), - opc, "\tp$cop, cr$CRd, [$base], $offset"> { - let Inst{31-28} = op31_28; - let Inst{24} = 0; // P = 0 - let Inst{21} = 1; // W = 1 - let Inst{22} = 0; // D = 0 - let Inst{20} = load; - } - - def _OPTION : ACI<(outs), - (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, i32imm:$option), - opc, "\tp$cop, cr$CRd, [$base], $option"> { - let Inst{31-28} = op31_28; - let Inst{24} = 0; // P = 0 - let Inst{23} = 1; // U = 1 - let Inst{21} = 0; // W = 0 - let Inst{22} = 0; // D = 0 - let Inst{20} = load; - } - - def L_OFFSET : ACI<(outs), - (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), - !strconcat(opc, "l"), "\tp$cop, cr$CRd, $addr"> { - let Inst{31-28} = op31_28; - let Inst{24} = 1; // P = 1 - let Inst{21} = 0; // W = 0 - let Inst{22} = 1; // D = 1 - let Inst{20} = load; - } - - def L_PRE : ACI<(outs), - (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr), - !strconcat(opc, "l"), "\tp$cop, cr$CRd, $addr!"> { - let Inst{31-28} = op31_28; - let Inst{24} = 1; // P = 1 - let Inst{21} = 1; // W = 1 - let Inst{22} = 1; // D = 1 - let Inst{20} = load; - } - - def L_POST : ACI<(outs), - (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, am2offset:$offset), - !strconcat(opc, "l"), "\tp$cop, cr$CRd, [$base], $offset"> { - let Inst{31-28} = op31_28; - let Inst{24} = 0; // P = 0 - let Inst{21} = 1; // W = 1 - let Inst{22} = 1; // D = 1 - let Inst{20} = load; - } - - def L_OPTION : ACI<(outs), - (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, nohash_imm:$option), - !strconcat(opc, "l"), "\tp$cop, cr$CRd, [$base], $option"> { - let Inst{31-28} = op31_28; - let Inst{24} = 0; // P = 0 - let Inst{23} = 1; // U = 1 - let Inst{21} = 0; // W = 0 - let Inst{22} = 1; // D = 1 - let Inst{20} = load; - } -} - -defm LDC : LdStCop<{?,?,?,?}, 1, "ldc">; -defm LDC2 : LdStCop<0b1111, 1, "ldc2">; -defm STC : LdStCop<{?,?,?,?}, 0, "stc">; -defm STC2 : LdStCop<0b1111, 0, "stc2">; - -//===----------------------------------------------------------------------===// -// Move between coprocessor and ARM core register -- for disassembly only -// - -class MovRCopro - : ABI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1, - GPR:$Rt, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2), - NoItinerary, opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2", - [/* For disassembly only; pattern left blank */]> { - let Inst{20} = direction; - let Inst{4} = 1; - - bits<4> Rt; - bits<4> cop; - bits<3> opc1; - bits<3> opc2; - bits<4> CRm; - bits<4> CRn; - - let Inst{15-12} = Rt; - let Inst{11-8} = cop; - let Inst{23-21} = opc1; - let Inst{7-5} = opc2; - let Inst{3-0} = CRm; - let Inst{19-16} = CRn; -} - -def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */>; -def MRC : MovRCopro<"mrc", 1 /* from coprocessor to ARM core register */>; - -class MovRCopro2 - : ABXI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1, - GPR:$Rt, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2), - NoItinerary, !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"), - [/* For disassembly only; pattern left blank */]> { - let Inst{31-28} = 0b1111; - let Inst{20} = direction; - let Inst{4} = 1; - - bits<4> Rt; - bits<4> cop; - bits<3> opc1; - bits<3> opc2; - bits<4> CRm; - bits<4> CRn; - - let Inst{15-12} = Rt; - let Inst{11-8} = cop; - let Inst{23-21} = opc1; - let Inst{7-5} = opc2; - let Inst{3-0} = CRm; - let Inst{19-16} = CRn; -} - -def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */>; -def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */>; - -class MovRRCopro - : ABI<0b1100, (outs), (ins p_imm:$cop, i32imm:$opc1, - GPR:$Rt, GPR:$Rt2, c_imm:$CRm), - NoItinerary, opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm", - [/* For disassembly only; pattern left blank */]> { - let Inst{23-21} = 0b010; - let Inst{20} = direction; - - bits<4> Rt; - bits<4> Rt2; - bits<4> cop; - bits<4> opc1; - bits<4> CRm; - - let Inst{15-12} = Rt; - let Inst{19-16} = Rt2; - let Inst{11-8} = cop; - let Inst{7-4} = opc1; - let Inst{3-0} = CRm; -} - -def MCRR : MovRRCopro<"mcrr", 0 /* from ARM core register to coprocessor */>; -def MRRC : MovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */>; - -class MovRRCopro2 - : ABXI<0b1100, (outs), (ins p_imm:$cop, i32imm:$opc1, - GPR:$Rt, GPR:$Rt2, c_imm:$CRm), - NoItinerary, !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"), - [/* For disassembly only; pattern left blank */]> { - let Inst{31-28} = 0b1111; - let Inst{23-21} = 0b010; - let Inst{20} = direction; - - bits<4> Rt; - bits<4> Rt2; - bits<4> cop; - bits<4> opc1; - bits<4> CRm; - - let Inst{15-12} = Rt; - let Inst{19-16} = Rt2; - let Inst{11-8} = cop; - let Inst{7-4} = opc1; - let Inst{3-0} = CRm; -} - -def MCRR2 : MovRRCopro2<"mcrr2", 0 /* from ARM core register to coprocessor */>; -def MRRC2 : MovRRCopro2<"mrrc2", 1 /* from coprocessor to ARM core register */>; - -//===----------------------------------------------------------------------===// -// Move between special register and ARM core register -- for disassembly only -// - -// Move to ARM core register from Special Register -def MRS : ABI<0b0001, (outs GPR:$Rd), (ins), NoItinerary, "mrs", "\t$Rd, cpsr", - [/* For disassembly only; pattern left blank */]> { - bits<4> Rd; - let Inst{23-16} = 0b00001111; - let Inst{15-12} = Rd; - let Inst{7-4} = 0b0000; -} - -def MRSsys : ABI<0b0001, (outs GPR:$Rd), (ins), NoItinerary,"mrs","\t$Rd, spsr", - [/* For disassembly only; pattern left blank */]> { - bits<4> Rd; - let Inst{23-16} = 0b01001111; - let Inst{15-12} = Rd; - let Inst{7-4} = 0b0000; -} - -// Move from ARM core register to Special Register -// -// No need to have both system and application versions, the encodings are the -// same and the assembly parser has no way to distinguish between them. The mask -// operand contains the special register (R Bit) in bit 4 and bits 3-0 contains -// the mask with the fields to be accessed in the special register. -def MSR : ABI<0b0001, (outs), (ins msr_mask:$mask, GPR:$Rn), NoItinerary, - "msr", "\t$mask, $Rn", - [/* For disassembly only; pattern left blank */]> { - bits<5> mask; - bits<4> Rn; - - let Inst{23} = 0; - let Inst{22} = mask{4}; // R bit - let Inst{21-20} = 0b10; - let Inst{19-16} = mask{3-0}; - let Inst{15-12} = 0b1111; - let Inst{11-4} = 0b00000000; - let Inst{3-0} = Rn; -} - -def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask, so_imm:$a), NoItinerary, - "msr", "\t$mask, $a", - [/* For disassembly only; pattern left blank */]> { - bits<5> mask; - bits<12> a; - - let Inst{23} = 0; - let Inst{22} = mask{4}; // R bit - let Inst{21-20} = 0b10; - let Inst{19-16} = mask{3-0}; - let Inst{15-12} = 0b1111; - let Inst{11-0} = a; -} diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index dc3d63e26ef5..e34d69a44d9f 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -80,6 +80,12 @@ def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, def NEONvorrImm : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>; def NEONvbicImm : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>; +def NEONvbsl : SDNode<"ARMISD::VBSL", + SDTypeProfile<1, 3, [SDTCisVec<0>, + SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>]>>; + def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>; // VDUPLANE can produce a quad-register result from a double-register source, @@ -146,10 +152,6 @@ def VLDMQIA : PseudoVFPLdStM<(outs QPR:$dst), (ins GPR:$Rn), IIC_fpLoad_m, "", [(set QPR:$dst, (v2f64 (load GPR:$Rn)))]>; -def VLDMQDB - : PseudoVFPLdStM<(outs QPR:$dst), (ins GPR:$Rn), - IIC_fpLoad_m, "", - [(set QPR:$dst, (v2f64 (load GPR:$Rn)))]>; // Use VSTM to store a Q register as a D register pair. // This is a pseudo instruction that is expanded to VSTMD after reg alloc. @@ -157,10 +159,6 @@ def VSTMQIA : PseudoVFPLdStM<(outs), (ins QPR:$src, GPR:$Rn), IIC_fpStore_m, "", [(store (v2f64 QPR:$src), GPR:$Rn)]>; -def VSTMQDB - : PseudoVFPLdStM<(outs), (ins QPR:$src, GPR:$Rn), - IIC_fpStore_m, "", - [(store (v2f64 QPR:$src), GPR:$Rn)]>; // Classes for VLD* pseudo-instructions with multi-register operands. // These are expanded to real instructions after register allocation. @@ -1801,7 +1799,7 @@ class N3VDX op21_20, bits<4> op11_8, bit op4, class N3VDSL op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> - : N3V<0, 1, op21_20, op11_8, 1, 0, + : N3VLane32<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", [(set (Ty DPR:$Vd), @@ -1811,7 +1809,7 @@ class N3VDSL op21_20, bits<4> op11_8, } class N3VDSL16 op21_20, bits<4> op11_8, string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> - : N3V<0, 1, op21_20, op11_8, 1, 0, + : N3VLane16<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$Vd, $Vn, $Vm[$lane]","", [(set (Ty DPR:$Vd), @@ -1841,7 +1839,7 @@ class N3VQX op21_20, bits<4> op11_8, bit op4, class N3VQSL op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode ShOp> - : N3V<1, 1, op21_20, op11_8, 1, 0, + : N3VLane32<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", [(set (ResTy QPR:$Vd), @@ -1852,7 +1850,7 @@ class N3VQSL op21_20, bits<4> op11_8, } class N3VQSL16 op21_20, bits<4> op11_8, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode ShOp> - : N3V<1, 1, op21_20, op11_8, 1, 0, + : N3VLane16<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$Vd, $Vn, $Vm[$lane]","", [(set (ResTy QPR:$Vd), @@ -1874,7 +1872,7 @@ class N3VDInt op21_20, bits<4> op11_8, bit op4, } class N3VDIntSL op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp> - : N3V<0, 1, op21_20, op11_8, 1, 0, + : N3VLane32<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$Vd), (ins DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", [(set (Ty DPR:$Vd), @@ -1885,7 +1883,7 @@ class N3VDIntSL op21_20, bits<4> op11_8, InstrItinClass itin, } class N3VDIntSL16 op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp> - : N3V<0, 1, op21_20, op11_8, 1, 0, + : N3VLane16<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$Vd), (ins DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", [(set (Ty DPR:$Vd), @@ -1915,7 +1913,7 @@ class N3VQInt op21_20, bits<4> op11_8, bit op4, class N3VQIntSL op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> - : N3V<1, 1, op21_20, op11_8, 1, 0, + : N3VLane32<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), (ins QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", [(set (ResTy QPR:$Vd), @@ -1927,7 +1925,7 @@ class N3VQIntSL op21_20, bits<4> op11_8, InstrItinClass itin, class N3VQIntSL16 op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> - : N3V<1, 1, op21_20, op11_8, 1, 0, + : N3VLane16<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), (ins QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, OpcodeStr, Dt, "$Vd, $Vn, $Vm[$lane]", "", [(set (ResTy QPR:$Vd), @@ -1959,7 +1957,7 @@ class N3VDMulOp op21_20, bits<4> op11_8, bit op4, class N3VDMulOpSL op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator MulOp, SDPatternOperator ShOp> - : N3V<0, 1, op21_20, op11_8, 1, 0, + : N3VLane32<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, @@ -1972,7 +1970,7 @@ class N3VDMulOpSL op21_20, bits<4> op11_8, InstrItinClass itin, class N3VDMulOpSL16 op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, SDNode MulOp, SDNode ShOp> - : N3V<0, 1, op21_20, op11_8, 1, 0, + : N3VLane16<0, 1, op21_20, op11_8, 1, 0, (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, @@ -1994,7 +1992,7 @@ class N3VQMulOp op21_20, bits<4> op11_8, bit op4, class N3VQMulOpSL op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDPatternOperator MulOp, SDPatternOperator ShOp> - : N3V<1, 1, op21_20, op11_8, 1, 0, + : N3VLane32<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, DPR_VFP2:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, @@ -2008,7 +2006,7 @@ class N3VQMulOpSL16 op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDNode MulOp, SDNode ShOp> - : N3V<1, 1, op21_20, op11_8, 1, 0, + : N3VLane16<1, 1, op21_20, op11_8, 1, 0, (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, DPR_8:$Vm, nohash_imm:$lane), NVMulSLFrm, itin, @@ -2069,7 +2067,7 @@ class N3VLMulOp op21_20, bits<4> op11_8, bit op4, class N3VLMulOpSL op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> - : N3V op21_20, bits<4> op11_8, class N3VLMulOpSL16 op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, SDNode MulOp, SDNode OpNode> - : N3V op21_20, bits<4> op11_8, bit op4, class N3VLInt3SL op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> - : N3V op21_20, bits<4> op11_8, InstrItinClass itin, class N3VLInt3SL16 op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> - : N3V op21_20, bits<4> op11_8, bit op4, class N3VLSL op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, SDNode OpNode> - : N3V op21_20, bits<4> op11_8, class N3VLSL16 op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, SDNode OpNode> - : N3V op21_20, bits<4> op11_8, bit op4, class N3VLIntSL op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> - : N3V op21_20, bits<4> op11_8, InstrItinClass itin, class N3VLIntSL16 op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, Intrinsic IntOp> - : N3V op24_23, bits<2> op21_20, bits<2> op19_18, // Shift by immediate, // both double- and quad-register. class N2VDSh op11_8, bit op7, bit op4, - Format f, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType Ty, SDNode OpNode> + Format f, InstrItinClass itin, Operand ImmTy, + string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> : N2VImm; class N2VQSh op11_8, bit op7, bit op4, - Format f, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType Ty, SDNode OpNode> + Format f, InstrItinClass itin, Operand ImmTy, + string OpcodeStr, string Dt, ValueType Ty, SDNode OpNode> : N2VImm; @@ -2315,9 +2313,9 @@ class N2VLSh op11_8, bit op7, bit op6, bit op4, // Narrow shift by immediate. class N2VNSh op11_8, bit op7, bit op6, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, SDNode OpNode> + ValueType ResTy, ValueType OpTy, Operand ImmTy, SDNode OpNode> : N2VImm; @@ -2325,16 +2323,18 @@ class N2VNSh op11_8, bit op7, bit op6, bit op4, // Shift right by immediate and accumulate, // both double- and quad-register. class N2VDShAdd op11_8, bit op7, bit op4, - string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> + Operand ImmTy, string OpcodeStr, string Dt, + ValueType Ty, SDNode ShOp> : N2VImm; class N2VQShAdd op11_8, bit op7, bit op4, - string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp> + Operand ImmTy, string OpcodeStr, string Dt, + ValueType Ty, SDNode ShOp> : N2VImm; @@ -2342,15 +2342,17 @@ class N2VQShAdd op11_8, bit op7, bit op4, // Shift by immediate and insert, // both double- and quad-register. class N2VDShIns op11_8, bit op7, bit op4, - Format f, string OpcodeStr, string Dt, ValueType Ty,SDNode ShOp> + Operand ImmTy, Format f, string OpcodeStr, string Dt, + ValueType Ty,SDNode ShOp> : N2VImm; class N2VQShIns op11_8, bit op7, bit op4, - Format f, string OpcodeStr, string Dt, ValueType Ty,SDNode ShOp> + Operand ImmTy, Format f, string OpcodeStr, string Dt, + ValueType Ty,SDNode ShOp> : N2VImm; @@ -3010,40 +3012,77 @@ multiclass N2VPLInt2_QHS op24_23, bits<2> op21_20, bits<2> op17_16, // Neon 2-register vector shift by immediate, // with f of either N2RegVShLFrm or N2RegVShRFrm // element sizes of 8, 16, 32 and 64 bits: -multiclass N2VSh_QHSD op11_8, bit op4, - InstrItinClass itin, string OpcodeStr, string Dt, - SDNode OpNode, Format f> { +multiclass N2VShL_QHSD op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + SDNode OpNode> { // 64-bit vector types. - def v8i8 : N2VDSh { let Inst{21-19} = 0b001; // imm6 = 001xxx } - def v4i16 : N2VDSh { let Inst{21-20} = 0b01; // imm6 = 01xxxx } - def v2i32 : N2VDSh { let Inst{21} = 0b1; // imm6 = 1xxxxx } - def v1i64 : N2VDSh; // imm6 = xxxxxx // 128-bit vector types. - def v16i8 : N2VQSh { let Inst{21-19} = 0b001; // imm6 = 001xxx } - def v8i16 : N2VQSh { let Inst{21-20} = 0b01; // imm6 = 01xxxx } - def v4i32 : N2VQSh { let Inst{21} = 0b1; // imm6 = 1xxxxx } - def v2i64 : N2VQSh; + // imm6 = xxxxxx +} +multiclass N2VShR_QHSD op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + SDNode OpNode> { + // 64-bit vector types. + def v8i8 : N2VDSh { + let Inst{21-19} = 0b001; // imm6 = 001xxx + } + def v4i16 : N2VDSh { + let Inst{21-20} = 0b01; // imm6 = 01xxxx + } + def v2i32 : N2VDSh { + let Inst{21} = 0b1; // imm6 = 1xxxxx + } + def v1i64 : N2VDSh; + // imm6 = xxxxxx + + // 128-bit vector types. + def v16i8 : N2VQSh { + let Inst{21-19} = 0b001; // imm6 = 001xxx + } + def v8i16 : N2VQSh { + let Inst{21-20} = 0b01; // imm6 = 01xxxx + } + def v4i32 : N2VQSh { + let Inst{21} = 0b1; // imm6 = 1xxxxx + } + def v2i64 : N2VQSh; // imm6 = xxxxxx } @@ -3053,79 +3092,113 @@ multiclass N2VSh_QHSD op11_8, bit op4, multiclass N2VShAdd_QHSD op11_8, bit op4, string OpcodeStr, string Dt, SDNode ShOp> { // 64-bit vector types. - def v8i8 : N2VDShAdd { let Inst{21-19} = 0b001; // imm6 = 001xxx } - def v4i16 : N2VDShAdd { let Inst{21-20} = 0b01; // imm6 = 01xxxx } - def v2i32 : N2VDShAdd { let Inst{21} = 0b1; // imm6 = 1xxxxx } - def v1i64 : N2VDShAdd; // imm6 = xxxxxx // 128-bit vector types. - def v16i8 : N2VQShAdd { let Inst{21-19} = 0b001; // imm6 = 001xxx } - def v8i16 : N2VQShAdd { let Inst{21-20} = 0b01; // imm6 = 01xxxx } - def v4i32 : N2VQShAdd { let Inst{21} = 0b1; // imm6 = 1xxxxx } - def v2i64 : N2VQShAdd; // imm6 = xxxxxx } - // Neon Shift-Insert vector operations, // with f of either N2RegVShLFrm or N2RegVShRFrm // element sizes of 8, 16, 32 and 64 bits: -multiclass N2VShIns_QHSD op11_8, bit op4, - string OpcodeStr, SDNode ShOp, - Format f> { +multiclass N2VShInsL_QHSD op11_8, bit op4, + string OpcodeStr> { // 64-bit vector types. - def v8i8 : N2VDShIns { + def v8i8 : N2VDShIns { let Inst{21-19} = 0b001; // imm6 = 001xxx } - def v4i16 : N2VDShIns { + def v4i16 : N2VDShIns { let Inst{21-20} = 0b01; // imm6 = 01xxxx } - def v2i32 : N2VDShIns { + def v2i32 : N2VDShIns { let Inst{21} = 0b1; // imm6 = 1xxxxx } - def v1i64 : N2VDShIns; + def v1i64 : N2VDShIns; // imm6 = xxxxxx // 128-bit vector types. - def v16i8 : N2VQShIns { + def v16i8 : N2VQShIns { let Inst{21-19} = 0b001; // imm6 = 001xxx } - def v8i16 : N2VQShIns { + def v8i16 : N2VQShIns { let Inst{21-20} = 0b01; // imm6 = 01xxxx } - def v4i32 : N2VQShIns { + def v4i32 : N2VQShIns { let Inst{21} = 0b1; // imm6 = 1xxxxx } - def v2i64 : N2VQShIns; + def v2i64 : N2VQShIns; + // imm6 = xxxxxx +} +multiclass N2VShInsR_QHSD op11_8, bit op4, + string OpcodeStr> { + // 64-bit vector types. + def v8i8 : N2VDShIns { + let Inst{21-19} = 0b001; // imm6 = 001xxx + } + def v4i16 : N2VDShIns { + let Inst{21-20} = 0b01; // imm6 = 01xxxx + } + def v2i32 : N2VDShIns { + let Inst{21} = 0b1; // imm6 = 1xxxxx + } + def v1i64 : N2VDShIns; + // imm6 = xxxxxx + + // 128-bit vector types. + def v16i8 : N2VQShIns { + let Inst{21-19} = 0b001; // imm6 = 001xxx + } + def v8i16 : N2VQShIns { + let Inst{21-20} = 0b01; // imm6 = 01xxxx + } + def v4i32 : N2VQShIns { + let Inst{21} = 0b1; // imm6 = 1xxxxx + } + def v2i64 : N2VQShIns; // imm6 = xxxxxx } @@ -3153,15 +3226,18 @@ multiclass N2VNSh_HSD op11_8, bit op7, bit op6, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, SDNode OpNode> { def v8i8 : N2VNSh { + OpcodeStr, !strconcat(Dt, "16"), + v8i8, v8i16, shr_imm8, OpNode> { let Inst{21-19} = 0b001; // imm6 = 001xxx } def v4i16 : N2VNSh { + OpcodeStr, !strconcat(Dt, "32"), + v4i16, v4i32, shr_imm16, OpNode> { let Inst{21-20} = 0b01; // imm6 = 01xxxx } def v2i32 : N2VNSh { + OpcodeStr, !strconcat(Dt, "64"), + v2i32, v2i64, shr_imm32, OpNode> { let Inst{21} = 0b1; // imm6 = 1xxxxx } } @@ -3697,16 +3773,21 @@ def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VCNTiD, "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", - [(set DPR:$Vd, - (v2i32 (or (and DPR:$Vn, DPR:$src1), - (and DPR:$Vm, (vnotd DPR:$src1)))))]>; + [(set DPR:$Vd, (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>; + +def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd), + (and DPR:$Vm, (vnotd DPR:$Vd)))), + (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>; + def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VCNTiQ, "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd", - [(set QPR:$Vd, - (v4i32 (or (and QPR:$Vn, QPR:$src1), - (and QPR:$Vm, (vnotq QPR:$src1)))))]>; + [(set QPR:$Vd, (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>; + +def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd), + (and QPR:$Vm, (vnotq QPR:$Vd)))), + (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>; // VBIF : Vector Bitwise Insert if False // like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst", @@ -3917,14 +3998,13 @@ defm VSHLs : N3VInt_QHSDSh<0, 0, 0b0100, 0, N3RegVShFrm, defm VSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 0, N3RegVShFrm, IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, "vshl", "u", int_arm_neon_vshiftu>; + // VSHL : Vector Shift Left (Immediate) -defm VSHLi : N2VSh_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl, - N2RegVShLFrm>; +defm VSHLi : N2VShL_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>; + // VSHR : Vector Shift Right (Immediate) -defm VSHRs : N2VSh_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", NEONvshrs, - N2RegVShRFrm>; -defm VSHRu : N2VSh_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", NEONvshru, - N2RegVShRFrm>; +defm VSHRs : N2VShR_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s",NEONvshrs>; +defm VSHRu : N2VShR_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u",NEONvshru>; // VSHLL : Vector Shift Left Long defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", NEONvshlls>; @@ -3957,10 +4037,8 @@ defm VRSHLu : N3VInt_QHSDSh<1, 0, 0b0101, 0, N3RegVShFrm, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, "vrshl", "u", int_arm_neon_vrshiftu>; // VRSHR : Vector Rounding Shift Right -defm VRSHRs : N2VSh_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", NEONvrshrs, - N2RegVShRFrm>; -defm VRSHRu : N2VSh_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", NEONvrshru, - N2RegVShRFrm>; +defm VRSHRs : N2VShR_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s",NEONvrshrs>; +defm VRSHRu : N2VShR_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u",NEONvrshru>; // VRSHRN : Vector Rounding Shift Right and Narrow defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i", @@ -3974,13 +4052,11 @@ defm VQSHLu : N3VInt_QHSDSh<1, 0, 0b0100, 1, N3RegVShFrm, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q, "vqshl", "u", int_arm_neon_vqshiftu>; // VQSHL : Vector Saturating Shift Left (Immediate) -defm VQSHLsi : N2VSh_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls, - N2RegVShLFrm>; -defm VQSHLui : N2VSh_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu, - N2RegVShLFrm>; +defm VQSHLsi : N2VShL_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls>; +defm VQSHLui : N2VShL_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu>; + // VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned) -defm VQSHLsu : N2VSh_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu, - N2RegVShLFrm>; +defm VQSHLsu : N2VShL_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu>; // VQSHRN : Vector Saturating Shift Right and Narrow defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s", @@ -4018,9 +4094,10 @@ defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>; defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>; // VSLI : Vector Shift Left and Insert -defm VSLI : N2VShIns_QHSD<1, 1, 0b0101, 1, "vsli", NEONvsli, N2RegVShLFrm>; +defm VSLI : N2VShInsL_QHSD<1, 1, 0b0101, 1, "vsli">; + // VSRI : Vector Shift Right and Insert -defm VSRI : N2VShIns_QHSD<1, 1, 0b0100, 1, "vsri", NEONvsri, N2RegVShRFrm>; +defm VSRI : N2VShInsR_QHSD<1, 1, 0b0100, 1, "vsri">; // Vector Absolute and Saturating Absolute. @@ -4362,14 +4439,8 @@ def VDUP8q : VDUPQ<0b11101110, 0b00, "8", v16i8>; def VDUP16q : VDUPQ<0b11101010, 0b01, "16", v8i16>; def VDUP32q : VDUPQ<0b11101010, 0b00, "32", v4i32>; -def VDUPfd : NVDup<0b11101000, 0b1011, 0b00, (outs DPR:$V), (ins GPR:$R), - IIC_VMOVIS, "vdup", "32", "$V, $R", - [(set DPR:$V, (v2f32 (NEONvdup - (f32 (bitconvert GPR:$R)))))]>; -def VDUPfq : NVDup<0b11101010, 0b1011, 0b00, (outs QPR:$V), (ins GPR:$R), - IIC_VMOVIS, "vdup", "32", "$V, $R", - [(set QPR:$V, (v4f32 (NEONvdup - (f32 (bitconvert GPR:$R)))))]>; +def : Pat<(v2f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32d GPR:$R)>; +def : Pat<(v4f32 (NEONvdup (f32 (bitconvert GPR:$R)))), (VDUP32q GPR:$R)>; // VDUP : Vector Duplicate Lane (from scalar to all elements) @@ -4397,9 +4468,6 @@ def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16> { def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32> { let Inst{19} = lane{0}; } -def VDUPLNfd : VDUPLND<{?,1,0,0}, "vdup", "32", v2f32> { - let Inst{19} = lane{0}; -} def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8> { let Inst{19-17} = lane{2-0}; } @@ -4409,9 +4477,12 @@ def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16> { def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32> { let Inst{19} = lane{0}; } -def VDUPLNfq : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4f32, v2f32> { - let Inst{19} = lane{0}; -} + +def : Pat<(v2f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)), + (VDUPLN32d DPR:$Vm, imm:$lane)>; + +def : Pat<(v4f32 (NEONvduplane (v2f32 DPR:$Vm), imm:$lane)), + (VDUPLN32q DPR:$Vm, imm:$lane)>; def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)), (v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src, @@ -4426,7 +4497,7 @@ def : Pat<(v4i32 (NEONvduplane (v4i32 QPR:$src), imm:$lane)), (DSubReg_i32_reg imm:$lane))), (SubReg_i32_lane imm:$lane)))>; def : Pat<(v4f32 (NEONvduplane (v4f32 QPR:$src), imm:$lane)), - (v4f32 (VDUPLNfq (v2f32 (EXTRACT_SUBREG QPR:$src, + (v4f32 (VDUPLN32q (v2f32 (EXTRACT_SUBREG QPR:$src, (DSubReg_i32_reg imm:$lane))), (SubReg_i32_lane imm:$lane)))>; @@ -4517,12 +4588,12 @@ class VREV64Q op19_18, string OpcodeStr, string Dt, ValueType Ty> def VREV64d8 : VREV64D<0b00, "vrev64", "8", v8i8>; def VREV64d16 : VREV64D<0b01, "vrev64", "16", v4i16>; def VREV64d32 : VREV64D<0b10, "vrev64", "32", v2i32>; -def VREV64df : VREV64D<0b10, "vrev64", "32", v2f32>; +def : Pat<(v2f32 (NEONvrev64 (v2f32 DPR:$Vm))), (VREV64d32 DPR:$Vm)>; def VREV64q8 : VREV64Q<0b00, "vrev64", "8", v16i8>; def VREV64q16 : VREV64Q<0b01, "vrev64", "16", v8i16>; def VREV64q32 : VREV64Q<0b10, "vrev64", "32", v4i32>; -def VREV64qf : VREV64Q<0b10, "vrev64", "32", v4f32>; +def : Pat<(v4f32 (NEONvrev64 (v4f32 QPR:$Vm))), (VREV64q32 QPR:$Vm)>; // VREV32 : Vector Reverse elements within 32-bit words @@ -4628,8 +4699,8 @@ def VEXTq32 : VEXTq<"vext", "32", v4i32> { let Inst{9-8} = 0b00; } def VEXTqf : VEXTq<"vext", "32", v4f32> { - let Inst{11} = index{0}; - let Inst{10-8} = 0b000; + let Inst{11-10} = index{1-0}; + let Inst{9-8} = 0b00; } // VTRN : Vector Transpose diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 826ef46bcdb5..8c542fe60bba 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -27,22 +27,22 @@ def imm_comp_XFORM : SDNodeXForm; /// imm0_7 predicate - True if the 32-bit immediate is in the range [0,7]. -def imm0_7 : PatLeaf<(i32 imm), [{ - return (uint32_t)N->getZExtValue() < 8; +def imm0_7 : ImmLeaf= 0 && Imm < 8; }]>; def imm0_7_neg : PatLeaf<(i32 imm), [{ return (uint32_t)-N->getZExtValue() < 8; }], imm_neg_XFORM>; -def imm0_255 : PatLeaf<(i32 imm), [{ - return (uint32_t)N->getZExtValue() < 256; +def imm0_255 : ImmLeaf= 0 && Imm < 256; }]>; def imm0_255_comp : PatLeaf<(i32 imm), [{ return ~((uint32_t)N->getZExtValue()) < 256; }]>; -def imm8_255 : PatLeaf<(i32 imm), [{ - return (uint32_t)N->getZExtValue() >= 8 && (uint32_t)N->getZExtValue() < 256; +def imm8_255 : ImmLeaf= 8 && Imm < 256; }]>; def imm8_255_neg : PatLeaf<(i32 imm), [{ unsigned Val = -N->getZExtValue(); @@ -369,6 +369,15 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in { let Inst{2-0} = 0b000; } + def tBX_Rm : TI<(outs), (ins pred:$p, GPR:$Rm), IIC_Br, "bx${p}\t$Rm", + [/* for disassembly only */]>, + T1Special<{1,1,0,?}> { + // A6.2.3 & A8.6.25 + bits<4> Rm; + let Inst{6-3} = Rm; + let Inst{2-0} = 0b000; + } + // Alternative return instruction used by vararg functions. def tBX_RET_vararg : TI<(outs), (ins tGPR:$Rm), IIC_Br, "bx\t$Rm", @@ -712,6 +721,19 @@ def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i, let Inst{7-0} = addr; } +// FIXME: Remove this entry when the above ldr.n workaround is fixed. +// For disassembly use only. +def tLDRpciDIS : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i, + "ldr", "\t$Rt, $addr", + [/* disassembly only */]>, + T1Encoding<{0,1,0,0,1,?}> { + // A6.2 & A8.6.59 + bits<3> Rt; + bits<8> addr; + let Inst{10-8} = Rt; + let Inst{7-0} = addr; +} + // A8.6.194 & A8.6.192 defm tSTR : thumb_st_rr_ri_enc<0b000, 0b0110, t_addrmode_rrs4, t_addrmode_is4, AddrModeT1_4, @@ -1175,10 +1197,18 @@ def tREVSH : // A8.6.136 "revsh", "\t$Rd, $Rm", [(set tGPR:$Rd, (sext_inreg - (or (srl (and tGPR:$Rm, 0xFF00), (i32 8)), + (or (srl tGPR:$Rm, (i32 8)), (shl tGPR:$Rm, (i32 8))), i16))]>, Requires<[IsThumb, IsThumb1Only, HasV6]>; +def : T1Pat<(sext_inreg (or (srl (and tGPR:$Rm, 0xFF00), (i32 8)), + (shl tGPR:$Rm, (i32 8))), i16), + (tREVSH tGPR:$Rm)>, + Requires<[IsThumb, IsThumb1Only, HasV6]>; + +def : T1Pat<(sra (bswap tGPR:$Rm), (i32 16)), (tREVSH tGPR:$Rm)>, + Requires<[IsThumb, IsThumb1Only, HasV6]>; + // Rotate right register def tROR : // A8.6.139 T1sItDPEncode<0b0111, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), @@ -1322,10 +1352,8 @@ def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd), // Move between coprocessor and ARM core register -- for disassembly only // -class tMovRCopro - : T1Cop<(outs), (ins p_imm:$cop, i32imm:$opc1, - GPR:$Rt, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2), - !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"), +class tMovRCopro + : T1Cop { let Inst{27-24} = 0b1110; let Inst{20} = direction; @@ -1346,8 +1374,12 @@ class tMovRCopro let Inst{19-16} = CRn; } -def tMCR : tMovRCopro<"mcr", 0 /* from ARM core register to coprocessor */>; -def tMRC : tMovRCopro<"mrc", 1 /* from coprocessor to ARM core register */>; +def tMCR : tMovRCopro<"mcr", 0 /* from ARM core register to coprocessor */, + (outs), (ins p_imm:$cop, i32imm:$opc1, GPR:$Rt, c_imm:$CRn, + c_imm:$CRm, i32imm:$opc2)>; +def tMRC : tMovRCopro<"mrc", 1 /* from coprocessor to ARM core register */, + (outs GPR:$Rt), (ins p_imm:$cop, i32imm:$opc1, c_imm:$CRn, + c_imm:$CRm, i32imm:$opc2)>; class tMovRRCopro : T1Cop<(outs), (ins p_imm:$cop, i32imm:$opc1, GPR:$Rt, GPR:$Rt2, c_imm:$CRm), @@ -1420,7 +1452,7 @@ def tTPsoft : TIx2<0b11110, 0b11, 1, (outs), (ins), IIC_Br, // from some other function to get here, and we're using the stack frame for the // containing function to save/restore registers, we can't keep anything live in // regs across the eh_sjlj_setjmp(), else it will almost certainly have been -// tromped upon when we get here from a longjmp(). We force everthing out of +// tromped upon when we get here from a longjmp(). We force everything out of // registers except for our own input by listing the relevant registers in // Defs. By doing so, we also cause the prologue/epilogue code to actively // preserve all of the callee-saved resgisters, which is exactly what we want. diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 0e01be59c7e8..600a12180fc5 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -44,7 +44,9 @@ def t2_so_imm_neg_XFORM : SDNodeXForm, PatLeaf<(imm), [{ return Pred_t2_so_imm(N); }]> { +def t2_so_imm : Operand, ImmLeaf { let EncoderMethod = "getT2SOImmOpValue"; } @@ -61,49 +63,15 @@ def t2_so_imm_neg : Operand, return ARM_AM::getT2SOImmVal(-((uint32_t)N->getZExtValue())) != -1; }], t2_so_imm_neg_XFORM>; -// Break t2_so_imm's up into two pieces. This handles immediates with up to 16 -// bits set in them. This uses t2_so_imm2part to match and t2_so_imm2part_[12] -// to get the first/second pieces. -def t2_so_imm2part : Operand, - PatLeaf<(imm), [{ - return ARM_AM::isT2SOImmTwoPartVal((unsigned)N->getZExtValue()); - }]> { -} - -def t2_so_imm2part_1 : SDNodeXFormgetZExtValue()); - return CurDAG->getTargetConstant(V, MVT::i32); -}]>; - -def t2_so_imm2part_2 : SDNodeXFormgetZExtValue()); - return CurDAG->getTargetConstant(V, MVT::i32); -}]>; - -def t2_so_neg_imm2part : Operand, PatLeaf<(imm), [{ - return ARM_AM::isT2SOImmTwoPartVal(-(int)N->getZExtValue()); - }]> { -} - -def t2_so_neg_imm2part_1 : SDNodeXFormgetZExtValue()); - return CurDAG->getTargetConstant(V, MVT::i32); -}]>; - -def t2_so_neg_imm2part_2 : SDNodeXFormgetZExtValue()); - return CurDAG->getTargetConstant(V, MVT::i32); -}]>; - /// imm1_31 predicate - True if the 32-bit immediate is in the range [1,31]. -def imm1_31 : PatLeaf<(i32 imm), [{ - return (int32_t)N->getZExtValue() >= 1 && (int32_t)N->getZExtValue() < 32; +def imm1_31 : ImmLeaf= 1 && (int32_t)Imm < 32; }]>; /// imm0_4095 predicate - True if the 32-bit immediate is in the range [0.4095]. def imm0_4095 : Operand, - PatLeaf<(i32 imm), [{ - return (uint32_t)N->getZExtValue() < 4096; + ImmLeaf= 0 && Imm < 4096; }]>; def imm0_4095_neg : PatLeaf<(i32 imm), [{ @@ -118,6 +86,11 @@ def imm0_255_not : PatLeaf<(i32 imm), [{ return (uint32_t)(~N->getZExtValue()) < 255; }], imm_comp_XFORM>; +def lo5AllOne : PatLeaf<(i32 imm), [{ + // Returns true if all low 5-bits are 1. + return (((uint32_t)N->getZExtValue()) & 0x1FUL) == 0x1FUL; +}]>; + // Define Thumb2 specific addressing modes. // t2addrmode_imm12 := reg + imm12 @@ -129,6 +102,12 @@ def t2addrmode_imm12 : Operand, let ParserMatchClass = MemMode5AsmOperand; } +// t2ldrlabel := imm12 +def t2ldrlabel : Operand { + let EncoderMethod = "getAddrModeImm12OpValue"; +} + + // ADR instruction labels. def t2adrlabel : Operand { let EncoderMethod = "getT2AdrLabelOpValue"; @@ -173,6 +152,15 @@ def t2addrmode_so_reg : Operand, let ParserMatchClass = MemMode5AsmOperand; } +// t2addrmode_reg := reg +// Used by load/store exclusive instructions. Useful to enable right assembly +// parsing and printing. Not used for any codegen matching. +// +def t2addrmode_reg : Operand { + let PrintMethod = "printAddrMode7Operand"; + let MIOperandInfo = (ops tGPR); + let ParserMatchClass = MemMode7AsmOperand; +} //===----------------------------------------------------------------------===// // Multiclass helpers... @@ -700,49 +688,27 @@ multiclass T2I_adde_sube_irs opcod, string opc, PatFrag opnode, let Inst{24-21} = opcod; } } +} // Carry setting variants -let isCodeGenOnly = 1, Defs = [CPSR] in { -multiclass T2I_adde_sube_s_irs opcod, string opc, PatFrag opnode, - bit Commutable = 0> { +// NOTE: CPSR def omitted because it will be handled by the custom inserter. +let usesCustomInserter = 1 in { +multiclass T2I_adde_sube_s_irs { // shifted imm - def ri : T2sTwoRegImm< - (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), IIC_iALUi, - opc, "\t$Rd, $Rn, $imm", - [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]>, - Requires<[IsThumb2]> { - let Inst{31-27} = 0b11110; - let Inst{25} = 0; - let Inst{24-21} = opcod; - let Inst{20} = 1; // The S bit. - let Inst{15} = 0; - } + def ri : t2PseudoInst<(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), + Size4Bytes, IIC_iALUi, + [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]>; // register - def rr : T2sThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUr, - opc, ".w\t$Rd, $Rn, $Rm", - [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]>, - Requires<[IsThumb2]> { + def rr : t2PseudoInst<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), + Size4Bytes, IIC_iALUr, + [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]> { let isCommutable = Commutable; - let Inst{31-27} = 0b11101; - let Inst{26-25} = 0b01; - let Inst{24-21} = opcod; - let Inst{20} = 1; // The S bit. - let Inst{14-12} = 0b000; // imm3 - let Inst{7-6} = 0b00; // imm2 - let Inst{5-4} = 0b00; // type } // shifted register - def rs : T2sTwoRegShiftedReg< - (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm), - IIC_iALUsi, opc, ".w\t$Rd, $Rn, $ShiftedRm", - [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]>, - Requires<[IsThumb2]> { - let Inst{31-27} = 0b11101; - let Inst{26-25} = 0b01; - let Inst{24-21} = opcod; - let Inst{20} = 1; // The S bit. - } -} + def rs : t2PseudoInst< + (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm), + Size4Bytes, IIC_iALUsi, + [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]>; } } @@ -864,6 +830,7 @@ multiclass T2I_ld opcod, string opc, let Inst{15-12} = Rt; bits<17> addr; + let addr{12} = 1; // add = TRUE let Inst{19-16} = addr{16-13}; // Rn let Inst{23} = addr{12}; // U let Inst{11-0} = addr{11-0}; // imm @@ -911,7 +878,7 @@ multiclass T2I_ld opcod, string opc, } // FIXME: Is the pci variant actually needed? - def pci : T2Ipc <(outs GPR:$Rt), (ins i32imm:$addr), iii, + def pci : T2Ipc <(outs GPR:$Rt), (ins t2ldrlabel:$addr), iii, opc, ".w\t$Rt, $addr", [(set GPR:$Rt, (opnode (ARMWrapper tconstpool:$addr)))]> { let isReMaterializable = 1; @@ -944,6 +911,7 @@ multiclass T2I_st opcod, string opc, let Inst{15-12} = Rt; bits<17> addr; + let addr{12} = 1; // add = TRUE let Inst{19-16} = addr{16-13}; // Rn let Inst{23} = addr{12}; // U let Inst{11-0} = addr{11-0}; // imm @@ -1398,7 +1366,7 @@ def t2LDRSH_POST : T2Iidxldst<1, 0b01, 1, 0, (outs GPR:$dst, GPR:$Rn), // for disassembly only. // Ref: A8.6.57 LDR (immediate, Thumb) Encoding T4 class T2IldT type, string opc, InstrItinClass ii> - : T2Ii8<(outs GPR:$Rt), (ins t2addrmode_imm8:$addr), ii, opc, + : T2Ii8<(outs rGPR:$Rt), (ins t2addrmode_imm8:$addr), ii, opc, "\t$Rt, $addr", []> { let Inst{31-27} = 0b11111; let Inst{26-25} = 0b00; @@ -1440,42 +1408,48 @@ def t2STRDi8 : T2Ii8s4<1, 0, 0, (outs), def t2STR_PRE : T2Iidxldst<0, 0b10, 0, 1, (outs GPR:$base_wb), (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr), AddrModeT2_i8, IndexModePre, IIC_iStore_iu, - "str", "\t$Rt, [$Rn, $addr]!", "$Rn = $base_wb", + "str", "\t$Rt, [$Rn, $addr]!", + "$Rn = $base_wb,@earlyclobber $base_wb", [(set GPR:$base_wb, (pre_store GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>; def t2STR_POST : T2Iidxldst<0, 0b10, 0, 0, (outs GPR:$base_wb), (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr), AddrModeT2_i8, IndexModePost, IIC_iStore_iu, - "str", "\t$Rt, [$Rn], $addr", "$Rn = $base_wb", + "str", "\t$Rt, [$Rn], $addr", + "$Rn = $base_wb,@earlyclobber $base_wb", [(set GPR:$base_wb, (post_store GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>; def t2STRH_PRE : T2Iidxldst<0, 0b01, 0, 1, (outs GPR:$base_wb), (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr), AddrModeT2_i8, IndexModePre, IIC_iStore_iu, - "strh", "\t$Rt, [$Rn, $addr]!", "$Rn = $base_wb", + "strh", "\t$Rt, [$Rn, $addr]!", + "$Rn = $base_wb,@earlyclobber $base_wb", [(set GPR:$base_wb, (pre_truncsti16 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>; def t2STRH_POST : T2Iidxldst<0, 0b01, 0, 0, (outs GPR:$base_wb), (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr), AddrModeT2_i8, IndexModePost, IIC_iStore_bh_iu, - "strh", "\t$Rt, [$Rn], $addr", "$Rn = $base_wb", + "strh", "\t$Rt, [$Rn], $addr", + "$Rn = $base_wb,@earlyclobber $base_wb", [(set GPR:$base_wb, (post_truncsti16 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>; def t2STRB_PRE : T2Iidxldst<0, 0b00, 0, 1, (outs GPR:$base_wb), (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr), AddrModeT2_i8, IndexModePre, IIC_iStore_bh_iu, - "strb", "\t$Rt, [$Rn, $addr]!", "$Rn = $base_wb", + "strb", "\t$Rt, [$Rn, $addr]!", + "$Rn = $base_wb,@earlyclobber $base_wb", [(set GPR:$base_wb, (pre_truncsti8 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>; def t2STRB_POST : T2Iidxldst<0, 0b00, 0, 0, (outs GPR:$base_wb), (ins GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr), AddrModeT2_i8, IndexModePost, IIC_iStore_bh_iu, - "strb", "\t$Rt, [$Rn], $addr", "$Rn = $base_wb", + "strb", "\t$Rt, [$Rn], $addr", + "$Rn = $base_wb,@earlyclobber $base_wb", [(set GPR:$base_wb, (post_truncsti8 GPR:$Rt, GPR:$Rn, t2am_imm8_offset:$addr))]>; @@ -1483,7 +1457,7 @@ def t2STRB_POST : T2Iidxldst<0, 0b00, 0, 0, (outs GPR:$base_wb), // only. // Ref: A8.6.193 STR (immediate, Thumb) Encoding T4 class T2IstT type, string opc, InstrItinClass ii> - : T2Ii8<(outs GPR:$Rt), (ins t2addrmode_imm8:$addr), ii, opc, + : T2Ii8<(outs rGPR:$Rt), (ins t2addrmode_imm8:$addr), ii, opc, "\t$Rt, $addr", []> { let Inst{31-27} = 0b11111; let Inst{26-25} = 0b00; @@ -1508,20 +1482,20 @@ def t2STRHT : T2IstT<0b01, "strht", IIC_iStore_bh_i>; // ldrd / strd pre / post variants // For disassembly only. -def t2LDRD_PRE : T2Ii8s4<1, 1, 1, (outs GPR:$Rt, GPR:$Rt2), +def t2LDRD_PRE : T2Ii8s4<1, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2), (ins GPR:$base, t2am_imm8s4_offset:$imm), IIC_iLoad_d_ru, "ldrd", "\t$Rt, $Rt2, [$base, $imm]!", []>; -def t2LDRD_POST : T2Ii8s4<0, 1, 1, (outs GPR:$Rt, GPR:$Rt2), +def t2LDRD_POST : T2Ii8s4<0, 1, 1, (outs rGPR:$Rt, rGPR:$Rt2), (ins GPR:$base, t2am_imm8s4_offset:$imm), IIC_iLoad_d_ru, "ldrd", "\t$Rt, $Rt2, [$base], $imm", []>; def t2STRD_PRE : T2Ii8s4<1, 1, 0, (outs), - (ins GPR:$Rt, GPR:$Rt2, GPR:$base, t2am_imm8s4_offset:$imm), + (ins rGPR:$Rt, rGPR:$Rt2, GPR:$base, t2am_imm8s4_offset:$imm), IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, [$base, $imm]!", []>; def t2STRD_POST : T2Ii8s4<0, 1, 0, (outs), - (ins GPR:$Rt, GPR:$Rt2, GPR:$base, t2am_imm8s4_offset:$imm), + (ins rGPR:$Rt, rGPR:$Rt2, GPR:$base, t2am_imm8s4_offset:$imm), IIC_iStore_d_ru, "strd", "\t$Rt, $Rt2, [$base], $imm", []>; // T2Ipl (Preload Data/Instruction) signals the memory system of possible future @@ -1541,6 +1515,7 @@ multiclass T2Ipl write, bits<1> instr, string opc> { let Inst{15-12} = 0b1111; bits<17> addr; + let addr{12} = 1; // add = TRUE let Inst{19-16} = addr{16-13}; // Rn let Inst{23} = addr{12}; // U let Inst{11-0} = addr{11-0}; // imm12 @@ -1813,10 +1788,8 @@ defm t2ADC : T2I_adde_sube_irs<0b1010, "adc", BinOpFrag<(adde_dead_carry node:$LHS, node:$RHS)>, 1>; defm t2SBC : T2I_adde_sube_irs<0b1011, "sbc", BinOpFrag<(sube_dead_carry node:$LHS, node:$RHS)>>; -defm t2ADCS : T2I_adde_sube_s_irs<0b1010, "adc", - BinOpFrag<(adde_live_carry node:$LHS, node:$RHS)>, 1>; -defm t2SBCS : T2I_adde_sube_s_irs<0b1011, "sbc", - BinOpFrag<(sube_live_carry node:$LHS, node:$RHS)>>; +defm t2ADCS : T2I_adde_sube_s_irs, 1>; +defm t2SBCS : T2I_adde_sube_s_irs>; // RSB defm t2RSB : T2I_rbin_irs <0b1110, "rsb", @@ -1847,9 +1820,14 @@ def : T2Pat<(addc rGPR:$src, t2_so_imm_neg:$imm), // Effectively, the inverse interpretation of the carry flag already accounts // for part of the negation. let AddedComplexity = 1 in -def : T2Pat<(adde rGPR:$src, imm0_255_not:$imm), +def : T2Pat<(adde_dead_carry rGPR:$src, imm0_255_not:$imm), + (t2SBCri rGPR:$src, imm0_255_not:$imm)>; +def : T2Pat<(adde_dead_carry rGPR:$src, t2_so_imm_not:$imm), + (t2SBCri rGPR:$src, t2_so_imm_not:$imm)>; +let AddedComplexity = 1 in +def : T2Pat<(adde_live_carry rGPR:$src, imm0_255_not:$imm), (t2SBCSri rGPR:$src, imm0_255_not:$imm)>; -def : T2Pat<(adde rGPR:$src, t2_so_imm_not:$imm), +def : T2Pat<(adde_live_carry rGPR:$src, t2_so_imm_not:$imm), (t2SBCSri rGPR:$src, t2_so_imm_not:$imm)>; // Select Bytes -- for disassembly only @@ -2052,6 +2030,10 @@ defm t2LSR : T2I_sh_ir<0b01, "lsr", BinOpFrag<(srl node:$LHS, node:$RHS)>>; defm t2ASR : T2I_sh_ir<0b10, "asr", BinOpFrag<(sra node:$LHS, node:$RHS)>>; defm t2ROR : T2I_sh_ir<0b11, "ror", BinOpFrag<(rotr node:$LHS, node:$RHS)>>; +// (rotr x, (and y, 0x...1f)) ==> (ROR x, y) +def : Pat<(rotr rGPR:$lhs, (and rGPR:$rhs, lo5AllOne)), + (t2RORrr rGPR:$lhs, rGPR:$rhs)>; + let Uses = [CPSR] in { def t2RRX : T2sTwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iMOVsi, "rrx", "\t$Rd, $Rm", @@ -2140,10 +2122,12 @@ def t2BFC : T2BitFI<(outs rGPR:$Rd), (ins rGPR:$src, bf_inv_mask_imm:$imm), IIC_iUNAsi, "bfc", "\t$Rd, $imm", [(set rGPR:$Rd, (and rGPR:$src, bf_inv_mask_imm:$imm))]> { let Inst{31-27} = 0b11110; + let Inst{26} = 0; // should be 0. let Inst{25} = 1; let Inst{24-20} = 0b10110; let Inst{19-16} = 0b1111; // Rn let Inst{15} = 0; + let Inst{5} = 0; // should be 0. bits<10> imm; let msb{4-0} = imm{9-5}; @@ -2176,9 +2160,11 @@ let Constraints = "$src = $Rd" in { [(set rGPR:$Rd, (ARMbfi rGPR:$src, rGPR:$Rn, bf_inv_mask_imm:$imm))]> { let Inst{31-27} = 0b11110; + let Inst{26} = 0; // should be 0. let Inst{25} = 1; let Inst{24-20} = 0b10110; let Inst{15} = 0; + let Inst{5} = 0; // should be 0. bits<10> imm; let msb{4-0} = imm{9-5}; @@ -2193,9 +2179,11 @@ let Constraints = "$src = $Rd" in { IIC_iBITi, "bfi", "\t$Rd, $Rn, $lsbit, $width", []> { let Inst{31-27} = 0b11110; + let Inst{26} = 0; // should be 0. let Inst{25} = 1; let Inst{24-20} = 0b10110; let Inst{15} = 0; + let Inst{5} = 0; // should be 0. bits<5> lsbit; bits<5> width; @@ -2607,9 +2595,15 @@ def t2REVSH : T2I_misc<0b01, 0b11, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr, "revsh", ".w\t$Rd, $Rm", [(set rGPR:$Rd, (sext_inreg - (or (srl (and rGPR:$Rm, 0xFF00), (i32 8)), + (or (srl rGPR:$Rm, (i32 8)), (shl rGPR:$Rm, (i32 8))), i16))]>; +def : T2Pat<(sext_inreg (or (srl (and rGPR:$Rm, 0xFF00), (i32 8)), + (shl rGPR:$Rm, (i32 8))), i16), + (t2REVSH rGPR:$Rm)>; + +def : T2Pat<(sra (bswap rGPR:$Rm), (i32 16)), (t2REVSH rGPR:$Rm)>; + def t2PKHBT : T2ThreeReg< (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, shift_imm:$sh), IIC_iBITsi, "pkhbt", "\t$Rd, $Rn, $Rm$sh", @@ -2843,9 +2837,9 @@ class T2I_ldrex opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz, let Inst{5-4} = opcod; let Inst{3-0} = 0b1111; - bits<4> Rn; + bits<4> addr; bits<4> Rt; - let Inst{19-16} = Rn; + let Inst{19-16} = addr; let Inst{15-12} = Rt; } class T2I_strex opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz, @@ -2859,37 +2853,37 @@ class T2I_strex opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz, let Inst{5-4} = opcod; bits<4> Rd; - bits<4> Rn; + bits<4> addr; bits<4> Rt; - let Inst{11-8} = Rd; - let Inst{19-16} = Rn; + let Inst{3-0} = Rd; + let Inst{19-16} = addr; let Inst{15-12} = Rt; } let mayLoad = 1 in { -def t2LDREXB : T2I_ldrex<0b00, (outs rGPR:$Rt), (ins rGPR:$Rn), AddrModeNone, - Size4Bytes, NoItinerary, "ldrexb", "\t$Rt, [$Rn]", +def t2LDREXB : T2I_ldrex<0b00, (outs rGPR:$Rt), (ins t2addrmode_reg:$addr), AddrModeNone, + Size4Bytes, NoItinerary, "ldrexb", "\t$Rt, $addr", "", []>; -def t2LDREXH : T2I_ldrex<0b01, (outs rGPR:$Rt), (ins rGPR:$Rn), AddrModeNone, - Size4Bytes, NoItinerary, "ldrexh", "\t$Rt, [$Rn]", +def t2LDREXH : T2I_ldrex<0b01, (outs rGPR:$Rt), (ins t2addrmode_reg:$addr), AddrModeNone, + Size4Bytes, NoItinerary, "ldrexh", "\t$Rt, $addr", "", []>; -def t2LDREX : Thumb2I<(outs rGPR:$Rt), (ins rGPR:$Rn), AddrModeNone, +def t2LDREX : Thumb2I<(outs rGPR:$Rt), (ins t2addrmode_reg:$addr), AddrModeNone, Size4Bytes, NoItinerary, - "ldrex", "\t$Rt, [$Rn]", "", + "ldrex", "\t$Rt, $addr", "", []> { let Inst{31-27} = 0b11101; let Inst{26-20} = 0b0000101; let Inst{11-8} = 0b1111; let Inst{7-0} = 0b00000000; // imm8 = 0 - bits<4> Rn; bits<4> Rt; - let Inst{19-16} = Rn; + bits<4> addr; + let Inst{19-16} = addr; let Inst{15-12} = Rt; } -def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$Rt, rGPR:$Rt2), (ins rGPR:$Rn), +def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$Rt, rGPR:$Rt2), (ins t2addrmode_reg:$addr), AddrModeNone, Size4Bytes, NoItinerary, - "ldrexd", "\t$Rt, $Rt2, [$Rn]", "", + "ldrexd", "\t$Rt, $Rt2, $addr", "", [], {?, ?, ?, ?}> { bits<4> Rt2; let Inst{11-8} = Rt2; @@ -2897,31 +2891,31 @@ def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$Rt, rGPR:$Rt2), (ins rGPR:$Rn), } let mayStore = 1, Constraints = "@earlyclobber $Rd" in { -def t2STREXB : T2I_strex<0b00, (outs rGPR:$Rd), (ins rGPR:$Rt, rGPR:$Rn), - AddrModeNone, Size4Bytes, NoItinerary, - "strexb", "\t$Rd, $Rt, [$Rn]", "", []>; -def t2STREXH : T2I_strex<0b01, (outs rGPR:$Rd), (ins rGPR:$Rt, rGPR:$Rn), - AddrModeNone, Size4Bytes, NoItinerary, - "strexh", "\t$Rd, $Rt, [$Rn]", "", []>; -def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, rGPR:$Rn), - AddrModeNone, Size4Bytes, NoItinerary, - "strex", "\t$Rd, $Rt, [$Rn]", "", - []> { +def t2STREXB : T2I_strex<0b00, (outs rGPR:$Rd), (ins rGPR:$Rt, t2addrmode_reg:$addr), + AddrModeNone, Size4Bytes, NoItinerary, + "strexb", "\t$Rd, $Rt, $addr", "", []>; +def t2STREXH : T2I_strex<0b01, (outs rGPR:$Rd), (ins rGPR:$Rt, t2addrmode_reg:$addr), + AddrModeNone, Size4Bytes, NoItinerary, + "strexh", "\t$Rd, $Rt, $addr", "", []>; +def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, t2addrmode_reg:$addr), + AddrModeNone, Size4Bytes, NoItinerary, + "strex", "\t$Rd, $Rt, $addr", "", + []> { let Inst{31-27} = 0b11101; let Inst{26-20} = 0b0000100; let Inst{7-0} = 0b00000000; // imm8 = 0 bits<4> Rd; - bits<4> Rn; + bits<4> addr; bits<4> Rt; let Inst{11-8} = Rd; - let Inst{19-16} = Rn; + let Inst{19-16} = addr; let Inst{15-12} = Rt; } def t2STREXD : T2I_strex<0b11, (outs rGPR:$Rd), - (ins rGPR:$Rt, rGPR:$Rt2, rGPR:$Rn), + (ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_reg:$addr), AddrModeNone, Size4Bytes, NoItinerary, - "strexd", "\t$Rd, $Rt, $Rt2, [$Rn]", "", [], + "strexd", "\t$Rd, $Rt, $Rt2, $addr", "", [], {?, ?, ?, ?}> { bits<4> Rt2; let Inst{11-8} = Rt2; @@ -2965,7 +2959,7 @@ let isCall = 1, // here, and we're using the stack frame for the containing function to // save/restore registers, we can't keep anything live in regs across // the eh_sjlj_setjmp(), else it will almost certainly have been tromped upon -// when we get here from a longjmp(). We force everthing out of registers +// when we get here from a longjmp(). We force everything out of registers // except for our own input by listing the relevant registers in Defs. By // doing so, we also cause the prologue/epilogue code to actively preserve // all of the callee-saved resgisters, which is exactly what we want. @@ -3238,19 +3232,20 @@ class T2RFE op31_20, dag oops, dag iops, InstrItinClass itin, bits<4> Rn; let Inst{19-16} = Rn; + let Inst{15-0} = 0xc000; } def t2RFEDBW : T2RFE<0b111010000011, - (outs), (ins rGPR:$Rn), NoItinerary, "rfedb", "\t$Rn!", + (outs), (ins GPR:$Rn), NoItinerary, "rfedb", "\t$Rn!", [/* For disassembly only; pattern left blank */]>; def t2RFEDB : T2RFE<0b111010000001, - (outs), (ins rGPR:$Rn), NoItinerary, "rfeab", "\t$Rn", + (outs), (ins GPR:$Rn), NoItinerary, "rfedb", "\t$Rn", [/* For disassembly only; pattern left blank */]>; def t2RFEIAW : T2RFE<0b111010011011, - (outs), (ins rGPR:$Rn), NoItinerary, "rfeia", "\t$Rn!", + (outs), (ins GPR:$Rn), NoItinerary, "rfeia", "\t$Rn!", [/* For disassembly only; pattern left blank */]>; def t2RFEIA : T2RFE<0b111010011001, - (outs), (ins rGPR:$Rn), NoItinerary, "rfeia", "\t$Rn", + (outs), (ins GPR:$Rn), NoItinerary, "rfeia", "\t$Rn", [/* For disassembly only; pattern left blank */]>; //===----------------------------------------------------------------------===// @@ -3352,10 +3347,8 @@ def t2MSR : T2SpecialReg<0b111100111000 /* op31-20 */, 0b10 /* op15-14 */, // Move between coprocessor and ARM core register -- for disassembly only // -class t2MovRCopro - : T2Cop<(outs), (ins p_imm:$cop, i32imm:$opc1, - GPR:$Rt, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2), - !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"), +class t2MovRCopro + : T2Cop { let Inst{27-24} = 0b1110; let Inst{20} = direction; @@ -3376,8 +3369,12 @@ class t2MovRCopro let Inst{19-16} = CRn; } -def t2MCR2 : t2MovRCopro<"mcr2", 0 /* from ARM core register to coprocessor */>; -def t2MRC2 : t2MovRCopro<"mrc2", 1 /* from coprocessor to ARM core register */>; +def t2MCR2 : t2MovRCopro<"mcr2", 0 /* from ARM core register to coprocessor */, + (outs), (ins p_imm:$cop, i32imm:$opc1, GPR:$Rt, c_imm:$CRn, + c_imm:$CRm, i32imm:$opc2)>; +def t2MRC2 : t2MovRCopro<"mrc2", 1 /* from coprocessor to ARM core register */, + (outs GPR:$Rt), (ins p_imm:$cop, i32imm:$opc1, c_imm:$CRn, + c_imm:$CRm, i32imm:$opc2)>; class t2MovRRCopro : T2Cop<(outs), (ins p_imm:$cop, i32imm:$opc1, GPR:$Rt, GPR:$Rt2, c_imm:$CRm), diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 29902833f2bb..376bd9607e4b 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -101,14 +101,6 @@ multiclass vfp_ldst_mult { - let Inst{24-23} = 0b10; // Decrement Before - let Inst{21} = 0; // No writeback - let Inst{20} = L_bit; - } def DDB_UPD : AXDI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops), IndexModeUpd, itin_upd, @@ -143,18 +135,6 @@ multiclass vfp_ldst_mult { - let Inst{24-23} = 0b10; // Decrement Before - let Inst{21} = 0; // No writeback - let Inst{20} = L_bit; - - // Some single precision VFP instructions may be executed on both NEON and - // VFP pipelines. - let D = VFPNeonDomain; - } def SDB_UPD : AXSI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, spr_reglist:$regs, variable_ops), IndexModeUpd, itin_upd, @@ -467,6 +447,10 @@ def VMOVRS : AVConv2I<0b11100001, 0b1010, let Inst{6-5} = 0b00; let Inst{3-0} = 0b0000; + + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; } def VMOVSR : AVConv4I<0b11100000, 0b1010, @@ -484,6 +468,10 @@ def VMOVSR : AVConv4I<0b11100000, 0b1010, let Inst{6-5} = 0b00; let Inst{3-0} = 0b0000; + + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; } let neverHasSideEffects = 1 in { @@ -503,6 +491,10 @@ def VMOVRRD : AVConv3I<0b11000101, 0b1011, let Inst{19-16} = Rt2; let Inst{7-6} = 0b00; + + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; } def VMOVRRS : AVConv3I<0b11000101, 0b1010, @@ -510,6 +502,10 @@ def VMOVRRS : AVConv3I<0b11000101, 0b1010, IIC_fpMOVDI, "vmov", "\t$wb, $dst2, $src1, $src2", [/* For disassembly only; pattern left blank */]> { let Inst{7-6} = 0b00; + + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; } } // neverHasSideEffects @@ -532,6 +528,10 @@ def VMOVDRR : AVConv5I<0b11000100, 0b1011, let Inst{19-16} = Rt2; let Inst{7-6} = 0b00; + + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; } let neverHasSideEffects = 1 in @@ -540,6 +540,10 @@ def VMOVSRR : AVConv5I<0b11000100, 0b1010, IIC_fpMOVID, "vmov", "\t$dst1, $dst2, $src1, $src2", [/* For disassembly only; pattern left blank */]> { let Inst{7-6} = 0b00; + + // Some single precision VFP instructions may be executed on both NEON and VFP + // pipelines. + let D = VFPNeonDomain; } // FMRDH: SPR -> GPR @@ -972,33 +976,15 @@ def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin), // let neverHasSideEffects = 1 in { -def VMOVDcc : ADuI<0b11101, 0b11, 0b0000, 0b01, 0, - (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), - IIC_fpUNA64, "vmov", ".f64\t$Dd, $Dm", +def VMOVDcc : ARMPseudoInst<(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm, pred:$p), + Size4Bytes, IIC_fpUNA64, [/*(set DPR:$Dd, (ARMcmov DPR:$Dn, DPR:$Dm, imm:$cc))*/]>, RegConstraint<"$Dn = $Dd">; -def VMOVScc : ASuI<0b11101, 0b11, 0b0000, 0b01, 0, - (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), - IIC_fpUNA32, "vmov", ".f32\t$Sd, $Sm", +def VMOVScc : ARMPseudoInst<(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm, pred:$p), + Size4Bytes, IIC_fpUNA32, [/*(set SPR:$Sd, (ARMcmov SPR:$Sn, SPR:$Sm, imm:$cc))*/]>, RegConstraint<"$Sn = $Sd">; - -def VNEGDcc : ADuI<0b11101, 0b11, 0b0001, 0b01, 0, - (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), - IIC_fpUNA64, "vneg", ".f64\t$Dd, $Dm", - [/*(set DPR:$Dd, (ARMcneg DPR:$Dn, DPR:$Dm, imm:$cc))*/]>, - RegConstraint<"$Dn = $Dd">; - -def VNEGScc : ASuI<0b11101, 0b11, 0b0001, 0b01, 0, - (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), - IIC_fpUNA32, "vneg", ".f32\t$Sd, $Sm", - [/*(set SPR:$Sd, (ARMcneg SPR:$Sn, SPR:$Sm, imm:$cc))*/]>, - RegConstraint<"$Sn = $Sd"> { - // Some single precision VFP instructions may be executed on both NEON and - // VFP pipelines on A8. - let D = VFPNeonA8Domain; -} } // neverHasSideEffects //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index d9dc5cdedb30..df89fadb311b 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -79,7 +79,7 @@ namespace { unsigned Position; MachineBasicBlock::iterator MBBI; bool Merged; - MemOpQueueEntry(int o, unsigned r, bool k, unsigned p, + MemOpQueueEntry(int o, unsigned r, bool k, unsigned p, MachineBasicBlock::iterator i) : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {} }; @@ -174,7 +174,7 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { switch (Mode) { default: llvm_unreachable("Unhandled submode!"); case ARM_AM::ia: return ARM::VLDMSIA; - case ARM_AM::db: return ARM::VLDMSDB; + case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists. } break; case ARM::VSTRS: @@ -182,7 +182,7 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { switch (Mode) { default: llvm_unreachable("Unhandled submode!"); case ARM_AM::ia: return ARM::VSTMSIA; - case ARM_AM::db: return ARM::VSTMSDB; + case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists. } break; case ARM::VLDRD: @@ -190,7 +190,7 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { switch (Mode) { default: llvm_unreachable("Unhandled submode!"); case ARM_AM::ia: return ARM::VLDMDIA; - case ARM_AM::db: return ARM::VLDMDDB; + case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists. } break; case ARM::VSTRD: @@ -198,7 +198,7 @@ static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) { switch (Mode) { default: llvm_unreachable("Unhandled submode!"); case ARM_AM::ia: return ARM::VSTMDIA; - case ARM_AM::db: return ARM::VSTMDDB; + case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists. } break; } @@ -246,13 +246,9 @@ AMSubMode getLoadStoreMultipleSubMode(int Opcode) { case ARM::t2LDMDB_UPD: case ARM::t2STMDB: case ARM::t2STMDB_UPD: - case ARM::VLDMSDB: case ARM::VLDMSDB_UPD: - case ARM::VSTMSDB: case ARM::VSTMSDB_UPD: - case ARM::VLDMDDB: case ARM::VLDMDDB_UPD: - case ARM::VSTMDDB: case ARM::VSTMDDB_UPD: return ARM_AM::db; @@ -312,6 +308,10 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, // VLDM/VSTM do not support DB mode without also updating the base reg. Mode = ARM_AM::db; else if (Offset != 0) { + // Check if this is a supported opcode before we insert instructions to + // calculate a new base register. + if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false; + // If starting offset isn't zero, insert a MI to materialize a new base. // But only do so if it is cost effective, i.e. merging more than two // loads / stores. @@ -354,6 +354,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD); Opcode = getLoadStoreMultipleOpcode(Opcode, Mode); + if (!Opcode) return false; MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)) .addReg(Base, getKillRegState(BaseKill)) .addImm(Pred).addReg(PredReg); @@ -453,6 +454,25 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned PRegNum = PMO.isUndef() ? UINT_MAX : getARMRegisterNumbering(PReg); unsigned Count = 1; + unsigned Limit = ~0U; + + // vldm / vstm limit are 32 for S variants, 16 for D variants. + + switch (Opcode) { + default: break; + case ARM::VSTRS: + Limit = 32; + break; + case ARM::VSTRD: + Limit = 16; + break; + case ARM::VLDRD: + Limit = 16; + break; + case ARM::VLDRS: + Limit = 32; + break; + } for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) { int NewOffset = MemOps[i].Offset; @@ -460,13 +480,13 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Reg = MO.getReg(); unsigned RegNum = MO.isUndef() ? UINT_MAX : getARMRegisterNumbering(Reg); - // Register numbers must be in ascending order. For VFP, the registers - // must also be consecutive and there is a limit of 16 double-word - // registers per instruction. + // Register numbers must be in ascending order. For VFP / NEON load and + // store multiples, the registers must also be consecutive and within the + // limit on the number of registers per instruction. if (Reg != ARM::SP && NewOffset == Offset + (int)Size && - ((isNotVFP && RegNum > PRegNum) - || ((Size < 8 || Count < 16) && RegNum == PRegNum+1))) { + ((isNotVFP && RegNum > PRegNum) || + ((Count < Limit) && RegNum == PRegNum+1))) { Offset += Size; PRegNum = RegNum; ++Count; @@ -567,14 +587,10 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { case ARM::t2STMIA: case ARM::t2STMDB: case ARM::VLDMSIA: - case ARM::VLDMSDB: case ARM::VSTMSIA: - case ARM::VSTMSDB: return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4; case ARM::VLDMDIA: - case ARM::VLDMDDB: case ARM::VSTMDIA: - case ARM::VSTMDDB: return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8; } } @@ -624,7 +640,6 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, } break; case ARM::VLDMSIA: - case ARM::VLDMSDB: switch (Mode) { default: llvm_unreachable("Unhandled submode!"); case ARM_AM::ia: return ARM::VLDMSIA_UPD; @@ -632,7 +647,6 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, } break; case ARM::VLDMDIA: - case ARM::VLDMDDB: switch (Mode) { default: llvm_unreachable("Unhandled submode!"); case ARM_AM::ia: return ARM::VLDMDIA_UPD; @@ -640,7 +654,6 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, } break; case ARM::VSTMSIA: - case ARM::VSTMSDB: switch (Mode) { default: llvm_unreachable("Unhandled submode!"); case ARM_AM::ia: return ARM::VSTMSIA_UPD; @@ -648,7 +661,6 @@ static unsigned getUpdatingLSMultipleOpcode(unsigned Opc, } break; case ARM::VSTMDIA: - case ARM::VSTMDDB: switch (Mode) { default: llvm_unreachable("Unhandled submode!"); case ARM_AM::ia: return ARM::VSTMDIA_UPD; @@ -749,7 +761,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, MIB.addOperand(MI->getOperand(OpNum)); // Transfer memoperands. - (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); MBB.erase(MBBI); return true; @@ -1275,14 +1287,14 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize, CurrPred, CurrPredReg, Scratch, MemOps, Merges); - // Try folding preceeding/trailing base inc/dec into the generated + // Try folding preceding/trailing base inc/dec into the generated // LDM/STM ops. for (unsigned i = 0, e = Merges.size(); i < e; ++i) if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI)) ++NumMerges; NumMerges += Merges.size(); - // Try folding preceeding/trailing base inc/dec into those load/store + // Try folding preceding/trailing base inc/dec into those load/store // that were not merged to form LDM/STM ops. for (unsigned i = 0; i != NumMemOps; ++i) if (!MemOps[i].Merged) @@ -1292,7 +1304,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { // RS may be pointing to an instruction that's deleted. RS->skipTo(prior(MBBI)); } else if (NumMemOps == 1) { - // Try folding preceeding/trailing base inc/dec into the single + // Try folding preceding/trailing base inc/dec into the single // load/store. if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) { ++NumMerges; @@ -1322,7 +1334,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { } /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops -/// ("bx lr" and "mov pc, lr") into the preceeding stack restore so it +/// ("bx lr" and "mov pc, lr") into the preceding stack restore so it /// directly restore the value of LR into pc. /// ldmfd sp!, {..., lr} /// bx lr @@ -1530,15 +1542,9 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, // Then make sure the immediate offset fits. int OffImm = getMemoryOpOffset(Op0); if (isT2) { - if (OffImm < 0) { - if (OffImm < -255) - // Can't fall back to t2LDRi8 / t2STRi8. - return false; - } else { - int Limit = (1 << 8) * Scale; - if (OffImm >= Limit || (OffImm & (Scale-1))) - return false; - } + int Limit = (1 << 8) * Scale; + if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1))) + return false; Offset = OffImm; } else { ARM_AM::AddrOpc AddSub = ARM_AM::add; diff --git a/lib/Target/ARM/ARMMCAsmInfo.cpp b/lib/Target/ARM/ARMMCAsmInfo.cpp index 53edfcad9308..a3f89e92f8ec 100644 --- a/lib/Target/ARM/ARMMCAsmInfo.cpp +++ b/lib/Target/ARM/ARMMCAsmInfo.cpp @@ -12,8 +12,16 @@ //===----------------------------------------------------------------------===// #include "ARMMCAsmInfo.h" +#include "llvm/Support/CommandLine.h" + using namespace llvm; +cl::opt +EnableARMEHABI("arm-enable-ehabi", cl::Hidden, + cl::desc("Generate ARM EHABI tables"), + cl::init(false)); + + static const char *const arm_asm_table[] = { "{r0}", "r0", "{r1}", "r1", @@ -65,4 +73,8 @@ ARMELFMCAsmInfo::ARMELFMCAsmInfo() { DwarfRequiresFrameSection = false; SupportsDebugInformation = true; + + // Exceptions handling + if (EnableARMEHABI) + ExceptionsType = ExceptionHandling::ARM; } diff --git a/lib/Target/ARM/ARMMCCodeEmitter.cpp b/lib/Target/ARM/ARMMCCodeEmitter.cpp index 6d7b48587d19..10607b17c532 100644 --- a/lib/Target/ARM/ARMMCCodeEmitter.cpp +++ b/lib/Target/ARM/ARMMCCodeEmitter.cpp @@ -278,6 +278,15 @@ class ARMMCCodeEmitter : public MCCodeEmitter { unsigned getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op, SmallVectorImpl &Fixups) const; + unsigned getShiftRight8Imm(const MCInst &MI, unsigned Op, + SmallVectorImpl &Fixups) const; + unsigned getShiftRight16Imm(const MCInst &MI, unsigned Op, + SmallVectorImpl &Fixups) const; + unsigned getShiftRight32Imm(const MCInst &MI, unsigned Op, + SmallVectorImpl &Fixups) const; + unsigned getShiftRight64Imm(const MCInst &MI, unsigned Op, + SmallVectorImpl &Fixups) const; + unsigned NEONThumb2DataIPostEncoder(const MCInst &MI, unsigned EncodedValue) const; unsigned NEONThumb2LoadStorePostEncoder(const MCInst &MI, @@ -1201,6 +1210,30 @@ getAddrMode6OffsetOpValue(const MCInst &MI, unsigned Op, return MO.getReg(); } +unsigned ARMMCCodeEmitter:: +getShiftRight8Imm(const MCInst &MI, unsigned Op, + SmallVectorImpl &Fixups) const { + return 8 - MI.getOperand(Op).getImm(); +} + +unsigned ARMMCCodeEmitter:: +getShiftRight16Imm(const MCInst &MI, unsigned Op, + SmallVectorImpl &Fixups) const { + return 16 - MI.getOperand(Op).getImm(); +} + +unsigned ARMMCCodeEmitter:: +getShiftRight32Imm(const MCInst &MI, unsigned Op, + SmallVectorImpl &Fixups) const { + return 32 - MI.getOperand(Op).getImm(); +} + +unsigned ARMMCCodeEmitter:: +getShiftRight64Imm(const MCInst &MI, unsigned Op, + SmallVectorImpl &Fixups) const { + return 64 - MI.getOperand(Op).getImm(); +} + void ARMMCCodeEmitter:: EncodeInstruction(const MCInst &MI, raw_ostream &OS, SmallVectorImpl &Fixups) const { diff --git a/lib/Target/ARM/ARMMCExpr.h b/lib/Target/ARM/ARMMCExpr.h index d42f766ca91f..0a2e883deb1d 100644 --- a/lib/Target/ARM/ARMMCExpr.h +++ b/lib/Target/ARM/ARMMCExpr.h @@ -60,6 +60,9 @@ class ARMMCExpr : public MCTargetExpr { bool EvaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout) const; void AddValueSymbols(MCAssembler *) const; + const MCSection *FindAssociatedSection() const { + return getSubExpr()->FindAssociatedSection(); + } static bool classof(const MCExpr *E) { return E->getKind() == MCExpr::Target; diff --git a/lib/Target/ARM/ARMRegisterInfo.cpp b/lib/Target/ARM/ARMRegisterInfo.cpp index ad51bc13edf0..1cba1ba591ef 100644 --- a/lib/Target/ARM/ARMRegisterInfo.cpp +++ b/lib/Target/ARM/ARMRegisterInfo.cpp @@ -12,26 +12,8 @@ //===----------------------------------------------------------------------===// #include "ARM.h" -#include "ARMAddressingModes.h" #include "ARMBaseInstrInfo.h" -#include "ARMInstrInfo.h" -#include "ARMMachineFunctionInfo.h" #include "ARMRegisterInfo.h" -#include "ARMSubtarget.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineLocation.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/SmallVector.h" using namespace llvm; ARMRegisterInfo::ARMRegisterInfo(const ARMBaseInstrInfo &tii, diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index 22d15b572ddd..54bf82a99e73 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -70,6 +70,8 @@ def R4 : ARMReg< 4, "r4">, DwarfRegNum<[4]>; def R5 : ARMReg< 5, "r5">, DwarfRegNum<[5]>; def R6 : ARMReg< 6, "r6">, DwarfRegNum<[6]>; def R7 : ARMReg< 7, "r7">, DwarfRegNum<[7]>; +// These require 32-bit instructions. +let CostPerUse = 1 in { def R8 : ARMReg< 8, "r8">, DwarfRegNum<[8]>; def R9 : ARMReg< 9, "r9">, DwarfRegNum<[9]>; def R10 : ARMReg<10, "r10">, DwarfRegNum<[10]>; @@ -78,6 +80,7 @@ def R12 : ARMReg<12, "r12">, DwarfRegNum<[12]>; def SP : ARMReg<13, "sp">, DwarfRegNum<[13]>; def LR : ARMReg<14, "lr">, DwarfRegNum<[14]>; def PC : ARMReg<15, "pc">, DwarfRegNum<[15]>; +} // Float registers def S0 : ARMFReg< 0, "s0">; def S1 : ARMFReg< 1, "s1">; @@ -99,33 +102,41 @@ def S30 : ARMFReg<30, "s30">; def S31 : ARMFReg<31, "s31">; // Aliases of the F* registers used to hold 64-bit fp values (doubles) let SubRegIndices = [ssub_0, ssub_1] in { -def D0 : ARMReg< 0, "d0", [S0, S1]>; -def D1 : ARMReg< 1, "d1", [S2, S3]>; -def D2 : ARMReg< 2, "d2", [S4, S5]>; -def D3 : ARMReg< 3, "d3", [S6, S7]>; -def D4 : ARMReg< 4, "d4", [S8, S9]>; -def D5 : ARMReg< 5, "d5", [S10, S11]>; -def D6 : ARMReg< 6, "d6", [S12, S13]>; -def D7 : ARMReg< 7, "d7", [S14, S15]>; -def D8 : ARMReg< 8, "d8", [S16, S17]>; -def D9 : ARMReg< 9, "d9", [S18, S19]>; -def D10 : ARMReg<10, "d10", [S20, S21]>; -def D11 : ARMReg<11, "d11", [S22, S23]>; -def D12 : ARMReg<12, "d12", [S24, S25]>; -def D13 : ARMReg<13, "d13", [S26, S27]>; -def D14 : ARMReg<14, "d14", [S28, S29]>; -def D15 : ARMReg<15, "d15", [S30, S31]>; +def D0 : ARMReg< 0, "d0", [S0, S1]>, DwarfRegNum<[256]>; +def D1 : ARMReg< 1, "d1", [S2, S3]>, DwarfRegNum<[257]>; +def D2 : ARMReg< 2, "d2", [S4, S5]>, DwarfRegNum<[258]>; +def D3 : ARMReg< 3, "d3", [S6, S7]>, DwarfRegNum<[259]>; +def D4 : ARMReg< 4, "d4", [S8, S9]>, DwarfRegNum<[260]>; +def D5 : ARMReg< 5, "d5", [S10, S11]>, DwarfRegNum<[261]>; +def D6 : ARMReg< 6, "d6", [S12, S13]>, DwarfRegNum<[262]>; +def D7 : ARMReg< 7, "d7", [S14, S15]>, DwarfRegNum<[263]>; +def D8 : ARMReg< 8, "d8", [S16, S17]>, DwarfRegNum<[264]>; +def D9 : ARMReg< 9, "d9", [S18, S19]>, DwarfRegNum<[265]>; +def D10 : ARMReg<10, "d10", [S20, S21]>, DwarfRegNum<[266]>; +def D11 : ARMReg<11, "d11", [S22, S23]>, DwarfRegNum<[267]>; +def D12 : ARMReg<12, "d12", [S24, S25]>, DwarfRegNum<[268]>; +def D13 : ARMReg<13, "d13", [S26, S27]>, DwarfRegNum<[269]>; +def D14 : ARMReg<14, "d14", [S28, S29]>, DwarfRegNum<[270]>; +def D15 : ARMReg<15, "d15", [S30, S31]>, DwarfRegNum<[271]>; } // VFP3 defines 16 additional double registers -def D16 : ARMFReg<16, "d16">; def D17 : ARMFReg<17, "d17">; -def D18 : ARMFReg<18, "d18">; def D19 : ARMFReg<19, "d19">; -def D20 : ARMFReg<20, "d20">; def D21 : ARMFReg<21, "d21">; -def D22 : ARMFReg<22, "d22">; def D23 : ARMFReg<23, "d23">; -def D24 : ARMFReg<24, "d24">; def D25 : ARMFReg<25, "d25">; -def D26 : ARMFReg<26, "d26">; def D27 : ARMFReg<27, "d27">; -def D28 : ARMFReg<28, "d28">; def D29 : ARMFReg<29, "d29">; -def D30 : ARMFReg<30, "d30">; def D31 : ARMFReg<31, "d31">; +def D16 : ARMFReg<16, "d16">, DwarfRegNum<[272]>; +def D17 : ARMFReg<17, "d17">, DwarfRegNum<[273]>; +def D18 : ARMFReg<18, "d18">, DwarfRegNum<[274]>; +def D19 : ARMFReg<19, "d19">, DwarfRegNum<[275]>; +def D20 : ARMFReg<20, "d20">, DwarfRegNum<[276]>; +def D21 : ARMFReg<21, "d21">, DwarfRegNum<[277]>; +def D22 : ARMFReg<22, "d22">, DwarfRegNum<[278]>; +def D23 : ARMFReg<23, "d23">, DwarfRegNum<[279]>; +def D24 : ARMFReg<24, "d24">, DwarfRegNum<[280]>; +def D25 : ARMFReg<25, "d25">, DwarfRegNum<[281]>; +def D26 : ARMFReg<26, "d26">, DwarfRegNum<[282]>; +def D27 : ARMFReg<27, "d27">, DwarfRegNum<[283]>; +def D28 : ARMFReg<28, "d28">, DwarfRegNum<[284]>; +def D29 : ARMFReg<29, "d29">, DwarfRegNum<[285]>; +def D30 : ARMFReg<30, "d30">, DwarfRegNum<[286]>; +def D31 : ARMFReg<31, "d31">, DwarfRegNum<[287]>; // Advanced SIMD (NEON) defines 16 quad-word aliases let SubRegIndices = [dsub_0, dsub_1], diff --git a/lib/Target/ARM/ARMScheduleA9.td b/lib/Target/ARM/ARMScheduleA9.td index 82c6735f1b14..49fedf63f8bc 100644 --- a/lib/Target/ARM/ARMScheduleA9.td +++ b/lib/Target/ARM/ARMScheduleA9.td @@ -656,19 +656,19 @@ def CortexA9Itineraries : ProcessorItineraries< [1, 1, 1]>, // // Single-precision to Integer Move + // + // On A9 move-from-VFP is free to issue with no stall if other VFP + // operations are in flight. I assume it still can't dual-issue though. InstrItinData, - InstrStage<1, [A9_MUX0], 0>, - InstrStage<1, [A9_DRegsVFP], 0, Required>, - InstrStage<2, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_NPipe]>], + InstrStage<1, [A9_MUX0], 0>], [2, 1]>, // // Double-precision to Integer Move + // + // On A9 move-from-VFP is free to issue with no stall if other VFP + // operations are in flight. I assume it still can't dual-issue though. InstrItinData, - InstrStage<1, [A9_MUX0], 0>, - InstrStage<1, [A9_DRegsVFP], 0, Required>, - InstrStage<2, [A9_DRegsN], 0, Reserved>, - InstrStage<1, [A9_NPipe]>], + InstrStage<1, [A9_MUX0], 0>], [2, 1, 1]>, // // Single-precision FP Load @@ -691,20 +691,22 @@ def CortexA9Itineraries : ProcessorItineraries< [2, 1]>, // // FP Load Multiple + // FIXME: assumes 2 doubles which requires 2 LS cycles. InstrItinData, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, InstrStage<1, [A9_NPipe], 0>, - InstrStage<1, [A9_LSUnit]>], [1, 1, 1, 1]>, + InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1]>, // // FP Load Multiple + update + // FIXME: assumes 2 doubles which requires 2 LS cycles. InstrItinData, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, InstrStage<1, [A9_NPipe], 0>, - InstrStage<1, [A9_LSUnit]>], [2, 1, 1, 1]>, + InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1]>, // // Single-precision FP Store InstrItinData, @@ -725,205 +727,206 @@ def CortexA9Itineraries : ProcessorItineraries< [1, 1]>, // // FP Store Multiple + // FIXME: assumes 2 doubles which requires 2 LS cycles. InstrItinData, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, InstrStage<1, [A9_NPipe], 0>, - InstrStage<1, [A9_LSUnit]>], [1, 1, 1, 1]>, + InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1]>, // // FP Store Multiple + update + // FIXME: assumes 2 doubles which requires 2 LS cycles. InstrItinData, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsVFP], 0, Required>, InstrStage<2, [A9_DRegsN], 0, Reserved>, InstrStage<1, [A9_NPipe], 0>, - InstrStage<1, [A9_LSUnit]>], [2, 1, 1, 1]>, + InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1]>, // NEON // VLD1 - // FIXME: Conservatively assume insufficent alignment. InstrItinData, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], - [2, 1]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], + [1, 1]>, // VLD1x2 InstrItinData, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], - [2, 2, 1]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], + [1, 1, 1]>, // VLD1x3 InstrItinData, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], - [2, 2, 3, 1]>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [1, 1, 2, 1]>, // VLD1x4 InstrItinData, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], - [2, 2, 3, 3, 1]>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [1, 1, 2, 2, 1]>, // VLD1u InstrItinData, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], - [2, 2, 1]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], + [1, 2, 1]>, // VLD1x2u InstrItinData, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], - [2, 2, 2, 1]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], + [1, 1, 2, 1]>, // VLD1x3u InstrItinData, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], - [2, 2, 3, 2, 1]>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [1, 1, 2, 2, 1]>, // VLD1x4u InstrItinData, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], - [2, 2, 3, 3, 2, 1]>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [1, 1, 2, 2, 2, 1]>, // // VLD1ln InstrItinData, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], - [4, 1, 1, 1]>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [3, 1, 1, 1]>, // // VLD1lnu InstrItinData, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], - [4, 2, 1, 1, 1, 1]>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [3, 2, 1, 1, 1, 1]>, // // VLD1dup InstrItinData, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], - [3, 1]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], + [2, 1]>, // // VLD1dupu InstrItinData, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], - [3, 2, 1, 1]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], + [2, 2, 1, 1]>, // // VLD2 InstrItinData, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 7 cycles - InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], - [3, 3, 1]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], + [2, 2, 1]>, // // VLD2x2 InstrItinData, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], - [3, 4, 3, 4, 1]>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [2, 3, 2, 3, 1]>, // // VLD2ln InstrItinData, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], - [4, 4, 1, 1, 1, 1]>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [3, 3, 1, 1, 1, 1]>, // // VLD2u InstrItinData, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, // Extra latency cycles since wbck is 7 cycles - InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], - [3, 3, 2, 1, 1, 1]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], + [2, 2, 2, 1, 1, 1]>, // // VLD2x2u InstrItinData, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], - [3, 4, 3, 4, 2, 1]>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [2, 3, 2, 3, 2, 1]>, // // VLD2lnu InstrItinData, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], - [4, 4, 2, 1, 1, 1, 1, 1]>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [3, 3, 2, 1, 1, 1, 1, 1]>, // // VLD2dup InstrItinData, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], - [3, 3, 1]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], + [2, 2, 1]>, // // VLD2dupu InstrItinData, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<8, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], - [3, 3, 2, 1, 1]>, + InstrStage<7, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], + [2, 2, 2, 1, 1]>, // // VLD3 InstrItinData, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<10,[A9_DRegsVFP], 0, Reserved>, - InstrStage<4, [A9_NPipe], 0>, - InstrStage<4, [A9_LSUnit]>], - [4, 4, 5, 1]>, + InstrStage<9,[A9_DRegsVFP], 0, Reserved>, + InstrStage<3, [A9_NPipe], 0>, + InstrStage<3, [A9_LSUnit]>], + [3, 3, 4, 1]>, // // VLD3ln InstrItinData, @@ -938,10 +941,10 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<10,[A9_DRegsVFP], 0, Reserved>, - InstrStage<4, [A9_NPipe], 0>, - InstrStage<4, [A9_LSUnit]>], - [4, 4, 5, 2, 1]>, + InstrStage<9,[A9_DRegsVFP], 0, Reserved>, + InstrStage<3, [A9_NPipe], 0>, + InstrStage<3, [A9_LSUnit]>], + [3, 3, 4, 2, 1]>, // // VLD3lnu InstrItinData, @@ -974,108 +977,108 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<10,[A9_DRegsVFP], 0, Reserved>, - InstrStage<4, [A9_NPipe], 0>, - InstrStage<4, [A9_LSUnit]>], - [4, 4, 5, 5, 1]>, + InstrStage<9,[A9_DRegsVFP], 0, Reserved>, + InstrStage<3, [A9_NPipe], 0>, + InstrStage<3, [A9_LSUnit]>], + [3, 3, 4, 4, 1]>, // // VLD4ln InstrItinData, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<11,[A9_DRegsVFP], 0, Reserved>, - InstrStage<5, [A9_NPipe], 0>, - InstrStage<5, [A9_LSUnit]>], - [5, 5, 6, 6, 1, 1, 1, 1, 2, 2]>, + InstrStage<10,[A9_DRegsVFP], 0, Reserved>, + InstrStage<4, [A9_NPipe], 0>, + InstrStage<4, [A9_LSUnit]>], + [4, 4, 5, 5, 1, 1, 1, 1, 2, 2]>, // // VLD4u InstrItinData, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<10,[A9_DRegsVFP], 0, Reserved>, - InstrStage<4, [A9_NPipe], 0>, - InstrStage<4, [A9_LSUnit]>], - [4, 4, 5, 5, 2, 1]>, + InstrStage<9,[A9_DRegsVFP], 0, Reserved>, + InstrStage<3, [A9_NPipe], 0>, + InstrStage<3, [A9_LSUnit]>], + [3, 3, 4, 4, 2, 1]>, // // VLD4lnu InstrItinData, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<11,[A9_DRegsVFP], 0, Reserved>, - InstrStage<5, [A9_NPipe], 0>, - InstrStage<5, [A9_LSUnit]>], - [5, 5, 6, 6, 2, 1, 1, 1, 1, 1, 2, 2]>, + InstrStage<10,[A9_DRegsVFP], 0, Reserved>, + InstrStage<4, [A9_NPipe], 0>, + InstrStage<4, [A9_LSUnit]>], + [4, 4, 5, 5, 2, 1, 1, 1, 1, 1, 2, 2]>, // // VLD4dup InstrItinData, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], - [3, 3, 4, 4, 1]>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [2, 2, 3, 3, 1]>, // // VLD4dupu InstrItinData, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<9, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], - [3, 3, 4, 4, 2, 1, 1]>, + InstrStage<8, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], + [2, 2, 3, 3, 2, 1, 1]>, // // VST1 InstrItinData, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<2, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], + InstrStage<1, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>, // // VST1x2 InstrItinData, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<2, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], + InstrStage<1, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], [1, 1, 1, 1]>, // // VST1x3 InstrItinData, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<3, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], + InstrStage<2, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1, 2]>, // // VST1x4 InstrItinData, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<3, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], + InstrStage<2, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1, 2, 2]>, // // VST1u InstrItinData, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<2, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], + InstrStage<1, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], [2, 1, 1, 1, 1]>, // // VST1x2u InstrItinData, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<2, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], + InstrStage<1, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], [2, 1, 1, 1, 1, 1]>, // // VST1x3u @@ -1083,44 +1086,44 @@ def CortexA9Itineraries : ProcessorItineraries< InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, InstrStage<2, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1, 1, 1, 2]>, // // VST1x4u InstrItinData, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<3, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], + InstrStage<2, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1, 1, 1, 2, 2]>, // // VST1ln InstrItinData, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<2, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], + InstrStage<1, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], [1, 1, 1]>, // // VST1lnu InstrItinData, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<3, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], + InstrStage<1, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], [2, 1, 1, 1, 1]>, // // VST2 InstrItinData, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<2, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], + InstrStage<1, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], [1, 1, 1, 1]>, // // VST2x2 @@ -1136,9 +1139,9 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<2, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], + InstrStage<1, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], [2, 1, 1, 1, 1, 1]>, // // VST2x2u @@ -1154,36 +1157,36 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<2, [A9_DRegsVFP], 0, Reserved>, - InstrStage<2, [A9_NPipe], 0>, - InstrStage<2, [A9_LSUnit]>], + InstrStage<1, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], [1, 1, 1, 1]>, // // VST2lnu InstrItinData, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<3, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], + InstrStage<1, [A9_DRegsVFP], 0, Reserved>, + InstrStage<1, [A9_NPipe], 0>, + InstrStage<1, [A9_LSUnit]>], [2, 1, 1, 1, 1, 1]>, // // VST3 InstrItinData, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<3, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], + InstrStage<2, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1, 2]>, // // VST3u InstrItinData, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<3, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], + InstrStage<2, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1, 1, 1, 2]>, // // VST3ln @@ -1208,36 +1211,36 @@ def CortexA9Itineraries : ProcessorItineraries< InstrItinData, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<3, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], + InstrStage<2, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1, 2, 2]>, // // VST4u InstrItinData, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<3, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], + InstrStage<2, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1, 1, 1, 2, 2]>, // // VST4ln InstrItinData, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<3, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], + InstrStage<2, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], [1, 1, 1, 1, 2, 2]>, // // VST4lnu InstrItinData, InstrStage<1, [A9_MUX0], 0>, InstrStage<1, [A9_DRegsN], 0, Required>, - InstrStage<3, [A9_DRegsVFP], 0, Reserved>, - InstrStage<3, [A9_NPipe], 0>, - InstrStage<3, [A9_LSUnit]>], + InstrStage<2, [A9_DRegsVFP], 0, Reserved>, + InstrStage<2, [A9_NPipe], 0>, + InstrStage<2, [A9_LSUnit]>], [2, 1, 1, 1, 1, 1, 2, 2]>, // diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp index 2b9202bff01c..aa1e398c0e42 100644 --- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp +++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -35,7 +35,7 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, // This requires 4-byte alignment. if ((Align & 3) != 0) return SDValue(); - // This requires the copy size to be a constant, preferrably + // This requires the copy size to be a constant, preferably // within a subtarget-specific limit. ConstantSDNode *ConstantSize = dyn_cast(Size); if (!ConstantSize) diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 1465984899c6..c6f266b07531 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -38,6 +38,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, , ARMFPUType(None) , UseNEONForSinglePrecisionFP(false) , SlowFPVMLx(false) + , HasVMLxForwarding(false) , SlowFPBrcc(false) , IsThumb(isT) , ThumbMode(Thumb1) @@ -51,6 +52,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, , HasT2ExtractPack(false) , HasDataBarrier(false) , Pref32BitThumb(false) + , AvoidCPSRPartialUpdate(false) , HasMPExtension(false) , FPOnlySP(false) , AllowsUnalignedMem(false) diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 76c1c3fb41b1..0271c873f191 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -61,6 +61,10 @@ class ARMSubtarget : public TargetSubtarget { /// whether the FP VML[AS] instructions are slow (if so, don't use them). bool SlowFPVMLx; + /// HasVMLxForwarding - If true, NEON has special multiplier accumulator + /// forwarding to allow mul + mla being issued back to back. + bool HasVMLxForwarding; + /// SlowFPBrcc - True if floating point compare + branch is slow. bool SlowFPBrcc; @@ -106,6 +110,11 @@ class ARMSubtarget : public TargetSubtarget { /// over 16-bit ones. bool Pref32BitThumb; + /// AvoidCPSRPartialUpdate - If true, codegen would avoid using instructions + /// that partially update CPSR and add false dependency on the previous + /// CPSR setting instruction. + bool AvoidCPSRPartialUpdate; + /// HasMPExtension - True if the subtarget supports Multiprocessing /// extension (ARMv7 only). bool HasMPExtension; @@ -182,15 +191,19 @@ class ARMSubtarget : public TargetSubtarget { bool hasT2ExtractPack() const { return HasT2ExtractPack; } bool hasDataBarrier() const { return HasDataBarrier; } bool useFPVMLx() const { return !SlowFPVMLx; } + bool hasVMLxForwarding() const { return HasVMLxForwarding; } bool isFPBrccSlow() const { return SlowFPBrcc; } bool isFPOnlySP() const { return FPOnlySP; } bool prefers32BitThumb() const { return Pref32BitThumb; } + bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; } bool hasMPExtension() const { return HasMPExtension; } bool hasFP16() const { return HasFP16; } bool hasD16() const { return HasD16; } - bool isTargetDarwin() const { return TargetTriple.getOS() == Triple::Darwin; } + const Triple &getTargetTriple() const { return TargetTriple; } + + bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } bool isTargetELF() const { return !isTargetDarwin(); } bool isAPCS_ABI() const { return TargetABI == ARM_ABI_APCS; } diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 0ee773b165fb..29aa4f7ad2ce 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -22,16 +22,13 @@ #include "llvm/Target/TargetRegistry.h" using namespace llvm; -static cl::optExpandMLx("expand-fp-mlx", cl::init(false), cl::Hidden); - static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { Triple TheTriple(TT); - switch (TheTriple.getOS()) { - case Triple::Darwin: + + if (TheTriple.isOSDarwin()) return new ARMMCAsmInfoDarwin(); - default: - return new ARMELFMCAsmInfo(); - } + + return new ARMELFMCAsmInfo(); } // This is duplicated code. Refactor this. @@ -41,17 +38,17 @@ static MCStreamer *createMCStreamer(const Target &T, const std::string &TT, MCCodeEmitter *Emitter, bool RelaxAll, bool NoExecStack) { - switch (Triple(TT).getOS()) { - case Triple::Darwin: + Triple TheTriple(TT); + + if (TheTriple.isOSDarwin()) return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll); - case Triple::MinGW32: - case Triple::Cygwin: - case Triple::Win32: + + if (TheTriple.isOSWindows()) { llvm_unreachable("ARM does not support Windows COFF format"); return NULL; - default: - return createELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll, NoExecStack); } + + return createELFStreamer(Ctx, TAB, OS, Emitter, RelaxAll, NoExecStack); } extern "C" void LLVMInitializeARMTarget() { @@ -86,8 +83,7 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, : LLVMTargetMachine(T, TT), Subtarget(TT, FS, isThumb), JITInfo(), - InstrItins(Subtarget.getInstrItineraryData()) -{ + InstrItins(Subtarget.getInstrItineraryData()) { DefRelocModel = getRelocationModel(); } @@ -149,8 +145,7 @@ bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM, // FIXME: temporarily disabling load / store optimization pass for Thumb1. if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only()) PM.add(createARMLoadStoreOptimizationPass(true)); - if (ExpandMLx && - OptLevel != CodeGenOpt::None && Subtarget.hasVFP2()) + if (OptLevel != CodeGenOpt::None && Subtarget.isCortexA9()) PM.add(createMLxExpansionPass()); return true; diff --git a/lib/Target/ARM/ARMTargetObjectFile.cpp b/lib/Target/ARM/ARMTargetObjectFile.cpp index 7535da54a95f..19defa1b5196 100644 --- a/lib/Target/ARM/ARMTargetObjectFile.cpp +++ b/lib/Target/ARM/ARMTargetObjectFile.cpp @@ -36,8 +36,9 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx, ELF::SHF_WRITE | ELF::SHF_ALLOC, SectionKind::getDataRel()); + LSDASection = NULL; } - + AttributesSection = getContext().getELFSection(".ARM.attributes", ELF::SHT_ARM_ATTRIBUTES, diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 129af206e1d9..29ecc182d31f 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -29,15 +29,6 @@ #include "llvm/ADT/Twine.h" using namespace llvm; -/// Shift types used for register controlled shifts in ARM memory addressing. -enum ShiftType { - Lsl, - Lsr, - Asr, - Ror, - Rrx -}; - namespace { class ARMOperand; @@ -55,8 +46,10 @@ class ARMAsmParser : public TargetAsmParser { int TryParseRegister(); virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc); bool TryParseRegisterWithWriteBack(SmallVectorImpl &); + bool TryParseShiftRegister(SmallVectorImpl &); bool ParseRegisterList(SmallVectorImpl &); - bool ParseMemory(SmallVectorImpl &); + bool ParseMemory(SmallVectorImpl &, + ARMII::AddrMode AddrMode); bool ParseOperand(SmallVectorImpl &, StringRef Mnemonic); bool ParsePrefix(ARMMCExpr::VariantKind &RefKind); const MCExpr *ApplyPrefixToExpr(const MCExpr *E, @@ -65,13 +58,14 @@ class ARMAsmParser : public TargetAsmParser { bool ParseMemoryOffsetReg(bool &Negative, bool &OffsetRegShifted, - enum ShiftType &ShiftType, + enum ARM_AM::ShiftOpc &ShiftType, const MCExpr *&ShiftAmount, const MCExpr *&Offset, bool &OffsetIsReg, int &OffsetRegNum, SMLoc &E); - bool ParseShift(enum ShiftType &St, const MCExpr *&ShiftAmount, SMLoc &E); + bool ParseShift(enum ARM_AM::ShiftOpc &St, + const MCExpr *&ShiftAmount, SMLoc &E); bool ParseDirectiveWord(unsigned Size, SMLoc L); bool ParseDirectiveThumb(SMLoc L); bool ParseDirectiveThumbFunc(SMLoc L); @@ -102,10 +96,25 @@ class ARMAsmParser : public TargetAsmParser { SmallVectorImpl&); OperandMatchResultTy tryParseMSRMaskOperand( SmallVectorImpl&); + OperandMatchResultTy tryParseMemMode2Operand( + SmallVectorImpl&); + OperandMatchResultTy tryParseMemMode3Operand( + SmallVectorImpl&); + + // Asm Match Converter Methods + bool CvtLdWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl &); + bool CvtStWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl &); + bool CvtLdWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl &); + bool CvtStWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl &); public: ARMAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &_TM) : TargetAsmParser(T), Parser(_Parser), TM(_TM) { + MCAsmParserExtension::Initialize(_Parser); // Initialize the set of available features. setAvailableFeatures(ComputeAvailableFeatures( &TM.getSubtarget())); @@ -136,6 +145,7 @@ class ARMOperand : public MCParsedAsmOperand { RegisterList, DPRRegisterList, SPRRegisterList, + Shifter, Token } Kind; @@ -178,13 +188,14 @@ class ARMOperand : public MCParsedAsmOperand { /// Combined record for all forms of ARM address expressions. struct { + ARMII::AddrMode AddrMode; unsigned BaseRegNum; union { unsigned RegNum; ///< Offset register num, when OffsetIsReg. const MCExpr *Value; ///< Offset value, when !OffsetIsReg. } Offset; const MCExpr *ShiftAmount; // used when OffsetRegShifted is true - enum ShiftType ShiftType; // used when OffsetRegShifted is true + enum ARM_AM::ShiftOpc ShiftType; // used when OffsetRegShifted is true unsigned OffsetRegShifted : 1; // only used when OffsetIsReg is true unsigned Preindexed : 1; unsigned Postindexed : 1; @@ -192,6 +203,11 @@ class ARMOperand : public MCParsedAsmOperand { unsigned Negative : 1; // only used when OffsetIsReg is true unsigned Writeback : 1; } Mem; + + struct { + ARM_AM::ShiftOpc ShiftTy; + unsigned RegNum; + } Shift; }; ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} @@ -234,6 +250,10 @@ class ARMOperand : public MCParsedAsmOperand { break; case ProcIFlags: IFlags = o.IFlags; + break; + case Shifter: + Shift = o.Shift; + break; } } @@ -290,7 +310,9 @@ class ARMOperand : public MCParsedAsmOperand { /// @name Memory Operand Accessors /// @{ - + ARMII::AddrMode getMemAddrMode() const { + return Mem.AddrMode; + } unsigned getMemBaseRegNum() const { return Mem.BaseRegNum; } @@ -310,7 +332,7 @@ class ARMOperand : public MCParsedAsmOperand { assert(Mem.OffsetIsReg && Mem.OffsetRegShifted && "Invalid access!"); return Mem.ShiftAmount; } - enum ShiftType getMemShiftType() const { + enum ARM_AM::ShiftOpc getMemShiftType() const { assert(Mem.OffsetIsReg && Mem.OffsetRegShifted && "Invalid access!"); return Mem.ShiftType; } @@ -334,6 +356,52 @@ class ARMOperand : public MCParsedAsmOperand { bool isToken() const { return Kind == Token; } bool isMemBarrierOpt() const { return Kind == MemBarrierOpt; } bool isMemory() const { return Kind == Memory; } + bool isShifter() const { return Kind == Shifter; } + bool isMemMode2() const { + if (getMemAddrMode() != ARMII::AddrMode2) + return false; + + if (getMemOffsetIsReg()) + return true; + + if (getMemNegative() && + !(getMemPostindexed() || getMemPreindexed())) + return false; + + const MCConstantExpr *CE = dyn_cast(getMemOffset()); + if (!CE) return false; + int64_t Value = CE->getValue(); + + // The offset must be in the range 0-4095 (imm12). + if (Value > 4095 || Value < -4095) + return false; + + return true; + } + bool isMemMode3() const { + if (getMemAddrMode() != ARMII::AddrMode3) + return false; + + if (getMemOffsetIsReg()) { + if (getMemOffsetRegShifted()) + return false; // No shift with offset reg allowed + return true; + } + + if (getMemNegative() && + !(getMemPostindexed() || getMemPreindexed())) + return false; + + const MCConstantExpr *CE = dyn_cast(getMemOffset()); + if (!CE) return false; + int64_t Value = CE->getValue(); + + // The offset must be in the range 0-255 (imm8). + if (Value > 255 || Value < -255) + return false; + + return true; + } bool isMemMode5() const { if (!isMemory() || getMemOffsetIsReg() || getMemWriteback() || getMemNegative()) @@ -346,6 +414,23 @@ class ARMOperand : public MCParsedAsmOperand { int64_t Value = CE->getValue(); return ((Value & 0x3) == 0 && Value <= 1020 && Value >= -1020); } + bool isMemMode7() const { + if (!isMemory() || + getMemPreindexed() || + getMemPostindexed() || + getMemOffsetIsReg() || + getMemNegative() || + getMemWriteback()) + return false; + + const MCConstantExpr *CE = dyn_cast(getMemOffset()); + if (!CE) return false; + + if (CE->getValue()) + return false; + + return true; + } bool isMemModeRegThumb() const { if (!isMemory() || !getMemOffsetIsReg() || getMemWriteback()) return false; @@ -402,6 +487,12 @@ class ARMOperand : public MCParsedAsmOperand { Inst.addOperand(MCOperand::CreateReg(getReg())); } + void addShifterOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateImm( + ARM_AM::getSORegOpc(Shift.ShiftTy, 0))); + } + void addRegListOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); const SmallVectorImpl &RegList = getRegList(); @@ -428,6 +519,88 @@ class ARMOperand : public MCParsedAsmOperand { Inst.addOperand(MCOperand::CreateImm(unsigned(getMemBarrierOpt()))); } + void addMemMode7Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && isMemMode7() && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum())); + + const MCConstantExpr *CE = dyn_cast(getMemOffset()); + (void)CE; + assert((CE || CE->getValue() == 0) && + "No offset operand support in mode 7"); + } + + void addMemMode2Operands(MCInst &Inst, unsigned N) const { + assert(isMemMode2() && "Invalid mode or number of operands!"); + Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum())); + unsigned IdxMode = (getMemPreindexed() | getMemPostindexed() << 1); + + if (getMemOffsetIsReg()) { + Inst.addOperand(MCOperand::CreateReg(getMemOffsetRegNum())); + + ARM_AM::AddrOpc AMOpc = getMemNegative() ? ARM_AM::sub : ARM_AM::add; + ARM_AM::ShiftOpc ShOpc = ARM_AM::no_shift; + int64_t ShiftAmount = 0; + + if (getMemOffsetRegShifted()) { + ShOpc = getMemShiftType(); + const MCConstantExpr *CE = + dyn_cast(getMemShiftAmount()); + ShiftAmount = CE->getValue(); + } + + Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM2Opc(AMOpc, ShiftAmount, + ShOpc, IdxMode))); + return; + } + + // Create a operand placeholder to always yield the same number of operands. + Inst.addOperand(MCOperand::CreateReg(0)); + + // FIXME: #-0 is encoded differently than #0. Does the parser preserve + // the difference? + const MCConstantExpr *CE = dyn_cast(getMemOffset()); + assert(CE && "Non-constant mode 2 offset operand!"); + int64_t Offset = CE->getValue(); + + if (Offset >= 0) + Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM2Opc(ARM_AM::add, + Offset, ARM_AM::no_shift, IdxMode))); + else + Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM2Opc(ARM_AM::sub, + -Offset, ARM_AM::no_shift, IdxMode))); + } + + void addMemMode3Operands(MCInst &Inst, unsigned N) const { + assert(isMemMode3() && "Invalid mode or number of operands!"); + Inst.addOperand(MCOperand::CreateReg(getMemBaseRegNum())); + unsigned IdxMode = (getMemPreindexed() | getMemPostindexed() << 1); + + if (getMemOffsetIsReg()) { + Inst.addOperand(MCOperand::CreateReg(getMemOffsetRegNum())); + + ARM_AM::AddrOpc AMOpc = getMemNegative() ? ARM_AM::sub : ARM_AM::add; + Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM3Opc(AMOpc, 0, + IdxMode))); + return; + } + + // Create a operand placeholder to always yield the same number of operands. + Inst.addOperand(MCOperand::CreateReg(0)); + + // FIXME: #-0 is encoded differently than #0. Does the parser preserve + // the difference? + const MCConstantExpr *CE = dyn_cast(getMemOffset()); + assert(CE && "Non-constant mode 3 offset operand!"); + int64_t Offset = CE->getValue(); + + if (Offset >= 0) + Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM3Opc(ARM_AM::add, + Offset, IdxMode))); + else + Inst.addOperand(MCOperand::CreateImm(ARM_AM::getAM3Opc(ARM_AM::sub, + -Offset, IdxMode))); + } + void addMemMode5Operands(MCInst &Inst, unsigned N) const { assert(N == 2 && isMemMode5() && "Invalid number of operands!"); @@ -525,6 +698,15 @@ class ARMOperand : public MCParsedAsmOperand { return Op; } + static ARMOperand *CreateShifter(ARM_AM::ShiftOpc ShTy, + SMLoc S, SMLoc E) { + ARMOperand *Op = new ARMOperand(Shifter); + Op->Shift.ShiftTy = ShTy; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + static ARMOperand * CreateRegList(const SmallVectorImpl > &Regs, SMLoc StartLoc, SMLoc EndLoc) { @@ -553,9 +735,10 @@ class ARMOperand : public MCParsedAsmOperand { return Op; } - static ARMOperand *CreateMem(unsigned BaseRegNum, bool OffsetIsReg, - const MCExpr *Offset, int OffsetRegNum, - bool OffsetRegShifted, enum ShiftType ShiftType, + static ARMOperand *CreateMem(ARMII::AddrMode AddrMode, unsigned BaseRegNum, + bool OffsetIsReg, const MCExpr *Offset, + int OffsetRegNum, bool OffsetRegShifted, + enum ARM_AM::ShiftOpc ShiftType, const MCExpr *ShiftAmount, bool Preindexed, bool Postindexed, bool Negative, bool Writeback, SMLoc S, SMLoc E) { @@ -571,6 +754,7 @@ class ARMOperand : public MCParsedAsmOperand { "Cannot have expression offset and register offset!"); ARMOperand *Op = new ARMOperand(Memory); + Op->Mem.AddrMode = AddrMode; Op->Mem.BaseRegNum = BaseRegNum; Op->Mem.OffsetIsReg = OffsetIsReg; if (OffsetIsReg) @@ -642,7 +826,8 @@ void ARMOperand::dump(raw_ostream &OS) const { break; case Memory: OS << ""; break; + case Shifter: + OS << ""; + break; case RegisterList: case DPRRegisterList: case SPRRegisterList: { @@ -738,6 +926,42 @@ int ARMAsmParser::TryParseRegister() { return RegNum; } +/// Try to parse a register name. The token must be an Identifier when called, +/// and if it is a register name the token is eaten and the register number is +/// returned. Otherwise return -1. +/// +bool ARMAsmParser::TryParseShiftRegister( + SmallVectorImpl &Operands) { + SMLoc S = Parser.getTok().getLoc(); + const AsmToken &Tok = Parser.getTok(); + assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); + + std::string upperCase = Tok.getString().str(); + std::string lowerCase = LowercaseString(upperCase); + ARM_AM::ShiftOpc ShiftTy = StringSwitch(lowerCase) + .Case("lsl", ARM_AM::lsl) + .Case("lsr", ARM_AM::lsr) + .Case("asr", ARM_AM::asr) + .Case("ror", ARM_AM::ror) + .Case("rrx", ARM_AM::rrx) + .Default(ARM_AM::no_shift); + + if (ShiftTy == ARM_AM::no_shift) + return true; + + Parser.Lex(); // Eat shift-type operand; + int RegNum = TryParseRegister(); + if (RegNum == -1) + return Error(Parser.getTok().getLoc(), "register expected"); + + Operands.push_back(ARMOperand::CreateReg(RegNum,S, Parser.getTok().getLoc())); + Operands.push_back(ARMOperand::CreateShifter(ShiftTy, + S, Parser.getTok().getLoc())); + + return false; +} + + /// Try to parse a register name. The token must be an Identifier when called. /// If it's a register, an AsmOperand is created. Another AsmOperand is created /// if there is a "writeback". 'true' if it's not a register. @@ -1046,13 +1270,96 @@ tryParseMSRMaskOperand(SmallVectorImpl &Operands) { return MatchOperand_Success; } +/// tryParseMemMode2Operand - Try to parse memory addressing mode 2 operand. +ARMAsmParser::OperandMatchResultTy ARMAsmParser:: +tryParseMemMode2Operand(SmallVectorImpl &Operands) { + assert(Parser.getTok().is(AsmToken::LBrac) && "Token is not a \"[\""); + + if (ParseMemory(Operands, ARMII::AddrMode2)) + return MatchOperand_NoMatch; + + return MatchOperand_Success; +} + +/// tryParseMemMode3Operand - Try to parse memory addressing mode 3 operand. +ARMAsmParser::OperandMatchResultTy ARMAsmParser:: +tryParseMemMode3Operand(SmallVectorImpl &Operands) { + assert(Parser.getTok().is(AsmToken::LBrac) && "Token is not a \"[\""); + + if (ParseMemory(Operands, ARMII::AddrMode3)) + return MatchOperand_NoMatch; + + return MatchOperand_Success; +} + +/// CvtLdWriteBackRegAddrMode2 - Convert parsed operands to MCInst. +/// Needed here because the Asm Gen Matcher can't handle properly tied operands +/// when they refer multiple MIOperands inside a single one. +bool ARMAsmParser:: +CvtLdWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl &Operands) { + ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); + + // Create a writeback register dummy placeholder. + Inst.addOperand(MCOperand::CreateImm(0)); + + ((ARMOperand*)Operands[3])->addMemMode2Operands(Inst, 3); + ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); + return true; +} + +/// CvtStWriteBackRegAddrMode2 - Convert parsed operands to MCInst. +/// Needed here because the Asm Gen Matcher can't handle properly tied operands +/// when they refer multiple MIOperands inside a single one. +bool ARMAsmParser:: +CvtStWriteBackRegAddrMode2(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl &Operands) { + // Create a writeback register dummy placeholder. + Inst.addOperand(MCOperand::CreateImm(0)); + ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); + ((ARMOperand*)Operands[3])->addMemMode2Operands(Inst, 3); + ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); + return true; +} + +/// CvtLdWriteBackRegAddrMode3 - Convert parsed operands to MCInst. +/// Needed here because the Asm Gen Matcher can't handle properly tied operands +/// when they refer multiple MIOperands inside a single one. +bool ARMAsmParser:: +CvtLdWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl &Operands) { + ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); + + // Create a writeback register dummy placeholder. + Inst.addOperand(MCOperand::CreateImm(0)); + + ((ARMOperand*)Operands[3])->addMemMode3Operands(Inst, 3); + ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); + return true; +} + +/// CvtStWriteBackRegAddrMode3 - Convert parsed operands to MCInst. +/// Needed here because the Asm Gen Matcher can't handle properly tied operands +/// when they refer multiple MIOperands inside a single one. +bool ARMAsmParser:: +CvtStWriteBackRegAddrMode3(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl &Operands) { + // Create a writeback register dummy placeholder. + Inst.addOperand(MCOperand::CreateImm(0)); + ((ARMOperand*)Operands[2])->addRegOperands(Inst, 1); + ((ARMOperand*)Operands[3])->addMemMode3Operands(Inst, 3); + ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); + return true; +} + /// Parse an ARM memory expression, return false if successful else return true /// or an error. The first token must be a '[' when called. /// /// TODO Only preindexing and postindexing addressing are started, unindexed /// with option, etc are still to do. bool ARMAsmParser:: -ParseMemory(SmallVectorImpl &Operands) { +ParseMemory(SmallVectorImpl &Operands, + ARMII::AddrMode AddrMode = ARMII::AddrModeNone) { SMLoc S, E; assert(Parser.getTok().is(AsmToken::LBrac) && "Token is not a Left Bracket"); @@ -1083,7 +1390,7 @@ ParseMemory(SmallVectorImpl &Operands) { ARMOperand *WBOp = 0; int OffsetRegNum = -1; bool OffsetRegShifted = false; - enum ShiftType ShiftType = Lsl; + enum ARM_AM::ShiftOpc ShiftType = ARM_AM::lsl; const MCExpr *ShiftAmount = 0; const MCExpr *Offset = 0; @@ -1106,10 +1413,17 @@ ParseMemory(SmallVectorImpl &Operands) { const AsmToken &ExclaimTok = Parser.getTok(); if (ExclaimTok.is(AsmToken::Exclaim)) { + // None of addrmode3 instruction uses "!" + if (AddrMode == ARMII::AddrMode3) + return true; + WBOp = ARMOperand::CreateToken(ExclaimTok.getString(), ExclaimTok.getLoc()); Writeback = true; Parser.Lex(); // Eat exclaim token + } else { // In addressing mode 2, pre-indexed mode always end with "!" + if (AddrMode == ARMII::AddrMode2) + Preindexed = false; } } else { // The "[Rn" we have so far was not followed by a comma. @@ -1143,13 +1457,17 @@ ParseMemory(SmallVectorImpl &Operands) { if (!OffsetIsReg) { if (!Offset) Offset = MCConstantExpr::Create(0, getContext()); + } else { + if (AddrMode == ARMII::AddrMode3 && OffsetRegShifted) { + Error(E, "shift amount not supported"); + return true; + } } - Operands.push_back(ARMOperand::CreateMem(BaseRegNum, OffsetIsReg, Offset, - OffsetRegNum, OffsetRegShifted, - ShiftType, ShiftAmount, Preindexed, - Postindexed, Negative, Writeback, - S, E)); + Operands.push_back(ARMOperand::CreateMem(AddrMode, BaseRegNum, OffsetIsReg, + Offset, OffsetRegNum, OffsetRegShifted, + ShiftType, ShiftAmount, Preindexed, + Postindexed, Negative, Writeback, S, E)); if (WBOp) Operands.push_back(WBOp); @@ -1165,7 +1483,7 @@ ParseMemory(SmallVectorImpl &Operands) { /// we return false on success or an error otherwise. bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative, bool &OffsetRegShifted, - enum ShiftType &ShiftType, + enum ARM_AM::ShiftOpc &ShiftType, const MCExpr *&ShiftAmount, const MCExpr *&Offset, bool &OffsetIsReg, @@ -1226,28 +1544,28 @@ bool ARMAsmParser::ParseMemoryOffsetReg(bool &Negative, /// ( lsl | lsr | asr | ror ) , # shift_amount /// rrx /// and returns true if it parses a shift otherwise it returns false. -bool ARMAsmParser::ParseShift(ShiftType &St, const MCExpr *&ShiftAmount, - SMLoc &E) { +bool ARMAsmParser::ParseShift(ARM_AM::ShiftOpc &St, + const MCExpr *&ShiftAmount, SMLoc &E) { const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::Identifier)) return true; StringRef ShiftName = Tok.getString(); if (ShiftName == "lsl" || ShiftName == "LSL") - St = Lsl; + St = ARM_AM::lsl; else if (ShiftName == "lsr" || ShiftName == "LSR") - St = Lsr; + St = ARM_AM::lsr; else if (ShiftName == "asr" || ShiftName == "ASR") - St = Asr; + St = ARM_AM::asr; else if (ShiftName == "ror" || ShiftName == "ROR") - St = Ror; + St = ARM_AM::ror; else if (ShiftName == "rrx" || ShiftName == "RRX") - St = Rrx; + St = ARM_AM::rrx; else return true; Parser.Lex(); // Eat shift type token. // Rrx stands alone. - if (St == Rrx) + if (St == ARM_AM::rrx) return false; // Otherwise, there must be a '#' and a shift amount. @@ -1286,6 +1604,9 @@ bool ARMAsmParser::ParseOperand(SmallVectorImpl &Operands, case AsmToken::Identifier: if (!TryParseRegisterWithWriteBack(Operands)) return false; + if (!TryParseShiftRegister(Operands)) + return false; + // Fall though for the Identifier case that is not a register or a // special name. diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 78d73d3a272b..bdce2c4cf896 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -18,6 +18,7 @@ #include "ARMDisassembler.h" #include "ARMDisassemblerCore.h" +#include "llvm/ADT/OwningPtr.h" #include "llvm/MC/EDInstInfo.h" #include "llvm/MC/MCInst.h" #include "llvm/Target/TargetRegistry.h" @@ -94,6 +95,9 @@ static unsigned decodeARMInstruction(uint32_t &insn) { // As a result, the decoder fails to deocode USAT properly. if (slice(insn, 27, 21) == 0x37 && slice(insn, 5, 4) == 1) return ARM::USAT; + // As a result, the decoder fails to deocode UQADD16 properly. + if (slice(insn, 27, 20) == 0x66 && slice(insn, 7, 4) == 1) + return ARM::UQADD16; // Ditto for ADDSrs, which is a super-instruction for A8.6.7 & A8.6.8. // As a result, the decoder fails to decode UMULL properly. @@ -280,6 +284,24 @@ static unsigned T2Morph2LoadLiteral(unsigned Opcode) { } } +// Helper function for special case handling of PLD (literal) and friends. +// See A8.6.117 T1 & T2 and friends for why we morphed the opcode +// before returning it. +static unsigned T2Morph2PLDLiteral(unsigned Opcode) { + switch (Opcode) { + default: + return Opcode; // Return unmorphed opcode. + + case ARM::t2PLDi8: case ARM::t2PLDs: + case ARM::t2PLDWi12: case ARM::t2PLDWi8: + case ARM::t2PLDWs: + return ARM::t2PLDi12; + + case ARM::t2PLIi8: case ARM::t2PLIs: + return ARM::t2PLIi12; + } +} + /// decodeThumbSideEffect is a decorator function which can potentially twiddle /// the instruction or morph the returned opcode under Thumb2. /// @@ -330,12 +352,27 @@ static unsigned decodeThumbSideEffect(bool IsThumb2, unsigned &insn) { } // --------- Transform End Marker --------- + unsigned unmorphed = decodeThumbInstruction(insn); + // See, for example, A6.3.7 Load word: Table A6-18 Load word. // See A8.6.57 T3, T4 & A8.6.60 T2 and friends for why we morphed the opcode // before returning it to our caller. if (op1 == 3 && slice(op2, 6, 5) == 0 && slice(op2, 0, 0) == 1 - && slice(insn, 19, 16) == 15) - return T2Morph2LoadLiteral(decodeThumbInstruction(insn)); + && slice(insn, 19, 16) == 15) { + unsigned morphed = T2Morph2LoadLiteral(unmorphed); + if (morphed != unmorphed) + return morphed; + } + + // See, for example, A8.6.117 PLD,PLDW (immediate) T1 & T2, and friends for + // why we morphed the opcode before returning it to our caller. + if (slice(insn, 31, 25) == 0x7C && slice(insn, 15, 12) == 0xF + && slice(insn, 22, 22) == 0 && slice(insn, 20, 20) == 1 + && slice(insn, 19, 16) == 15) { + unsigned morphed = T2Morph2PLDLiteral(unmorphed); + if (morphed != unmorphed) + return morphed; + } // One last check for NEON/VFP instructions. if ((op1 == 1 || op1 == 3) && slice(op2, 6, 6) == 1) @@ -375,21 +412,23 @@ bool ARMDisassembler::getInstruction(MCInst &MI, Size = 4; DEBUG({ - errs() << "Opcode=" << Opcode << " Name=" << ARMUtils::OpcodeName(Opcode) + errs() << "\nOpcode=" << Opcode << " Name=" < Builder(CreateMCBuilder(Opcode, Format)); if (!Builder) return false; + Builder->setupBuilderForSymbolicDisassembly(getLLVMOpInfoCallback(), + getDisInfoBlock(), getMCContext(), + Address); + if (!Builder->Build(MI, insn)) return false; - delete Builder; - return true; } @@ -398,7 +437,7 @@ bool ThumbDisassembler::getInstruction(MCInst &MI, const MemoryObject &Region, uint64_t Address, raw_ostream &os) const { - // The Thumb instruction stream is a sequence of halhwords. + // The Thumb instruction stream is a sequence of halfwords. // This represents the first halfword as well as the machine instruction // passed to decodeThumbInstruction(). For 16-bit Thumb instruction, the top @@ -463,17 +502,19 @@ bool ThumbDisassembler::getInstruction(MCInst &MI, showBitVector(errs(), insn); }); - ARMBasicMCBuilder *Builder = CreateMCBuilder(Opcode, Format); + OwningPtr Builder(CreateMCBuilder(Opcode, Format)); if (!Builder) return false; Builder->SetSession(const_cast(&SO)); + Builder->setupBuilderForSymbolicDisassembly(getLLVMOpInfoCallback(), + getDisInfoBlock(), getMCContext(), + Address); + if (!Builder->Build(MI, insn)) return false; - delete Builder; - return true; } diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp index bac68dd9ead0..642829cdab09 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp @@ -17,6 +17,7 @@ #include "ARMDisassemblerCore.h" #include "ARMAddressingModes.h" +#include "ARMMCExpr.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -82,10 +83,28 @@ const char *ARMUtils::OpcodeName(unsigned Opcode) { // FIXME: Auto-gened? static unsigned getRegisterEnum(BO B, unsigned RegClassID, unsigned RawRegister) { - // For this purpose, we can treat rGPR as if it were GPR. - if (RegClassID == ARM::rGPRRegClassID) RegClassID = ARM::GPRRegClassID; + if (RegClassID == ARM::rGPRRegClassID) { + // Check for The register numbers 13 and 15 that are not permitted for many + // Thumb register specifiers. + if (RawRegister == 13 || RawRegister == 15) { + B->SetErr(-1); + return 0; + } + // For this purpose, we can treat rGPR as if it were GPR. + RegClassID = ARM::GPRRegClassID; + } // See also decodeNEONRd(), decodeNEONRn(), decodeNEONRm(). + // A7.3 register encoding + // Qd -> bit[12] == 0 + // Qn -> bit[16] == 0 + // Qm -> bit[0] == 0 + // + // If one of these bits is 1, the instruction is UNDEFINED. + if (RegClassID == ARM::QPRRegClassID && slice(RawRegister, 0, 0) == 1) { + B->SetErr(-1); + return 0; + } unsigned RegNum = RegClassID == ARM::QPRRegClassID ? RawRegister >> 1 : RawRegister; @@ -497,14 +516,66 @@ static bool DisassemblePseudo(MCInst &MI, unsigned Opcode, uint32_t insn, return false; } -// Multiply Instructions. -// MLA, MLS, SMLABB, SMLABT, SMLATB, SMLATT, SMLAWB, SMLAWT, SMMLA, SMMLS: -// Rd{19-16} Rn{3-0} Rm{11-8} Ra{15-12} +// A8.6.94 MLA +// if d == 15 || n == 15 || m == 15 || a == 15 then UNPREDICTABLE; // -// MUL, SMMUL, SMULBB, SMULBT, SMULTB, SMULTT, SMULWB, SMULWT: +// A8.6.105 MUL +// if d == 15 || n == 15 || m == 15 then UNPREDICTABLE; +// +// A8.6.246 UMULL +// if dLo == 15 || dHi == 15 || n == 15 || m == 15 then UNPREDICTABLE; +// if dHi == dLo then UNPREDICTABLE; +static bool BadRegsMulFrm(unsigned Opcode, uint32_t insn) { + unsigned R19_16 = slice(insn, 19, 16); + unsigned R15_12 = slice(insn, 15, 12); + unsigned R11_8 = slice(insn, 11, 8); + unsigned R3_0 = slice(insn, 3, 0); + switch (Opcode) { + default: + // Did we miss an opcode? + DEBUG(errs() << "BadRegsMulFrm: unexpected opcode!"); + return false; + case ARM::MLA: case ARM::MLS: case ARM::SMLABB: case ARM::SMLABT: + case ARM::SMLATB: case ARM::SMLATT: case ARM::SMLAWB: case ARM::SMLAWT: + case ARM::SMMLA: case ARM::SMMLAR: case ARM::SMMLS: case ARM::SMMLSR: + case ARM::USADA8: + if (R19_16 == 15 || R15_12 == 15 || R11_8 == 15 || R3_0 == 15) + return true; + return false; + case ARM::MUL: case ARM::SMMUL: case ARM::SMMULR: + case ARM::SMULBB: case ARM::SMULBT: case ARM::SMULTB: case ARM::SMULTT: + case ARM::SMULWB: case ARM::SMULWT: case ARM::SMUAD: case ARM::SMUADX: + // A8.6.167 SMLAD & A8.6.172 SMLSD + case ARM::SMLAD: case ARM::SMLADX: case ARM::SMLSD: case ARM::SMLSDX: + case ARM::USAD8: + if (R19_16 == 15 || R11_8 == 15 || R3_0 == 15) + return true; + return false; + case ARM::SMLAL: case ARM::SMULL: case ARM::UMAAL: case ARM::UMLAL: + case ARM::UMULL: + case ARM::SMLALBB: case ARM::SMLALBT: case ARM::SMLALTB: case ARM::SMLALTT: + case ARM::SMLALD: case ARM::SMLALDX: case ARM::SMLSLD: case ARM::SMLSLDX: + if (R19_16 == 15 || R15_12 == 15 || R11_8 == 15 || R3_0 == 15) + return true; + if (R19_16 == R15_12) + return true; + return false;; + } +} + +// Multiply Instructions. +// MLA, MLS, SMLABB, SMLABT, SMLATB, SMLATT, SMLAWB, SMLAWT, SMMLA, SMMLAR, +// SMMLS, SMMLAR, SMLAD, SMLADX, SMLSD, SMLSDX, and USADA8 (for convenience): +// Rd{19-16} Rn{3-0} Rm{11-8} Ra{15-12} +// But note that register checking for {SMLAD, SMLADX, SMLSD, SMLSDX} is +// only for {d, n, m}. +// +// MUL, SMMUL, SMMULR, SMULBB, SMULBT, SMULTB, SMULTT, SMULWB, SMULWT, SMUAD, +// SMUADX, and USAD8 (for convenience): // Rd{19-16} Rn{3-0} Rm{11-8} // -// SMLAL, SMULL, UMAAL, UMLAL, UMULL, SMLALBB, SMLALBT, SMLALTB, SMLALTT: +// SMLAL, SMULL, UMAAL, UMLAL, UMULL, SMLALBB, SMLALBT, SMLALTB, SMLALTT, +// SMLALD, SMLADLX, SMLSLD, SMLSLDX: // RdLo{15-12} RdHi{19-16} Rn{3-0} Rm{11-8} // // The mapping of the multiply registers to the "regular" ARM registers, where @@ -531,6 +602,10 @@ static bool DisassembleMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, && OpInfo[2].RegClass == ARM::GPRRegClassID && "Expect three register operands"); + // Sanity check for the register encodings. + if (BadRegsMulFrm(Opcode, insn)) + return false; + // Instructions with two destination registers have RdLo{15-12} first. if (NumDefs == 2) { assert(NumOps >= 4 && OpInfo[3].RegClass == ARM::GPRRegClassID && @@ -618,18 +693,38 @@ static inline unsigned GetCopOpc(uint32_t insn) { static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - assert(NumOps >= 5 && "Num of operands >= 5 for coprocessor instr"); + assert(NumOps >= 4 && "Num of operands >= 4 for coprocessor instr"); unsigned &OpIdx = NumOpsAdded; + // A8.6.92 + // if coproc == '101x' then SEE "Advanced SIMD and VFP" + // But since the special instructions have more explicit encoding bits + // specified, if coproc == 10 or 11, we should reject it as invalid. + unsigned coproc = GetCoprocessor(insn); + if ((Opcode == ARM::MCR || Opcode == ARM::MCRR || + Opcode == ARM::MRC || Opcode == ARM::MRRC) && + (coproc == 10 || coproc == 11)) { + DEBUG(errs() << "Encoding error: coproc == 10 or 11 for MCR[R]/MR[R]C\n"); + return false; + } + bool OneCopOpc = (Opcode == ARM::MCRR || Opcode == ARM::MCRR2 || Opcode == ARM::MRRC || Opcode == ARM::MRRC2); + // CDP/CDP2 has no GPR operand; the opc1 operand is also wider (Inst{23-20}). bool NoGPR = (Opcode == ARM::CDP || Opcode == ARM::CDP2); bool LdStCop = LdStCopOpcode(Opcode); + bool RtOut = (Opcode == ARM::MRC || Opcode == ARM::MRC2); OpIdx = 0; - MI.addOperand(MCOperand::CreateImm(GetCoprocessor(insn))); + if (RtOut) { + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + decodeRd(insn)))); + ++OpIdx; + } + MI.addOperand(MCOperand::CreateImm(coproc)); + ++OpIdx; if (LdStCop) { // Unindex if P:W = 0b00 --> _OPTION variant @@ -639,26 +734,34 @@ static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn, MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); + OpIdx += 2; if (PW) { MI.addOperand(MCOperand::CreateReg(0)); ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub; + const TargetInstrDesc &TID = ARMInsts[Opcode]; + unsigned IndexMode = + (TID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift; unsigned Offset = ARM_AM::getAM2Opc(AddrOpcode, slice(insn, 7, 0) << 2, - ARM_AM::no_shift); + ARM_AM::no_shift, IndexMode); MI.addOperand(MCOperand::CreateImm(Offset)); - OpIdx = 5; + OpIdx += 2; } else { MI.addOperand(MCOperand::CreateImm(slice(insn, 7, 0))); - OpIdx = 4; + ++OpIdx; } } else { MI.addOperand(MCOperand::CreateImm(OneCopOpc ? GetCopOpc(insn) : GetCopOpc1(insn, NoGPR))); + ++OpIdx; - MI.addOperand(NoGPR ? MCOperand::CreateImm(decodeRd(insn)) - : MCOperand::CreateReg( - getRegisterEnum(B, ARM::GPRRegClassID, - decodeRd(insn)))); + if (!RtOut) { + MI.addOperand(NoGPR ? MCOperand::CreateImm(decodeRd(insn)) + : MCOperand::CreateReg( + getRegisterEnum(B, ARM::GPRRegClassID, + decodeRd(insn)))); + ++OpIdx; + } MI.addOperand(OneCopOpc ? MCOperand::CreateReg( getRegisterEnum(B, ARM::GPRRegClassID, @@ -667,7 +770,7 @@ static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn, MI.addOperand(MCOperand::CreateImm(decodeRm(insn))); - OpIdx = 5; + OpIdx += 2; if (!OneCopOpc) { MI.addOperand(MCOperand::CreateImm(GetCopOpc2(insn))); @@ -679,8 +782,8 @@ static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn, } // Branch Instructions. -// BLr9: SignExtend(Imm24:'00', 32) -// Bcc, BLr9_pred: SignExtend(Imm24:'00', 32) Pred0 Pred1 +// BL: SignExtend(Imm24:'00', 32) +// Bcc, BL_pred: SignExtend(Imm24:'00', 32) Pred0 Pred1 // SMC: ZeroExtend(imm4, 32) // SVC: ZeroExtend(Imm24, 32) // @@ -735,6 +838,11 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // MSRi take a mask, followed by one so_imm operand. The mask contains the // R Bit in bit 4, and the special register fields in bits 3-0. if (Opcode == ARM::MSRi) { + // A5.2.11 MSR (immediate), and hints & B6.1.6 MSR (immediate) + // The hints instructions have more specific encodings, so if mask == 0, + // we should reject this as an invalid instruction. + if (slice(insn, 19, 16) == 0) + return false; MI.addOperand(MCOperand::CreateImm(slice(insn, 22, 22) << 4 /* R Bit */ | slice(insn, 19, 16) /* Special Reg */ )); // SOImm is 4-bit rotate amount in bits 11-8 with 8-bit imm in bits 7-0. @@ -760,11 +868,11 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn, return true; } - assert((Opcode == ARM::Bcc || Opcode == ARM::BLr9 || Opcode == ARM::BLr9_pred + assert((Opcode == ARM::Bcc || Opcode == ARM::BL || Opcode == ARM::BL_pred || Opcode == ARM::SMC || Opcode == ARM::SVC) && "Unexpected Opcode"); - assert(NumOps >= 1 && OpInfo[0].RegClass < 0 && "Reg operand expected"); + assert(NumOps >= 1 && OpInfo[0].RegClass < 0 && "Imm operand expected"); int Imm32 = 0; if (Opcode == ARM::SMC) { @@ -778,12 +886,6 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned Imm26 = slice(insn, 23, 0) << 2; //Imm32 = signextend(Imm26); Imm32 = SignExtend32<26>(Imm26); - - // When executing an ARM instruction, PC reads as the address of the current - // instruction plus 8. The assembler subtracts 8 from the difference - // between the branch instruction and the target address, disassembler has - // to add 8 to compensate. - Imm32 += 8; } MI.addOperand(MCOperand::CreateImm(Imm32)); @@ -793,7 +895,7 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn, } // Misc. Branch Instructions. -// BLXr9, BXr9 +// BLX, BLXi, BX // BX, BX_RET static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { @@ -809,8 +911,9 @@ static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, if (Opcode == ARM::BX_RET || Opcode == ARM::MOVPCLR) return true; - // BLXr9 and BX take one GPR reg. - if (Opcode == ARM::BLXr9 || Opcode == ARM::BX) { + // BLX and BX take one GPR reg. + if (Opcode == ARM::BLX || Opcode == ARM::BLX_pred || + Opcode == ARM::BX) { assert(NumOps >= 1 && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && "Reg operand expected"); MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, @@ -819,6 +922,17 @@ static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, return true; } + // BLXi takes imm32 (the PC offset). + if (Opcode == ARM::BLXi) { + assert(NumOps >= 1 && OpInfo[0].RegClass < 0 && "Imm operand expected"); + // SignExtend(imm24:H:'0', 32) where imm24 = Inst{23-0} and H = Inst{24}. + unsigned Imm26 = slice(insn, 23, 0) << 2 | slice(insn, 24, 24) << 1; + int Imm32 = SignExtend32<26>(Imm26); + MI.addOperand(MCOperand::CreateImm(Imm32)); + OpIdx = 1; + return true; + } + return false; } @@ -837,6 +951,24 @@ static inline bool getBFCInvMask(uint32_t insn, uint32_t &mask) { return true; } +// Standard data-processing instructions allow PC as a register specifier, +// but we should reject other DPFrm instructions with PC as registers. +static bool BadRegsDPFrm(unsigned Opcode, uint32_t insn) { + switch (Opcode) { + default: + // Did we miss an opcode? + if (decodeRd(insn) == 15 || decodeRn(insn) == 15 || decodeRm(insn) == 15) { + DEBUG(errs() << "DPFrm with bad reg specifier(s)\n"); + return true; + } + case ARM::ADCrr: case ARM::ADDSrr: case ARM::ADDrr: case ARM::ANDrr: + case ARM::BICrr: case ARM::CMNzrr: case ARM::CMPrr: case ARM::EORrr: + case ARM::ORRrr: case ARM::RSBrr: case ARM::RSCrr: case ARM::SBCrr: + case ARM::SUBSrr: case ARM::SUBrr: case ARM::TEQrr: case ARM::TSTrr: + return false; + } +} + // A major complication is the fact that some of the saturating add/subtract // operations have Rd Rm Rn, instead of the "normal" Rd Rn Rm. // They are QADD, QDADD, QDSUB, and QSUB. @@ -864,6 +996,10 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // Special-case handling of BFC/BFI/SBFX/UBFX. if (Opcode == ARM::BFC || Opcode == ARM::BFI) { + // A8.6.17 BFC & A8.6.18 BFI + // Sanity check Rd. + if (decodeRd(insn) == 15) + return false; MI.addOperand(MCOperand::CreateReg(0)); if (Opcode == ARM::BFI) { MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, @@ -879,6 +1015,9 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn, return true; } if (Opcode == ARM::SBFX || Opcode == ARM::UBFX) { + // Sanity check Rd and Rm. + if (decodeRd(insn) == 15 || decodeRm(insn) == 15) + return false; MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); MI.addOperand(MCOperand::CreateImm(slice(insn, 11, 7))); @@ -915,15 +1054,21 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // Assert disabled because saturating operations, e.g., A8.6.127 QASX, are // routed here as well. // assert(getIBit(insn) == 0 && "I_Bit != '0' reg/reg form"); + if (BadRegsDPFrm(Opcode, insn)) + return false; MI.addOperand(MCOperand::CreateReg( getRegisterEnum(B, ARM::GPRRegClassID, RmRn? decodeRn(insn) : decodeRm(insn)))); ++OpIdx; } else if (Opcode == ARM::MOVi16 || Opcode == ARM::MOVTi16) { + // These two instructions don't allow d as 15. + if (decodeRd(insn) == 15) + return false; // We have an imm16 = imm4:imm12 (imm4=Inst{19:16}, imm12 = Inst{11:0}). assert(getIBit(insn) == 1 && "I_Bit != '1' reg/imm form"); unsigned Imm16 = slice(insn, 19, 16) << 12 | slice(insn, 11, 0); - MI.addOperand(MCOperand::CreateImm(Imm16)); + if (!B->tryAddingSymbolicOperand(Imm16, 4, MI)) + MI.addOperand(MCOperand::CreateImm(Imm16)); ++OpIdx; } else { // We have a reg/imm form. @@ -992,6 +1137,21 @@ static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn, MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); if (Rs) { + // If Inst{7} != 0, we should reject this insn as an invalid encoding. + if (slice(insn, 7, 7)) + return false; + + // A8.6.3 ADC (register-shifted register) + // if d == 15 || n == 15 || m == 15 || s == 15 then UNPREDICTABLE; + // + // This also accounts for shift instructions (register) where, fortunately, + // Inst{19-16} = 0b0000. + // A8.6.89 LSL (register) + // if d == 15 || n == 15 || m == 15 then UNPREDICTABLE; + if (decodeRd(insn) == 15 || decodeRn(insn) == 15 || + decodeRm(insn) == 15 || decodeRs(insn) == 15) + return false; + // Register-controlled shifts: [Rm, Rs, shift]. MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRs(insn)))); @@ -1015,6 +1175,71 @@ static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn, return true; } +static bool BadRegsLdStFrm(unsigned Opcode, uint32_t insn, bool Store, bool WBack, + bool Imm) { + const StringRef Name = ARMInsts[Opcode].Name; + unsigned Rt = decodeRd(insn); + unsigned Rn = decodeRn(insn); + unsigned Rm = decodeRm(insn); + unsigned P = getPBit(insn); + unsigned W = getWBit(insn); + + if (Store) { + // Only STR (immediate, register) allows PC as the source. + if (Name.startswith("STRB") && Rt == 15) { + DEBUG(errs() << "if t == 15 then UNPREDICTABLE\n"); + return true; + } + if (WBack && (Rn == 15 || Rn == Rt)) { + DEBUG(errs() << "if wback && (n == 15 || n == t) then UNPREDICTABLE\n"); + return true; + } + if (!Imm && Rm == 15) { + DEBUG(errs() << "if m == 15 then UNPREDICTABLE\n"); + return true; + } + } else { + // Only LDR (immediate, register) allows PC as the destination. + if (Name.startswith("LDRB") && Rt == 15) { + DEBUG(errs() << "if t == 15 then UNPREDICTABLE\n"); + return true; + } + if (Imm) { + // Immediate + if (Rn == 15) { + // The literal form must be in offset mode; it's an encoding error + // otherwise. + if (!(P == 1 && W == 0)) { + DEBUG(errs() << "Ld literal form with !(P == 1 && W == 0)\n"); + return true; + } + // LDRB (literal) does not allow PC as the destination. + if (Opcode != ARM::LDRi12 && Rt == 15) { + DEBUG(errs() << "if t == 15 then UNPREDICTABLE\n"); + return true; + } + } else { + // Write back while Rn == Rt does not make sense. + if (WBack && (Rn == Rt)) { + DEBUG(errs() << "if wback && n == t then UNPREDICTABLE\n"); + return true; + } + } + } else { + // Register + if (Rm == 15) { + DEBUG(errs() << "if m == 15 then UNPREDICTABLE\n"); + return true; + } + if (WBack && (Rn == 15 || Rn == Rt)) { + DEBUG(errs() << "if wback && (n == 15 || n == t) then UNPREDICTABLE\n"); + return true; + } + } + } + return false; +} + static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, bool isStore, BO B) { @@ -1077,19 +1302,41 @@ static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, if (OpIdx + 1 >= NumOps) return false; - assert((OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) && - (OpInfo[OpIdx+1].RegClass < 0) && - "Expect 1 reg operand followed by 1 imm operand"); + if (BadRegsLdStFrm(Opcode, insn, isStore, isPrePost, getIBit(insn)==0)) + return false; ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub; + unsigned IndexMode = + (TID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift; if (getIBit(insn) == 0) { - MI.addOperand(MCOperand::CreateReg(0)); + // For pre- and post-indexed case, add a reg0 operand (Addressing Mode #2). + // Otherwise, skip the reg operand since for addrmode_imm12, Rn has already + // been populated. + if (isPrePost) { + MI.addOperand(MCOperand::CreateReg(0)); + OpIdx += 1; + } - // Disassemble the 12-bit immediate offset. unsigned Imm12 = slice(insn, 11, 0); - unsigned Offset = ARM_AM::getAM2Opc(AddrOpcode, Imm12, ARM_AM::no_shift); - MI.addOperand(MCOperand::CreateImm(Offset)); + if (Opcode == ARM::LDRBi12 || Opcode == ARM::LDRi12 || + Opcode == ARM::STRBi12 || Opcode == ARM::STRi12) { + // Disassemble the 12-bit immediate offset, which is the second operand in + // $addrmode_imm12 => (ops GPR:$base, i32imm:$offsimm). + int Offset = AddrOpcode == ARM_AM::add ? 1 * Imm12 : -1 * Imm12; + MI.addOperand(MCOperand::CreateImm(Offset)); + } else { + // Disassemble the 12-bit immediate offset, which is the second operand in + // $am2offset => (ops GPR, i32imm). + unsigned Offset = ARM_AM::getAM2Opc(AddrOpcode, Imm12, ARM_AM::no_shift, + IndexMode); + MI.addOperand(MCOperand::CreateImm(Offset)); + } + OpIdx += 1; } else { + // If Inst{25} = 1 and Inst{4} != 0, we should reject this as invalid. + if (slice(insn,4,4) == 1) + return false; + // Disassemble the offset reg (Rm), shift type, and immediate shift length. MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); @@ -1101,9 +1348,9 @@ static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // A8.4.1. Possible rrx or shift amount of 32... getImmShiftSE(ShOp, ShImm); MI.addOperand(MCOperand::CreateImm( - ARM_AM::getAM2Opc(AddrOpcode, ShImm, ShOp))); + ARM_AM::getAM2Opc(AddrOpcode, ShImm, ShOp, IndexMode))); + OpIdx += 2; } - OpIdx += 2; return true; } @@ -1125,7 +1372,7 @@ static bool HasDualReg(unsigned Opcode) { case ARM::LDRD: case ARM::LDRD_PRE: case ARM::LDRD_POST: case ARM::STRD: case ARM::STRD_PRE: case ARM::STRD_POST: return true; - } + } } static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, @@ -1153,8 +1400,6 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, ++OpIdx; } - bool DualReg = HasDualReg(Opcode); - // Disassemble the dst/src operand. if (OpIdx >= NumOps) return false; @@ -1165,8 +1410,8 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, decodeRd(insn)))); ++OpIdx; - // Fill in LDRD and STRD's second operand. - if (DualReg) { + // Fill in LDRD and STRD's second operand Rt operand. + if (HasDualReg(Opcode)) { MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn) + 1))); ++OpIdx; @@ -1188,7 +1433,7 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && "Reg operand expected"); assert((!isPrePost || (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)) - && "Index mode or tied_to operand expected"); + && "Offset mode or tied_to operand expected"); MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); ++OpIdx; @@ -1204,19 +1449,22 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, "Expect 1 reg operand followed by 1 imm operand"); ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub; + unsigned IndexMode = + (TID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift; if (getAM3IBit(insn) == 1) { MI.addOperand(MCOperand::CreateReg(0)); // Disassemble the 8-bit immediate offset. unsigned Imm4H = (insn >> ARMII::ImmHiShift) & 0xF; unsigned Imm4L = insn & 0xF; - unsigned Offset = ARM_AM::getAM3Opc(AddrOpcode, (Imm4H << 4) | Imm4L); + unsigned Offset = ARM_AM::getAM3Opc(AddrOpcode, (Imm4H << 4) | Imm4L, + IndexMode); MI.addOperand(MCOperand::CreateImm(Offset)); } else { // Disassemble the offset reg (Rm). MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); - unsigned Offset = ARM_AM::getAM3Opc(AddrOpcode, 0); + unsigned Offset = ARM_AM::getAM3Opc(AddrOpcode, 0, IndexMode); MI.addOperand(MCOperand::CreateImm(Offset)); } OpIdx += 2; @@ -1236,13 +1484,13 @@ static bool DisassembleStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, } // The algorithm for disassembly of LdStMulFrm is different from others because -// it explicitly populates the two predicate operands after operand 0 (the base) -// and operand 1 (the AM4 mode imm). After operand 3, we need to populate the -// reglist with each affected register encoded as an MCOperand. +// it explicitly populates the two predicate operands after the base register. +// After that, we need to populate the reglist with each affected register +// encoded as an MCOperand. static bool DisassembleLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - assert(NumOps >= 5 && "LdStMulFrm expects NumOps >= 5"); + assert(NumOps >= 4 && "LdStMulFrm expects NumOps >= 4"); NumOpsAdded = 0; unsigned Base = getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)); @@ -1260,8 +1508,10 @@ static bool DisassembleLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, MI.addOperand(MCOperand::CreateReg(Base)); // Handling the two predicate operands before the reglist. - int64_t CondVal = insn >> ARMII::CondShift; - MI.addOperand(MCOperand::CreateImm(CondVal == 0xF ? 0xE : CondVal)); + int64_t CondVal = getCondField(insn); + if (CondVal == 0xF) + return false; + MI.addOperand(MCOperand::CreateImm(CondVal)); MI.addOperand(MCOperand::CreateReg(ARM::CPSR)); NumOpsAdded += 3; @@ -1352,6 +1602,12 @@ static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::GPRRegClassID; + // Sanity check the registers, which should not be 15. + if (decodeRd(insn) == 15 || decodeRm(insn) == 15) + return false; + if (ThreeReg && decodeRn(insn) == 15) + return false; + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRd(insn)))); ++OpIdx; @@ -1376,7 +1632,7 @@ static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, ARM_AM::ShiftOpc Opc = ARM_AM::no_shift; if (Opcode == ARM::PKHBT) Opc = ARM_AM::lsl; - else if (Opcode == ARM::PKHBT) + else if (Opcode == ARM::PKHTB) Opc = ARM_AM::asr; getImmShiftSE(Opc, ShiftAmt); MI.addOperand(MCOperand::CreateImm(ARM_AM::getSORegOpc(Opc, ShiftAmt))); @@ -1391,6 +1647,11 @@ static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleSatFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { + // A8.6.183 SSAT + // if d == 15 || n == 15 then UNPREDICTABLE; + if (decodeRd(insn) == 15 || decodeRm(insn) == 15) + return false; + const TargetInstrDesc &TID = ARMInsts[Opcode]; NumOpsAdded = TID.getNumOperands() - 2; // ignore predicate operands @@ -1429,6 +1690,11 @@ static bool DisassembleSatFrm(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleExtFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { + // A8.6.220 SXTAB + // if d == 15 || m == 15 then UNPREDICTABLE; + if (decodeRd(insn) == 15 || decodeRm(insn) == 15) + return false; + const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; unsigned &OpIdx = NumOpsAdded; @@ -1611,7 +1877,7 @@ static bool DisassembleVFPBinaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // A8.6.295 vcvt (floating-point <-> integer) // Int to FP: VSITOD, VSITOS, VUITOD, VUITOS // FP to Int: VTOSI[Z|R]D, VTOSI[Z|R]S, VTOUI[Z|R]D, VTOUI[Z|R]S -// +// // A8.6.297 vcvt (floating-point and fixed-point) // Dd|Sd Dd|Sd(TIED_TO) #fbits(= 16|32 - UInt(imm4:i)) static bool DisassembleVFPConv1Frm(MCInst &MI, unsigned Opcode, uint32_t insn, @@ -1800,15 +2066,14 @@ static bool DisassembleVFPLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn, } // VFP Load/Store Multiple Instructions. -// This is similar to the algorithm for LDM/STM in that operand 0 (the base) and -// operand 1 (the AM4 mode imm) is followed by two predicate operands. It is -// followed by a reglist of either DPR(s) or SPR(s). +// We have an optional write back reg, the base, and two predicate operands. +// It is then followed by a reglist of either DPR(s) or SPR(s). // // VLDMD[_UPD], VLDMS[_UPD], VSTMD[_UPD], VSTMS[_UPD] static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - assert(NumOps >= 5 && "VFPLdStMulFrm expects NumOps >= 5"); + assert(NumOps >= 4 && "VFPLdStMulFrm expects NumOps >= 4"); unsigned &OpIdx = NumOpsAdded; @@ -1827,25 +2092,18 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, MI.addOperand(MCOperand::CreateReg(Base)); - // Next comes the AM4 Opcode. - ARM_AM::AMSubMode SubMode = getAMSubModeForBits(getPUBits(insn)); - // Must be either "ia" or "db" submode. - if (SubMode != ARM_AM::ia && SubMode != ARM_AM::db) { - DEBUG(errs() << "Illegal addressing mode 4 sub-mode!\n"); - return false; - } - MI.addOperand(MCOperand::CreateImm(ARM_AM::getAM4ModeImm(SubMode))); - // Handling the two predicate operands before the reglist. - int64_t CondVal = insn >> ARMII::CondShift; - MI.addOperand(MCOperand::CreateImm(CondVal == 0xF ? 0xE : CondVal)); + int64_t CondVal = getCondField(insn); + if (CondVal == 0xF) + return false; + MI.addOperand(MCOperand::CreateImm(CondVal)); MI.addOperand(MCOperand::CreateReg(ARM::CPSR)); - OpIdx += 4; + OpIdx += 3; - bool isSPVFP = (Opcode == ARM::VLDMSIA || Opcode == ARM::VLDMSDB || + bool isSPVFP = (Opcode == ARM::VLDMSIA || Opcode == ARM::VLDMSIA_UPD || Opcode == ARM::VLDMSDB_UPD || - Opcode == ARM::VSTMSIA || Opcode == ARM::VSTMSDB || + Opcode == ARM::VSTMSIA || Opcode == ARM::VSTMSIA_UPD || Opcode == ARM::VSTMSDB_UPD); unsigned RegClassID = isSPVFP ? ARM::SPRRegClassID : ARM::DPRRegClassID; @@ -1855,6 +2113,11 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // Fill the variadic part of reglist. unsigned char Imm8 = insn & 0xFF; unsigned Regs = isSPVFP ? Imm8 : Imm8/2; + + // Apply some sanity checks before proceeding. + if (Regs == 0 || (RegD + Regs) > 32 || (!isSPVFP && Regs > 16)) + return false; + for (unsigned i = 0; i < Regs; ++i) { MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassID, RegD + i))); @@ -2136,7 +2399,7 @@ static unsigned decodeN3VImm(uint32_t insn) { // Correctly set VLD*/VST*'s TIED_TO GPR, as the asm printer needs it. static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, bool Store, bool DblSpaced, - BO B) { + unsigned alignment, BO B) { const TargetInstrDesc &TID = ARMInsts[Opcode]; const TargetOperandInfo *OpInfo = TID.OpInfo; @@ -2180,9 +2443,10 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, assert((OpIdx+1) < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && OpInfo[OpIdx + 1].RegClass < 0 && "Addrmode #6 Operands expected"); + // addrmode6 := (ops GPR:$addr, i32imm) MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, Rn))); - MI.addOperand(MCOperand::CreateImm(0)); // Alignment ignored? + MI.addOperand(MCOperand::CreateImm(alignment)); // Alignment OpIdx += 2; if (WB) { @@ -2230,9 +2494,10 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, assert((OpIdx+1) < NumOps && OpInfo[OpIdx].RegClass == ARM::GPRRegClassID && OpInfo[OpIdx + 1].RegClass < 0 && "Addrmode #6 Operands expected"); + // addrmode6 := (ops GPR:$addr, i32imm) MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, Rn))); - MI.addOperand(MCOperand::CreateImm(0)); // Alignment ignored? + MI.addOperand(MCOperand::CreateImm(alignment)); // Alignment OpIdx += 2; if (WB) { @@ -2263,6 +2528,92 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn, return true; } +// A8.6.308, A8.6.311, A8.6.314, A8.6.317. +static bool Align4OneLaneInst(unsigned elem, unsigned size, + unsigned index_align, unsigned & alignment) { + unsigned bits = 0; + switch (elem) { + default: + return false; + case 1: + // A8.6.308 + if (size == 0) + return slice(index_align, 0, 0) == 0; + else if (size == 1) { + bits = slice(index_align, 1, 0); + if (bits != 0 && bits != 1) + return false; + if (bits == 1) + alignment = 16; + return true; + } else if (size == 2) { + bits = slice(index_align, 2, 0); + if (bits != 0 && bits != 3) + return false; + if (bits == 3) + alignment = 32; + return true;; + } + return true; + case 2: + // A8.6.311 + if (size == 0) { + if (slice(index_align, 0, 0) == 1) + alignment = 16; + return true; + } if (size == 1) { + if (slice(index_align, 0, 0) == 1) + alignment = 32; + return true; + } else if (size == 2) { + if (slice(index_align, 1, 1) != 0) + return false; + if (slice(index_align, 0, 0) == 1) + alignment = 64; + return true;; + } + return true; + case 3: + // A8.6.314 + if (size == 0) { + if (slice(index_align, 0, 0) != 0) + return false; + return true; + } if (size == 1) { + if (slice(index_align, 0, 0) != 0) + return false; + return true; + return true; + } else if (size == 2) { + if (slice(index_align, 1, 0) != 0) + return false; + return true;; + } + return true; + case 4: + // A8.6.317 + if (size == 0) { + if (slice(index_align, 0, 0) == 1) + alignment = 32; + return true; + } if (size == 1) { + if (slice(index_align, 0, 0) == 1) + alignment = 64; + return true; + } else if (size == 2) { + bits = slice(index_align, 1, 0); + if (bits == 3) + return false; + if (bits == 1) + alignment = 64; + else if (bits == 2) + alignment = 128; + return true;; + } + return true; + } +} + // A7.7 // If L (Inst{21}) == 0, store instructions. // Find out about double-spaced-ness of the Opcode and pass it on to @@ -2272,11 +2623,33 @@ static bool DisassembleNLdSt(MCInst &MI, unsigned Opcode, uint32_t insn, const StringRef Name = ARMInsts[Opcode].Name; bool DblSpaced = false; + // 0 represents standard alignment, i.e., unaligned data access. + unsigned alignment = 0; + + unsigned elem = 0; // legal values: {1, 2, 3, 4} + if (Name.startswith("VST1") || Name.startswith("VLD1")) + elem = 1; + + if (Name.startswith("VST2") || Name.startswith("VLD2")) + elem = 2; + + if (Name.startswith("VST3") || Name.startswith("VLD3")) + elem = 3; + + if (Name.startswith("VST4") || Name.startswith("VLD4")) + elem = 4; if (Name.find("LN") != std::string::npos) { // To one lane instructions. // See, for example, 8.6.317 VLD4 (single 4-element structure to one lane). + // Utility function takes number of elements, size, and index_align. + if (!Align4OneLaneInst(elem, + slice(insn, 11, 10), + slice(insn, 7, 4), + alignment)) + return false; + // == 16 && Inst{5} == 1 --> DblSpaced = true if (Name.endswith("16") || Name.endswith("16_UPD")) DblSpaced = slice(insn, 5, 5) == 1; @@ -2284,30 +2657,102 @@ static bool DisassembleNLdSt(MCInst &MI, unsigned Opcode, uint32_t insn, // == 32 && Inst{6} == 1 --> DblSpaced = true if (Name.endswith("32") || Name.endswith("32_UPD")) DblSpaced = slice(insn, 6, 6) == 1; + } else if (Name.find("DUP") != std::string::npos) { + // Single element (or structure) to all lanes. + // Inst{9-8} encodes the number of element(s) in the structure, with: + // 0b00 (VLD1DUP) (for this, a bit makes sense only for data size 16 and 32. + // 0b01 (VLD2DUP) + // 0b10 (VLD3DUP) (for this, a bit must be encoded as 0) + // 0b11 (VLD4DUP) + // + // Inst{7-6} encodes the data size, with: + // 0b00 => 8, 0b01 => 16, 0b10 => 32 + // + // Inst{4} (the a bit) encodes the align action (0: standard alignment) + unsigned elem = slice(insn, 9, 8) + 1; + unsigned a = slice(insn, 4, 4); + if (elem != 3) { + // 0b11 is not a valid encoding for Inst{7-6}. + if (slice(insn, 7, 6) == 3) + return false; + unsigned data_size = 8 << slice(insn, 7, 6); + // For VLD1DUP, a bit makes sense only for data size of 16 and 32. + if (a && data_size == 8) + return false; + // Now we can calculate the alignment! + if (a) + alignment = elem * data_size; + } else { + if (a) { + // A8.6.315 VLD3 (single 3-element structure to all lanes) + // The a bit must be encoded as 0. + return false; + } + } } else { // Multiple n-element structures with type encoded as Inst{11-8}. // See, for example, A8.6.316 VLD4 (multiple 4-element structures). - // n == 2 && type == 0b1001 -> DblSpaced = true - if (Name.startswith("VST2") || Name.startswith("VLD2")) - DblSpaced = slice(insn, 11, 8) == 9; - - // n == 3 && type == 0b0101 -> DblSpaced = true - if (Name.startswith("VST3") || Name.startswith("VLD3")) - DblSpaced = slice(insn, 11, 8) == 5; - - // n == 4 && type == 0b0001 -> DblSpaced = true - if (Name.startswith("VST4") || Name.startswith("VLD4")) - DblSpaced = slice(insn, 11, 8) == 1; - + // Inst{5-4} encodes alignment. + unsigned align = slice(insn, 5, 4); + switch (align) { + default: + break; + case 1: + alignment = 64; break; + case 2: + alignment = 128; break; + case 3: + alignment = 256; break; + } + + unsigned type = slice(insn, 11, 8); + // Reject UNDEFINED instructions based on type and align. + // Plus set DblSpaced flag where appropriate. + switch (elem) { + default: + break; + case 1: + // n == 1 + // A8.6.307 & A8.6.391 + if ((type == 7 && slice(align, 1, 1) == 1) || + (type == 10 && align == 3) || + (type == 6 && slice(align, 1, 1) == 1)) + return false; + break; + case 2: + // n == 2 && type == 0b1001 -> DblSpaced = true + // A8.6.310 & A8.6.393 + if ((type == 8 || type == 9) && align == 3) + return false; + DblSpaced = (type == 9); + break; + case 3: + // n == 3 && type == 0b0101 -> DblSpaced = true + // A8.6.313 & A8.6.395 + if (slice(insn, 7, 6) == 3 || slice(align, 1, 1) == 1) + return false; + DblSpaced = (type == 5); + break; + case 4: + // n == 4 && type == 0b0001 -> DblSpaced = true + // A8.6.316 & A8.6.397 + if (slice(insn, 7, 6) == 3) + return false; + DblSpaced = (type == 1); + break; + } } return DisassembleNLdSt0(MI, Opcode, insn, NumOps, NumOpsAdded, - slice(insn, 21, 21) == 0, DblSpaced, B); + slice(insn, 21, 21) == 0, DblSpaced, alignment/8, B); } // VMOV (immediate) // Qd/Dd imm +// VBIC (immediate) +// VORR (immediate) +// Qd/Dd imm src(=Qd/Dd) static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { @@ -2334,12 +2779,20 @@ static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode, case ARM::VMOVv8i16: case ARM::VMVNv4i16: case ARM::VMVNv8i16: + case ARM::VBICiv4i16: + case ARM::VBICiv8i16: + case ARM::VORRiv4i16: + case ARM::VORRiv8i16: esize = ESize16; break; case ARM::VMOVv2i32: case ARM::VMOVv4i32: case ARM::VMVNv2i32: case ARM::VMVNv4i32: + case ARM::VBICiv2i32: + case ARM::VBICiv4i32: + case ARM::VORRiv2i32: + case ARM::VORRiv4i32: esize = ESize32; break; case ARM::VMOVv1i64: @@ -2347,7 +2800,7 @@ static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode, esize = ESize64; break; default: - assert(0 && "Unreachable code!"); + assert(0 && "Unexpected opcode!"); return false; } @@ -2356,6 +2809,16 @@ static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode, MI.addOperand(MCOperand::CreateImm(decodeN1VImm(insn, esize))); NumOpsAdded = 2; + + // VBIC/VORRiv*i* variants have an extra $src = $Vd to be filled in. + if (NumOps >= 3 && + (OpInfo[2].RegClass == ARM::DPRRegClassID || + OpInfo[2].RegClass == ARM::QPRRegClassID)) { + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[0].RegClass, + decodeNEONRd(insn)))); + NumOpsAdded += 1; + } + return true; } @@ -2376,7 +2839,7 @@ enum N2VFlag { // // Vector Move Long: // Qd Dm -// +// // Vector Move Narrow: // Dd Qm // @@ -2518,7 +2981,7 @@ static bool DisassembleNVectorShift(MCInst &MI, unsigned Opcode, uint32_t insn, assert(OpInfo[OpIdx].RegClass < 0 && "Imm operand expected"); // Add the imm operand. - + // VSHLL has maximum shift count as the imm, inferred from its size. unsigned Imm; switch (Opcode) { @@ -2631,7 +3094,7 @@ static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn, // N3RegFrm. if (Opcode == ARM::VMOVDneon || Opcode == ARM::VMOVQ) return true; - + // Dm = Inst{5:3-0} => NEON Rm // or // Dm is restricted to D0-D7 if size is 16, D0-D15 otherwise @@ -2770,7 +3233,7 @@ static bool DisassembleNGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn, ElemSize esize = Opcode == ARM::VGETLNi32 ? ESize32 : ((Opcode == ARM::VGETLNs16 || Opcode == ARM::VGETLNu16) ? ESize16 - : ESize32); + : ESize8); // Rt = Inst{15-12} => ARM Rd MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, @@ -2852,17 +3315,6 @@ static bool DisassembleNDupFrm(MCInst &MI, unsigned Opcode, uint32_t insn, return true; } -// A8.6.41 DMB -// A8.6.42 DSB -// A8.6.49 ISB -static inline bool MemBarrierInstr(uint32_t insn) { - unsigned op7_4 = slice(insn, 7, 4); - if (slice(insn, 31, 8) == 0xf57ff0 && (op7_4 >= 4 && op7_4 <= 6)) - return true; - - return false; -} - static inline bool PreLoadOpcode(unsigned Opcode) { switch(Opcode) { case ARM::PLDi12: case ARM::PLDrs: @@ -2878,8 +3330,8 @@ static bool DisassemblePreLoadFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { // Preload Data/Instruction requires either 2 or 3 operands. - // PLDi, PLDWi, PLIi: addrmode_imm12 - // PLDr[a|m], PLDWr[a|m], PLIr[a|m]: ldst_so_reg + // PLDi12, PLDWi12, PLIi12: addrmode_imm12 + // PLDrs, PLDWrs, PLIrs: ldst_so_reg MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRn(insn)))); @@ -2888,10 +3340,19 @@ static bool DisassemblePreLoadFrm(MCInst &MI, unsigned Opcode, uint32_t insn, || Opcode == ARM::PLIi12) { unsigned Imm12 = slice(insn, 11, 0); bool Negative = getUBit(insn) == 0; + + // A8.6.118 PLD (literal) PLDWi12 with Rn=PC is transformed to PLDi12. + if (Opcode == ARM::PLDWi12 && slice(insn, 19, 16) == 0xF) { + DEBUG(errs() << "Rn == '1111': PLDWi12 morphed to PLDi12\n"); + MI.setOpcode(ARM::PLDi12); + } + // -0 is represented specially. All other values are as normal. + int Offset = Negative ? -1 * Imm12 : Imm12; if (Imm12 == 0 && Negative) - Imm12 = INT32_MIN; - MI.addOperand(MCOperand::CreateImm(Imm12)); + Offset = INT32_MIN; + + MI.addOperand(MCOperand::CreateImm(Offset)); NumOpsAdded = 2; } else { MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, @@ -2917,14 +3378,20 @@ static bool DisassemblePreLoadFrm(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - if (MemBarrierInstr(insn)) { - // DMBsy, DSBsy, and ISBsy instructions have zero operand and are taken care - // of within the generic ARMBasicMCBuilder::BuildIt() method. - // + if (Opcode == ARM::DMB || Opcode == ARM::DSB) { // Inst{3-0} encodes the memory barrier option for the variants. - MI.addOperand(MCOperand::CreateImm(slice(insn, 3, 0))); - NumOpsAdded = 1; - return true; + unsigned opt = slice(insn, 3, 0); + switch (opt) { + case ARM_MB::SY: case ARM_MB::ST: + case ARM_MB::ISH: case ARM_MB::ISHST: + case ARM_MB::NSH: case ARM_MB::NSHST: + case ARM_MB::OSH: case ARM_MB::OSHST: + MI.addOperand(MCOperand::CreateImm(opt)); + NumOpsAdded = 1; + return true; + default: + return false; + } } switch (Opcode) { @@ -2936,6 +3403,11 @@ static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, case ARM::WFI: case ARM::SEV: return true; + case ARM::SWP: + case ARM::SWPB: + // SWP, SWPB: Rd Rm Rn + // Delegate to DisassembleLdStExFrm().... + return DisassembleLdStExFrm(MI, Opcode, insn, NumOps, NumOpsAdded, B); default: break; } @@ -2950,20 +3422,32 @@ static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // opcodes which match the same real instruction. This is needed since there's // no current handling of optional arguments. Fix here when a better handling // of optional arguments is implemented. - if (Opcode == ARM::CPS3p) { + if (Opcode == ARM::CPS3p) { // M = 1 + // Let's reject these impossible imod values by returning false: + // 1. (imod=0b01) + // + // AsmPrinter cannot handle imod=0b00, plus (imod=0b00,M=1,iflags!=0) is an + // invalid combination, so we just check for imod=0b00 here. + if (slice(insn, 19, 18) == 0 || slice(insn, 19, 18) == 1) + return false; MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 18))); // imod MI.addOperand(MCOperand::CreateImm(slice(insn, 8, 6))); // iflags MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); // mode NumOpsAdded = 3; return true; } - if (Opcode == ARM::CPS2p) { + if (Opcode == ARM::CPS2p) { // mode = 0, M = 0 + // Let's reject these impossible imod values by returning false: + // 1. (imod=0b00,M=0) + // 2. (imod=0b01) + if (slice(insn, 19, 18) == 0 || slice(insn, 19, 18) == 1) + return false; MI.addOperand(MCOperand::CreateImm(slice(insn, 19, 18))); // imod MI.addOperand(MCOperand::CreateImm(slice(insn, 8, 6))); // iflags NumOpsAdded = 2; return true; } - if (Opcode == ARM::CPS1p) { + if (Opcode == ARM::CPS1p) { // imod = 0, iflags = 0, M = 1 MI.addOperand(MCOperand::CreateImm(slice(insn, 4, 0))); // mode NumOpsAdded = 1; return true; @@ -3142,7 +3626,7 @@ bool ARMBasicMCBuilder::DoPredicateOperands(MCInst& MI, unsigned Opcode, return false; } - + /// TryPredicateAndSBitModifier - TryPredicateAndSBitModifier tries to process /// the possible Predicate and SBitModifier, to build the remaining MCOperand /// constituents. @@ -3154,6 +3638,7 @@ bool ARMBasicMCBuilder::TryPredicateAndSBitModifier(MCInst& MI, unsigned Opcode, const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; const std::string &Name = ARMInsts[Opcode].Name; unsigned Idx = MI.getNumOperands(); + uint64_t TSFlags = ARMInsts[Opcode].TSFlags; // First, we check whether this instr specifies the PredicateOperand through // a pair of TargetOperandInfos with isPredicate() property. @@ -3173,14 +3658,23 @@ bool ARMBasicMCBuilder::TryPredicateAndSBitModifier(MCInst& MI, unsigned Opcode, // like ARM. // // A8.6.16 B - if (Name == "t2Bcc") - MI.addOperand(MCOperand::CreateImm(CondCode(slice(insn, 25, 22)))); - else if (Name == "tBcc") - MI.addOperand(MCOperand::CreateImm(CondCode(slice(insn, 11, 8)))); - else + // Check for undefined encodings. + unsigned cond; + if (Name == "t2Bcc") { + if ((cond = slice(insn, 25, 22)) >= 14) + return false; + MI.addOperand(MCOperand::CreateImm(CondCode(cond))); + } else if (Name == "tBcc") { + if ((cond = slice(insn, 11, 8)) == 14) + return false; + MI.addOperand(MCOperand::CreateImm(CondCode(cond))); + } else MI.addOperand(MCOperand::CreateImm(ARMCC::AL)); } else { // ARM instructions get their condition field from Inst{31-28}. + // We should reject Inst{31-28} = 0b1111 as invalid encoding. + if (!isNEONDomain(TSFlags) && getCondField(insn) == 0xF) + return false; MI.addOperand(MCOperand::CreateImm(CondCode(getCondField(insn)))); } } @@ -3243,3 +3737,84 @@ ARMBasicMCBuilder *llvm::CreateMCBuilder(unsigned Opcode, ARMFormat Format) { return new ARMBasicMCBuilder(Opcode, Format, ARMInsts[Opcode].getNumOperands()); } + +/// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic +/// operand in place of the immediate Value in the MCInst. The immediate +/// Value has had any PC adjustment made by the caller. If the getOpInfo() +/// function was set as part of the setupBuilderForSymbolicDisassembly() call +/// then that function is called to get any symbolic information at the +/// builder's Address for this instrution. If that returns non-zero then the +/// symbolic information it returns is used to create an MCExpr and that is +/// added as an operand to the MCInst. This function returns true if it adds +/// an operand to the MCInst and false otherwise. +bool ARMBasicMCBuilder::tryAddingSymbolicOperand(uint64_t Value, + uint64_t InstSize, + MCInst &MI) { + if (!GetOpInfo) + return false; + + struct LLVMOpInfo1 SymbolicOp; + SymbolicOp.Value = Value; + if (!GetOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) + return false; + + const MCExpr *Add = NULL; + if (SymbolicOp.AddSymbol.Present) { + if (SymbolicOp.AddSymbol.Name) { + StringRef Name(SymbolicOp.AddSymbol.Name); + MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name); + Add = MCSymbolRefExpr::Create(Sym, *Ctx); + } else { + Add = MCConstantExpr::Create(SymbolicOp.AddSymbol.Value, *Ctx); + } + } + + const MCExpr *Sub = NULL; + if (SymbolicOp.SubtractSymbol.Present) { + if (SymbolicOp.SubtractSymbol.Name) { + StringRef Name(SymbolicOp.SubtractSymbol.Name); + MCSymbol *Sym = Ctx->GetOrCreateSymbol(Name); + Sub = MCSymbolRefExpr::Create(Sym, *Ctx); + } else { + Sub = MCConstantExpr::Create(SymbolicOp.SubtractSymbol.Value, *Ctx); + } + } + + const MCExpr *Off = NULL; + if (SymbolicOp.Value != 0) + Off = MCConstantExpr::Create(SymbolicOp.Value, *Ctx); + + const MCExpr *Expr; + if (Sub) { + const MCExpr *LHS; + if (Add) + LHS = MCBinaryExpr::CreateSub(Add, Sub, *Ctx); + else + LHS = MCUnaryExpr::CreateMinus(Sub, *Ctx); + if (Off != 0) + Expr = MCBinaryExpr::CreateAdd(LHS, Off, *Ctx); + else + Expr = LHS; + } else if (Add) { + if (Off != 0) + Expr = MCBinaryExpr::CreateAdd(Add, Off, *Ctx); + else + Expr = Add; + } else { + if (Off != 0) + Expr = Off; + else + Expr = MCConstantExpr::Create(0, *Ctx); + } + + if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_ARM_HI16) + MI.addOperand(MCOperand::CreateExpr(ARMMCExpr::CreateUpper16(Expr, *Ctx))); + else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_ARM_LO16) + MI.addOperand(MCOperand::CreateExpr(ARMMCExpr::CreateLower16(Expr, *Ctx))); + else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_None) + MI.addOperand(MCOperand::CreateExpr(Expr)); + else + assert("bad SymbolicOp.VariantKind"); + + return true; +} diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h index 9c30d332d1f2..a7ba14141c0a 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h +++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h @@ -22,12 +22,17 @@ #define ARMDISASSEMBLERCORE_H #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCContext.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm-c/Disassembler.h" #include "ARMBaseInstrInfo.h" #include "ARMRegisterInfo.h" #include "ARMDisassembler.h" namespace llvm { +class MCContext; class ARMUtils { public: @@ -134,6 +139,31 @@ static inline void setSlice(unsigned &Bits, unsigned From, unsigned To, Bits |= (Val & Mask) << To; } +// Return an integer result equal to the number of bits of x that are ones. +static inline uint32_t +BitCount (uint64_t x) +{ + // c accumulates the total bits set in x + uint32_t c; + for (c = 0; x; ++c) + { + x &= x - 1; // clear the least significant bit set + } + return c; +} + +static inline bool +BitIsSet (const uint64_t value, const uint64_t bit) +{ + return (value & (1ull << bit)) != 0; +} + +static inline bool +BitIsClear (const uint64_t value, const uint64_t bit) +{ + return (value & (1ull << bit)) == 0; +} + /// Various utilities for checking the target specific flags. /// A unary data processing instruction doesn't have an Rn operand. @@ -141,6 +171,12 @@ static inline bool isUnaryDP(uint64_t TSFlags) { return (TSFlags & ARMII::UnaryDP); } +/// A NEON Domain instruction has cond field (Inst{31-28}) as 0b1111. +static inline bool isNEONDomain(uint64_t TSFlags) { + return (TSFlags & ARMII::DomainNEON) || + (TSFlags & ARMII::DomainNEONA8); +} + /// This four-bit field describes the addressing mode used. /// See also ARMBaseInstrInfo.h. static inline unsigned getAddrMode(uint64_t TSFlags) { @@ -196,7 +232,7 @@ class ARMBasicMCBuilder { public: ARMBasicMCBuilder(ARMBasicMCBuilder &B) : Opcode(B.Opcode), Format(B.Format), NumOps(B.NumOps), Disasm(B.Disasm), - SP(B.SP) { + SP(B.SP), GetOpInfo(0), DisInfo(0), Ctx(0) { Err = 0; } @@ -255,6 +291,44 @@ class ARMBasicMCBuilder { assert(SP); return slice(SP->ITState, 7, 4); } + +private: + // + // Hooks for symbolic disassembly via the public 'C' interface. + // + // The function to get the symbolic information for operands. + LLVMOpInfoCallback GetOpInfo; + // The pointer to the block of symbolic information for above call back. + void *DisInfo; + // The assembly context for creating symbols and MCExprs in place of + // immediate operands when there is symbolic information. + MCContext *Ctx; + // The address of the instruction being disassembled. + uint64_t Address; + +public: + void setupBuilderForSymbolicDisassembly(LLVMOpInfoCallback getOpInfo, + void *disInfo, MCContext *ctx, + uint64_t address) { + GetOpInfo = getOpInfo; + DisInfo = disInfo; + Ctx = ctx; + Address = address; + } + + uint64_t getBuilderAddress() const { return Address; } + + /// tryAddingSymbolicOperand - tryAddingSymbolicOperand trys to add a symbolic + /// operand in place of the immediate Value in the MCInst. The immediate + /// Value has had any PC adjustment made by the caller. If the getOpInfo() + /// function was set as part of the setupBuilderForSymbolicDisassembly() call + /// then that function is called to get any symbolic information at the + /// builder's Address for this instrution. If that returns non-zero then the + /// symbolic information it returns is used to create an MCExpr and that is + /// added as an operand to the MCInst. This function returns true if it adds + /// an operand to the MCInst and false otherwise. + bool tryAddingSymbolicOperand(uint64_t Value, uint64_t InstSize, MCInst &MI); + }; } // namespace llvm diff --git a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h index 23372e022414..8d39982f5640 100644 --- a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h +++ b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h @@ -108,6 +108,8 @@ static inline bool IsGPR(unsigned RegClass) { // Utilities for 32-bit Thumb instructions. +static inline bool BadReg(uint32_t n) { return n == 13 || n == 15; } + // Extract imm4: Inst{19-16}. static inline unsigned getImm4(uint32_t insn) { return slice(insn, 19, 16); @@ -398,9 +400,17 @@ static bool DisassembleThumb1General(MCInst &MI, unsigned Opcode, uint32_t insn, assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef() && "Pure imm operand expected"); - MI.addOperand(MCOperand::CreateImm(UseRt ? getT1Imm8(insn) - : (Imm3 ? getT1Imm3(insn) - : getT1Imm5(insn)))); + unsigned Imm = 0; + if (UseRt) + Imm = getT1Imm8(insn); + else if (Imm3) + Imm = getT1Imm3(insn); + else { + Imm = getT1Imm5(insn); + ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 12, 11)); + getImmShiftSE(ShOp, Imm); + } + MI.addOperand(MCOperand::CreateImm(Imm)); } ++OpIdx; @@ -469,6 +479,7 @@ static bool DisassembleThumb1DP(MCInst &MI, unsigned Opcode, uint32_t insn, // tBX_RET: 0 operand // tBX_RET_vararg: Rm // tBLXr_r9: Rm +// tBRIND: Rm static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { @@ -476,11 +487,17 @@ static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn, if (NumOps == 0) return true; - // BX/BLX has 1 reg operand: Rm. - if (NumOps == 1) { + // BX/BLX/tBRIND (indirect branch, i.e, mov pc, Rm) has 1 reg operand: Rm. + if (Opcode==ARM::tBLXr_r9 || Opcode==ARM::tBX_Rm || Opcode==ARM::tBRIND) { + if (Opcode != ARM::tBRIND) { + // Handling the two predicate operands before the reg operand. + if (!B->DoPredicateOperands(MI, Opcode, insn, NumOps)) + return false; + NumOpsAdded += 2; + } MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, getT1Rm(insn)))); - NumOpsAdded = 1; + NumOpsAdded += 1; return true; } @@ -598,7 +615,7 @@ static bool DisassembleThumb2Ldpci(MCInst &MI, unsigned Opcode, // A6.2.4 Load/store single data item // -// Load/Store Register (reg|imm): tRd tRn imm5 tRm +// Load/Store Register (reg|imm): tRd tRn imm5|tRm // Load Register Signed Byte|Halfword: tRd tRn tRm static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { @@ -607,11 +624,6 @@ static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode, const TargetOperandInfo *OpInfo = TID.OpInfo; unsigned &OpIdx = NumOpsAdded; - // Table A6-5 16-bit Thumb Load/store instructions - // opA = 0b0101 for STR/LDR (register) and friends. - // Otherwise, we have STR/LDR (immediate) and friends. - bool Imm5 = (opA != 5); - assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID && OpInfo[1].RegClass == ARM::tGPRRegClassID @@ -624,28 +636,28 @@ static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode, getT1tRn(insn)))); OpIdx = 2; - // We have either { imm5, tRm } or { tRm } remaining. - // Process the imm5 first. Note that STR/LDR (register) should skip the imm5 - // offset operand for t_addrmode_s[1|2|4]. + // We have either { imm5 } or { tRm } remaining. + // Note that STR/LDR (register) should skip the imm5 offset operand for + // t_addrmode_s[1|2|4]. assert(OpIdx < NumOps && "More operands expected"); if (OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { - - MI.addOperand(MCOperand::CreateImm(Imm5 ? getT1Imm5(insn) : 0)); + // Table A6-5 16-bit Thumb Load/store instructions + // opA = 0b0101 for STR/LDR (register) and friends. + // Otherwise, we have STR/LDR (immediate) and friends. + assert(opA != 5 && "Immediate operand expected for this opcode"); + MI.addOperand(MCOperand::CreateImm(getT1Imm5(insn))); + ++OpIdx; + } else { + // The next reg operand is tRm, the offset. + assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID + && "Thumb reg operand expected"); + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::tGPRRegClassID, + getT1tRm(insn)))); ++OpIdx; } - - // The next reg operand is tRm, the offset. - assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID - && "Thumb reg operand expected"); - MI.addOperand(MCOperand::CreateReg( - Imm5 ? 0 - : getRegisterEnum(B, ARM::tGPRRegClassID, - getT1tRm(insn)))); - ++OpIdx; - return true; } @@ -895,6 +907,10 @@ static bool DisassembleThumb1LdStMul(bool Ld, MCInst &MI, unsigned Opcode, } unsigned RegListBits = slice(insn, 7, 0); + if (BitCount(RegListBits) < 1) { + DEBUG(errs() << "if BitCount(registers) < 1 then UNPREDICTABLE\n"); + return false; + } // Fill the variadic part of reglist. for (unsigned i = 0; i < 8; ++i) @@ -945,6 +961,11 @@ static bool DisassembleThumb1CondBr(MCInst &MI, unsigned Opcode, uint32_t insn, : (int)Imm8)); // Predicate operands by ARMBasicMCBuilder::TryPredicateAndSBitModifier(). + // But note that for tBcc, if cond = '1110' then UNDEFINED. + if (Opcode == ARM::tBcc && slice(insn, 11, 8) == 14) { + DEBUG(errs() << "if cond = '1110' then UNDEFINED\n"); + return false; + } NumOpsAdded = 1; return true; @@ -965,11 +986,7 @@ static bool DisassembleThumb1Br(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned Imm11 = getT1Imm11(insn); - // When executing a Thumb instruction, PC reads as the address of the current - // instruction plus 4. The assembler subtracts 4 from the difference between - // the branch instruction and the target address, disassembler has to add 4 to - // to compensate. - MI.addOperand(MCOperand::CreateImm(SignExtend32<12>(Imm11 << 1) + 4)); + MI.addOperand(MCOperand::CreateImm(SignExtend32<12>(Imm11 << 1))); NumOpsAdded = 1; @@ -1129,8 +1146,12 @@ static bool DisassembleThumb2SRS(MCInst &MI, unsigned Opcode, uint32_t insn, // t2RFE[IA|DB]W/t2RFE[IA|DB]: Rn static bool DisassembleThumb2RFE(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRn(insn)))); + unsigned Rn = decodeRn(insn); + if (Rn == 15) { + DEBUG(errs() << "if n == 15 then UNPREDICTABLE\n"); + return false; + } + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B,ARM::GPRRegClassID,Rn))); NumOpsAdded = 1; return true; } @@ -1149,7 +1170,7 @@ static bool DisassembleThumb2LdStMul(MCInst &MI, unsigned Opcode, uint32_t insn, Opcode == ARM::t2STMIA || Opcode == ARM::t2STMIA_UPD || Opcode == ARM::t2STMDB || Opcode == ARM::t2STMDB_UPD) && "Unexpected opcode"); - assert(NumOps >= 5 && "Thumb2 LdStMul expects NumOps >= 5"); + assert(NumOps >= 4 && "Thumb2 LdStMul expects NumOps >= 4"); NumOpsAdded = 0; @@ -1203,45 +1224,79 @@ static bool DisassembleThumb2LdStEx(MCInst &MI, unsigned Opcode, uint32_t insn, OpIdx = 0; assert(NumOps >= 2 - && OpInfo[0].RegClass == ARM::GPRRegClassID - && OpInfo[1].RegClass == ARM::GPRRegClassID + && OpInfo[0].RegClass > 0 + && OpInfo[1].RegClass > 0 && "Expect >=2 operands and first two as reg operands"); bool isStore = (ARM::t2STREX <= Opcode && Opcode <= ARM::t2STREXH); bool isSW = (Opcode == ARM::t2LDREX || Opcode == ARM::t2STREX); bool isDW = (Opcode == ARM::t2LDREXD || Opcode == ARM::t2STREXD); + unsigned Rt = decodeRd(insn); + unsigned Rt2 = decodeRs(insn); // But note that this is Rd for t2STREX. + unsigned Rd = decodeRm(insn); + unsigned Rn = decodeRn(insn); + + // Some sanity checking first. + if (isStore) { + // if d == n || d == t then UNPREDICTABLE + // if d == n || d == t || d == t2 then UNPREDICTABLE + if (isDW) { + if (Rd == Rn || Rd == Rt || Rd == Rt2) { + DEBUG(errs() << "if d == n || d == t || d == t2 then UNPREDICTABLE\n"); + return false; + } + } else { + if (isSW) { + if (Rt2 == Rn || Rt2 == Rt) { + DEBUG(errs() << "if d == n || d == t then UNPREDICTABLE\n"); + return false; + } + } else { + if (Rd == Rn || Rd == Rt) { + DEBUG(errs() << "if d == n || d == t then UNPREDICTABLE\n"); + return false; + } + } + } + } else { + // Load + // A8.6.71 LDREXD + // if t == t2 then UNPREDICTABLE + if (isDW && Rt == Rt2) { + DEBUG(errs() << "if t == t2 then UNPREDICTABLE\n"); + return false; + } + } + // Add the destination operand for store. if (isStore) { MI.addOperand(MCOperand::CreateReg( - getRegisterEnum(B, ARM::GPRRegClassID, - isSW ? decodeRs(insn) : decodeRm(insn)))); + getRegisterEnum(B, OpInfo[OpIdx].RegClass, + isSW ? Rt2 : Rd))); ++OpIdx; } // Source operand for store and destination operand for load. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRd(insn)))); + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, + Rt))); ++OpIdx; // Thumb2 doubleword complication: with an extra source/destination operand. if (isDW) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRs(insn)))); + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B,OpInfo[OpIdx].RegClass, + Rt2))); ++OpIdx; } // Finally add the pointer operand. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, - decodeRn(insn)))); + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, + Rn))); ++OpIdx; return true; } -// LLVM, as of Jan-05-2010, does not output , i.e., Rs, in the asm. -// Whereas the ARM Arch. Manual does not require that t2 = t+1 like in ARM ISA. -// // t2LDRDi8: Rd Rs Rn imm8s4 (offset mode) // t2LDRDpci: Rd Rs imm8s4 (Not decoded, prefer the generic t2LDRDi8 version) // t2STRDi8: Rd Rs Rn imm8s4 (offset mode) @@ -1255,18 +1310,50 @@ static bool DisassembleThumb2LdStDual(MCInst &MI, unsigned Opcode, if (!OpInfo) return false; assert(NumOps >= 4 - && OpInfo[0].RegClass == ARM::GPRRegClassID - && OpInfo[1].RegClass == ARM::GPRRegClassID - && OpInfo[2].RegClass == ARM::GPRRegClassID + && OpInfo[0].RegClass > 0 + && OpInfo[0].RegClass == OpInfo[1].RegClass + && OpInfo[2].RegClass > 0 && OpInfo[3].RegClass < 0 && "Expect >= 4 operands and first 3 as reg operands"); + // Thumnb allows for specifying Rt and Rt2, unlike ARM (which has Rt2==Rt+1). + unsigned Rt = decodeRd(insn); + unsigned Rt2 = decodeRs(insn); + unsigned Rn = decodeRn(insn); + + // Some sanity checking first. + + // A8.6.67 LDRD (literal) has its W bit as (0). + if (Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2LDRD_PRE || Opcode == ARM::t2LDRD_POST) { + if (Rn == 15 && slice(insn, 21, 21) != 0) + return false; + } else { + // For Dual Store, PC cannot be used as the base register. + if (Rn == 15) { + DEBUG(errs() << "if n == 15 then UNPREDICTABLE\n"); + return false; + } + } + if (Rt == Rt2) { + DEBUG(errs() << "if t == t2 then UNPREDICTABLE\n"); + return false; + } + if (Opcode != ARM::t2LDRDi8 && Opcode != ARM::t2STRDi8) { + if (Rn == Rt || Rn == Rt2) { + DEBUG(errs() << "if wback && (n == t || n == t2) then UNPREDICTABLE\n"); + return false; + } + } + // Add the operands. - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + unsigned RegClassPair = OpInfo[0].RegClass; + unsigned RegClassBase = OpInfo[2].RegClass; + + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassPair, decodeRd(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassPair, decodeRs(insn)))); - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RegClassBase, decodeRn(insn)))); // Finally add (+/-)imm8*4, depending on the U bit. @@ -1394,9 +1481,12 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn, if (OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef()) { - if (Thumb2ShiftOpcode(Opcode)) - MI.addOperand(MCOperand::CreateImm(getShiftAmtBits(insn))); - else { + if (Thumb2ShiftOpcode(Opcode)) { + unsigned Imm = getShiftAmtBits(insn); + ARM_AM::ShiftOpc ShOp = getShiftOpcForBits(slice(insn, 5, 4)); + getImmShiftSE(ShOp, Imm); + MI.addOperand(MCOperand::CreateImm(Imm)); + } else { // Build the constant shift specifier operand. unsigned bits2 = getShiftTypeBits(insn); unsigned imm5 = getShiftAmtBits(insn); @@ -1421,7 +1511,8 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn, static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode, uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) { - const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo; + const TargetInstrDesc &TID = ARMInsts[Opcode]; + const TargetOperandInfo *OpInfo = TID.OpInfo; unsigned &OpIdx = NumOpsAdded; OpIdx = 0; @@ -1448,8 +1539,15 @@ static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode, DEBUG(errs()<<"Thumb2 encoding error: d==15 for DPModImm 2-reg instr.\n"); return false; } - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID, - decodeRn(insn)))); + int Idx; + if ((Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) { + // The reg operand is tied to the first reg operand. + MI.addOperand(MI.getOperand(Idx)); + } else { + // Add second reg operand. + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, RnRegClassID, + decodeRn(insn)))); + } ++OpIdx; } @@ -1518,7 +1616,7 @@ static bool DisassembleThumb2Sat(MCInst &MI, unsigned Opcode, uint32_t insn, // o t2ADDri12, t2SUBri12: Rs Rn imm12 // o t2LEApcrel (ADR): Rs imm12 // o t2BFC (BFC): Rs Ro(TIED_TO) bf_inv_mask_imm -// o t2BFI (BFI) (Currently not defined in LLVM as of Jan-07-2010) +// o t2BFI (BFI): Rs Ro(TIED_TO) Rn bf_inv_mask_imm // o t2MOVi16: Rs imm16 // o t2MOVTi16: Rs imm16 // o t2SBFX (SBFX): Rs Rn lsb width @@ -1579,9 +1677,10 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode, if (Opcode == ARM::t2ADDri12 || Opcode == ARM::t2SUBri12 || Opcode == ARM::t2LEApcrel) MI.addOperand(MCOperand::CreateImm(getIImm3Imm8(insn))); - else if (Opcode == ARM::t2MOVi16 || Opcode == ARM::t2MOVTi16) - MI.addOperand(MCOperand::CreateImm(getImm16(insn))); - else if (Opcode == ARM::t2BFC || Opcode == ARM::t2BFI) { + else if (Opcode == ARM::t2MOVi16 || Opcode == ARM::t2MOVTi16) { + if (!B->tryAddingSymbolicOperand(getImm16(insn), 4, MI)) + MI.addOperand(MCOperand::CreateImm(getImm16(insn))); + } else if (Opcode == ARM::t2BFC || Opcode == ARM::t2BFI) { uint32_t mask = 0; if (getBitfieldInvMask(insn, mask)) MI.addOperand(MCOperand::CreateImm(mask)); @@ -1625,8 +1724,7 @@ static inline bool t2MiscCtrlInstr(uint32_t insn) { // A8.6.26 // t2BXJ -> Rn // -// Miscellaneous control: t2DMBsy (and its t2DMB variants), -// t2DSBsy (and its t2DSB varianst), t2ISBsy, t2CLREX +// Miscellaneous control: // -> no operand (except pred-imm pred-ccr for CLREX, memory barrier variants) // // Hint: t2NOP, t2YIELD, t2WFE, t2WFI, t2SEV @@ -1643,6 +1741,22 @@ static bool DisassembleThumb2BrMiscCtrl(MCInst &MI, unsigned Opcode, if (NumOps == 0) return true; + if (Opcode == ARM::t2DMB || Opcode == ARM::t2DSB) { + // Inst{3-0} encodes the memory barrier option for the variants. + unsigned opt = slice(insn, 3, 0); + switch (opt) { + case ARM_MB::SY: case ARM_MB::ST: + case ARM_MB::ISH: case ARM_MB::ISHST: + case ARM_MB::NSH: case ARM_MB::NSHST: + case ARM_MB::OSH: case ARM_MB::OSHST: + MI.addOperand(MCOperand::CreateImm(opt)); + NumOpsAdded = 1; + return true; + default: + return false; + } + } + if (t2MiscCtrlInstr(insn)) return true; @@ -1719,6 +1833,17 @@ static bool DisassembleThumb2BrMiscCtrl(MCInst &MI, unsigned Opcode, return true; } + // Some instructions have predicate operands first before the immediate. + if (Opcode == ARM::tBLXi_r9 || Opcode == ARM::tBLr9) { + // Handling the two predicate operands before the imm operand. + if (B->DoPredicateOperands(MI, Opcode, insn, NumOps)) + NumOpsAdded += 2; + else { + DEBUG(errs() << "Expected predicate operands not found.\n"); + return false; + } + } + // Add the imm operand. int Offset = 0; @@ -1739,13 +1864,12 @@ static bool DisassembleThumb2BrMiscCtrl(MCInst &MI, unsigned Opcode, Offset = decodeImm32_BLX(insn); break; } - // When executing a Thumb instruction, PC reads as the address of the current - // instruction plus 4. The assembler subtracts 4 from the difference between - // the branch instruction and the target address, disassembler has to add 4 to - // to compensate. - MI.addOperand(MCOperand::CreateImm(Offset + 4)); - NumOpsAdded = 1; + if (!B->tryAddingSymbolicOperand(Offset + B->getBuilderAddress() + 4, 4, MI)) + MI.addOperand(MCOperand::CreateImm(Offset)); + + // This is an increment as some predicate operands may have been added first. + NumOpsAdded += 1; return true; } @@ -1787,7 +1911,7 @@ static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn, decodeRn(insn)))); ++OpIdx; - if (OpInfo[OpIdx].RegClass == ARM::GPRRegClassID) { + if (OpInfo[OpIdx].RegClass == ARM::rGPRRegClassID) { MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); } else { @@ -1795,17 +1919,17 @@ static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn, && !OpInfo[OpIdx].isOptionalDef() && "Pure imm operand expected"); int Offset = 0; - if (slice(insn, 19, 16) == 0xFF) { - bool Negative = slice(insn, 23, 23) == 0; - unsigned Imm12 = getImm12(insn); - Offset = Negative ? -1 - Imm12 : 1 * Imm12; - } else if (Opcode == ARM::t2PLDi8 || Opcode == ARM::t2PLDWi8 || - Opcode == ARM::t2PLIi8) { + if (Opcode == ARM::t2PLDi8 || Opcode == ARM::t2PLDWi8 || + Opcode == ARM::t2PLIi8) { // A8.6.117 Encoding T2: add = FALSE unsigned Imm8 = getImm8(insn); - Offset = -1 - Imm8; - } else // The i12 forms. See, for example, A8.6.117 Encoding T1. + Offset = -1 * Imm8; + } else { + // The i12 forms. See, for example, A8.6.117 Encoding T1. + // Note that currently t2PLDi12 also handles the previously named t2PLDpci + // opcode, that's why we use decodeImm12(insn) which returns +/- imm12. Offset = decodeImm12(insn); + } MI.addOperand(MCOperand::CreateImm(Offset)); } ++OpIdx; @@ -1820,6 +1944,87 @@ static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn, return true; } +static bool BadRegsThumb2LdSt(unsigned Opcode, uint32_t insn, bool Load, + unsigned R0, unsigned R1, unsigned R2, bool UseRm, bool WB) { + + // Inst{22-21} encodes the data item transferred for load/store. + // For single word, it is encoded as ob10. + bool Word = (slice(insn, 22, 21) == 2); + bool Half = (slice(insn, 22, 21) == 1); + bool Byte = (slice(insn, 22, 21) == 0); + + if (UseRm && BadReg(R2)) { + DEBUG(errs() << "if BadReg(m) then UNPREDICTABLE\n"); + return true; + } + + if (Load) { + if (!Word && R0 == 13) { + DEBUG(errs() << "if t == 13 then UNPREDICTABLE\n"); + return true; + } + if (Byte) { + if (WB && R0 == 15 && slice(insn, 10, 8) == 3) { + // A8.6.78 LDRSB (immediate) Encoding T2 (errata markup 8.0) + DEBUG(errs() << "if t == 15 && PUW == '011' then UNPREDICTABLE\n"); + return true; + } + } + // A6.3.8 Load halfword, memory hints + if (Half) { + if (WB) { + if (R0 == R1) { + // A8.6.82 LDRSH (immediate) Encoding T2 + DEBUG(errs() << "if WB && n == t then UNPREDICTABLE\n"); + return true; + } + if (R0 == 15 && slice(insn, 10, 8) == 3) { + // A8.6.82 LDRSH (immediate) Encoding T2 (errata markup 8.0) + DEBUG(errs() << "if t == 15 && PUW == '011' then UNPREDICTABLE\n"); + return true; + } + } else { + if (Opcode == ARM::t2LDRHi8 || Opcode == ARM::t2LDRSHi8) { + if (R0 == 15 && slice(insn, 10, 8) == 4) { + // A8.6.82 LDRSH (immediate) Encoding T2 + DEBUG(errs() << "if Rt == '1111' and PUW == '100' then SEE" + << " \"Unallocated memory hints\"\n"); + return true; + } + } else { + if (R0 == 15) { + // A8.6.82 LDRSH (immediate) Encoding T1 + DEBUG(errs() << "if Rt == '1111' then SEE" + << " \"Unallocated memory hints\"\n"); + return true; + } + } + } + } + } else { + if (WB && R0 == R1) { + DEBUG(errs() << "if wback && n == t then UNPREDICTABLE\n"); + return true; + } + if ((WB && R0 == 15) || (!WB && R1 == 15)) { + DEBUG(errs() << "if Rn == '1111' then UNDEFINED\n"); + return true; + } + if (Word) { + if ((WB && R1 == 15) || (!WB && R0 == 15)) { + DEBUG(errs() << "if t == 15 then UNPREDICTABLE\n"); + return true; + } + } else { + if ((WB && BadReg(R1)) || (!WB && BadReg(R0))) { + DEBUG(errs() << "if BadReg(t) then UNPREDICTABLE\n"); + return true; + } + } + } + return false; +} + // A6.3.10 Store single data item // A6.3.9 Load byte, memory hints // A6.3.8 Load halfword, memory hints @@ -1865,16 +2070,16 @@ static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode, OpIdx = 0; assert(NumOps >= 3 && - OpInfo[0].RegClass == ARM::GPRRegClassID && - OpInfo[1].RegClass == ARM::GPRRegClassID && + OpInfo[0].RegClass > 0 && + OpInfo[1].RegClass > 0 && "Expect >= 3 operands and first two as reg operands"); - bool ThreeReg = (OpInfo[2].RegClass == ARM::GPRRegClassID); + bool ThreeReg = (OpInfo[2].RegClass > 0); bool TIED_TO = ThreeReg && TID.getOperandConstraint(2, TOI::TIED_TO) != -1; bool Imm12 = !ThreeReg && slice(insn, 23, 23) == 1; // ARMInstrThumb2.td // Build the register operands, followed by the immediate. - unsigned R0, R1, R2 = 0; + unsigned R0 = 0, R1 = 0, R2 = 0; unsigned Rd = decodeRd(insn); int Imm = 0; @@ -1905,19 +2110,24 @@ static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode, Imm = decodeImm8(insn); } - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, R0))); ++OpIdx; - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, R1))); ++OpIdx; if (ThreeReg) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + // This could be an offset register or a TIED_TO register. + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B,OpInfo[OpIdx].RegClass, R2))); ++OpIdx; } + if (BadRegsThumb2LdSt(Opcode, insn, Load, R0, R1, R2, ThreeReg & !TIED_TO, + TIED_TO)) + return false; + assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef() && "Pure imm operand expected"); @@ -1947,25 +2157,25 @@ static bool DisassembleThumb2DPReg(MCInst &MI, unsigned Opcode, uint32_t insn, OpIdx = 0; assert(NumOps >= 2 && - OpInfo[0].RegClass == ARM::rGPRRegClassID && - OpInfo[1].RegClass == ARM::rGPRRegClassID && + OpInfo[0].RegClass > 0 && + OpInfo[1].RegClass > 0 && "Expect >= 2 operands and first two as reg operands"); // Build the register operands, followed by the optional rotation amount. - bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass == ARM::rGPRRegClassID; + bool ThreeReg = NumOps > 2 && OpInfo[2].RegClass > 0; - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, decodeRs(insn)))); ++OpIdx; if (ThreeReg) { - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B,OpInfo[OpIdx].RegClass, decodeRn(insn)))); ++OpIdx; } - MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID, + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, OpInfo[OpIdx].RegClass, decodeRm(insn)))); ++OpIdx; diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index 1499da00ae1c..fc2aa7526b7f 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -29,6 +29,9 @@ StringRef ARMInstPrinter::getOpcodeName(unsigned Opcode) const { return getInstructionName(Opcode); } +StringRef ARMInstPrinter::getRegName(unsigned RegNo) const { + return getRegisterName(RegNo); +} void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O) { unsigned Opcode = MI->getOpcode(); @@ -133,9 +136,10 @@ static void printSOImm(raw_ostream &O, int64_t V, raw_ostream *CommentStream, unsigned Rot = ARM_AM::getSOImmValRot(V); // Print low-level immediate formation info, per - // A5.1.3: "Data-processing operands - Immediate". + // A5.2.3: Data-processing (immediate), and + // A5.2.4: Modified immediate constants in ARM instructions if (Rot) { - O << "#" << Imm << ", " << Rot; + O << "#" << Imm << ", #" << Rot; // Pretty printed version. if (CommentStream) *CommentStream << (int)ARM_AM::rotr32(Imm, Rot) << "\n"; @@ -178,18 +182,16 @@ void ARMInstPrinter::printSORegOperand(const MCInst *MI, unsigned OpNum, } } +//===--------------------------------------------------------------------===// +// Addressing Mode #2 +//===--------------------------------------------------------------------===// -void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op, - raw_ostream &O) { +void ARMInstPrinter::printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, + raw_ostream &O) { const MCOperand &MO1 = MI->getOperand(Op); const MCOperand &MO2 = MI->getOperand(Op+1); const MCOperand &MO3 = MI->getOperand(Op+2); - if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right. - printOperand(MI, Op, O); - return; - } - O << "[" << getRegisterName(MO1.getReg()); if (!MO2.getReg()) { @@ -212,6 +214,50 @@ void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op, O << "]"; } +void ARMInstPrinter::printAM2PostIndexOp(const MCInst *MI, unsigned Op, + raw_ostream &O) { + const MCOperand &MO1 = MI->getOperand(Op); + const MCOperand &MO2 = MI->getOperand(Op+1); + const MCOperand &MO3 = MI->getOperand(Op+2); + + O << "[" << getRegisterName(MO1.getReg()) << "], "; + + if (!MO2.getReg()) { + unsigned ImmOffs = ARM_AM::getAM2Offset(MO3.getImm()); + O << '#' + << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm())) + << ImmOffs; + return; + } + + O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm())) + << getRegisterName(MO2.getReg()); + + if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm())) + O << ", " + << ARM_AM::getShiftOpcStr(ARM_AM::getAM2ShiftOpc(MO3.getImm())) + << " #" << ShImm; +} + +void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op, + raw_ostream &O) { + const MCOperand &MO1 = MI->getOperand(Op); + + if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right. + printOperand(MI, Op, O); + return; + } + + const MCOperand &MO3 = MI->getOperand(Op+2); + unsigned IdxMode = ARM_AM::getAM2IdxMode(MO3.getImm()); + + if (IdxMode == ARMII::IndexModePost) { + printAM2PostIndexOp(MI, Op, O); + return; + } + printAM2PreOrOffsetIndexOp(MI, Op, O); +} + void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { @@ -235,11 +281,35 @@ void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI, << " #" << ShImm; } -void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &MO1 = MI->getOperand(OpNum); - const MCOperand &MO2 = MI->getOperand(OpNum+1); - const MCOperand &MO3 = MI->getOperand(OpNum+2); +//===--------------------------------------------------------------------===// +// Addressing Mode #3 +//===--------------------------------------------------------------------===// + +void ARMInstPrinter::printAM3PostIndexOp(const MCInst *MI, unsigned Op, + raw_ostream &O) { + const MCOperand &MO1 = MI->getOperand(Op); + const MCOperand &MO2 = MI->getOperand(Op+1); + const MCOperand &MO3 = MI->getOperand(Op+2); + + O << "[" << getRegisterName(MO1.getReg()) << "], "; + + if (MO2.getReg()) { + O << (char)ARM_AM::getAM3Op(MO3.getImm()) + << getRegisterName(MO2.getReg()); + return; + } + + unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm()); + O << '#' + << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm())) + << ImmOffs; +} + +void ARMInstPrinter::printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned Op, + raw_ostream &O) { + const MCOperand &MO1 = MI->getOperand(Op); + const MCOperand &MO2 = MI->getOperand(Op+1); + const MCOperand &MO3 = MI->getOperand(Op+2); O << '[' << getRegisterName(MO1.getReg()); @@ -256,6 +326,18 @@ void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned OpNum, O << ']'; } +void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned Op, + raw_ostream &O) { + const MCOperand &MO3 = MI->getOperand(Op+2); + unsigned IdxMode = ARM_AM::getAM3IdxMode(MO3.getImm()); + + if (IdxMode == ARMII::IndexModePost) { + printAM3PostIndexOp(MI, Op, O); + return; + } + printAM3PreOrOffsetIndexOp(MI, Op, O); +} + void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { @@ -314,6 +396,12 @@ void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum, O << "]"; } +void ARMInstPrinter::printAddrMode7Operand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO1 = MI->getOperand(OpNum); + O << "[" << getRegisterName(MO1.getReg()) << "]"; +} + void ARMInstPrinter::printAddrMode6OffsetOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { @@ -414,16 +502,6 @@ void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum, } } -void ARMInstPrinter::printNegZeroOperand(const MCInst *MI, unsigned OpNum, - raw_ostream &O) { - const MCOperand &Op = MI->getOperand(OpNum); - O << '#'; - if (Op.getImm() < 0) - O << '-' << (-Op.getImm() - 1); - else - O << Op.getImm(); -} - void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O) { ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm(); diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h index 679d3135ea6d..b3ac03ab2200 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h @@ -17,14 +17,18 @@ #include "llvm/MC/MCInstPrinter.h" namespace llvm { - class MCOperand; + +class MCOperand; +class TargetMachine; class ARMInstPrinter : public MCInstPrinter { public: - ARMInstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) {} + ARMInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI) + : MCInstPrinter(MAI) {} virtual void printInst(const MCInst *MI, raw_ostream &O); virtual StringRef getOpcodeName(unsigned Opcode) const; + virtual StringRef getRegName(unsigned RegNo) const; static const char *getInstructionName(unsigned Opcode); @@ -38,15 +42,25 @@ class ARMInstPrinter : public MCInstPrinter { void printSOImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printSORegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printAddrMode2Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printAM2PostIndexOp(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printAM2PreOrOffsetIndexOp(const MCInst *MI, unsigned OpNum, + raw_ostream &O); void printAddrMode2OffsetOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printAddrMode3Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printAM3PostIndexOp(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printAM3PreOrOffsetIndexOp(const MCInst *MI, unsigned OpNum, + raw_ostream &O); void printAddrMode3OffsetOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printLdStmModeOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printAddrMode5Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printAddrMode6Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printAddrMode7Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printAddrMode6OffsetOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); @@ -87,9 +101,7 @@ class ARMInstPrinter : public MCInstPrinter { void printSetendOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printCPSIMod(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printCPSIFlag(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printCPSOptionOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printMSRMaskOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); - void printNegZeroOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printPredicateOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printMandatoryPredicateOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp index 9a27e2f47064..f6d024232eae 100644 --- a/lib/Target/ARM/MLxExpansionPass.cpp +++ b/lib/Target/ARM/MLxExpansionPass.cpp @@ -15,11 +15,13 @@ #define DEBUG_TYPE "mlx-expansion" #include "ARM.h" #include "ARMBaseInstrInfo.h" +#include "ARMSubtarget.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -49,15 +51,17 @@ namespace { const TargetRegisterInfo *TRI; MachineRegisterInfo *MRI; + bool isA9; unsigned MIIdx; MachineInstr* LastMIs[4]; + SmallPtrSet IgnoreStall; void clearStack(); void pushStack(MachineInstr *MI); MachineInstr *getAccDefMI(MachineInstr *MI) const; unsigned getDefReg(MachineInstr *MI) const; bool hasRAWHazard(unsigned Reg, MachineInstr *MI) const; - bool FindMLxHazard(MachineInstr *MI) const; + bool FindMLxHazard(MachineInstr *MI); void ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI, unsigned MulOpc, unsigned AddSubOpc, bool NegAcc, bool HasLane); @@ -146,7 +150,7 @@ bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const { } -bool MLxExpansion::FindMLxHazard(MachineInstr *MI) const { +bool MLxExpansion::FindMLxHazard(MachineInstr *MI) { if (NumExpand >= ExpandLimit) return false; @@ -154,7 +158,7 @@ bool MLxExpansion::FindMLxHazard(MachineInstr *MI) const { return true; MachineInstr *DefMI = getAccDefMI(MI); - if (TII->isFpMLxInstruction(DefMI->getOpcode())) + if (TII->isFpMLxInstruction(DefMI->getOpcode())) { // r0 = vmla // r3 = vmla r0, r1, r2 // takes 16 - 17 cycles @@ -163,24 +167,33 @@ bool MLxExpansion::FindMLxHazard(MachineInstr *MI) const { // r4 = vmul r1, r2 // r3 = vadd r0, r4 // takes about 14 - 15 cycles even with vmul stalling for 4 cycles. + IgnoreStall.insert(DefMI); return true; + } + + if (IgnoreStall.count(MI)) + return false; // If a VMLA.F is followed by an VADD.F or VMUL.F with no RAW hazard, the // VADD.F or VMUL.F will stall 4 cycles before issue. The 4 cycle stall // preserves the in-order retirement of the instructions. // Look at the next few instructions, if *most* of them can cause hazards, // then the scheduler can't *fix* this, we'd better break up the VMLA. + unsigned Limit1 = isA9 ? 1 : 4; + unsigned Limit2 = isA9 ? 1 : 4; for (unsigned i = 1; i <= 4; ++i) { int Idx = ((int)MIIdx - i + 4) % 4; MachineInstr *NextMI = LastMIs[Idx]; if (!NextMI) continue; - if (TII->canCauseFpMLxStall(NextMI->getOpcode())) - return true; + if (TII->canCauseFpMLxStall(NextMI->getOpcode())) { + if (i <= Limit1) + return true; + } // Look for VMLx RAW hazard. - if (hasRAWHazard(getDefReg(MI), NextMI)) + if (i <= Limit2 && hasRAWHazard(getDefReg(MI), NextMI)) return true; } @@ -248,6 +261,7 @@ bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) { bool Changed = false; clearStack(); + IgnoreStall.clear(); unsigned Skip = 0; MachineBasicBlock::reverse_iterator MII = MBB.rbegin(), E = MBB.rend(); @@ -299,6 +313,8 @@ bool MLxExpansion::runOnMachineFunction(MachineFunction &Fn) { TII = static_cast(Fn.getTarget().getInstrInfo()); TRI = Fn.getTarget().getRegisterInfo(); MRI = &Fn.getRegInfo(); + const ARMSubtarget *STI = &Fn.getTarget().getSubtarget(); + isA9 = STI->isCortexA9(); bool Modified = false; for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt index 9fc3fb92cb2c..8ba9a27e95c8 100644 --- a/lib/Target/ARM/README.txt +++ b/lib/Target/ARM/README.txt @@ -657,3 +657,27 @@ Note that both "tst" and "moveq" are redundant. //===---------------------------------------------------------------------===// +When loading immediate constants with movt/movw, if there are multiple +constants needed with the same low 16 bits, and those values are not live at +the same time, it would be possible to use a single movw instruction, followed +by multiple movt instructions to rewrite the high bits to different values. +For example: + + volatile store i32 -1, i32* inttoptr (i32 1342210076 to i32*), align 4, + !tbaa +!0 + volatile store i32 -1, i32* inttoptr (i32 1342341148 to i32*), align 4, + !tbaa +!0 + +is compiled and optimized to: + + movw r0, #32796 + mov.w r1, #-1 + movt r0, #20480 + str r1, [r0] + movw r0, #32796 @ <= this MOVW is not needed, value is there already + movt r0, #20482 + str r1, [r0] + +//===---------------------------------------------------------------------===// diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp index 233e16538771..dee3d278203f 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -34,13 +34,14 @@ bool Thumb1FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const return !MF.getFrameInfo()->hasVarSizedObjects(); } -static void emitSPUpdate(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - const TargetInstrInfo &TII, DebugLoc dl, - const Thumb1RegisterInfo &MRI, - int NumBytes) { - emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes, TII, - MRI, dl); +static void +emitSPUpdate(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + const TargetInstrInfo &TII, DebugLoc dl, + const Thumb1RegisterInfo &MRI, + int NumBytes, unsigned MIFlags = MachineInstr::NoFlags) { + emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII, + MRI, MIFlags); } void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { @@ -70,11 +71,13 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { int FramePtrSpillFI = 0; if (VARegSaveSize) - emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -VARegSaveSize); + emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -VARegSaveSize, + MachineInstr::FrameSetup); if (!AFI->hasStackFrame()) { if (NumBytes != 0) - emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes); + emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes, + MachineInstr::FrameSetup); return; } @@ -131,7 +134,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { // Adjust FP so it point to the stack slot that contains the previous FP. if (hasFP(MF)) { BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr) - .addFrameIndex(FramePtrSpillFI).addImm(0); + .addFrameIndex(FramePtrSpillFI).addImm(0) + .setMIFlags(MachineInstr::FrameSetup); if (NumBytes > 7) // If offset is > 7 then sp cannot be adjusted in a single instruction, // try restoring from fp instead. @@ -140,7 +144,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { if (NumBytes) // Insert it after all the callee-save spills. - emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes); + emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes, + MachineInstr::FrameSetup); if (STI.isTargetELF() && hasFP(MF)) MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() - @@ -156,7 +161,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { // to reference locals. if (RegInfo->hasBasePointer(MF)) BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), BasePtr).addReg(ARM::SP); - + // If the frame has variable sized objects then the epilogue must restore // the sp from fp. We can assume there's an FP here since hasFP already // checks for hasVarSizedObjects. @@ -232,8 +237,8 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, if (NumBytes) { assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) && "No scratch register to restore SP from FP!"); - emitThumbRegPlusImmediate(MBB, MBBI, ARM::R4, FramePtr, -NumBytes, - TII, *RegInfo, dl); + emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes, + TII, *RegInfo); BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP) .addReg(ARM::R4); } else @@ -307,6 +312,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, MIB.addReg(Reg, getKillRegState(isKill)); } + MIB.setMIFlags(MachineInstr::FrameSetup); return true; } diff --git a/lib/Target/ARM/Thumb1FrameLowering.h b/lib/Target/ARM/Thumb1FrameLowering.h index c592e125de17..bcfc5165fad0 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.h +++ b/lib/Target/ARM/Thumb1FrameLowering.h @@ -12,7 +12,7 @@ //===----------------------------------------------------------------------===// #ifndef __THUMB_FRAMEINFO_H_ -#define __THUMM_FRAMEINFO_H_ +#define __THUMB_FRAMEINFO_H_ #include "ARM.h" #include "ARMFrameLowering.h" diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp index f62a13e3e288..33cefb6e79bb 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -31,8 +31,6 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -48,15 +46,29 @@ Thumb1RegisterInfo::Thumb1RegisterInfo(const ARMBaseInstrInfo &tii, : ARMBaseRegisterInfo(tii, sti) { } +const TargetRegisterClass* +Thumb1RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) + const { + if (RC == ARM::tGPRRegisterClass || RC->hasSuperClass(ARM::tGPRRegisterClass)) + return ARM::tGPRRegisterClass; + return ARMBaseRegisterInfo::getLargestLegalSuperClass(RC); +} + +const TargetRegisterClass * +Thumb1RegisterInfo::getPointerRegClass(unsigned Kind) const { + return ARM::tGPRRegisterClass; +} + /// emitLoadConstPool - Emits a load from constpool to materialize the /// specified immediate. -void Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - DebugLoc dl, - unsigned DestReg, unsigned SubIdx, - int Val, - ARMCC::CondCodes Pred, - unsigned PredReg) const { +void +Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + DebugLoc dl, + unsigned DestReg, unsigned SubIdx, + int Val, + ARMCC::CondCodes Pred, unsigned PredReg, + unsigned MIFlags) const { MachineFunction &MF = *MBB.getParent(); MachineConstantPool *ConstantPool = MF.getConstantPool(); const Constant *C = ConstantInt::get( @@ -64,8 +76,9 @@ void Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, unsigned Idx = ConstantPool->getConstantPoolIndex(C, 4); BuildMI(MBB, MBBI, dl, TII.get(ARM::tLDRpci)) - .addReg(DestReg, getDefRegState(true), SubIdx) - .addConstantPoolIndex(Idx).addImm(Pred).addReg(PredReg); + .addReg(DestReg, getDefRegState(true), SubIdx) + .addConstantPoolIndex(Idx).addImm(Pred).addReg(PredReg) + .setMIFlags(MIFlags); } @@ -76,11 +89,12 @@ void Thumb1RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, static void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, + DebugLoc dl, unsigned DestReg, unsigned BaseReg, int NumBytes, bool CanChangeCC, const TargetInstrInfo &TII, const ARMBaseRegisterInfo& MRI, - DebugLoc dl) { + unsigned MIFlags = MachineInstr::NoFlags) { MachineFunction &MF = *MBB.getParent(); bool isHigh = !isARMLowRegister(DestReg) || (BaseReg != 0 && !isARMLowRegister(BaseReg)); @@ -101,14 +115,15 @@ void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB, if (NumBytes <= 255 && NumBytes >= 0) AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg)) - .addImm(NumBytes); + .addImm(NumBytes).setMIFlags(MIFlags); else if (NumBytes < 0 && NumBytes >= -255) { AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVi8), LdReg)) - .addImm(NumBytes); + .addImm(NumBytes).setMIFlags(MIFlags); AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tRSB), LdReg)) - .addReg(LdReg, RegState::Kill); + .addReg(LdReg, RegState::Kill).setMIFlags(MIFlags); } else - MRI.emitLoadConstPool(MBB, MBBI, dl, LdReg, 0, NumBytes); + MRI.emitLoadConstPool(MBB, MBBI, dl, LdReg, 0, NumBytes, + ARMCC::AL, 0, MIFlags); // Emit add / sub. int Opc = (isSub) ? ARM::tSUBrr : (isHigh ? ARM::tADDhirr : ARM::tADDrr); @@ -151,10 +166,11 @@ static unsigned calcNumMI(int Opc, int ExtraOpc, unsigned Bytes, /// a destreg = basereg + immediate in Thumb code. void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, + DebugLoc dl, unsigned DestReg, unsigned BaseReg, int NumBytes, const TargetInstrInfo &TII, const ARMBaseRegisterInfo& MRI, - DebugLoc dl) { + unsigned MIFlags) { bool isSub = NumBytes < 0; unsigned Bytes = (unsigned)NumBytes; if (isSub) Bytes = -NumBytes; @@ -211,8 +227,9 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, if (NumMIs > Threshold) { // This will expand into too many instructions. Load the immediate from a // constpool entry. - emitThumbRegPlusImmInReg(MBB, MBBI, DestReg, BaseReg, NumBytes, true, TII, - MRI, dl); + emitThumbRegPlusImmInReg(MBB, MBBI, dl, + DestReg, BaseReg, NumBytes, true, + TII, MRI, MIFlags); return; } @@ -224,11 +241,12 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, Bytes -= ThisVal; const TargetInstrDesc &TID = TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3); const MachineInstrBuilder MIB = - AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TID, DestReg)); + AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TID, DestReg).setMIFlags(MIFlags)); AddDefaultPred(MIB.addReg(BaseReg, RegState::Kill).addImm(ThisVal)); } else { BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg) - .addReg(BaseReg, RegState::Kill); + .addReg(BaseReg, RegState::Kill) + .setMIFlags(MIFlags); } BaseReg = DestReg; } @@ -243,9 +261,10 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg); if (NeedCC) MIB = AddDefaultT1CC(MIB); - MIB .addReg(DestReg).addImm(ThisVal); + MIB.addReg(DestReg).addImm(ThisVal); if (NeedPred) MIB = AddDefaultPred(MIB); + MIB.setMIFlags(MIFlags); } else { bool isKill = BaseReg != ARM::SP; @@ -255,8 +274,9 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, MIB.addReg(BaseReg, getKillRegState(isKill)).addImm(ThisVal); if (NeedPred) MIB = AddDefaultPred(MIB); - BaseReg = DestReg; + MIB.setMIFlags(MIFlags); + BaseReg = DestReg; if (Opc == ARM::tADDrSPi) { // r4 = add sp, imm // r4 = add r4, imm @@ -274,7 +294,8 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB, const TargetInstrDesc &TID = TII.get(ExtraOpc); AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TID, DestReg)) .addReg(DestReg, RegState::Kill) - .addImm(((unsigned)NumBytes) & 3)); + .addImm(((unsigned)NumBytes) & 3) + .setMIFlags(MIFlags)); } } @@ -283,8 +304,8 @@ static void emitSPUpdate(MachineBasicBlock &MBB, const TargetInstrInfo &TII, DebugLoc dl, const Thumb1RegisterInfo &MRI, int NumBytes) { - emitThumbRegPlusImmediate(MBB, MBBI, ARM::SP, ARM::SP, NumBytes, TII, - MRI, dl); + emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII, + MRI); } void Thumb1RegisterInfo:: @@ -337,7 +358,7 @@ static void emitThumbConstant(MachineBasicBlock &MBB, DestReg)) .addImm(ThisVal)); if (Imm > 0) - emitThumbRegPlusImmediate(MBB, MBBI, DestReg, DestReg, Imm, TII, MRI, dl); + emitThumbRegPlusImmediate(MBB, MBBI, dl, DestReg, DestReg, Imm, TII, MRI); if (isSub) { const TargetInstrDesc &TID = TII.get(ARM::tRSB); AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TID, DestReg)) @@ -430,8 +451,8 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, // MI would expand into a large number of instructions. Don't try to // simplify the immediate. if (NumMIs > 2) { - emitThumbRegPlusImmediate(MBB, II, DestReg, FrameReg, Offset, TII, - *this, dl); + emitThumbRegPlusImmediate(MBB, II, dl, DestReg, FrameReg, Offset, TII, + *this); MBB.erase(II); return true; } @@ -450,8 +471,8 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx, } Offset = (Offset - Mask * Scale); MachineBasicBlock::iterator NII = llvm::next(II); - emitThumbRegPlusImmediate(MBB, NII, DestReg, DestReg, Offset, TII, - *this, dl); + emitThumbRegPlusImmediate(MBB, NII, dl, DestReg, DestReg, Offset, TII, + *this); } else { // Translate r0 = add sp, -imm to // r0 = -imm (this is then translated into a series of instructons) @@ -645,15 +666,15 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, bool UseRR = false; if (Opcode == ARM::tRestore) { if (FrameReg == ARM::SP) - emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg, - Offset, false, TII, *this, dl); + emitThumbRegPlusImmInReg(MBB, II, dl, TmpReg, FrameReg, + Offset, false, TII, *this); else { emitLoadConstPool(MBB, II, dl, TmpReg, 0, Offset); UseRR = true; } } else { - emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII, - *this, dl); + emitThumbRegPlusImmediate(MBB, II, dl, TmpReg, FrameReg, Offset, TII, + *this); } MI.setDesc(TII.get(UseRR ? ARM::tLDRr : ARM::tLDRi)); @@ -668,15 +689,15 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, if (Opcode == ARM::tSpill) { if (FrameReg == ARM::SP) - emitThumbRegPlusImmInReg(MBB, II, VReg, FrameReg, - Offset, false, TII, *this, dl); + emitThumbRegPlusImmInReg(MBB, II, dl, VReg, FrameReg, + Offset, false, TII, *this); else { emitLoadConstPool(MBB, II, dl, VReg, 0, Offset); UseRR = true; } } else - emitThumbRegPlusImmediate(MBB, II, VReg, FrameReg, Offset, TII, - *this, dl); + emitThumbRegPlusImmediate(MBB, II, dl, VReg, FrameReg, Offset, TII, + *this); MI.setDesc(TII.get(UseRR ? ARM::tSTRr : ARM::tSTRi)); MI.getOperand(i).ChangeToRegister(VReg, false, false, true); if (UseRR) diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h index 8a87cc55c829..9060e59e5980 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.h +++ b/lib/Target/ARM/Thumb1RegisterInfo.h @@ -28,6 +28,11 @@ struct Thumb1RegisterInfo : public ARMBaseRegisterInfo { public: Thumb1RegisterInfo(const ARMBaseInstrInfo &tii, const ARMSubtarget &STI); + const TargetRegisterClass* + getLargestLegalSuperClass(const TargetRegisterClass *RC) const; + + const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const; + /// emitLoadConstPool - Emits a load from constpool to materialize the /// specified immediate. void emitLoadConstPool(MachineBasicBlock &MBB, @@ -35,7 +40,8 @@ struct Thumb1RegisterInfo : public ARMBaseRegisterInfo { DebugLoc dl, unsigned DestReg, unsigned SubIdx, int Val, ARMCC::CondCodes Pred = ARMCC::AL, - unsigned PredReg = 0) const; + unsigned PredReg = 0, + unsigned MIFlags = MachineInstr::NoFlags) const; /// Code Generation virtual methods... void eliminateCallFramePseudoInstr(MachineFunction &MF, diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index 9b1073be3c8e..d169dbb7f197 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -184,7 +184,7 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, DebugLoc dl, unsigned DestReg, unsigned BaseReg, int NumBytes, ARMCC::CondCodes Pred, unsigned PredReg, - const ARMBaseInstrInfo &TII) { + const ARMBaseInstrInfo &TII, unsigned MIFlags) { bool isSub = NumBytes < 0; if (isSub) NumBytes = -NumBytes; @@ -198,14 +198,14 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, // Use a movw to materialize the 16-bit constant. BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), DestReg) .addImm(NumBytes) - .addImm((unsigned)Pred).addReg(PredReg); + .addImm((unsigned)Pred).addReg(PredReg).setMIFlags(MIFlags); Fits = true; } else if ((NumBytes & 0xffff) == 0) { // Use a movt to materialize the 32-bit constant. BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVTi16), DestReg) .addReg(DestReg) .addImm(NumBytes >> 16) - .addImm((unsigned)Pred).addReg(PredReg); + .addImm((unsigned)Pred).addReg(PredReg).setMIFlags(MIFlags); Fits = true; } @@ -214,12 +214,14 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), DestReg) .addReg(BaseReg, RegState::Kill) .addReg(DestReg, RegState::Kill) - .addImm((unsigned)Pred).addReg(PredReg).addReg(0); + .addImm((unsigned)Pred).addReg(PredReg).addReg(0) + .setMIFlags(MIFlags); } else { BuildMI(MBB, MBBI, dl, TII.get(ARM::t2ADDrr), DestReg) .addReg(DestReg, RegState::Kill) .addReg(BaseReg, RegState::Kill) - .addImm((unsigned)Pred).addReg(PredReg).addReg(0); + .addImm((unsigned)Pred).addReg(PredReg).addReg(0) + .setMIFlags(MIFlags); } return; } @@ -230,7 +232,8 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, unsigned Opc = 0; if (DestReg == ARM::SP && BaseReg != ARM::SP) { // mov sp, rn. Note t2MOVr cannot be used. - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr),DestReg).addReg(BaseReg); + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr),DestReg) + .addReg(BaseReg).setMIFlags(MIFlags); BaseReg = ARM::SP; continue; } @@ -243,7 +246,7 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, Opc = isSub ? ARM::tSUBspi : ARM::tADDspi; // FIXME: Fix Thumb1 immediate encoding. BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) - .addReg(BaseReg).addImm(ThisVal/4); + .addReg(BaseReg).addImm(ThisVal/4).setMIFlags(MIFlags); NumBytes = 0; continue; } @@ -283,7 +286,7 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, MachineInstrBuilder MIB = AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) .addReg(BaseReg, RegState::Kill) - .addImm(ThisVal)); + .addImm(ThisVal)).setMIFlags(MIFlags); if (HasCCOut) AddDefaultCC(MIB); diff --git a/lib/Target/ARM/Thumb2RegisterInfo.cpp b/lib/Target/ARM/Thumb2RegisterInfo.cpp index 099b8f724140..355c3bf0352c 100644 --- a/lib/Target/ARM/Thumb2RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb2RegisterInfo.cpp @@ -13,26 +13,15 @@ //===----------------------------------------------------------------------===// #include "ARM.h" -#include "ARMAddressingModes.h" -#include "ARMBaseInstrInfo.h" -#include "ARMMachineFunctionInfo.h" #include "ARMSubtarget.h" #include "Thumb2InstrInfo.h" #include "Thumb2RegisterInfo.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" -#include "llvm/LLVMContext.h" #include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineLocation.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Support/ErrorHandling.h" using namespace llvm; Thumb2RegisterInfo::Thumb2RegisterInfo(const ARMBaseInstrInfo &tii, @@ -42,13 +31,14 @@ Thumb2RegisterInfo::Thumb2RegisterInfo(const ARMBaseInstrInfo &tii, /// emitLoadConstPool - Emits a load from constpool to materialize the /// specified immediate. -void Thumb2RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - DebugLoc dl, - unsigned DestReg, unsigned SubIdx, - int Val, - ARMCC::CondCodes Pred, - unsigned PredReg) const { +void +Thumb2RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + DebugLoc dl, + unsigned DestReg, unsigned SubIdx, + int Val, + ARMCC::CondCodes Pred, unsigned PredReg, + unsigned MIFlags) const { MachineFunction &MF = *MBB.getParent(); MachineConstantPool *ConstantPool = MF.getConstantPool(); const Constant *C = ConstantInt::get( @@ -57,5 +47,6 @@ void Thumb2RegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, BuildMI(MBB, MBBI, dl, TII.get(ARM::t2LDRpci)) .addReg(DestReg, getDefRegState(true), SubIdx) - .addConstantPoolIndex(Idx).addImm((int64_t)ARMCC::AL).addReg(0); + .addConstantPoolIndex(Idx).addImm((int64_t)ARMCC::AL).addReg(0) + .setMIFlags(MIFlags); } diff --git a/lib/Target/ARM/Thumb2RegisterInfo.h b/lib/Target/ARM/Thumb2RegisterInfo.h index b3cf2e5b0935..824378aeab4e 100644 --- a/lib/Target/ARM/Thumb2RegisterInfo.h +++ b/lib/Target/ARM/Thumb2RegisterInfo.h @@ -35,7 +35,8 @@ struct Thumb2RegisterInfo : public ARMBaseRegisterInfo { DebugLoc dl, unsigned DestReg, unsigned SubIdx, int Val, ARMCC::CondCodes Pred = ARMCC::AL, - unsigned PredReg = 0) const; + unsigned PredReg = 0, + unsigned MIFlags = MachineInstr::NoFlags) const; }; } diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index cc8f61cd72a4..ce2e9663fb74 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -12,6 +12,7 @@ #include "ARMAddressingModes.h" #include "ARMBaseRegisterInfo.h" #include "ARMBaseInstrInfo.h" +#include "ARMSubtarget.h" #include "Thumb2InstrInfo.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -49,82 +50,86 @@ namespace { // 1 - No cc field. // 2 - Always set CPSR. unsigned PredCC2 : 2; + unsigned PartFlag : 1; // 16-bit instruction does partial flag update unsigned Special : 1; // Needs to be dealt with specially }; static const ReduceEntry ReduceTable[] = { - // Wide, Narrow1, Narrow2, imm1,imm2, lo1, lo2, P/C, S - { ARM::t2ADCrr, 0, ARM::tADC, 0, 0, 0, 1, 0,0, 0 }, - { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0 }, - { ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0 }, + // Wide, Narrow1, Narrow2, imm1,imm2, lo1, lo2, P/C, PF, S + { ARM::t2ADCrr, 0, ARM::tADC, 0, 0, 0, 1, 0,0, 0,0 }, + { ARM::t2ADDri, ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 0,0, 0,0 }, + { ARM::t2ADDrr, ARM::tADDrr, ARM::tADDhirr, 0, 0, 1, 0, 0,1, 0,0 }, // Note: immediate scale is 4. - { ARM::t2ADDrSPi,ARM::tADDrSPi,0, 8, 0, 1, 0, 1,0, 1 }, - { ARM::t2ADDSri,ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 2,2, 1 }, - { ARM::t2ADDSrr,ARM::tADDrr, 0, 0, 0, 1, 0, 2,0, 1 }, - { ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 0 }, - { ARM::t2ASRri, ARM::tASRri, 0, 5, 0, 1, 0, 0,0, 0 }, - { ARM::t2ASRrr, 0, ARM::tASRrr, 0, 0, 0, 1, 0,0, 0 }, - { ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 0 }, + { ARM::t2ADDrSPi,ARM::tADDrSPi,0, 8, 0, 1, 0, 1,0, 0,1 }, + { ARM::t2ADDSri,ARM::tADDi3, ARM::tADDi8, 3, 8, 1, 1, 2,2, 0,1 }, + { ARM::t2ADDSrr,ARM::tADDrr, 0, 0, 0, 1, 0, 2,0, 0,1 }, + { ARM::t2ANDrr, 0, ARM::tAND, 0, 0, 0, 1, 0,0, 1,0 }, + { ARM::t2ASRri, ARM::tASRri, 0, 5, 0, 1, 0, 0,0, 1,0 }, + { ARM::t2ASRrr, 0, ARM::tASRrr, 0, 0, 0, 1, 0,0, 1,0 }, + { ARM::t2BICrr, 0, ARM::tBIC, 0, 0, 0, 1, 0,0, 1,0 }, //FIXME: Disable CMN, as CCodes are backwards from compare expectations - //{ ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 2,0, 0 }, - { ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0 }, - { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 1 }, - { ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 0 }, + //{ ARM::t2CMNrr, ARM::tCMN, 0, 0, 0, 1, 0, 2,0, 0,0 }, + { ARM::t2CMPri, ARM::tCMPi8, 0, 8, 0, 1, 0, 2,0, 0,0 }, + { ARM::t2CMPrr, ARM::tCMPhir, 0, 0, 0, 0, 0, 2,0, 0,1 }, + { ARM::t2EORrr, 0, ARM::tEOR, 0, 0, 0, 1, 0,0, 1,0 }, // FIXME: adr.n immediate offset must be multiple of 4. - //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0, 0, 0, 1, 0, 1,0, 0 }, - { ARM::t2LSLri, ARM::tLSLri, 0, 5, 0, 1, 0, 0,0, 0 }, - { ARM::t2LSLrr, 0, ARM::tLSLrr, 0, 0, 0, 1, 0,0, 0 }, - { ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 0 }, - { ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 0 }, - { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0 }, - { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 1 }, + //{ ARM::t2LEApcrelJT,ARM::tLEApcrelJT, 0, 0, 0, 1, 0, 1,0, 0,0 }, + { ARM::t2LSLri, ARM::tLSLri, 0, 5, 0, 1, 0, 0,0, 1,0 }, + { ARM::t2LSLrr, 0, ARM::tLSLrr, 0, 0, 0, 1, 0,0, 1,0 }, + { ARM::t2LSRri, ARM::tLSRri, 0, 5, 0, 1, 0, 0,0, 1,0 }, + { ARM::t2LSRrr, 0, ARM::tLSRrr, 0, 0, 0, 1, 0,0, 1,0 }, + // FIXME: tMOVi8 and tMVN also partially update CPSR but they are less + // likely to cause issue in the loop. As a size / performance workaround, + // they are not marked as such. + { ARM::t2MOVi, ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0,0 }, + { ARM::t2MOVi16,ARM::tMOVi8, 0, 8, 0, 1, 0, 0,0, 0,1 }, // FIXME: Do we need the 16-bit 'S' variant? - { ARM::t2MOVr,ARM::tMOVgpr2gpr,0, 0, 0, 0, 0, 1,0, 0 }, - { ARM::t2MOVCCr,0, ARM::tMOVCCr, 0, 0, 0, 0, 0,1, 0 }, - { ARM::t2MOVCCi,0, ARM::tMOVCCi, 0, 8, 0, 1, 0,1, 0 }, - { ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 0 }, - { ARM::t2MVNr, ARM::tMVN, 0, 0, 0, 1, 0, 0,0, 0 }, - { ARM::t2ORRrr, 0, ARM::tORR, 0, 0, 0, 1, 0,0, 0 }, - { ARM::t2REV, ARM::tREV, 0, 0, 0, 1, 0, 1,0, 0 }, - { ARM::t2REV16, ARM::tREV16, 0, 0, 0, 1, 0, 1,0, 0 }, - { ARM::t2REVSH, ARM::tREVSH, 0, 0, 0, 1, 0, 1,0, 0 }, - { ARM::t2RORrr, 0, ARM::tROR, 0, 0, 0, 1, 0,0, 0 }, - { ARM::t2RSBri, ARM::tRSB, 0, 0, 0, 1, 0, 0,0, 1 }, - { ARM::t2RSBSri,ARM::tRSB, 0, 0, 0, 1, 0, 2,0, 1 }, - { ARM::t2SBCrr, 0, ARM::tSBC, 0, 0, 0, 1, 0,0, 0 }, - { ARM::t2SUBri, ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 0,0, 0 }, - { ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0 }, - { ARM::t2SUBSri,ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 2,2, 0 }, - { ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0 }, - { ARM::t2SXTBr, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0 }, - { ARM::t2SXTHr, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0 }, - { ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0 }, - { ARM::t2UXTBr, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0 }, - { ARM::t2UXTHr, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0 }, + { ARM::t2MOVr,ARM::tMOVgpr2gpr,0, 0, 0, 0, 0, 1,0, 0,0 }, + { ARM::t2MOVCCr,0, ARM::tMOVCCr, 0, 0, 0, 0, 0,1, 0,0 }, + { ARM::t2MOVCCi,0, ARM::tMOVCCi, 0, 8, 0, 1, 0,1, 0,0 }, + { ARM::t2MUL, 0, ARM::tMUL, 0, 0, 0, 1, 0,0, 1,0 }, + { ARM::t2MVNr, ARM::tMVN, 0, 0, 0, 1, 0, 0,0, 0,0 }, + { ARM::t2ORRrr, 0, ARM::tORR, 0, 0, 0, 1, 0,0, 1,0 }, + { ARM::t2REV, ARM::tREV, 0, 0, 0, 1, 0, 1,0, 0,0 }, + { ARM::t2REV16, ARM::tREV16, 0, 0, 0, 1, 0, 1,0, 0,0 }, + { ARM::t2REVSH, ARM::tREVSH, 0, 0, 0, 1, 0, 1,0, 0,0 }, + { ARM::t2RORrr, 0, ARM::tROR, 0, 0, 0, 1, 0,0, 1,0 }, + { ARM::t2RSBri, ARM::tRSB, 0, 0, 0, 1, 0, 0,0, 0,1 }, + { ARM::t2RSBSri,ARM::tRSB, 0, 0, 0, 1, 0, 2,0, 0,1 }, + { ARM::t2SBCrr, 0, ARM::tSBC, 0, 0, 0, 1, 0,0, 0,0 }, + { ARM::t2SUBri, ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 0,0, 0,0 }, + { ARM::t2SUBrr, ARM::tSUBrr, 0, 0, 0, 1, 0, 0,0, 0,0 }, + { ARM::t2SUBSri,ARM::tSUBi3, ARM::tSUBi8, 3, 8, 1, 1, 2,2, 0,0 }, + { ARM::t2SUBSrr,ARM::tSUBrr, 0, 0, 0, 1, 0, 2,0, 0,0 }, + { ARM::t2SXTBr, ARM::tSXTB, 0, 0, 0, 1, 0, 1,0, 0,0 }, + { ARM::t2SXTHr, ARM::tSXTH, 0, 0, 0, 1, 0, 1,0, 0,0 }, + { ARM::t2TSTrr, ARM::tTST, 0, 0, 0, 1, 0, 2,0, 0,0 }, + { ARM::t2UXTBr, ARM::tUXTB, 0, 0, 0, 1, 0, 1,0, 0,0 }, + { ARM::t2UXTHr, ARM::tUXTH, 0, 0, 0, 1, 0, 1,0, 0,0 }, // FIXME: Clean this up after splitting each Thumb load / store opcode // into multiple ones. - { ARM::t2LDRi12,ARM::tLDRi, ARM::tLDRspi, 5, 8, 1, 0, 0,0, 1 }, - { ARM::t2LDRs, ARM::tLDRr, 0, 0, 0, 1, 0, 0,0, 1 }, - { ARM::t2LDRBi12,ARM::tLDRBi, 0, 5, 0, 1, 0, 0,0, 1 }, - { ARM::t2LDRBs, ARM::tLDRBr, 0, 0, 0, 1, 0, 0,0, 1 }, - { ARM::t2LDRHi12,ARM::tLDRHi, 0, 5, 0, 1, 0, 0,0, 1 }, - { ARM::t2LDRHs, ARM::tLDRHr, 0, 0, 0, 1, 0, 0,0, 1 }, - { ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 1 }, - { ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 1 }, - { ARM::t2STRi12,ARM::tSTRi, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 1 }, - { ARM::t2STRs, ARM::tSTRr, 0, 0, 0, 1, 0, 0,0, 1 }, - { ARM::t2STRBi12,ARM::tSTRBi, 0, 5, 0, 1, 0, 0,0, 1 }, - { ARM::t2STRBs, ARM::tSTRBr, 0, 0, 0, 1, 0, 0,0, 1 }, - { ARM::t2STRHi12,ARM::tSTRHi, 0, 5, 0, 1, 0, 0,0, 1 }, - { ARM::t2STRHs, ARM::tSTRHr, 0, 0, 0, 1, 0, 0,0, 1 }, + { ARM::t2LDRi12,ARM::tLDRi, ARM::tLDRspi, 5, 8, 1, 0, 0,0, 0,1 }, + { ARM::t2LDRs, ARM::tLDRr, 0, 0, 0, 1, 0, 0,0, 0,1 }, + { ARM::t2LDRBi12,ARM::tLDRBi, 0, 5, 0, 1, 0, 0,0, 0,1 }, + { ARM::t2LDRBs, ARM::tLDRBr, 0, 0, 0, 1, 0, 0,0, 0,1 }, + { ARM::t2LDRHi12,ARM::tLDRHi, 0, 5, 0, 1, 0, 0,0, 0,1 }, + { ARM::t2LDRHs, ARM::tLDRHr, 0, 0, 0, 1, 0, 0,0, 0,1 }, + { ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 0,1 }, + { ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 0,1 }, + { ARM::t2STRi12,ARM::tSTRi, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 0,1 }, + { ARM::t2STRs, ARM::tSTRr, 0, 0, 0, 1, 0, 0,0, 0,1 }, + { ARM::t2STRBi12,ARM::tSTRBi, 0, 5, 0, 1, 0, 0,0, 0,1 }, + { ARM::t2STRBs, ARM::tSTRBr, 0, 0, 0, 1, 0, 0,0, 0,1 }, + { ARM::t2STRHi12,ARM::tSTRHi, 0, 5, 0, 1, 0, 0,0, 0,1 }, + { ARM::t2STRHs, ARM::tSTRHr, 0, 0, 0, 1, 0, 0,0, 0,1 }, - { ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 1 }, - { ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 1 }, - { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0, 0, 1, 1, 1,1, 1 }, + { ARM::t2LDMIA, ARM::tLDMIA, 0, 0, 0, 1, 1, 1,1, 0,1 }, + { ARM::t2LDMIA_RET,0, ARM::tPOP_RET, 0, 0, 1, 1, 1,1, 0,1 }, + { ARM::t2LDMIA_UPD,ARM::tLDMIA_UPD,ARM::tPOP,0, 0, 1, 1, 1,1, 0,1 }, // ARM::t2STM (with no basereg writeback) has no Thumb1 equivalent - { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 1 }, - { ARM::t2STMDB_UPD, 0, ARM::tPUSH, 0, 0, 1, 1, 1,1, 1 }, + { ARM::t2STMIA_UPD,ARM::tSTMIA_UPD, 0, 0, 0, 1, 1, 1,1, 0,1 }, + { ARM::t2STMDB_UPD, 0, ARM::tPUSH, 0, 0, 1, 1, 1,1, 0,1 }, }; class Thumb2SizeReduce : public MachineFunctionPass { @@ -133,6 +138,7 @@ namespace { Thumb2SizeReduce(); const Thumb2InstrInfo *TII; + const ARMSubtarget *STI; virtual bool runOnMachineFunction(MachineFunction &MF); @@ -144,6 +150,8 @@ namespace { /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable. DenseMap ReduceOpcodeMap; + bool canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use); + bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry, bool is2Addr, ARMCC::CondCodes Pred, bool LiveCPSR, bool &HasCC, bool &CCDead); @@ -152,19 +160,20 @@ namespace { const ReduceEntry &Entry); bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, - const ReduceEntry &Entry, bool LiveCPSR); + const ReduceEntry &Entry, bool LiveCPSR, + MachineInstr *CPSRDef); /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address /// instruction. bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry, - bool LiveCPSR); + bool LiveCPSR, MachineInstr *CPSRDef); /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit /// non-two-address instruction. bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry, - bool LiveCPSR); + bool LiveCPSR, MachineInstr *CPSRDef); /// ReduceMBB - Reduce width of instructions in the specified basic block. bool ReduceMBB(MachineBasicBlock &MBB); @@ -187,6 +196,52 @@ static bool HasImplicitCPSRDef(const TargetInstrDesc &TID) { return false; } +/// canAddPseudoFlagDep - For A9 (and other out-of-order) implementations, +/// the 's' 16-bit instruction partially update CPSR. Abort the +/// transformation to avoid adding false dependency on last CPSR setting +/// instruction which hurts the ability for out-of-order execution engine +/// to do register renaming magic. +/// This function checks if there is a read-of-write dependency between the +/// last instruction that defines the CPSR and the current instruction. If there +/// is, then there is no harm done since the instruction cannot be retired +/// before the CPSR setting instruction anyway. +/// Note, we are not doing full dependency analysis here for the sake of compile +/// time. We're not looking for cases like: +/// r0 = muls ... +/// r1 = add.w r0, ... +/// ... +/// = mul.w r1 +/// In this case it would have been ok to narrow the mul.w to muls since there +/// are indirect RAW dependency between the muls and the mul.w +bool +Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use) { + if (!Def || !STI->avoidCPSRPartialUpdate()) + return false; + + SmallSet Defs; + for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = Def->getOperand(i); + if (!MO.isReg() || MO.isUndef() || MO.isUse()) + continue; + unsigned Reg = MO.getReg(); + if (Reg == 0 || Reg == ARM::CPSR) + continue; + Defs.insert(Reg); + } + + for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = Use->getOperand(i); + if (!MO.isReg() || MO.isUndef() || MO.isDef()) + continue; + unsigned Reg = MO.getReg(); + if (Defs.count(Reg)) + return false; + } + + // No read-after-write dependency. The narrowing will add false dependency. + return true; +} + bool Thumb2SizeReduce::VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry, bool is2Addr, ARMCC::CondCodes Pred, @@ -410,7 +465,10 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, MIB.addOperand(MI->getOperand(OpNum)); // Transfer memoperands. - (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + + // Transfer MI flags. + MIB.setMIFlags(MI->getFlags()); DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); @@ -422,7 +480,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, bool Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry, - bool LiveCPSR) { + bool LiveCPSR, MachineInstr *CPSRDef) { if (Entry.LowRegs1 && !VerifyLowRegs(MI)) return false; @@ -440,12 +498,12 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, switch (Opc) { default: break; case ARM::t2ADDSri: { - if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR)) + if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef)) return true; // fallthrough } case ARM::t2ADDSrr: - return ReduceToNarrow(MBB, MI, Entry, LiveCPSR); + return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef); } } break; @@ -453,13 +511,13 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, case ARM::t2RSBri: case ARM::t2RSBSri: if (MI->getOperand(2).getImm() == 0) - return ReduceToNarrow(MBB, MI, Entry, LiveCPSR); + return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef); break; case ARM::t2MOVi16: // Can convert only 'pure' immediate operands, not immediates obtained as // globals' addresses. if (MI->getOperand(1).isImm()) - return ReduceToNarrow(MBB, MI, Entry, LiveCPSR); + return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef); break; case ARM::t2CMPrr: { // Try to reduce to the lo-reg only version first. Why there are two @@ -468,17 +526,17 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, // are prioritized, but the table assumes a unique entry for each // source insn opcode. So for now, we hack a local entry record to use. static const ReduceEntry NarrowEntry = - { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 1 }; - if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR)) + { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1 }; + if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, CPSRDef)) return true; - return ReduceToNarrow(MBB, MI, Entry, LiveCPSR); + return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef); } case ARM::t2ADDrSPi: { static const ReduceEntry NarrowEntry = - { ARM::t2ADDrSPi,ARM::tADDspi, 0, 7, 0, 1, 0, 1, 0, 1 }; + { ARM::t2ADDrSPi,ARM::tADDspi, 0, 7, 0, 1, 0, 1, 0, 0,1 }; if (MI->getOperand(0).getReg() == ARM::SP) - return ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR); - return ReduceToNarrow(MBB, MI, Entry, LiveCPSR); + return ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, CPSRDef); + return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef); } } return false; @@ -487,7 +545,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, bool Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry, - bool LiveCPSR) { + bool LiveCPSR, MachineInstr *CPSRDef) { if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr)) return false; @@ -542,6 +600,12 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, if (!VerifyPredAndCC(MI, Entry, true, Pred, LiveCPSR, HasCC, CCDead)) return false; + // Avoid adding a false dependency on partial flag update by some 16-bit + // instructions which has the 's' bit set. + if (Entry.PartFlag && NewTID.hasOptionalDef() && HasCC && + canAddPseudoFlagDep(CPSRDef, MI)) + return false; + // Add the 16-bit instruction. DebugLoc dl = MI->getDebugLoc(); MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewTID); @@ -563,6 +627,9 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, MIB.addOperand(MI->getOperand(i)); } + // Transfer MI flags. + MIB.setMIFlags(MI->getFlags()); + DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); MBB.erase(MI); @@ -573,7 +640,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, bool Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry, - bool LiveCPSR) { + bool LiveCPSR, MachineInstr *CPSRDef) { if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit)) return false; @@ -626,6 +693,12 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, if (!VerifyPredAndCC(MI, Entry, false, Pred, LiveCPSR, HasCC, CCDead)) return false; + // Avoid adding a false dependency on partial flag update by some 16-bit + // instructions which has the 's' bit set. + if (Entry.PartFlag && NewTID.hasOptionalDef() && HasCC && + canAddPseudoFlagDep(CPSRDef, MI)) + return false; + // Add the 16-bit instruction. DebugLoc dl = MI->getDebugLoc(); MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewTID); @@ -663,6 +736,9 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, if (!TID.isPredicable() && NewTID.isPredicable()) AddDefaultPred(MIB); + // Transfer MI flags. + MIB.setMIFlags(MI->getFlags()); + DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); MBB.erase(MI); @@ -670,7 +746,7 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, return true; } -static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR) { +static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR, bool &DefCPSR) { bool HasDef = false; for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI.getOperand(i); @@ -678,6 +754,8 @@ static bool UpdateCPSRDef(MachineInstr &MI, bool LiveCPSR) { continue; if (MO.getReg() != ARM::CPSR) continue; + + DefCPSR = true; if (!MO.isDead()) HasDef = true; } @@ -707,6 +785,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { // Yes, CPSR could be livein. bool LiveCPSR = MBB.isLiveIn(ARM::CPSR); + MachineInstr *CPSRDef = 0; MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end(); MachineBasicBlock::iterator NextMII; @@ -722,7 +801,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { const ReduceEntry &Entry = ReduceTable[OPI->second]; // Ignore "special" cases for now. if (Entry.Special) { - if (ReduceSpecial(MBB, MI, Entry, LiveCPSR)) { + if (ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef)) { Modified = true; MachineBasicBlock::iterator I = prior(NextMII); MI = &*I; @@ -731,7 +810,8 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { } // Try to transform to a 16-bit two-address instruction. - if (Entry.NarrowOpc2 && ReduceTo2Addr(MBB, MI, Entry, LiveCPSR)) { + if (Entry.NarrowOpc2 && + ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef)) { Modified = true; MachineBasicBlock::iterator I = prior(NextMII); MI = &*I; @@ -739,7 +819,8 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { } // Try to transform to a 16-bit non-two-address instruction. - if (Entry.NarrowOpc1 && ReduceToNarrow(MBB, MI, Entry, LiveCPSR)) { + if (Entry.NarrowOpc1 && + ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef)) { Modified = true; MachineBasicBlock::iterator I = prior(NextMII); MI = &*I; @@ -747,7 +828,14 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { } ProcessNext: - LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR); + bool DefCPSR = false; + LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR); + if (MI->getDesc().isCall()) + // Calls don't really set CPSR. + CPSRDef = 0; + else if (DefCPSR) + // This is the last CPSR defining instruction. + CPSRDef = MI; } return Modified; @@ -756,6 +844,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) { const TargetMachine &TM = MF.getTarget(); TII = static_cast(TM.getInstrInfo()); + STI = &TM.getSubtarget(); bool Modified = false; for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) diff --git a/lib/Target/Alpha/Alpha.td b/lib/Target/Alpha/Alpha.td index 4508eda897d2..ae79c2e4b70e 100644 --- a/lib/Target/Alpha/Alpha.td +++ b/lib/Target/Alpha/Alpha.td @@ -21,7 +21,7 @@ include "llvm/Target/Target.td" //===----------------------------------------------------------------------===// def FeatureCIX : SubtargetFeature<"cix", "HasCT", "true", - "Enable CIX extentions">; + "Enable CIX extensions">; //===----------------------------------------------------------------------===// // Register File Description diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp index c4f43ab9e4e7..ee404f06fc43 100644 --- a/lib/Target/Alpha/AlphaISelLowering.cpp +++ b/lib/Target/Alpha/AlphaISelLowering.cpp @@ -296,7 +296,7 @@ AlphaTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Build a sequence of copy-to-reg nodes chained together with token chain and // flag operands which copy the outgoing args into registers. The InFlag in - // necessary since all emited instructions must be stuck together. + // necessary since all emitted instructions must be stuck together. SDValue InFlag; for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, diff --git a/lib/Target/Alpha/AlphaInstrInfo.td b/lib/Target/Alpha/AlphaInstrInfo.td index 099d7157ca2b..b20171224e29 100644 --- a/lib/Target/Alpha/AlphaInstrInfo.td +++ b/lib/Target/Alpha/AlphaInstrInfo.td @@ -1030,7 +1030,7 @@ def : Pat<(brcond (setune F8RC:$RA, immFPZ), bb:$DISP), //WMB Mfc 18.4400 Write memory barrier //MF_FPCR F-P 17.025 Move from FPCR //MT_FPCR F-P 17.024 Move to FPCR -//There are in the Multimedia extentions, so let's not use them yet +//There are in the Multimedia extensions, so let's not use them yet //def MAXSB8 : OForm<0x1C, 0x3E, "MAXSB8 $RA,$RB,$RC">; //Vector signed byte maximum //def MAXSW4 : OForm< 0x1C, 0x3F, "MAXSW4 $RA,$RB,$RC">; //Vector signed word maximum //def MAXUB8 : OForm<0x1C, 0x3C, "MAXUB8 $RA,$RB,$RC">; //Vector unsigned byte maximum diff --git a/lib/Target/Alpha/README.txt b/lib/Target/Alpha/README.txt index 9ae15174c582..cc170e313030 100644 --- a/lib/Target/Alpha/README.txt +++ b/lib/Target/Alpha/README.txt @@ -33,9 +33,9 @@ add crazy vector instructions (MVI): (MIN|MAX)(U|S)(B8|W4) min and max, signed and unsigned, byte and word PKWB, UNPKBW pack/unpack word to byte PKLB UNPKBL pack/unpack long to byte -PERR pixel error (sum accross bytes of bytewise abs(i8v8 a - i8v8 b)) +PERR pixel error (sum across bytes of bytewise abs(i8v8 a - i8v8 b)) -cmpbytes bytewise cmpeq of i8v8 a and i8v8 b (not part of MVI extentions) +cmpbytes bytewise cmpeq of i8v8 a and i8v8 b (not part of MVI extensions) this has some good examples for other operations that can be synthesised well from these rather meager vector ops (such as saturating add). diff --git a/lib/Target/Blackfin/BlackfinISelLowering.cpp b/lib/Target/Blackfin/BlackfinISelLowering.cpp index 7c80eec3ba63..1e1f8c9dc256 100644 --- a/lib/Target/Blackfin/BlackfinISelLowering.cpp +++ b/lib/Target/Blackfin/BlackfinISelLowering.cpp @@ -345,7 +345,7 @@ BlackfinTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Build a sequence of copy-to-reg nodes chained together with token // chain and flag operands which copy the outgoing args into registers. - // The InFlag in necessary since all emited instructions must be + // The InFlag in necessary since all emitted instructions must be // stuck together. SDValue InFlag; for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp index 6c555a3e9d1f..358d1b35b66c 100644 --- a/lib/Target/CBackend/CBackend.cpp +++ b/lib/Target/CBackend/CBackend.cpp @@ -2440,24 +2440,6 @@ void CWriter::visitReturnInst(ReturnInst &I) { return; } - if (I.getNumOperands() > 1) { - Out << " {\n"; - Out << " "; - printType(Out, I.getParent()->getParent()->getReturnType()); - Out << " llvm_cbe_mrv_temp = {\n"; - for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) { - Out << " "; - writeOperand(I.getOperand(i)); - if (i != e - 1) - Out << ","; - Out << "\n"; - } - Out << " };\n"; - Out << " return llvm_cbe_mrv_temp;\n"; - Out << " }\n"; - return; - } - Out << " return"; if (I.getNumOperands()) { Out << ' '; diff --git a/lib/Target/CellSPU/SPU64InstrInfo.td b/lib/Target/CellSPU/SPU64InstrInfo.td index 5ef5716bd8cf..f340edfb0f86 100644 --- a/lib/Target/CellSPU/SPU64InstrInfo.td +++ b/lib/Target/CellSPU/SPU64InstrInfo.td @@ -24,7 +24,7 @@ // 5. The code sequences for r64 and v2i64 are probably overly conservative, // compared to the code that gcc produces. // -// M00$E B!tes Kan be Pretty N@sTi!!!!! (appologies to Monty!) +// M00$E B!tes Kan be Pretty N@sTi!!!!! (apologies to Monty!) //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ // selb instruction definition for i64. Note that the selection mask is diff --git a/lib/Target/CellSPU/SPUAsmPrinter.cpp b/lib/Target/CellSPU/SPUAsmPrinter.cpp index 40404614b703..fd96694b32fe 100644 --- a/lib/Target/CellSPU/SPUAsmPrinter.cpp +++ b/lib/Target/CellSPU/SPUAsmPrinter.cpp @@ -182,6 +182,10 @@ namespace { printOp(MI->getOperand(OpNo), O); } + void printHBROperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { + printOp(MI->getOperand(OpNo), O); + } + void printPCRelativeOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O) { // Used to generate a ".-", but it turns out that the assembler // really wants the target. @@ -279,6 +283,9 @@ void SPUAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) { } O << *Mang->getSymbol(MO.getGlobal()); return; + case MachineOperand::MO_MCSymbol: + O << *(MO.getMCSymbol()); + return; default: O << ""; return; diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp index d2261562e721..9351ffdc0b7f 100644 --- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp +++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp @@ -321,12 +321,17 @@ SPUDAGToDAGISel::SelectAFormAddr(SDNode *Op, SDValue N, SDValue &Base, // These match the addr256k operand type: EVT OffsVT = MVT::i16; SDValue Zero = CurDAG->getTargetConstant(0, OffsVT); + int64_t val; switch (N.getOpcode()) { case ISD::Constant: + val = dyn_cast(N.getNode())->getSExtValue(); + Base = CurDAG->getTargetConstant( val , MVT::i32); + Index = Zero; + return true; break; case ISD::ConstantPool: case ISD::GlobalAddress: - report_fatal_error("SPU SelectAFormAddr: Constant/Pool/Global not lowered."); + report_fatal_error("SPU SelectAFormAddr: Pool/Global not lowered."); /*NOTREACHED*/ case ISD::TargetConstant: diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index 743a4d7a0f78..8668da3ca2f8 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -705,7 +705,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { offset )); - // Shift the low similarily + // Shift the low similarly // TODO: add SPUISD::SHL_BYTES low = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, low, offset ); diff --git a/lib/Target/CellSPU/SPUISelLowering.h b/lib/Target/CellSPU/SPUISelLowering.h index dd48d7bafaef..cf883e25ed72 100644 --- a/lib/Target/CellSPU/SPUISelLowering.h +++ b/lib/Target/CellSPU/SPUISelLowering.h @@ -183,14 +183,6 @@ namespace llvm { virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty) const; - - /// After allocating this many registers, the allocator should feel - /// register pressure. The value is a somewhat random guess, based on the - /// number of non callee saved registers in the C calling convention. - virtual unsigned getRegPressureLimit( const TargetRegisterClass *RC, - MachineFunction &MF) const{ - return 50; - } }; } diff --git a/lib/Target/CellSPU/SPUInstrFormats.td b/lib/Target/CellSPU/SPUInstrFormats.td index 21bc275209c6..bdbe2552dcdd 100644 --- a/lib/Target/CellSPU/SPUInstrFormats.td +++ b/lib/Target/CellSPU/SPUInstrFormats.td @@ -296,3 +296,25 @@ class Pseudo pattern> let Pattern = pattern; let Inst{31-0} = 0; } + +//===----------------------------------------------------------------------===// +// Branch hint formats +//===----------------------------------------------------------------------===// +// For hbrr and hbra +class HBI16Form opcode, dag IOL, string asmstr> + : Instruction { + field bits<32> Inst; + bits<16>i16; + bits<9>RO; + + let Namespace = "SPU"; + let InOperandList = IOL; + let OutOperandList = (outs); //no output + let AsmString = asmstr; + let Itinerary = BranchHints; + + let Inst{0-6} = opcode; + let Inst{7-8} = RO{8-7}; + let Inst{9-24} = i16; + let Inst{25-31} = RO{6-0}; +} diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp index f9e6c72ef310..080434d66789 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.cpp +++ b/lib/Target/CellSPU/SPUInstrInfo.cpp @@ -21,6 +21,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/MC/MCContext.h" using namespace llvm; @@ -281,9 +282,20 @@ SPUInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, return true; } +// search MBB for branch hint labels and branch hit ops +static void removeHBR( MachineBasicBlock &MBB) { + for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I){ + if (I->getOpcode() == SPU::HBRA || + I->getOpcode() == SPU::HBR_LABEL){ + I=MBB.erase(I); + } + } +} + unsigned SPUInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { MachineBasicBlock::iterator I = MBB.end(); + removeHBR(MBB); if (I == MBB.begin()) return 0; --I; @@ -314,6 +326,23 @@ SPUInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { return 2; } +/** Find the optimal position for a hint branch instruction in a basic block. + * This should take into account: + * -the branch hint delays + * -congestion of the memory bus + * -dual-issue scheduling (i.e. avoid insertion of nops) + * Current implementation is rather simplistic. + */ +static MachineBasicBlock::iterator findHBRPosition(MachineBasicBlock &MBB) +{ + MachineBasicBlock::iterator J = MBB.end(); + for( int i=0; i<8; i++) { + if( J == MBB.begin() ) return J; + J--; + } + return J; +} + unsigned SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, @@ -324,32 +353,61 @@ SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, assert((Cond.size() == 2 || Cond.size() == 0) && "SPU branch conditions have two components!"); + MachineInstrBuilder MIB; + //TODO: make a more accurate algorithm. + bool haveHBR = MBB.size()>8; + + removeHBR(MBB); + MCSymbol *branchLabel = MBB.getParent()->getContext().CreateTempSymbol(); + // Add a label just before the branch + if (haveHBR) + MIB = BuildMI(&MBB, DL, get(SPU::HBR_LABEL)).addSym(branchLabel); + // One-way branch. if (FBB == 0) { if (Cond.empty()) { // Unconditional branch - MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(SPU::BR)); + MIB = BuildMI(&MBB, DL, get(SPU::BR)); MIB.addMBB(TBB); DEBUG(errs() << "Inserted one-way uncond branch: "); DEBUG((*MIB).dump()); + + // basic blocks have just one branch so it is safe to add the hint a its + if (haveHBR) { + MIB = BuildMI( MBB, findHBRPosition(MBB), DL, get(SPU::HBRA)); + MIB.addSym(branchLabel); + MIB.addMBB(TBB); + } } else { // Conditional branch - MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm())); + MIB = BuildMI(&MBB, DL, get(Cond[0].getImm())); MIB.addReg(Cond[1].getReg()).addMBB(TBB); + if (haveHBR) { + MIB = BuildMI(MBB, findHBRPosition(MBB), DL, get(SPU::HBRA)); + MIB.addSym(branchLabel); + MIB.addMBB(TBB); + } + DEBUG(errs() << "Inserted one-way cond branch: "); DEBUG((*MIB).dump()); } return 1; } else { - MachineInstrBuilder MIB = BuildMI(&MBB, DL, get(Cond[0].getImm())); + MIB = BuildMI(&MBB, DL, get(Cond[0].getImm())); MachineInstrBuilder MIB2 = BuildMI(&MBB, DL, get(SPU::BR)); // Two-way Conditional Branch. MIB.addReg(Cond[1].getReg()).addMBB(TBB); MIB2.addMBB(FBB); + if (haveHBR) { + MIB = BuildMI( MBB, findHBRPosition(MBB), DL, get(SPU::HBRA)); + MIB.addSym(branchLabel); + MIB.addMBB(FBB); + } + DEBUG(errs() << "Inserted conditional branch: "); DEBUG((*MIB).dump()); DEBUG(errs() << "part 2: "); diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td index 25f6fd000b8b..e103c9b6a5af 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.td +++ b/lib/Target/CellSPU/SPUInstrInfo.td @@ -28,6 +28,8 @@ let hasCtrlDep = 1, Defs = [R1], Uses = [R1] in { def ADJCALLSTACKUP : Pseudo<(outs), (ins u16imm_i32:$amt), "${:comment} ADJCALLSTACKUP", [(callseq_end timm:$amt)]>; + def HBR_LABEL : Pseudo<(outs), (ins hbrtarget:$targ), + "$targ:\t${:comment}branch hint target",[ ]>; } //===----------------------------------------------------------------------===// @@ -2013,9 +2015,9 @@ class SHLHInst pattern>: RotShiftVec, pattern>; class SHLHVecInst: - SHLHInst<(outs VECREG:$rT), (ins VECREG:$rA, R16C:$rB), + SHLHInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), [(set (vectype VECREG:$rT), - (SPUvec_shl (vectype VECREG:$rA), R16C:$rB))]>; + (SPUvec_shl (vectype VECREG:$rA), (vectype VECREG:$rB)))]>; multiclass ShiftLeftHalfword { @@ -2063,9 +2065,9 @@ class SHLInst pattern>: multiclass ShiftLeftWord { def v4i32: - SHLInst<(outs VECREG:$rT), (ins VECREG:$rA, R16C:$rB), + SHLInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), [(set (v4i32 VECREG:$rT), - (SPUvec_shl (v4i32 VECREG:$rA), R16C:$rB))]>; + (SPUvec_shl (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)))]>; def r32: SHLInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB), [(set R32C:$rT, (shl R32C:$rA, R32C:$rB))]>; @@ -2511,19 +2513,11 @@ class ROTHMInst pattern>: RotShiftVec, pattern>; def ROTHMv8i16: - ROTHMInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), + ROTHMInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), [/* see patterns below - $rB must be negated */]>; -def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), R32C:$rB), - (ROTHMv8i16 VECREG:$rA, (SFIr32 R32C:$rB, 0))>; - -def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), R16C:$rB), - (ROTHMv8i16 VECREG:$rA, - (SFIr32 (XSHWr16 R16C:$rB), 0))>; - -def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), R8C:$rB), - (ROTHMv8i16 VECREG:$rA, - (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB) ), 0))>; +def : Pat<(SPUvec_srl (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)), + (ROTHMv8i16 VECREG:$rA, (SFHIvec VECREG:$rB, 0))>; // ROTHM r16 form: Rotate 16-bit quantity to right, zero fill at the left // Note: This instruction doesn't match a pattern because rB must be negated @@ -2584,19 +2578,11 @@ class ROTMInst pattern>: RotShiftVec, pattern>; def ROTMv4i32: - ROTMInst<(outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), + ROTMInst<(outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), [/* see patterns below - $rB must be negated */]>; -def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), R32C:$rB), - (ROTMv4i32 VECREG:$rA, (SFIr32 R32C:$rB, 0))>; - -def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), R16C:$rB), - (ROTMv4i32 VECREG:$rA, - (SFIr32 (XSHWr16 R16C:$rB), 0))>; - -def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), R8C:$rB), - (ROTMv4i32 VECREG:$rA, - (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>; +def : Pat<(SPUvec_srl (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)), + (ROTMv4i32 VECREG:$rA, (SFIvec VECREG:$rB, 0))>; def ROTMr32: ROTMInst<(outs R32C:$rT), (ins R32C:$rA, R32C:$rB), @@ -2802,20 +2788,12 @@ defm ROTQMBII: RotateMaskQuadByBitsImm; //-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ def ROTMAHv8i16: - RRForm<0b01111010000, (outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), + RRForm<0b01111010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), "rotmah\t$rT, $rA, $rB", RotShiftVec, [/* see patterns below - $rB must be negated */]>; -def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), R32C:$rB), - (ROTMAHv8i16 VECREG:$rA, (SFIr32 R32C:$rB, 0))>; - -def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), R16C:$rB), - (ROTMAHv8i16 VECREG:$rA, - (SFIr32 (XSHWr16 R16C:$rB), 0))>; - -def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), R8C:$rB), - (ROTMAHv8i16 VECREG:$rA, - (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>; +def : Pat<(SPUvec_sra (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)), + (ROTMAHv8i16 VECREG:$rA, (SFHIvec VECREG:$rB, 0))>; def ROTMAHr16: RRForm<0b01111010000, (outs R16C:$rT), (ins R16C:$rA, R32C:$rB), @@ -2857,20 +2835,12 @@ def : Pat<(sra R16C:$rA, (i8 imm:$val)), (ROTMAHIr16 R16C:$rA, (TO_IMM32 uimm7:$val))>; def ROTMAv4i32: - RRForm<0b01011010000, (outs VECREG:$rT), (ins VECREG:$rA, R32C:$rB), + RRForm<0b01011010000, (outs VECREG:$rT), (ins VECREG:$rA, VECREG:$rB), "rotma\t$rT, $rA, $rB", RotShiftVec, [/* see patterns below - $rB must be negated */]>; -def : Pat<(SPUvec_sra (v4i32 VECREG:$rA), R32C:$rB), - (ROTMAv4i32 VECREG:$rA, (SFIr32 R32C:$rB, 0))>; - -def : Pat<(SPUvec_sra (v4i32 VECREG:$rA), R16C:$rB), - (ROTMAv4i32 VECREG:$rA, - (SFIr32 (XSHWr16 R16C:$rB), 0))>; - -def : Pat<(SPUvec_sra (v4i32 VECREG:$rA), R8C:$rB), - (ROTMAv4i32 VECREG:$rA, - (SFIr32 (XSHWr16 (XSBHr8 R8C:$rB)), 0))>; +def : Pat<(SPUvec_sra (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)), + (ROTMAv4i32 VECREG:$rA, (SFIvec (v4i32 VECREG:$rB), 0))>; def ROTMAr32: RRForm<0b01011010000, (outs R32C:$rT), (ins R32C:$rA, R32C:$rB), @@ -4208,8 +4178,8 @@ def : Pat<(fabs (v4f32 VECREG:$rA)), //===----------------------------------------------------------------------===// // Hint for branch instructions: //===----------------------------------------------------------------------===// - -/* def HBR : SPUInstr<(outs), (ins), "hbr\t" */ +def HBRA : + HBI16Form<0b0001001,(ins hbrtarget:$brinst, brtarget:$btarg), "hbra\t$brinst, $btarg">; //===----------------------------------------------------------------------===// // Execution, Load NOP (execute NOPs belong in even pipeline, load NOPs belong diff --git a/lib/Target/CellSPU/SPURegisterInfo.h b/lib/Target/CellSPU/SPURegisterInfo.h index 641da0480a8d..1708c5983722 100644 --- a/lib/Target/CellSPU/SPURegisterInfo.h +++ b/lib/Target/CellSPU/SPURegisterInfo.h @@ -46,6 +46,14 @@ namespace llvm { virtual const TargetRegisterClass * getPointerRegClass(unsigned Kind = 0) const; + /// After allocating this many registers, the allocator should feel + /// register pressure. The value is a somewhat random guess, based on the + /// number of non callee saved registers in the C calling convention. + virtual unsigned getRegPressureLimit( const TargetRegisterClass *RC, + MachineFunction &MF) const{ + return 50; + } + //! Return the array of callee-saved registers virtual const unsigned* getCalleeSavedRegs(const MachineFunction *MF) const; diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index 71d6049c8a1b..797cfd597e60 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -1348,12 +1348,10 @@ void CppWriter::printInstruction(const Instruction *I, const PHINode* phi = cast(I); Out << "PHINode* " << iName << " = PHINode::Create(" - << getCppName(phi->getType()) << ", \""; + << getCppName(phi->getType()) << ", " + << phi->getNumIncomingValues() << ", \""; printEscapedString(phi->getName()); Out << "\", " << bbname << ");"; - nl(Out) << iName << "->reserveOperandSpace(" - << phi->getNumIncomingValues() - << ");"; nl(Out); for (unsigned i = 0; i < phi->getNumOperands(); i+=2) { Out << iName << "->addIncoming(" diff --git a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp index 3379ac216972..060a87b7c616 100644 --- a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp +++ b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp @@ -57,18 +57,26 @@ static unsigned mblazeBinary2Opcode[] = { }; static unsigned getRD(uint32_t insn) { + if (!MBlazeRegisterInfo::isRegister((insn>>21)&0x1F)) + return UNSUPPORTED; return MBlazeRegisterInfo::getRegisterFromNumbering((insn>>21)&0x1F); } static unsigned getRA(uint32_t insn) { + if (!MBlazeRegisterInfo::getRegisterFromNumbering((insn>>16)&0x1F)) + return UNSUPPORTED; return MBlazeRegisterInfo::getRegisterFromNumbering((insn>>16)&0x1F); } static unsigned getRB(uint32_t insn) { + if (!MBlazeRegisterInfo::getRegisterFromNumbering((insn>>11)&0x1F)) + return UNSUPPORTED; return MBlazeRegisterInfo::getRegisterFromNumbering((insn>>11)&0x1F); } static int64_t getRS(uint32_t insn) { + if (!MBlazeRegisterInfo::isSpecialRegister(insn&0x3FFF)) + return UNSUPPORTED; return MBlazeRegisterInfo::getSpecialRegisterFromNumbering(insn&0x3FFF); } @@ -489,13 +497,14 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr, raw_ostream &vStream) const { // The machine instruction. uint32_t insn; + uint64_t read; uint8_t bytes[4]; - // We always consume 4 bytes of data - size = 4; + // By default we consume 1 byte on failure + size = 1; // We want to read exactly 4 bytes of data. - if (region.readBytes(address, 4, (uint8_t*)bytes, NULL) == -1) + if (region.readBytes(address, 4, (uint8_t*)bytes, &read) == -1 || read < 4) return false; // Encoded as a big-endian 32-bit word in the stream. @@ -509,44 +518,63 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr, instr.setOpcode(opcode); + unsigned RD = getRD(insn); + unsigned RA = getRA(insn); + unsigned RB = getRB(insn); + unsigned RS = getRS(insn); + uint64_t tsFlags = MBlazeInsts[opcode].TSFlags; switch ((tsFlags & MBlazeII::FormMask)) { - default: llvm_unreachable("unknown instruction encoding"); + default: + return false; case MBlazeII::FRRRR: - instr.addOperand(MCOperand::CreateReg(getRD(insn))); - instr.addOperand(MCOperand::CreateReg(getRB(insn))); - instr.addOperand(MCOperand::CreateReg(getRA(insn))); + if (RD == UNSUPPORTED || RA == UNSUPPORTED || RB == UNSUPPORTED) + return false; + instr.addOperand(MCOperand::CreateReg(RD)); + instr.addOperand(MCOperand::CreateReg(RB)); + instr.addOperand(MCOperand::CreateReg(RA)); break; case MBlazeII::FRRR: - instr.addOperand(MCOperand::CreateReg(getRD(insn))); - instr.addOperand(MCOperand::CreateReg(getRA(insn))); - instr.addOperand(MCOperand::CreateReg(getRB(insn))); + if (RD == UNSUPPORTED || RA == UNSUPPORTED || RB == UNSUPPORTED) + return false; + instr.addOperand(MCOperand::CreateReg(RD)); + instr.addOperand(MCOperand::CreateReg(RA)); + instr.addOperand(MCOperand::CreateReg(RB)); break; case MBlazeII::FRI: switch (opcode) { - default: llvm_unreachable("unknown instruction encoding"); + default: + return false; case MBlaze::MFS: - instr.addOperand(MCOperand::CreateReg(getRD(insn))); + if (RD == UNSUPPORTED) + return false; + instr.addOperand(MCOperand::CreateReg(RD)); instr.addOperand(MCOperand::CreateImm(insn&0x3FFF)); break; case MBlaze::MTS: + if (RA == UNSUPPORTED) + return false; instr.addOperand(MCOperand::CreateImm(insn&0x3FFF)); - instr.addOperand(MCOperand::CreateReg(getRA(insn))); + instr.addOperand(MCOperand::CreateReg(RA)); break; case MBlaze::MSRSET: case MBlaze::MSRCLR: - instr.addOperand(MCOperand::CreateReg(getRD(insn))); + if (RD == UNSUPPORTED) + return false; + instr.addOperand(MCOperand::CreateReg(RD)); instr.addOperand(MCOperand::CreateImm(insn&0x7FFF)); break; } break; case MBlazeII::FRRI: - instr.addOperand(MCOperand::CreateReg(getRD(insn))); - instr.addOperand(MCOperand::CreateReg(getRA(insn))); + if (RD == UNSUPPORTED || RA == UNSUPPORTED) + return false; + instr.addOperand(MCOperand::CreateReg(RD)); + instr.addOperand(MCOperand::CreateReg(RA)); switch (opcode) { default: instr.addOperand(MCOperand::CreateImm(getIMM(insn))); @@ -560,27 +588,37 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr, break; case MBlazeII::FCRR: - instr.addOperand(MCOperand::CreateReg(getRA(insn))); - instr.addOperand(MCOperand::CreateReg(getRB(insn))); + if (RA == UNSUPPORTED || RB == UNSUPPORTED) + return false; + instr.addOperand(MCOperand::CreateReg(RA)); + instr.addOperand(MCOperand::CreateReg(RB)); break; case MBlazeII::FCRI: - instr.addOperand(MCOperand::CreateReg(getRA(insn))); + if (RA == UNSUPPORTED) + return false; + instr.addOperand(MCOperand::CreateReg(RA)); instr.addOperand(MCOperand::CreateImm(getIMM(insn))); break; case MBlazeII::FRCR: - instr.addOperand(MCOperand::CreateReg(getRD(insn))); - instr.addOperand(MCOperand::CreateReg(getRB(insn))); + if (RD == UNSUPPORTED || RB == UNSUPPORTED) + return false; + instr.addOperand(MCOperand::CreateReg(RD)); + instr.addOperand(MCOperand::CreateReg(RB)); break; case MBlazeII::FRCI: - instr.addOperand(MCOperand::CreateReg(getRD(insn))); + if (RD == UNSUPPORTED) + return false; + instr.addOperand(MCOperand::CreateReg(RD)); instr.addOperand(MCOperand::CreateImm(getIMM(insn))); break; case MBlazeII::FCCR: - instr.addOperand(MCOperand::CreateReg(getRB(insn))); + if (RB == UNSUPPORTED) + return false; + instr.addOperand(MCOperand::CreateReg(RB)); break; case MBlazeII::FCCI: @@ -588,33 +626,45 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr, break; case MBlazeII::FRRCI: - instr.addOperand(MCOperand::CreateReg(getRD(insn))); - instr.addOperand(MCOperand::CreateReg(getRA(insn))); + if (RD == UNSUPPORTED || RA == UNSUPPORTED) + return false; + instr.addOperand(MCOperand::CreateReg(RD)); + instr.addOperand(MCOperand::CreateReg(RA)); instr.addOperand(MCOperand::CreateImm(getSHT(insn))); break; case MBlazeII::FRRC: - instr.addOperand(MCOperand::CreateReg(getRD(insn))); - instr.addOperand(MCOperand::CreateReg(getRA(insn))); + if (RD == UNSUPPORTED || RA == UNSUPPORTED) + return false; + instr.addOperand(MCOperand::CreateReg(RD)); + instr.addOperand(MCOperand::CreateReg(RA)); break; case MBlazeII::FRCX: - instr.addOperand(MCOperand::CreateReg(getRD(insn))); + if (RD == UNSUPPORTED) + return false; + instr.addOperand(MCOperand::CreateReg(RD)); instr.addOperand(MCOperand::CreateImm(getFSL(insn))); break; case MBlazeII::FRCS: - instr.addOperand(MCOperand::CreateReg(getRD(insn))); - instr.addOperand(MCOperand::CreateReg(getRS(insn))); + if (RD == UNSUPPORTED || RS == UNSUPPORTED) + return false; + instr.addOperand(MCOperand::CreateReg(RD)); + instr.addOperand(MCOperand::CreateReg(RS)); break; case MBlazeII::FCRCS: - instr.addOperand(MCOperand::CreateReg(getRS(insn))); - instr.addOperand(MCOperand::CreateReg(getRA(insn))); + if (RS == UNSUPPORTED || RA == UNSUPPORTED) + return false; + instr.addOperand(MCOperand::CreateReg(RS)); + instr.addOperand(MCOperand::CreateReg(RA)); break; case MBlazeII::FCRCX: - instr.addOperand(MCOperand::CreateReg(getRA(insn))); + if (RA == UNSUPPORTED) + return false; + instr.addOperand(MCOperand::CreateReg(RA)); instr.addOperand(MCOperand::CreateImm(getFSL(insn))); break; @@ -623,16 +673,23 @@ bool MBlazeDisassembler::getInstruction(MCInst &instr, break; case MBlazeII::FCR: - instr.addOperand(MCOperand::CreateReg(getRB(insn))); + if (RB == UNSUPPORTED) + return false; + instr.addOperand(MCOperand::CreateReg(RB)); break; case MBlazeII::FRIR: - instr.addOperand(MCOperand::CreateReg(getRD(insn))); + if (RD == UNSUPPORTED || RA == UNSUPPORTED) + return false; + instr.addOperand(MCOperand::CreateReg(RD)); instr.addOperand(MCOperand::CreateImm(getIMM(insn))); - instr.addOperand(MCOperand::CreateReg(getRA(insn))); + instr.addOperand(MCOperand::CreateReg(RA)); break; } + // We always consume 4 bytes of data on success + size = 4; + return true; } diff --git a/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h b/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h index bebc6c83d544..13c4b49f981c 100644 --- a/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h +++ b/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h @@ -18,11 +18,12 @@ namespace llvm { class MCOperand; + class TargetMachine; class MBlazeInstPrinter : public MCInstPrinter { public: - MBlazeInstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) { - } + MBlazeInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI) + : MCInstPrinter(MAI) {} virtual void printInst(const MCInst *MI, raw_ostream &O); diff --git a/lib/Target/MBlaze/MBlaze.td b/lib/Target/MBlaze/MBlaze.td index 1fa1e4dd5776..1245658d29ba 100644 --- a/lib/Target/MBlaze/MBlaze.td +++ b/lib/Target/MBlaze/MBlaze.td @@ -31,49 +31,28 @@ def MBlazeInstrInfo : InstrInfo; // Microblaze Subtarget features // //===----------------------------------------------------------------------===// -def FeaturePipe3 : SubtargetFeature<"pipe3", "HasPipe3", "true", - "Implements 3-stage pipeline">; def FeatureBarrel : SubtargetFeature<"barrel", "HasBarrel", "true", "Implements barrel shifter">; def FeatureDiv : SubtargetFeature<"div", "HasDiv", "true", "Implements hardware divider">; def FeatureMul : SubtargetFeature<"mul", "HasMul", "true", "Implements hardware multiplier">; -def FeatureFSL : SubtargetFeature<"fsl", "HasFSL", "true", - "Implements FSL instructions">; -def FeatureEFSL : SubtargetFeature<"efsl", "HasEFSL", "true", - "Implements extended FSL instructions">; -def FeatureMSRSet : SubtargetFeature<"msrset", "HasMSRSet", "true", - "Implements MSR register set and clear">; -def FeatureException : SubtargetFeature<"exception", "HasException", "true", - "Implements hardware exception support">; def FeaturePatCmp : SubtargetFeature<"patcmp", "HasPatCmp", "true", "Implements pattern compare instruction">; def FeatureFPU : SubtargetFeature<"fpu", "HasFPU", "true", "Implements floating point unit">; -def FeatureESR : SubtargetFeature<"esr", "HasESR", "true", - "Implements ESR and EAR registers">; -def FeaturePVR : SubtargetFeature<"pvr", "HasPVR", "true", - "Implements processor version register">; def FeatureMul64 : SubtargetFeature<"mul64", "HasMul64", "true", "Implements multiplier with 64-bit result">; def FeatureSqrt : SubtargetFeature<"sqrt", "HasSqrt", "true", "Implements sqrt and floating point convert">; -def FeatureMMU : SubtargetFeature<"mmu", "HasMMU", "true", - "Implements memory management unit">; //===----------------------------------------------------------------------===// // MBlaze processors supported. //===----------------------------------------------------------------------===// -class Proc Features> - : Processor; - -def : Proc<"v400", []>; -def : Proc<"v500", []>; -def : Proc<"v600", []>; -def : Proc<"v700", []>; -def : Proc<"v710", []>; +def : Processor<"mblaze", MBlazeGenericItineraries, []>; +def : Processor<"mblaze3", MBlazePipe3Itineraries, []>; +def : Processor<"mblaze5", MBlazePipe5Itineraries, []>; //===----------------------------------------------------------------------===// // Instruction Descriptions diff --git a/lib/Target/MBlaze/MBlazeAsmBackend.cpp b/lib/Target/MBlaze/MBlazeAsmBackend.cpp index a4b21afa599e..08f14c365957 100644 --- a/lib/Target/MBlaze/MBlazeAsmBackend.cpp +++ b/lib/Target/MBlaze/MBlazeAsmBackend.cpp @@ -150,14 +150,13 @@ void ELFMBlazeAsmBackend::ApplyFixup(const MCFixup &Fixup, char *Data, TargetAsmBackend *llvm::createMBlazeAsmBackend(const Target &T, const std::string &TT) { - switch (Triple(TT).getOS()) { - case Triple::Darwin: + Triple TheTriple(TT); + + if (TheTriple.isOSDarwin()) assert(0 && "Mac not supported on MBlaze"); - case Triple::MinGW32: - case Triple::Cygwin: - case Triple::Win32: + + if (TheTriple.isOSWindows()) assert(0 && "Windows not supported on MBlaze"); - default: - return new ELFMBlazeAsmBackend(T, Triple(TT).getOS()); - } + + return new ELFMBlazeAsmBackend(T, TheTriple.getOS()); } diff --git a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp index 0016df569b93..0f0f60e69f08 100644 --- a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp +++ b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp @@ -319,10 +319,11 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { } static MCInstPrinter *createMBlazeMCInstPrinter(const Target &T, + TargetMachine &TM, unsigned SyntaxVariant, const MCAsmInfo &MAI) { if (SyntaxVariant == 0) - return new MBlazeInstPrinter(MAI); + return new MBlazeInstPrinter(TM, MAI); return 0; } diff --git a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp index 4399ee280098..973e96844e81 100644 --- a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp +++ b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp @@ -77,7 +77,7 @@ static bool hasImmInstruction(MachineBasicBlock::iterator &candidate) { // We must assume that unknown immediate values require more than // 16-bits to represent. - if (mop.isGlobal() || mop.isSymbol()) + if (mop.isGlobal() || mop.isSymbol() || mop.isJTI() || mop.isCPI()) return true; // FIXME: we could probably check to see if the FP value happens diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp index f39826b1cf17..21a59884a6b8 100644 --- a/lib/Target/MBlaze/MBlazeISelLowering.cpp +++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp @@ -274,7 +274,7 @@ MBlazeTargetLowering::EmitCustomShift(MachineInstr *MI, F->insert(It, loop); F->insert(It, finish); - // Update machine-CFG edges by transfering adding all successors and + // Update machine-CFG edges by transferring adding all successors and // remaining instructions from the current block to the new block which // will contain the Phi node for the select. finish->splice(finish->begin(), MBB, @@ -456,7 +456,7 @@ MBlazeTargetLowering::EmitCustomAtomic(MachineInstr *MI, F->insert(It, start); F->insert(It, exit); - // Update machine-CFG edges by transfering adding all successors and + // Update machine-CFG edges by transferring adding all successors and // remaining instructions from the current block to the new block which // will contain the Phi node for the select. exit->splice(exit->begin(), MBB, llvm::next(MachineBasicBlock::iterator(MI)), @@ -778,7 +778,7 @@ LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv, // Build a sequence of copy-to-reg nodes chained together with token // chain and flag operands which copy the outgoing args into registers. - // The InFlag in necessary since all emited instructions must be + // The InFlag in necessary since all emitted instructions must be // stuck together. SDValue InFlag; for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { @@ -1103,7 +1103,7 @@ MBlazeTargetLowering::getSingleConstraintMatchWeight( switch (*constraint) { default: weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); - break; + break; case 'd': case 'y': if (type->isIntegerTy()) diff --git a/lib/Target/MBlaze/MBlazeInstrFPU.td b/lib/Target/MBlaze/MBlazeInstrFPU.td index 094de5c0c1a8..4acdcfdd772c 100644 --- a/lib/Target/MBlaze/MBlazeInstrFPU.td +++ b/lib/Target/MBlaze/MBlazeInstrFPU.td @@ -21,22 +21,22 @@ class LoadFM op, string instr_asm, PatFrag OpNode> : TA; + [(set (f32 GPR:$dst), (OpNode xaddr:$addr))], IIC_MEMl>; class LoadFMI op, string instr_asm, PatFrag OpNode> : TB; + [(set (f32 GPR:$dst), (OpNode iaddr:$addr))], IIC_MEMl>; class StoreFM op, string instr_asm, PatFrag OpNode> : TA; + [(OpNode (f32 GPR:$dst), xaddr:$addr)], IIC_MEMs>; class StoreFMI op, string instr_asm, PatFrag OpNode> : TB; + [(OpNode (f32 GPR:$dst), iaddr:$addr)], IIC_MEMs>; class ArithF op, bits<11> flags, string instr_asm, SDNode OpNode, InstrItinClass itin> : @@ -56,15 +56,10 @@ class ArithFR op, bits<11> flags, string instr_asm, SDNode OpNode, !strconcat(instr_asm, " $dst, $c, $b"), [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], itin>; -class LogicF op, string instr_asm> : - TB; - class LogicFI op, string instr_asm> : TB; + [], IIC_ALU>; let rb=0 in { class ArithF2 op, bits<11> flags, string instr_asm, @@ -95,10 +90,10 @@ let rb=0 in { //===----------------------------------------------------------------------===// let Predicates=[HasFPU] in { def FORI : LogicFI<0x28, "ori ">; - def FADD : ArithF<0x16, 0x000, "fadd ", fadd, IIAlu>; - def FRSUB : ArithFR<0x16, 0x080, "frsub ", fsub, IIAlu>; - def FMUL : ArithF<0x16, 0x100, "fmul ", fmul, IIAlu>; - def FDIV : ArithF<0x16, 0x180, "fdiv ", fdiv, IIAlu>; + def FADD : ArithF<0x16, 0x000, "fadd ", fadd, IIC_FPU>; + def FRSUB : ArithFR<0x16, 0x080, "frsub ", fsub, IIC_FPU>; + def FMUL : ArithF<0x16, 0x100, "fmul ", fmul, IIC_FPU>; + def FDIV : ArithF<0x16, 0x180, "fdiv ", fdiv, IIC_FPUd>; } let Predicates=[HasFPU], isCodeGenOnly=1 in { @@ -110,19 +105,19 @@ let Predicates=[HasFPU], isCodeGenOnly=1 in { } let Predicates=[HasFPU,HasSqrt] in { - def FLT : ArithIF<0x16, 0x280, "flt ", IIAlu>; - def FINT : ArithFI<0x16, 0x300, "fint ", IIAlu>; - def FSQRT : ArithF2<0x16, 0x380, "fsqrt ", IIAlu>; + def FLT : ArithIF<0x16, 0x280, "flt ", IIC_FPUf>; + def FINT : ArithFI<0x16, 0x300, "fint ", IIC_FPUi>; + def FSQRT : ArithF2<0x16, 0x380, "fsqrt ", IIC_FPUs>; } let isAsCheapAsAMove = 1 in { - def FCMP_UN : CmpFN<0x16, 0x200, "fcmp.un", IIAlu>; - def FCMP_LT : CmpFN<0x16, 0x210, "fcmp.lt", IIAlu>; - def FCMP_EQ : CmpFN<0x16, 0x220, "fcmp.eq", IIAlu>; - def FCMP_LE : CmpFN<0x16, 0x230, "fcmp.le", IIAlu>; - def FCMP_GT : CmpFN<0x16, 0x240, "fcmp.gt", IIAlu>; - def FCMP_NE : CmpFN<0x16, 0x250, "fcmp.ne", IIAlu>; - def FCMP_GE : CmpFN<0x16, 0x260, "fcmp.ge", IIAlu>; + def FCMP_UN : CmpFN<0x16, 0x200, "fcmp.un", IIC_FPUc>; + def FCMP_LT : CmpFN<0x16, 0x210, "fcmp.lt", IIC_FPUc>; + def FCMP_EQ : CmpFN<0x16, 0x220, "fcmp.eq", IIC_FPUc>; + def FCMP_LE : CmpFN<0x16, 0x230, "fcmp.le", IIC_FPUc>; + def FCMP_GT : CmpFN<0x16, 0x240, "fcmp.gt", IIC_FPUc>; + def FCMP_NE : CmpFN<0x16, 0x250, "fcmp.ne", IIC_FPUc>; + def FCMP_GE : CmpFN<0x16, 0x260, "fcmp.ge", IIC_FPUc>; } diff --git a/lib/Target/MBlaze/MBlazeInstrFSL.td b/lib/Target/MBlaze/MBlazeInstrFSL.td index 32098452416b..3082a7e227f8 100644 --- a/lib/Target/MBlaze/MBlazeInstrFSL.td +++ b/lib/Target/MBlaze/MBlazeInstrFSL.td @@ -13,7 +13,7 @@ class FSLGet op, bits<5> flags, string instr_asm, Intrinsic OpNode> : MBlazeInst + [(set GPR:$dst, (OpNode immZExt4:$b))],IIC_FSLg> { bits<5> rd; bits<4> fslno; @@ -29,7 +29,7 @@ class FSLGet op, bits<5> flags, string instr_asm, Intrinsic OpNode> : class FSLGetD op, bits<5> flags, string instr_asm, Intrinsic OpNode> : MBlazeInst + [(set GPR:$dst, (OpNode GPR:$b))], IIC_FSLg> { bits<5> rd; bits<5> rb; @@ -45,7 +45,7 @@ class FSLGetD op, bits<5> flags, string instr_asm, Intrinsic OpNode> : class FSLPut op, bits<4> flags, string instr_asm, Intrinsic OpNode> : MBlazeInst + [(OpNode GPR:$v, immZExt4:$b)], IIC_FSLp> { bits<5> ra; bits<4> fslno; @@ -61,7 +61,7 @@ class FSLPut op, bits<4> flags, string instr_asm, Intrinsic OpNode> : class FSLPutD op, bits<4> flags, string instr_asm, Intrinsic OpNode> : MBlazeInst + [(OpNode GPR:$v, GPR:$b)], IIC_FSLp> { bits<5> ra; bits<5> rb; @@ -77,7 +77,7 @@ class FSLPutD op, bits<4> flags, string instr_asm, Intrinsic OpNode> : class FSLPutT op, bits<4> flags, string instr_asm, Intrinsic OpNode> : MBlazeInst + [(OpNode immZExt4:$b)], IIC_FSLp> { bits<4> fslno; @@ -92,7 +92,7 @@ class FSLPutT op, bits<4> flags, string instr_asm, Intrinsic OpNode> : class FSLPutTD op, bits<4> flags, string instr_asm, Intrinsic OpNode> : MBlazeInst + [(OpNode GPR:$b)], IIC_FSLp> { bits<5> rb; diff --git a/lib/Target/MBlaze/MBlazeInstrFormats.td b/lib/Target/MBlaze/MBlazeInstrFormats.td index d62574d0edee..54f605f989a3 100644 --- a/lib/Target/MBlaze/MBlazeInstrFormats.td +++ b/lib/Target/MBlaze/MBlazeInstrFormats.td @@ -81,7 +81,7 @@ class MBlazeInst op, Format form, dag outs, dag ins, string asmstr, // Pseudo instruction class //===----------------------------------------------------------------------===// class MBlazePseudo pattern>: - MBlazeInst<0x0, FPseudo, outs, ins, asmstr, pattern, IIPseudo>; + MBlazeInst<0x0, FPseudo, outs, ins, asmstr, pattern, IIC_Pseudo>; //===----------------------------------------------------------------------===// // Type A instruction class in MBlaze : <|opcode|rd|ra|rb|flags|> diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.cpp b/lib/Target/MBlaze/MBlazeInstrInfo.cpp index b353dcdef05b..794ebedf1e6a 100644 --- a/lib/Target/MBlaze/MBlazeInstrInfo.cpp +++ b/lib/Target/MBlaze/MBlazeInstrInfo.cpp @@ -17,6 +17,8 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/ScoreboardHazardRecognizer.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "MBlazeGenInstrInfo.inc" diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.h b/lib/Target/MBlaze/MBlazeInstrInfo.h index b7300c14080d..b717da8e2bec 100644 --- a/lib/Target/MBlaze/MBlazeInstrInfo.h +++ b/lib/Target/MBlaze/MBlazeInstrInfo.h @@ -261,7 +261,6 @@ class MBlazeInstrInfo : public TargetInstrInfoImpl { virtual bool ReverseBranchCondition(SmallVectorImpl &Cond) const; - virtual void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.td b/lib/Target/MBlaze/MBlazeInstrInfo.td index 7b8f70a30434..896e8eae1637 100644 --- a/lib/Target/MBlaze/MBlazeInstrInfo.td +++ b/lib/Target/MBlaze/MBlazeInstrInfo.td @@ -47,22 +47,22 @@ def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_MBCallSeqEnd, //===----------------------------------------------------------------------===// // MBlaze Instruction Predicate Definitions. //===----------------------------------------------------------------------===// -def HasPipe3 : Predicate<"Subtarget.hasPipe3()">; +// def HasPipe3 : Predicate<"Subtarget.hasPipe3()">; def HasBarrel : Predicate<"Subtarget.hasBarrel()">; -def NoBarrel : Predicate<"!Subtarget.hasBarrel()">; +// def NoBarrel : Predicate<"!Subtarget.hasBarrel()">; def HasDiv : Predicate<"Subtarget.hasDiv()">; def HasMul : Predicate<"Subtarget.hasMul()">; -def HasFSL : Predicate<"Subtarget.hasFSL()">; -def HasEFSL : Predicate<"Subtarget.hasEFSL()">; -def HasMSRSet : Predicate<"Subtarget.hasMSRSet()">; -def HasException : Predicate<"Subtarget.hasException()">; +// def HasFSL : Predicate<"Subtarget.hasFSL()">; +// def HasEFSL : Predicate<"Subtarget.hasEFSL()">; +// def HasMSRSet : Predicate<"Subtarget.hasMSRSet()">; +// def HasException : Predicate<"Subtarget.hasException()">; def HasPatCmp : Predicate<"Subtarget.hasPatCmp()">; def HasFPU : Predicate<"Subtarget.hasFPU()">; -def HasESR : Predicate<"Subtarget.hasESR()">; -def HasPVR : Predicate<"Subtarget.hasPVR()">; +// def HasESR : Predicate<"Subtarget.hasESR()">; +// def HasPVR : Predicate<"Subtarget.hasPVR()">; def HasMul64 : Predicate<"Subtarget.hasMul64()">; def HasSqrt : Predicate<"Subtarget.hasSqrt()">; -def HasMMU : Predicate<"Subtarget.hasMMU()">; +// def HasMMU : Predicate<"Subtarget.hasMMU()">; //===----------------------------------------------------------------------===// // MBlaze Operand, Complex Patterns and Transformations Definitions. @@ -170,18 +170,18 @@ class ArithI op, string instr_asm, SDNode OpNode, Operand Od, PatLeaf imm_type> : TB; + [(set GPR:$dst, (OpNode GPR:$b, imm_type:$c))], IIC_ALU>; class ArithI32 op, string instr_asm,Operand Od, PatLeaf imm_type> : TB; + [], IIC_ALU>; class ShiftI op, bits<2> flags, string instr_asm, SDNode OpNode, Operand Od, PatLeaf imm_type> : SHT; + [(set GPR:$dst, (OpNode GPR:$b, imm_type:$c))], IIC_SHT>; class ArithR op, bits<11> flags, string instr_asm, SDNode OpNode, InstrItinClass itin> : @@ -193,7 +193,7 @@ class ArithRI op, string instr_asm, SDNode OpNode, Operand Od, PatLeaf imm_type> : TBR; + [(set GPR:$dst, (OpNode imm_type:$b, GPR:$c))], IIC_ALU>; class ArithN op, bits<11> flags, string instr_asm, InstrItinClass itin> : @@ -204,7 +204,7 @@ class ArithN op, bits<11> flags, string instr_asm, class ArithNI op, string instr_asm,Operand Od, PatLeaf imm_type> : TB; + [], IIC_ALU>; class ArithRN op, bits<11> flags, string instr_asm, InstrItinClass itin> : @@ -215,7 +215,7 @@ class ArithRN op, bits<11> flags, string instr_asm, class ArithRNI op, string instr_asm,Operand Od, PatLeaf imm_type> : TBR; + [], IIC_ALU>; //===----------------------------------------------------------------------===// // Misc Arithmetic Instructions @@ -224,23 +224,23 @@ class ArithRNI op, string instr_asm,Operand Od, PatLeaf imm_type> : class Logic op, bits<11> flags, string instr_asm, SDNode OpNode> : TA; + [(set GPR:$dst, (OpNode GPR:$b, GPR:$c))], IIC_ALU>; class LogicI op, string instr_asm, SDNode OpNode> : TB; + IIC_ALU>; class LogicI32 op, string instr_asm> : TB; + [], IIC_ALU>; class PatCmp op, bits<11> flags, string instr_asm> : TA; + [], IIC_ALU>; //===----------------------------------------------------------------------===// // Memory Access Instructions @@ -248,22 +248,22 @@ class PatCmp op, bits<11> flags, string instr_asm> : class LoadM op, bits<11> flags, string instr_asm> : TA; + [], IIC_MEMl>; class LoadMI op, string instr_asm, PatFrag OpNode> : TB; + [(set (i32 GPR:$dst), (OpNode iaddr:$addr))], IIC_MEMl>; class StoreM op, bits<11> flags, string instr_asm> : TA; + [], IIC_MEMs>; class StoreMI op, string instr_asm, PatFrag OpNode> : TB; + [(OpNode (i32 GPR:$dst), iaddr:$addr)], IIC_MEMs>; //===----------------------------------------------------------------------===// // Branch Instructions @@ -271,7 +271,7 @@ class StoreMI op, string instr_asm, PatFrag OpNode> : class Branch op, bits<5> br, bits<11> flags, string instr_asm> : TA { + [], IIC_BR> { let rd = 0x0; let ra = br; let Form = FCCR; @@ -280,7 +280,7 @@ class Branch op, bits<5> br, bits<11> flags, string instr_asm> : class BranchI op, bits<5> br, string instr_asm> : TB { + [], IIC_BR> { let rd = 0; let ra = br; let Form = FCCI; @@ -292,7 +292,7 @@ class BranchI op, bits<5> br, string instr_asm> : class BranchL op, bits<5> br, bits<11> flags, string instr_asm> : TA { + [], IIC_BRl> { let ra = br; let Form = FRCR; } @@ -300,7 +300,7 @@ class BranchL op, bits<5> br, bits<11> flags, string instr_asm> : class BranchLI op, bits<5> br, string instr_asm> : TB { + [], IIC_BRl> { let ra = br; let Form = FRCI; } @@ -312,7 +312,7 @@ class BranchC op, bits<5> br, bits<11> flags, string instr_asm> : TA { + [], IIC_BRc> { let rd = br; let Form = FCRR; } @@ -320,7 +320,7 @@ class BranchC op, bits<5> br, bits<11> flags, string instr_asm> : class BranchCI op, bits<5> br, string instr_asm> : TB { + [], IIC_BRc> { let rd = br; let Form = FCRI; } @@ -330,71 +330,74 @@ class BranchCI op, bits<5> br, string instr_asm> : //===----------------------------------------------------------------------===// let isCommutable = 1, isAsCheapAsAMove = 1 in { - def ADDK : Arith<0x04, 0x000, "addk ", add, IIAlu>; + def ADDK : Arith<0x04, 0x000, "addk ", add, IIC_ALU>; def AND : Logic<0x21, 0x000, "and ", and>; def OR : Logic<0x20, 0x000, "or ", or>; def XOR : Logic<0x22, 0x000, "xor ", xor>; - def PCMPBF : PatCmp<0x20, 0x400, "pcmpbf ">; - def PCMPEQ : PatCmp<0x22, 0x400, "pcmpeq ">; - def PCMPNE : PatCmp<0x23, 0x400, "pcmpne ">; + + let Predicates=[HasPatCmp] in { + def PCMPBF : PatCmp<0x20, 0x400, "pcmpbf ">; + def PCMPEQ : PatCmp<0x22, 0x400, "pcmpeq ">; + def PCMPNE : PatCmp<0x23, 0x400, "pcmpne ">; + } let Defs = [CARRY] in { - def ADD : Arith<0x00, 0x000, "add ", addc, IIAlu>; + def ADD : Arith<0x00, 0x000, "add ", addc, IIC_ALU>; let Uses = [CARRY] in { - def ADDC : Arith<0x02, 0x000, "addc ", adde, IIAlu>; + def ADDC : Arith<0x02, 0x000, "addc ", adde, IIC_ALU>; } } let Uses = [CARRY] in { - def ADDKC : ArithN<0x06, 0x000, "addkc ", IIAlu>; + def ADDKC : ArithN<0x06, 0x000, "addkc ", IIC_ALU>; } } let isAsCheapAsAMove = 1 in { - def ANDN : ArithN<0x23, 0x000, "andn ", IIAlu>; - def CMP : ArithN<0x05, 0x001, "cmp ", IIAlu>; - def CMPU : ArithN<0x05, 0x003, "cmpu ", IIAlu>; - def RSUBK : ArithR<0x05, 0x000, "rsubk ", sub, IIAlu>; + def ANDN : ArithN<0x23, 0x000, "andn ", IIC_ALU>; + def CMP : ArithN<0x05, 0x001, "cmp ", IIC_ALU>; + def CMPU : ArithN<0x05, 0x003, "cmpu ", IIC_ALU>; + def RSUBK : ArithR<0x05, 0x000, "rsubk ", sub, IIC_ALU>; let Defs = [CARRY] in { - def RSUB : ArithR<0x01, 0x000, "rsub ", subc, IIAlu>; + def RSUB : ArithR<0x01, 0x000, "rsub ", subc, IIC_ALU>; let Uses = [CARRY] in { - def RSUBC : ArithR<0x03, 0x000, "rsubc ", sube, IIAlu>; + def RSUBC : ArithR<0x03, 0x000, "rsubc ", sube, IIC_ALU>; } } let Uses = [CARRY] in { - def RSUBKC : ArithRN<0x07, 0x000, "rsubkc ", IIAlu>; + def RSUBKC : ArithRN<0x07, 0x000, "rsubkc ", IIC_ALU>; } } let isCommutable = 1, Predicates=[HasMul] in { - def MUL : Arith<0x10, 0x000, "mul ", mul, IIAlu>; + def MUL : Arith<0x10, 0x000, "mul ", mul, IIC_ALUm>; } let isCommutable = 1, Predicates=[HasMul,HasMul64] in { - def MULH : Arith<0x10, 0x001, "mulh ", mulhs, IIAlu>; - def MULHU : Arith<0x10, 0x003, "mulhu ", mulhu, IIAlu>; + def MULH : Arith<0x10, 0x001, "mulh ", mulhs, IIC_ALUm>; + def MULHU : Arith<0x10, 0x003, "mulhu ", mulhu, IIC_ALUm>; } let Predicates=[HasMul,HasMul64] in { - def MULHSU : ArithN<0x10, 0x002, "mulhsu ", IIAlu>; + def MULHSU : ArithN<0x10, 0x002, "mulhsu ", IIC_ALUm>; } let Predicates=[HasBarrel] in { - def BSRL : Arith<0x11, 0x000, "bsrl ", srl, IIAlu>; - def BSRA : Arith<0x11, 0x200, "bsra ", sra, IIAlu>; - def BSLL : Arith<0x11, 0x400, "bsll ", shl, IIAlu>; + def BSRL : Arith<0x11, 0x000, "bsrl ", srl, IIC_SHT>; + def BSRA : Arith<0x11, 0x200, "bsra ", sra, IIC_SHT>; + def BSLL : Arith<0x11, 0x400, "bsll ", shl, IIC_SHT>; def BSRLI : ShiftI<0x19, 0x0, "bsrli ", srl, uimm5, immZExt5>; def BSRAI : ShiftI<0x19, 0x1, "bsrai ", sra, uimm5, immZExt5>; def BSLLI : ShiftI<0x19, 0x2, "bslli ", shl, uimm5, immZExt5>; } let Predicates=[HasDiv] in { - def IDIV : ArithR<0x12, 0x000, "idiv ", sdiv, IIAlu>; - def IDIVU : ArithR<0x12, 0x002, "idivu ", udiv, IIAlu>; + def IDIV : ArithR<0x12, 0x000, "idiv ", sdiv, IIC_ALUd>; + def IDIVU : ArithR<0x12, 0x002, "idivu ", udiv, IIC_ALUd>; } //===----------------------------------------------------------------------===// @@ -552,7 +555,7 @@ let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, def RTSD : TB<0x2D, (outs), (ins GPR:$target, simm16:$imm), "rtsd $target, $imm", [], - IIBranch>; + IIC_BR>; } let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, @@ -560,7 +563,7 @@ let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, def RTID : TB<0x2D, (outs), (ins GPR:$target, simm16:$imm), "rtid $target, $imm", [], - IIBranch>; + IIC_BR>; } let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, @@ -568,7 +571,7 @@ let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, def RTBD : TB<0x2D, (outs), (ins GPR:$target, simm16:$imm), "rtbd $target, $imm", [], - IIBranch>; + IIC_BR>; } let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, @@ -576,7 +579,7 @@ let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, def RTED : TB<0x2D, (outs), (ins GPR:$target, simm16:$imm), "rted $target, $imm", [], - IIBranch>; + IIC_BR>; } //===----------------------------------------------------------------------===// @@ -584,7 +587,7 @@ let isReturn=1, isTerminator=1, hasDelaySlot=1, isBarrier=1, //===----------------------------------------------------------------------===// let neverHasSideEffects = 1 in { - def NOP : MBlazeInst< 0x20, FC, (outs), (ins), "nop ", [], IIAlu>; + def NOP : MBlazeInst< 0x20, FC, (outs), (ins), "nop ", [], IIC_ALU>; } let usesCustomInserter = 1 in { @@ -611,17 +614,17 @@ let usesCustomInserter = 1 in { let rb = 0 in { def SEXT16 : TA<0x24, 0x061, (outs GPR:$dst), (ins GPR:$src), - "sext16 $dst, $src", [], IIAlu>; + "sext16 $dst, $src", [], IIC_ALU>; def SEXT8 : TA<0x24, 0x060, (outs GPR:$dst), (ins GPR:$src), - "sext8 $dst, $src", [], IIAlu>; + "sext8 $dst, $src", [], IIC_ALU>; let Defs = [CARRY] in { def SRL : TA<0x24, 0x041, (outs GPR:$dst), (ins GPR:$src), - "srl $dst, $src", [], IIAlu>; + "srl $dst, $src", [], IIC_ALU>; def SRA : TA<0x24, 0x001, (outs GPR:$dst), (ins GPR:$src), - "sra $dst, $src", [], IIAlu>; + "sra $dst, $src", [], IIC_ALU>; let Uses = [CARRY] in { def SRC : TA<0x24, 0x021, (outs GPR:$dst), (ins GPR:$src), - "src $dst, $src", [], IIAlu>; + "src $dst, $src", [], IIC_ALU>; } } } @@ -637,36 +640,36 @@ let isCodeGenOnly=1 in { //===----------------------------------------------------------------------===// let Form=FRCS in { def MFS : SPC<0x25, 0x2, (outs GPR:$dst), (ins SPR:$src), - "mfs $dst, $src", [], IIAlu>; + "mfs $dst, $src", [], IIC_ALU>; } let Form=FCRCS in { def MTS : SPC<0x25, 0x3, (outs SPR:$dst), (ins GPR:$src), - "mts $dst, $src", [], IIAlu>; + "mts $dst, $src", [], IIC_ALU>; } def MSRSET : MSR<0x25, 0x20, (outs GPR:$dst), (ins uimm15:$set), - "msrset $dst, $set", [], IIAlu>; + "msrset $dst, $set", [], IIC_ALU>; def MSRCLR : MSR<0x25, 0x22, (outs GPR:$dst), (ins uimm15:$clr), - "msrclr $dst, $clr", [], IIAlu>; + "msrclr $dst, $clr", [], IIC_ALU>; let rd=0x0, Form=FCRR in { def WDC : TA<0x24, 0x64, (outs), (ins GPR:$a, GPR:$b), - "wdc $a, $b", [], IIAlu>; + "wdc $a, $b", [], IIC_WDC>; def WDCF : TA<0x24, 0x74, (outs), (ins GPR:$a, GPR:$b), - "wdc.flush $a, $b", [], IIAlu>; + "wdc.flush $a, $b", [], IIC_WDC>; def WDCC : TA<0x24, 0x66, (outs), (ins GPR:$a, GPR:$b), - "wdc.clear $a, $b", [], IIAlu>; + "wdc.clear $a, $b", [], IIC_WDC>; def WIC : TA<0x24, 0x68, (outs), (ins GPR:$a, GPR:$b), - "wic $a, $b", [], IIAlu>; + "wic $a, $b", [], IIC_WDC>; } def BRK : BranchL<0x26, 0x0C, 0x000, "brk ">; def BRKI : BranchLI<0x2E, 0x0C, "brki ">; def IMM : MBlazeInst<0x2C, FCCI, (outs), (ins simm16:$imm), - "imm $imm", [], IIAlu>; + "imm $imm", [], IIC_ALU>; //===----------------------------------------------------------------------===// // Pseudo instructions for atomic operations @@ -848,11 +851,6 @@ def : Pat<(MBWrapper tconstpool:$in), (ORI (i32 R0), tconstpool:$in)>; // Misc instructions def : Pat<(and (i32 GPR:$lh), (not (i32 GPR:$rh))),(ANDN GPR:$lh, GPR:$rh)>; -// Arithmetic with immediates -def : Pat<(add (i32 GPR:$in), imm:$imm),(ADDIK GPR:$in, imm:$imm)>; -def : Pat<(or (i32 GPR:$in), imm:$imm),(ORI GPR:$in, imm:$imm)>; -def : Pat<(xor (i32 GPR:$in), imm:$imm),(XORI GPR:$in, imm:$imm)>; - // Convert any extend loads into zero extend loads def : Pat<(extloadi8 iaddr:$src), (i32 (LBUI iaddr:$src))>; def : Pat<(extloadi16 iaddr:$src), (i32 (LHUI iaddr:$src))>; diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp index fa9140d7922f..ed8511df5ee8 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp @@ -181,6 +181,26 @@ unsigned MBlazeRegisterInfo::getSpecialRegisterFromNumbering(unsigned Reg) { return 0; // Not reached } +bool MBlazeRegisterInfo::isRegister(unsigned Reg) { + return Reg <= 31; +} + +bool MBlazeRegisterInfo::isSpecialRegister(unsigned Reg) { + switch (Reg) { + case 0x0000 : case 0x0001 : case 0x0003 : case 0x0005 : + case 0x0007 : case 0x000B : case 0x000D : case 0x1000 : + case 0x1001 : case 0x1002 : case 0x1003 : case 0x1004 : + case 0x2000 : case 0x2001 : case 0x2002 : case 0x2003 : + case 0x2004 : case 0x2005 : case 0x2006 : case 0x2007 : + case 0x2008 : case 0x2009 : case 0x200A : case 0x200B : + return true; + + default: + return false; + } + return false; // Not reached +} + unsigned MBlazeRegisterInfo::getPICCallReg() { return MBlaze::R20; } diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.h b/lib/Target/MBlaze/MBlazeRegisterInfo.h index 839536d4e7b5..69ec5aa48914 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.h +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.h @@ -45,6 +45,8 @@ struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo { static unsigned getRegisterNumbering(unsigned RegEnum); static unsigned getRegisterFromNumbering(unsigned RegEnum); static unsigned getSpecialRegisterFromNumbering(unsigned RegEnum); + static bool isRegister(unsigned RegEnum); + static bool isSpecialRegister(unsigned RegEnum); /// Get PIC indirect call register static unsigned getPICCallReg(); diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.td b/lib/Target/MBlaze/MBlazeRegisterInfo.td index fbefb22e9f25..1a695a74bca0 100644 --- a/lib/Target/MBlaze/MBlazeRegisterInfo.td +++ b/lib/Target/MBlaze/MBlazeRegisterInfo.td @@ -85,18 +85,19 @@ let Namespace = "MBlaze" in { def RTLBX : MBlazeSPRReg<0x1002, "rtlbx">, DwarfRegNum<[41]>; def RTLBLO : MBlazeSPRReg<0x1003, "rtlblo">, DwarfRegNum<[42]>; def RTLBHI : MBlazeSPRReg<0x1004, "rtlbhi">, DwarfRegNum<[43]>; - def RPVR0 : MBlazeSPRReg<0x2000, "rpvr0">, DwarfRegNum<[44]>; - def RPVR1 : MBlazeSPRReg<0x2001, "rpvr1">, DwarfRegNum<[45]>; - def RPVR2 : MBlazeSPRReg<0x2002, "rpvr2">, DwarfRegNum<[46]>; - def RPVR3 : MBlazeSPRReg<0x2003, "rpvr3">, DwarfRegNum<[47]>; - def RPVR4 : MBlazeSPRReg<0x2004, "rpvr4">, DwarfRegNum<[48]>; - def RPVR5 : MBlazeSPRReg<0x2005, "rpvr5">, DwarfRegNum<[49]>; - def RPVR6 : MBlazeSPRReg<0x2006, "rpvr6">, DwarfRegNum<[50]>; - def RPVR7 : MBlazeSPRReg<0x2007, "rpvr7">, DwarfRegNum<[51]>; - def RPVR8 : MBlazeSPRReg<0x2008, "rpvr8">, DwarfRegNum<[52]>; - def RPVR9 : MBlazeSPRReg<0x2009, "rpvr9">, DwarfRegNum<[53]>; - def RPVR10 : MBlazeSPRReg<0x200A, "rpvr10">, DwarfRegNum<[54]>; - def RPVR11 : MBlazeSPRReg<0x200B, "rpvr11">, DwarfRegNum<[55]>; + def RTLBSX : MBlazeSPRReg<0x1004, "rtlbsx">, DwarfRegNum<[44]>; + def RPVR0 : MBlazeSPRReg<0x2000, "rpvr0">, DwarfRegNum<[45]>; + def RPVR1 : MBlazeSPRReg<0x2001, "rpvr1">, DwarfRegNum<[46]>; + def RPVR2 : MBlazeSPRReg<0x2002, "rpvr2">, DwarfRegNum<[47]>; + def RPVR3 : MBlazeSPRReg<0x2003, "rpvr3">, DwarfRegNum<[48]>; + def RPVR4 : MBlazeSPRReg<0x2004, "rpvr4">, DwarfRegNum<[49]>; + def RPVR5 : MBlazeSPRReg<0x2005, "rpvr5">, DwarfRegNum<[50]>; + def RPVR6 : MBlazeSPRReg<0x2006, "rpvr6">, DwarfRegNum<[51]>; + def RPVR7 : MBlazeSPRReg<0x2007, "rpvr7">, DwarfRegNum<[52]>; + def RPVR8 : MBlazeSPRReg<0x2008, "rpvr8">, DwarfRegNum<[53]>; + def RPVR9 : MBlazeSPRReg<0x2009, "rpvr9">, DwarfRegNum<[54]>; + def RPVR10 : MBlazeSPRReg<0x200A, "rpvr10">, DwarfRegNum<[55]>; + def RPVR11 : MBlazeSPRReg<0x200B, "rpvr11">, DwarfRegNum<[56]>; // The carry bit. In the Microblaze this is really bit 29 of the // MSR register but this is the only bit of that register that we diff --git a/lib/Target/MBlaze/MBlazeSchedule.td b/lib/Target/MBlaze/MBlazeSchedule.td index ac4d98c9240e..4662f25ceb12 100644 --- a/lib/Target/MBlaze/MBlazeSchedule.td +++ b/lib/Target/MBlaze/MBlazeSchedule.td @@ -8,57 +8,48 @@ //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// -// Functional units across MBlaze chips sets. Based on GCC/MBlaze backend files. +// MBlaze functional units. //===----------------------------------------------------------------------===// -def ALU : FuncUnit; -def IMULDIV : FuncUnit; +def IF : FuncUnit; +def ID : FuncUnit; +def EX : FuncUnit; +def MA : FuncUnit; +def WB : FuncUnit; //===----------------------------------------------------------------------===// // Instruction Itinerary classes used for MBlaze //===----------------------------------------------------------------------===// -def IIAlu : InstrItinClass; -def IILoad : InstrItinClass; -def IIStore : InstrItinClass; -def IIXfer : InstrItinClass; -def IIBranch : InstrItinClass; -def IIHiLo : InstrItinClass; -def IIImul : InstrItinClass; -def IIIdiv : InstrItinClass; -def IIFcvt : InstrItinClass; -def IIFmove : InstrItinClass; -def IIFcmp : InstrItinClass; -def IIFadd : InstrItinClass; -def IIFmulSingle : InstrItinClass; -def IIFmulDouble : InstrItinClass; -def IIFdivSingle : InstrItinClass; -def IIFdivDouble : InstrItinClass; -def IIFsqrtSingle : InstrItinClass; -def IIFsqrtDouble : InstrItinClass; -def IIFrecipFsqrtStep : InstrItinClass; -def IIPseudo : InstrItinClass; +def IIC_ALU : InstrItinClass; +def IIC_ALUm : InstrItinClass; +def IIC_ALUd : InstrItinClass; +def IIC_SHT : InstrItinClass; +def IIC_FSLg : InstrItinClass; +def IIC_FSLp : InstrItinClass; +def IIC_MEMs : InstrItinClass; +def IIC_MEMl : InstrItinClass; +def IIC_FPU : InstrItinClass; +def IIC_FPUd : InstrItinClass; +def IIC_FPUf : InstrItinClass; +def IIC_FPUi : InstrItinClass; +def IIC_FPUs : InstrItinClass; +def IIC_FPUc : InstrItinClass; +def IIC_BR : InstrItinClass; +def IIC_BRc : InstrItinClass; +def IIC_BRl : InstrItinClass; +def IIC_WDC : InstrItinClass; +def IIC_Pseudo : InstrItinClass; //===----------------------------------------------------------------------===// -// MBlaze Generic instruction itineraries. +// MBlaze generic instruction itineraries. //===----------------------------------------------------------------------===// -def MBlazeGenericItineraries : ProcessorItineraries< - [ALU, IMULDIV], [], [ - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]> -]>; +def MBlazeGenericItineraries : ProcessorItineraries<[], [], []>; + +//===----------------------------------------------------------------------===// +// MBlaze instruction itineraries for three stage pipeline. +//===----------------------------------------------------------------------===// +include "MBlazeSchedule3.td" + +//===----------------------------------------------------------------------===// +// MBlaze instruction itineraries for five stage pipeline. +//===----------------------------------------------------------------------===// +include "MBlazeSchedule5.td" diff --git a/lib/Target/MBlaze/MBlazeSchedule3.td b/lib/Target/MBlaze/MBlazeSchedule3.td new file mode 100644 index 000000000000..ccbf99dbd3a2 --- /dev/null +++ b/lib/Target/MBlaze/MBlazeSchedule3.td @@ -0,0 +1,236 @@ +//===- MBlazeSchedule3.td - MBlaze Scheduling Definitions --*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MBlaze instruction itineraries for the three stage pipeline. +//===----------------------------------------------------------------------===// +def MBlazePipe3Itineraries : ProcessorItineraries< + [IF,ID,EX], [], [ + + // ALU instruction with one destination register and either two register + // source operands or one register source operand and one immediate operand. + // The instruction takes one cycle to execute in each of the stages. The + // two source operands are read during the decode stage and the result is + // ready after the execute stage. + InstrItinData< IIC_ALU, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<1,[EX]>], // one cycle in execute stage + [ 2 // result ready after two cycles + , 1 // first operand read after one cycle + , 1 ]>, // second operand read after one cycle + + // ALU multiply instruction with one destination register and either two + // register source operands or one register source operand and one immediate + // operand. The instruction takes one cycle to execute in each of the + // pipeline stages except the execute stage, which takes three cycles. The + // two source operands are read during the decode stage and the result is + // ready after the execute stage. + InstrItinData< IIC_ALUm, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<3,[EX]>], // three cycles in execute stage + [ 4 // result ready after four cycles + , 1 // first operand read after one cycle + , 1 ]>, // second operand read after one cycle + + // ALU divide instruction with one destination register two register source + // operands. The instruction takes one cycle to execute in each the pipeline + // stages except the execute stage, which takes 34 cycles. The two + // source operands are read during the decode stage and the result is ready + // after the execute stage. + InstrItinData< IIC_ALUd, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<34,[EX]>], // 34 cycles in execute stage + [ 35 // result ready after 35 cycles + , 1 // first operand read after one cycle + , 1 ]>, // second operand read after one cycle + + // Shift instruction with one destination register and either two register + // source operands or one register source operand and one immediate operand. + // The instruction takes one cycle to execute in each of the pipeline stages + // except the execute stage, which takes two cycles. The two source operands + // are read during the decode stage and the result is ready after the execute + // stage. + InstrItinData< IIC_SHT, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<2,[EX]>], // two cycles in execute stage + [ 3 // result ready after three cycles + , 1 // first operand read after one cycle + , 1 ]>, // second operand read after one cycle + + // Branch instruction with one source operand register. The instruction takes + // one cycle to execute in each of the pipeline stages. The source operand is + // read during the decode stage. + InstrItinData< IIC_BR, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<1,[EX]>], // one cycle in execute stage + [ 1 ]>, // first operand read after one cycle + + // Conditional branch instruction with two source operand registers. The + // instruction takes one cycle to execute in each of the pipeline stages. The + // two source operands are read during the decode stage. + InstrItinData< IIC_BRc, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<1,[EX]>], // one cycle in execute stage + [ 1 // first operand read after one cycle + , 1 ]>, // second operand read after one cycle + + // Branch and link instruction with one destination register and one source + // operand register. The instruction takes one cycle to execute in each of + // the pipeline stages. The source operand is read during the decode stage + // and the destination register is ready after the execute stage. + InstrItinData< IIC_BRl, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<1,[EX]>], // one cycle in execute stage + [ 2 // result ready after two cycles + , 1 ]>, // first operand read after one cycle + + // Cache control instruction with two source operand registers. The + // instruction takes one cycle to execute in each of the pipeline stages + // except the memory access stage, which takes two cycles. The source + // operands are read during the decode stage. + InstrItinData< IIC_WDC, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<2,[EX]>], // two cycles in execute stage + [ 1 // first operand read after one cycle + , 1 ]>, // second operand read after one cycle + + // Floating point instruction with one destination register and two source + // operand registers. The instruction takes one cycle to execute in each of + // the pipeline stages except the execute stage, which takes six cycles. The + // source operands are read during the decode stage and the results are ready + // after the execute stage. + InstrItinData< IIC_FPU, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<6,[EX]>], // six cycles in execute stage + [ 7 // result ready after seven cycles + , 1 // first operand read after one cycle + , 1 ]>, // second operand read after one cycle + + // Floating point divide instruction with one destination register and two + // source operand registers. The instruction takes one cycle to execute in + // each of the pipeline stages except the execute stage, which takes 30 + // cycles. The source operands are read during the decode stage and the + // results are ready after the execute stage. + InstrItinData< IIC_FPUd, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<30,[EX]>], // one cycle in execute stage + [ 31 // result ready after 31 cycles + , 1 // first operand read after one cycle + , 1 ]>, // second operand read after one cycle + + // Convert floating point to integer instruction with one destination + // register and one source operand register. The instruction takes one cycle + // to execute in each of the pipeline stages except the execute stage, + // which takes seven cycles. The source operands are read during the decode + // stage and the results are ready after the execute stage. + InstrItinData< IIC_FPUi, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<7,[EX]>], // seven cycles in execute stage + [ 8 // result ready after eight cycles + , 1 ]>, // first operand read after one cycle + + // Convert integer to floating point instruction with one destination + // register and one source operand register. The instruction takes one cycle + // to execute in each of the pipeline stages except the execute stage, + // which takes six cycles. The source operands are read during the decode + // stage and the results are ready after the execute stage. + InstrItinData< IIC_FPUf, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<6,[EX]>], // six cycles in execute stage + [ 7 // result ready after seven cycles + , 1 ]>, // first operand read after one cycle + + // Floating point square root instruction with one destination register and + // one source operand register. The instruction takes one cycle to execute in + // each of the pipeline stages except the execute stage, which takes 29 + // cycles. The source operands are read during the decode stage and the + // results are ready after the execute stage. + InstrItinData< IIC_FPUs, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<29,[EX]>], // 29 cycles in execute stage + [ 30 // result ready after 30 cycles + , 1 ]>, // first operand read after one cycle + + // Floating point comparison instruction with one destination register and + // two source operand registers. The instruction takes one cycle to execute + // in each of the pipeline stages except the execute stage, which takes three + // cycles. The source operands are read during the decode stage and the + // results are ready after the execute stage. + InstrItinData< IIC_FPUc, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<3,[EX]>], // three cycles in execute stage + [ 4 // result ready after four cycles + , 1 // first operand read after one cycle + , 1 ]>, // second operand read after one cycle + + // FSL get instruction with one register or immediate source operand and one + // destination register. The instruction takes one cycle to execute in each + // of the pipeline stages except the execute stage, which takes two cycles. + // The one source operand is read during the decode stage and the result is + // ready after the execute stage. + InstrItinData< IIC_FSLg, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<2,[EX]>], // two cycles in execute stage + [ 3 // result ready after two cycles + , 1 ]>, // first operand read after one cycle + + // FSL put instruction with either two register source operands or one + // register source operand and one immediate operand. There is no result + // produced by the instruction. The instruction takes one cycle to execute in + // each of the pipeline stages except the execute stage, which takes two + // cycles. The two source operands are read during the decode stage. + InstrItinData< IIC_FSLp, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<2,[EX]>], // two cycles in execute stage + [ 1 // first operand read after one cycle + , 1 ]>, // second operand read after one cycle + + // Memory store instruction with either three register source operands or two + // register source operands and one immediate operand. There is no result + // produced by the instruction. The instruction takes one cycle to execute in + // each of the pipeline stages except the execute stage, which takes two + // cycles. All of the source operands are read during the decode stage. + InstrItinData< IIC_MEMs, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<2,[EX]>], // two cycles in execute stage + [ 1 // first operand read after one cycle + , 1 // second operand read after one cycle + , 1 ]>, // third operand read after one cycle + + // Memory load instruction with one destination register and either two + // register source operands or one register source operand and one immediate + // operand. The instruction takes one cycle to execute in each of the + // pipeline stages except the execute stage, which takes two cycles. All of + // the source operands are read during the decode stage and the result is + // ready after the execute stage. + InstrItinData< IIC_MEMl, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<2,[EX]>], // two cycles in execute stage + [ 3 // result ready after four cycles + , 1 // second operand read after one cycle + , 1 ]> // third operand read after one cycle +]>; diff --git a/lib/Target/MBlaze/MBlazeSchedule5.td b/lib/Target/MBlaze/MBlazeSchedule5.td new file mode 100644 index 000000000000..fa88766fdb18 --- /dev/null +++ b/lib/Target/MBlaze/MBlazeSchedule5.td @@ -0,0 +1,267 @@ +//===- MBlazeSchedule5.td - MBlaze Scheduling Definitions --*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MBlaze instruction itineraries for the five stage pipeline. +//===----------------------------------------------------------------------===// +def MBlazePipe5Itineraries : ProcessorItineraries< + [IF,ID,EX,MA,WB], [], [ + + // ALU instruction with one destination register and either two register + // source operands or one register source operand and one immediate operand. + // The instruction takes one cycle to execute in each of the stages. The + // two source operands are read during the decode stage and the result is + // ready after the execute stage. + InstrItinData< IIC_ALU, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<1,[EX]> // one cycle in execute stage + , InstrStage<1,[MA]> // one cycle in memory access stage + , InstrStage<1,[WB]>], // one cycle in write back stage + [ 2 // result ready after two cycles + , 1 // first operand read after one cycle + , 1 ]>, // second operand read after one cycle + + // ALU multiply instruction with one destination register and either two + // register source operands or one register source operand and one immediate + // operand. The instruction takes one cycle to execute in each of the + // pipeline stages. The two source operands are read during the decode stage + // and the result is ready after the execute stage. + InstrItinData< IIC_ALUm, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<1,[EX]> // one cycle in execute stage + , InstrStage<1,[MA]> // one cycle in memory access stage + , InstrStage<1,[WB]>], // one cycle in write back stage + [ 2 // result ready after two cycles + , 1 // first operand read after one cycle + , 1 ]>, // second operand read after one cycle + + // ALU divide instruction with one destination register two register source + // operands. The instruction takes one cycle to execute in each the pipeline + // stages except the memory access stage, which takes 31 cycles. The two + // source operands are read during the decode stage and the result is ready + // after the memory access stage. + InstrItinData< IIC_ALUd, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<1,[EX]> // one cycle in execute stage + , InstrStage<31,[MA]> // 31 cycles in memory access stage + , InstrStage<1,[WB]>], // one cycle in write back stage + [ 33 // result ready after 33 cycles + , 1 // first operand read after one cycle + , 1 ]>, // second operand read after one cycle + + // Shift instruction with one destination register and either two register + // source operands or one register source operand and one immediate operand. + // The instruction takes one cycle to execute in each of the pipeline stages. + // The two source operands are read during the decode stage and the result is + // ready after the memory access stage. + InstrItinData< IIC_SHT, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<1,[EX]> // one cycle in execute stage + , InstrStage<1,[MA]> // one cycle in memory access stage + , InstrStage<1,[WB]>], // one cycle in write back stage + [ 3 // result ready after three cycles + , 1 // first operand read after one cycle + , 1 ]>, // second operand read after one cycle + + // Branch instruction with one source operand register. The instruction takes + // one cycle to execute in each of the pipeline stages. The source operand is + // read during the decode stage. + InstrItinData< IIC_BR, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<1,[EX]> // one cycle in execute stage + , InstrStage<1,[MA]> // one cycle in memory access stage + , InstrStage<1,[WB]>], // one cycle in write back stage + [ 1 ]>, // first operand read after one cycle + + // Conditional branch instruction with two source operand registers. The + // instruction takes one cycle to execute in each of the pipeline stages. The + // two source operands are read during the decode stage. + InstrItinData< IIC_BRc, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<1,[EX]> // one cycle in execute stage + , InstrStage<1,[MA]> // one cycle in memory access stage + , InstrStage<1,[WB]>], // one cycle in write back stage + [ 1 // first operand read after one cycle + , 1 ]>, // second operand read after one cycle + + // Branch and link instruction with one destination register and one source + // operand register. The instruction takes one cycle to execute in each of + // the pipeline stages. The source operand is read during the decode stage + // and the destination register is ready after the writeback stage. + InstrItinData< IIC_BRl, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<1,[EX]> // one cycle in execute stage + , InstrStage<1,[MA]> // one cycle in memory access stage + , InstrStage<1,[WB]>], // one cycle in write back stage + [ 4 // result ready after four cycles + , 1 ]>, // first operand read after one cycle + + // Cache control instruction with two source operand registers. The + // instruction takes one cycle to execute in each of the pipeline stages + // except the memory access stage, which takes two cycles. The source + // operands are read during the decode stage. + InstrItinData< IIC_WDC, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<1,[EX]> // one cycle in execute stage + , InstrStage<2,[MA]> // two cycles in memory access stage + , InstrStage<1,[WB]>], // one cycle in write back stage + [ 1 // first operand read after one cycle + , 1 ]>, // second operand read after one cycle + + // Floating point instruction with one destination register and two source + // operand registers. The instruction takes one cycle to execute in each of + // the pipeline stages except the memory access stage, which takes two + // cycles. The source operands are read during the decode stage and the + // results are ready after the writeback stage. + InstrItinData< IIC_FPU, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<1,[EX]> // one cycle in execute stage + , InstrStage<2,[MA]> // two cycles in memory access stage + , InstrStage<1,[WB]>], // one cycle in write back stage + [ 5 // result ready after five cycles + , 1 // first operand read after one cycle + , 1 ]>, // second operand read after one cycle + + // Floating point divide instruction with one destination register and two + // source operand registers. The instruction takes one cycle to execute in + // each of the pipeline stages except the memory access stage, which takes 26 + // cycles. The source operands are read during the decode stage and the + // results are ready after the writeback stage. + InstrItinData< IIC_FPUd, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<1,[EX]> // one cycle in execute stage + , InstrStage<26,[MA]> // 26 cycles in memory access stage + , InstrStage<1,[WB]>], // one cycle in write back stage + [ 29 // result ready after 29 cycles + , 1 // first operand read after one cycle + , 1 ]>, // second operand read after one cycle + + // Convert floating point to integer instruction with one destination + // register and one source operand register. The instruction takes one cycle + // to execute in each of the pipeline stages except the memory access stage, + // which takes three cycles. The source operands are read during the decode + // stage and the results are ready after the writeback stage. + InstrItinData< IIC_FPUi, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<1,[EX]> // one cycle in execute stage + , InstrStage<3,[MA]> // three cycles in memory access stage + , InstrStage<1,[WB]>], // one cycle in write back stage + [ 6 // result ready after six cycles + , 1 ]>, // first operand read after one cycle + + // Convert integer to floating point instruction with one destination + // register and one source operand register. The instruction takes one cycle + // to execute in each of the pipeline stages except the memory access stage, + // which takes two cycles. The source operands are read during the decode + // stage and the results are ready after the writeback stage. + InstrItinData< IIC_FPUf, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<1,[EX]> // one cycle in execute stage + , InstrStage<2,[MA]> // two cycles in memory access stage + , InstrStage<1,[WB]>], // one cycle in write back stage + [ 5 // result ready after five cycles + , 1 ]>, // first operand read after one cycle + + // Floating point square root instruction with one destination register and + // one source operand register. The instruction takes one cycle to execute in + // each of the pipeline stages except the memory access stage, which takes 25 + // cycles. The source operands are read during the decode stage and the + // results are ready after the writeback stage. + InstrItinData< IIC_FPUs, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<1,[EX]> // one cycle in execute stage + , InstrStage<25,[MA]> // 25 cycles in memory access stage + , InstrStage<1,[WB]>], // one cycle in write back stage + [ 28 // result ready after 28 cycles + , 1 ]>, // first operand read after one cycle + + // Floating point comparison instruction with one destination register and + // two source operand registers. The instruction takes one cycle to execute + // in each of the pipeline stages. The source operands are read during the + // decode stage and the results are ready after the execute stage. + InstrItinData< IIC_FPUc, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<1,[EX]> // one cycle in execute stage + , InstrStage<1,[MA]> // one cycle in memory access stage + , InstrStage<1,[WB]>], // one cycle in write back stage + [ 2 // result ready after two cycles + , 1 // first operand read after one cycle + , 1 ]>, // second operand read after one cycle + + // FSL get instruction with one register or immediate source operand and one + // destination register. The instruction takes one cycle to execute in each + // of the pipeline stages. The one source operand is read during the decode + // stage and the result is ready after the execute stage. + InstrItinData< IIC_FSLg, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<1,[EX]> // one cycle in execute stage + , InstrStage<1,[MA]> // one cycle in memory access stage + , InstrStage<1,[WB]>], // one cycle in write back stage + [ 2 // result ready after two cycles + , 1 ]>, // first operand read after one cycle + + // FSL put instruction with either two register source operands or one + // register source operand and one immediate operand. There is no result + // produced by the instruction. The instruction takes one cycle to execute in + // each of the pipeline stages. The two source operands are read during the + // decode stage. + InstrItinData< IIC_FSLp, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<1,[EX]> // one cycle in execute stage + , InstrStage<1,[MA]> // one cycle in memory access stage + , InstrStage<1,[WB]>], // one cycle in write back stage + [ 1 // first operand read after one cycle + , 1 ]>, // second operand read after one cycle + + // Memory store instruction with either three register source operands or two + // register source operands and one immediate operand. There is no result + // produced by the instruction. The instruction takes one cycle to execute in + // each of the pipeline stages. All of the source operands are read during + // the decode stage. + InstrItinData< IIC_MEMs, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<1,[EX]> // one cycle in execute stage + , InstrStage<1,[MA]> // one cycle in memory access stage + , InstrStage<1,[WB]>], // one cycle in write back stage + [ 1 // first operand read after one cycle + , 1 // second operand read after one cycle + , 1 ]>, // third operand read after one cycle + + // Memory load instruction with one destination register and either two + // register source operands or one register source operand and one immediate + // operand. The instruction takes one cycle to execute in each of the + // pipeline stages. All of the source operands are read during the decode + // stage and the result is ready after the writeback stage. + InstrItinData< IIC_MEMl, + [ InstrStage<1,[IF]> // one cycle in fetch stage + , InstrStage<1,[ID]> // one cycle in decode stage + , InstrStage<1,[EX]> // one cycle in execute stage + , InstrStage<1,[MA]> // one cycle in memory access stage + , InstrStage<1,[WB]>], // one cycle in write back stage + [ 4 // result ready after four cycles + , 1 // second operand read after one cycle + , 1 ]> // third operand read after one cycle +]>; diff --git a/lib/Target/MBlaze/MBlazeSubtarget.cpp b/lib/Target/MBlaze/MBlazeSubtarget.cpp index 344052156869..a80744a4769a 100644 --- a/lib/Target/MBlaze/MBlazeSubtarget.cpp +++ b/lib/Target/MBlaze/MBlazeSubtarget.cpp @@ -13,19 +13,39 @@ #include "MBlazeSubtarget.h" #include "MBlaze.h" +#include "MBlazeRegisterInfo.h" #include "MBlazeGenSubtarget.inc" #include "llvm/Support/CommandLine.h" using namespace llvm; MBlazeSubtarget::MBlazeSubtarget(const std::string &TT, const std::string &FS): - HasPipe3(false), HasBarrel(false), HasDiv(false), HasMul(false), - HasFSL(false), HasEFSL(false), HasMSRSet(false), HasException(false), - HasPatCmp(false), HasFPU(false), HasESR(false), HasPVR(false), - HasMul64(false), HasSqrt(false), HasMMU(false) + HasBarrel(false), HasDiv(false), HasMul(false), HasPatCmp(false), + HasFPU(false), HasMul64(false), HasSqrt(false) { - std::string CPU = "v400"; - MBlazeArchVersion = V400; - // Parse features string. - ParseSubtargetFeatures(FS, CPU); + std::string CPU = "mblaze"; + CPU = ParseSubtargetFeatures(FS, CPU); + + // Only use instruction scheduling if the selected CPU has an instruction + // itinerary (the default CPU is the only one that doesn't). + HasItin = CPU != "mblaze"; + DEBUG(dbgs() << "CPU " << CPU << "(" << HasItin << ")\n"); + + // Compute the issue width of the MBlaze itineraries + computeIssueWidth(); } + +void MBlazeSubtarget::computeIssueWidth() { + InstrItins.IssueWidth = 1; +} + +bool MBlazeSubtarget:: +enablePostRAScheduler(CodeGenOpt::Level OptLevel, + TargetSubtarget::AntiDepBreakMode& Mode, + RegClassVector& CriticalPathRCs) const { + Mode = TargetSubtarget::ANTIDEP_CRITICAL; + CriticalPathRCs.clear(); + CriticalPathRCs.push_back(&MBlaze::GPRRegClass); + return HasItin && OptLevel >= CodeGenOpt::Default; +} + diff --git a/lib/Target/MBlaze/MBlazeSubtarget.h b/lib/Target/MBlaze/MBlazeSubtarget.h index bebb3f773e03..2255b2809be2 100644 --- a/lib/Target/MBlaze/MBlazeSubtarget.h +++ b/lib/Target/MBlaze/MBlazeSubtarget.h @@ -24,29 +24,14 @@ namespace llvm { class MBlazeSubtarget : public TargetSubtarget { protected: - - enum MBlazeArchEnum { - V400, V500, V600, V700, V710 - }; - - // MBlaze architecture version - MBlazeArchEnum MBlazeArchVersion; - - bool HasPipe3; bool HasBarrel; bool HasDiv; bool HasMul; - bool HasFSL; - bool HasEFSL; - bool HasMSRSet; - bool HasException; bool HasPatCmp; bool HasFPU; - bool HasESR; - bool HasPVR; bool HasMul64; bool HasSqrt; - bool HasMMU; + bool HasItin; InstrItineraryData InstrItins; @@ -61,18 +46,26 @@ class MBlazeSubtarget : public TargetSubtarget { std::string ParseSubtargetFeatures(const std::string &FS, const std::string &CPU); + /// Compute the number of maximum number of issues per cycle for the + /// MBlaze scheduling itineraries. + void computeIssueWidth(); + + /// enablePostRAScheduler - True at 'More' optimization. + bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, + TargetSubtarget::AntiDepBreakMode& Mode, + RegClassVector& CriticalPathRCs) const; + + /// getInstrItins - Return the instruction itineraies based on subtarget. + const InstrItineraryData &getInstrItineraryData() const { return InstrItins; } + + bool hasItin() const { return HasItin; } + bool hasPCMP() const { return HasPatCmp; } bool hasFPU() const { return HasFPU; } bool hasSqrt() const { return HasSqrt; } bool hasMul() const { return HasMul; } bool hasMul64() const { return HasMul64; } bool hasDiv() const { return HasDiv; } bool hasBarrel() const { return HasBarrel; } - - bool isV400() const { return MBlazeArchVersion == V400; } - bool isV500() const { return MBlazeArchVersion == V500; } - bool isV600() const { return MBlazeArchVersion == V600; } - bool isV700() const { return MBlazeArchVersion == V700; } - bool isV710() const { return MBlazeArchVersion == V710; } }; } // End llvm namespace diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp index cd949e1998de..df34a83e33a8 100644 --- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp +++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp @@ -36,19 +36,18 @@ static MCStreamer *createMCStreamer(const Target &T, const std::string &TT, bool RelaxAll, bool NoExecStack) { Triple TheTriple(TT); - switch (TheTriple.getOS()) { - case Triple::Darwin: + + if (TheTriple.isOSDarwin()) { llvm_unreachable("MBlaze does not support Darwin MACH-O format"); return NULL; - case Triple::MinGW32: - case Triple::Cygwin: - case Triple::Win32: + } + + if (TheTriple.isOSWindows()) { llvm_unreachable("MBlaze does not support Windows COFF format"); return NULL; - default: - return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll, - NoExecStack); } + + return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll, NoExecStack); } @@ -87,7 +86,8 @@ MBlazeTargetMachine(const Target &T, const std::string &TT, DataLayout("E-p:32:32:32-i8:8:8-i16:16:16"), InstrInfo(*this), FrameLowering(Subtarget), - TLInfo(*this), TSInfo(*this), ELFWriterInfo(*this) { + TLInfo(*this), TSInfo(*this), ELFWriterInfo(*this), + InstrItins(Subtarget.getInstrItineraryData()) { if (getRelocationModel() == Reloc::Default) { setRelocationModel(Reloc::Static); } diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.h b/lib/Target/MBlaze/MBlazeTargetMachine.h index 45ad07858887..48ce37a482fc 100644 --- a/lib/Target/MBlaze/MBlazeTargetMachine.h +++ b/lib/Target/MBlaze/MBlazeTargetMachine.h @@ -38,13 +38,18 @@ namespace llvm { MBlazeSelectionDAGInfo TSInfo; MBlazeIntrinsicInfo IntrinsicInfo; MBlazeELFWriterInfo ELFWriterInfo; + InstrItineraryData InstrItins; + public: MBlazeTargetMachine(const Target &T, const std::string &TT, - const std::string &FS); + const std::string &FS); virtual const MBlazeInstrInfo *getInstrInfo() const { return &InstrInfo; } + virtual const InstrItineraryData *getInstrItineraryData() const + { return &InstrItins; } + virtual const TargetFrameLowering *getFrameLowering() const { return &FrameLowering; } diff --git a/lib/Target/MBlaze/TODO b/lib/Target/MBlaze/TODO index 2e613eb0ca0f..317d7c0a0b14 100644 --- a/lib/Target/MBlaze/TODO +++ b/lib/Target/MBlaze/TODO @@ -9,8 +9,6 @@ needs to be examined more closely: - The stack layout needs to be examined to make sure it meets the standard, especially in regards to var arg functions. - - The processor itineraries are copied from a different backend - and need to be updated to model the MicroBlaze correctly. - Look at the MBlazeGenFastISel.inc stuff and make use of it if appropriate. @@ -18,9 +16,6 @@ There are a few things that need to be looked at: - There are some instructions that are not generated by the backend and have not been tested as far as the parser is concerned. - - The assembly parser does not use any MicroBlaze specific directives. + - The assembly parser does not use many MicroBlaze specific directives. I should investigate if there are MicroBlaze specific directive and, if there are, add them. - - The instruction MFS and MTS use special names for some of the - special registers that can be accessed. These special register - names should be parsed by the assembly parser. diff --git a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h index f0e1ce22841b..63860dcc7e3a 100644 --- a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h +++ b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h @@ -18,11 +18,12 @@ namespace llvm { class MCOperand; + class TargetMachine; class MSP430InstPrinter : public MCInstPrinter { public: - MSP430InstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) { - } + MSP430InstPrinter(TargetMachine &TM, const MCAsmInfo &MAI) + : MCInstPrinter(MAI) {} virtual void printInst(const MCInst *MI, raw_ostream &O); diff --git a/lib/Target/MSP430/MSP430AsmPrinter.cpp b/lib/Target/MSP430/MSP430AsmPrinter.cpp index a1a7f44c19c4..5264d680d8b3 100644 --- a/lib/Target/MSP430/MSP430AsmPrinter.cpp +++ b/lib/Target/MSP430/MSP430AsmPrinter.cpp @@ -164,10 +164,11 @@ void MSP430AsmPrinter::EmitInstruction(const MachineInstr *MI) { } static MCInstPrinter *createMSP430MCInstPrinter(const Target &T, + TargetMachine &TM, unsigned SyntaxVariant, const MCAsmInfo &MAI) { if (SyntaxVariant == 0) - return new MSP430InstPrinter(MAI); + return new MSP430InstPrinter(TM, MAI); return 0; } diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index a95d59c0576c..006785b1f74d 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -515,7 +515,7 @@ MSP430TargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, // Build a sequence of copy-to-reg nodes chained together with token chain and // flag operands which copy the outgoing args into registers. The InFlag in - // necessary since all emited instructions must be stuck together. + // necessary since all emitted instructions must be stuck together. SDValue InFlag; for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt index 26df1a05295e..8939b0a87a57 100644 --- a/lib/Target/Mips/CMakeLists.txt +++ b/lib/Target/Mips/CMakeLists.txt @@ -13,6 +13,7 @@ tablegen(MipsGenSubtarget.inc -gen-subtarget) add_llvm_target(MipsCodeGen MipsAsmPrinter.cpp MipsDelaySlotFiller.cpp + MipsExpandPseudo.cpp MipsInstrInfo.cpp MipsISelDAGToDAG.cpp MipsISelLowering.cpp diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h index a9ab050d6f0d..05b4c5a070d6 100644 --- a/lib/Target/Mips/Mips.h +++ b/lib/Target/Mips/Mips.h @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This file contains the entry points for global functions defined in +// This file contains the entry points for global functions defined in // the LLVM Mips back-end. // //===----------------------------------------------------------------------===// @@ -25,6 +25,7 @@ namespace llvm { FunctionPass *createMipsISelDag(MipsTargetMachine &TM); FunctionPass *createMipsDelaySlotFillerPass(MipsTargetMachine &TM); + FunctionPass *createMipsExpandPseudoPass(MipsTargetMachine &TM); extern Target TheMipsTarget; extern Target TheMipselTarget; diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td index 3e6437b93ccf..b79016d788f0 100644 --- a/lib/Target/Mips/Mips.td +++ b/lib/Target/Mips/Mips.td @@ -59,7 +59,7 @@ def FeatureMips1 : SubtargetFeature<"mips1", "MipsArchVersion", "Mips1", def FeatureMips2 : SubtargetFeature<"mips2", "MipsArchVersion", "Mips2", "Mips2 ISA Support">; def FeatureMips32 : SubtargetFeature<"mips32", "MipsArchVersion", "Mips32", - "Mips32 ISA Support", + "Mips32 ISA Support", [FeatureCondMov, FeatureBitCount]>; def FeatureMips32r2 : SubtargetFeature<"mips32r2", "MipsArchVersion", "Mips32r2", "Mips32r2 ISA Support", @@ -81,7 +81,7 @@ def : Proc<"r6000", [FeatureMips2]>; def : Proc<"4ke", [FeatureMips32r2]>; -// Allegrex is a 32bit subset of r4000, both for interger and fp registers, +// Allegrex is a 32bit subset of r4000, both for integer and fp registers, // but much more similar to Mips2 than Mips3. It also contains some of // Mips32/Mips32r2 instructions and a custom vector fpu processor. def : Proc<"allegrex", [FeatureMips2, FeatureSingleFloat, FeatureEABI, diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index bd28a9bd073b..502f744e4d85 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -30,7 +30,7 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegistry.h" @@ -53,14 +53,14 @@ namespace { return "Mips Assembly Printer"; } - bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, raw_ostream &O); void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O); void printUnsignedImm(const MachineInstr *MI, int opNum, raw_ostream &O); - void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O, + void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O, const char *Modifier = 0); - void printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O, + void printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O, const char *Modifier = 0); void printSavedRegsBitmask(raw_ostream &O); void printHex32(unsigned int Value, raw_ostream &O); @@ -77,7 +77,8 @@ namespace { } virtual void EmitFunctionBodyStart(); virtual void EmitFunctionBodyEnd(); - virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const; + virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock* + MBB) const; static const char *getRegisterName(unsigned RegNo); virtual void EmitFunctionEntryLabel(); @@ -94,12 +95,12 @@ namespace { // -- Frame directive "frame Stackpointer, Stacksize, RARegister" // Describe the stack frame. // -// -- Mask directives "(f)mask bitmask, offset" +// -- Mask directives "(f)mask bitmask, offset" // Tells the assembler which registers are saved and where. -// bitmask - contain a little endian bitset indicating which registers are -// saved on function prologue (e.g. with a 0x80000000 mask, the +// bitmask - contain a little endian bitset indicating which registers are +// saved on function prologue (e.g. with a 0x80000000 mask, the // assembler knows the register 31 (RA) is saved at prologue. -// offset - the position before stack pointer subtraction indicating where +// offset - the position before stack pointer subtraction indicating where // the first saved register on prologue is located. (e.g. with a // // Consider the following function prologue: @@ -110,9 +111,9 @@ namespace { // sw $ra, 40($sp) // sw $fp, 36($sp) // -// With a 0xc0000000 mask, the assembler knows the register 31 (RA) and -// 30 (FP) are saved at prologue. As the save order on prologue is from -// left to right, RA is saved first. A -8 offset means that after the +// With a 0xc0000000 mask, the assembler knows the register 31 (RA) and +// 30 (FP) are saved at prologue. As the save order on prologue is from +// left to right, RA is saved first. A -8 offset means that after the // stack pointer subtration, the first register in the mask (RA) will be // saved at address 48-8=40. // @@ -122,7 +123,7 @@ namespace { // Mask directives //===----------------------------------------------------------------------===// -// Create a bitmask with all callee saved registers for CPU or Floating Point +// Create a bitmask with all callee saved registers for CPU or Floating Point // registers. For CPU registers consider RA, GP and FP for saving if necessary. void MipsAsmPrinter::printSavedRegsBitmask(raw_ostream &O) { const TargetFrameLowering *TFI = TM.getFrameLowering(); @@ -168,7 +169,7 @@ void MipsAsmPrinter::printSavedRegsBitmask(raw_ostream &O) { // Print a 32 bit hex number with all numbers. void MipsAsmPrinter::printHex32(unsigned Value, raw_ostream &O) { O << "0x"; - for (int i = 7; i >= 0; i--) + for (int i = 7; i >= 0; i--) O << utohexstr((Value & (0xF << (i*4))) >> (i*4)); } @@ -191,9 +192,9 @@ void MipsAsmPrinter::emitFrameDirective() { } /// Emit Set directives. -const char *MipsAsmPrinter::getCurrentABIString() const { +const char *MipsAsmPrinter::getCurrentABIString() const { switch (Subtarget->getTargetABI()) { - case MipsSubtarget::O32: return "abi32"; + case MipsSubtarget::O32: return "abi32"; case MipsSubtarget::O64: return "abiO64"; case MipsSubtarget::N32: return "abiN32"; case MipsSubtarget::N64: return "abi64"; @@ -203,7 +204,7 @@ const char *MipsAsmPrinter::getCurrentABIString() const { llvm_unreachable("Unknown Mips ABI"); return NULL; -} +} void MipsAsmPrinter::EmitFunctionEntryLabel() { OutStreamer.EmitRawText("\t.ent\t" + Twine(CurrentFnSym->getName())); @@ -214,7 +215,7 @@ void MipsAsmPrinter::EmitFunctionEntryLabel() { /// the first basic block in the function. void MipsAsmPrinter::EmitFunctionBodyStart() { emitFrameDirective(); - + SmallString<128> Str; raw_svector_ostream OS(Str); printSavedRegsBitmask(OS); @@ -226,7 +227,7 @@ void MipsAsmPrinter::EmitFunctionBodyStart() { void MipsAsmPrinter::EmitFunctionBodyEnd() { // There are instruction for this macros, but they must // always be at the function end, and we can't emit and - // break with BB logic. + // break with BB logic. OutStreamer.EmitRawText(StringRef("\t.set\tmacro")); OutStreamer.EmitRawText(StringRef("\t.set\treorder")); OutStreamer.EmitRawText("\t.end\t" + Twine(CurrentFnSym->getName())); @@ -236,8 +237,8 @@ void MipsAsmPrinter::EmitFunctionBodyEnd() { /// isBlockOnlyReachableByFallthough - Return true if the basic block has /// exactly one predecessor and the control transfer mechanism between /// the predecessor and this block is a fall-through. -bool MipsAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) - const { +bool MipsAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock* + MBB) const { // The predecessor has to be immediately before this block. const MachineBasicBlock *Pred = *MBB->pred_begin(); @@ -246,16 +247,41 @@ bool MipsAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock * if (const BasicBlock *bb = Pred->getBasicBlock()) if (isa(bb->getTerminator())) return false; + + // If this is a landing pad, it isn't a fall through. If it has no preds, + // then nothing falls through to it. + if (MBB->isLandingPad() || MBB->pred_empty()) + return false; + + // If there isn't exactly one predecessor, it can't be a fall through. + MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI; + ++PI2; + + if (PI2 != MBB->pred_end()) + return false; + + // The predecessor has to be immediately before this block. + if (!Pred->isLayoutSuccessor(MBB)) + return false; + + // If the block is completely empty, then it definitely does fall through. + if (Pred->empty()) + return true; - return AsmPrinter::isBlockOnlyReachableByFallthrough(MBB); + // Otherwise, check the last instruction. + // Check if the last terminator is an unconditional branch. + MachineBasicBlock::const_iterator I = Pred->end(); + while (I != Pred->begin() && !(--I)->getDesc().isTerminator()) ; + + return !I->getDesc().isBarrier(); } // Print out an operand for an inline asm expression. -bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, +bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant,const char *ExtraCode, raw_ostream &O) { // Does this asm operand have a single letter operand modifier? - if (ExtraCode && ExtraCode[0]) + if (ExtraCode && ExtraCode[0]) return true; // Unknown modifier. printOperand(MI, OpNo, O); @@ -273,22 +299,9 @@ void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum, switch(MO.getTargetFlags()) { case MipsII::MO_GPREL: O << "%gp_rel("; break; case MipsII::MO_GOT_CALL: O << "%call16("; break; - case MipsII::MO_GOT: { - const MachineOperand &LastMO = MI->getOperand(opNum-1); - bool LastMOIsGP = LastMO.getType() == MachineOperand::MO_Register - && LastMO.getReg() == Mips::GP; - if (MI->getOpcode() == Mips::LW || LastMOIsGP) - O << "%got("; - else - O << "%lo("; - break; - } - case MipsII::MO_ABS_HILO: - if (MI->getOpcode() == Mips::LUi) - O << "%hi("; - else - O << "%lo("; - break; + case MipsII::MO_GOT: O << "%got("; break; + case MipsII::MO_ABS_HI: O << "%hi("; break; + case MipsII::MO_ABS_LO: O << "%lo("; break; } switch (MO.getType()) { @@ -308,6 +321,12 @@ void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum, O << *Mang->getSymbol(MO.getGlobal()); break; + case MachineOperand::MO_BlockAddress: { + MCSymbol* BA = GetBlockAddressSymbol(MO.getBlockAddress()); + O << BA->getName(); + break; + } + case MachineOperand::MO_ExternalSymbol: O << *GetExternalSymbolSymbol(MO.getSymbolName()); break; @@ -323,7 +342,7 @@ void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum, if (MO.getOffset()) O << "+" << MO.getOffset(); break; - + default: llvm_unreachable(""); } @@ -336,7 +355,7 @@ void MipsAsmPrinter::printUnsignedImm(const MachineInstr *MI, int opNum, const MachineOperand &MO = MI->getOperand(opNum); if (MO.isImm()) O << (unsigned short int)MO.getImm(); - else + else printOperand(MI, opNum, O); } @@ -352,8 +371,8 @@ printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O, return; } - // Load/Store memory operands -- imm($reg) - // If PIC target the target is loaded as the + // Load/Store memory operands -- imm($reg) + // If PIC target the target is loaded as the // pattern lw $25,%call16($28) printOperand(MI, opNum, O); O << "("; @@ -365,12 +384,12 @@ void MipsAsmPrinter:: printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O, const char *Modifier) { const MachineOperand& MO = MI->getOperand(opNum); - O << Mips::MipsFCCToString((Mips::CondCode)MO.getImm()); + O << Mips::MipsFCCToString((Mips::CondCode)MO.getImm()); } void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) { // FIXME: Use SwitchSection. - + // Tell the assembler which ABI we are using OutStreamer.EmitRawText("\t.section .mdebug." + Twine(getCurrentABIString())); @@ -383,11 +402,11 @@ void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) { } // return to previous section - OutStreamer.EmitRawText(StringRef("\t.previous")); + OutStreamer.EmitRawText(StringRef("\t.previous")); } // Force static initialization. -extern "C" void LLVMInitializeMipsAsmPrinter() { +extern "C" void LLVMInitializeMipsAsmPrinter() { RegisterAsmPrinter X(TheMipsTarget); RegisterAsmPrinter Y(TheMipselTarget); } diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td index 8f313efaf8da..57aeb1d2793c 100644 --- a/lib/Target/Mips/MipsCallingConv.td +++ b/lib/Target/Mips/MipsCallingConv.td @@ -1,23 +1,23 @@ //===- MipsCallingConv.td - Calling Conventions for Mips ---*- tablegen -*-===// -// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // This describes the calling conventions for Mips architecture. //===----------------------------------------------------------------------===// /// CCIfSubtarget - Match if the current subtarget has a feature F. -class CCIfSubtarget: +class CCIfSubtarget: CCIf().", F), A>; //===----------------------------------------------------------------------===// // Mips O32 Calling Convention //===----------------------------------------------------------------------===// -// Only the return rules are defined here for O32. The rules for argument +// Only the return rules are defined here for O32. The rules for argument // passing are defined in MipsISelLowering.cpp. def RetCC_MipsO32 : CallingConv<[ // i32 are returned in registers V0, V1 @@ -41,15 +41,15 @@ def CC_MipsEABI : CallingConv<[ // Integer arguments are passed in integer registers. CCIfType<[i32], CCAssignToReg<[A0, A1, A2, A3, T0, T1, T2, T3]>>, - // Single fp arguments are passed in pairs within 32-bit mode - CCIfType<[f32], CCIfSubtarget<"isSingleFloat()", + // Single fp arguments are passed in pairs within 32-bit mode + CCIfType<[f32], CCIfSubtarget<"isSingleFloat()", CCAssignToReg<[F12, F13, F14, F15, F16, F17, F18, F19]>>>, - CCIfType<[f32], CCIfSubtarget<"isNotSingleFloat()", + CCIfType<[f32], CCIfSubtarget<"isNotSingleFloat()", CCAssignToReg<[F12, F14, F16, F18]>>>, - // The first 4 doubl fp arguments are passed in single fp registers. - CCIfType<[f64], CCIfSubtarget<"isNotSingleFloat()", + // The first 4 double fp arguments are passed in single fp registers. + CCIfType<[f64], CCIfSubtarget<"isNotSingleFloat()", CCAssignToReg<[D6, D7, D8, D9]>>>, // Integer values get stored in stack slots that are 4 bytes in diff --git a/lib/Target/Mips/MipsExpandPseudo.cpp b/lib/Target/Mips/MipsExpandPseudo.cpp new file mode 100644 index 000000000000..4423f5147980 --- /dev/null +++ b/lib/Target/Mips/MipsExpandPseudo.cpp @@ -0,0 +1,117 @@ +//===-- MipsExpandPseudo.cpp - Expand pseudo instructions ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass expands pseudo instructions into target instructions after register +// allocation but before post-RA scheduling. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "mips-expand-pseudo" + +#include "Mips.h" +#include "MipsTargetMachine.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/ADT/Statistic.h" + +using namespace llvm; + +namespace { + struct MipsExpandPseudo : public MachineFunctionPass { + + TargetMachine &TM; + const TargetInstrInfo *TII; + + static char ID; + MipsExpandPseudo(TargetMachine &tm) + : MachineFunctionPass(ID), TM(tm), TII(tm.getInstrInfo()) { } + + virtual const char *getPassName() const { + return "Mips PseudoInstrs Expansion"; + } + + bool runOnMachineFunction(MachineFunction &F); + bool runOnMachineBasicBlock(MachineBasicBlock &MBB); + + private: + void ExpandBuildPairF64(MachineBasicBlock&, MachineBasicBlock::iterator); + void ExpandExtractElementF64(MachineBasicBlock&, + MachineBasicBlock::iterator); + }; + char MipsExpandPseudo::ID = 0; +} // end of anonymous namespace + +bool MipsExpandPseudo::runOnMachineFunction(MachineFunction& F) { + bool Changed = false; + + for (MachineFunction::iterator I = F.begin(); I != F.end(); ++I) + Changed |= runOnMachineBasicBlock(*I); + + return Changed; +} + +bool MipsExpandPseudo::runOnMachineBasicBlock(MachineBasicBlock& MBB) { + + bool Changed = false; + for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end();) { + const TargetInstrDesc& Tid = I->getDesc(); + + switch(Tid.getOpcode()) { + default: + ++I; + continue; + case Mips::BuildPairF64: + ExpandBuildPairF64(MBB, I); + break; + case Mips::ExtractElementF64: + ExpandExtractElementF64(MBB, I); + break; + } + + // delete original instr + MBB.erase(I++); + Changed = true; + } + + return Changed; +} + +void MipsExpandPseudo::ExpandBuildPairF64(MachineBasicBlock& MBB, + MachineBasicBlock::iterator I) { + unsigned DstReg = I->getOperand(0).getReg(); + unsigned LoReg = I->getOperand(1).getReg(), HiReg = I->getOperand(2).getReg(); + const TargetInstrDesc& Mtc1Tdd = TII->get(Mips::MTC1); + DebugLoc dl = I->getDebugLoc(); + const unsigned* SubReg = + TM.getRegisterInfo()->getSubRegisters(DstReg); + + // mtc1 Lo, $fp + // mtc1 Hi, $fp + 1 + BuildMI(MBB, I, dl, Mtc1Tdd, *SubReg).addReg(LoReg); + BuildMI(MBB, I, dl, Mtc1Tdd, *(SubReg + 1)).addReg(HiReg); +} + +void MipsExpandPseudo::ExpandExtractElementF64(MachineBasicBlock& MBB, + MachineBasicBlock::iterator I) { + unsigned DstReg = I->getOperand(0).getReg(); + unsigned SrcReg = I->getOperand(1).getReg(); + unsigned N = I->getOperand(2).getImm(); + const TargetInstrDesc& Mfc1Tdd = TII->get(Mips::MFC1); + DebugLoc dl = I->getDebugLoc(); + const unsigned* SubReg = TM.getRegisterInfo()->getSubRegisters(SrcReg); + + BuildMI(MBB, I, dl, Mfc1Tdd, DstReg).addReg(*(SubReg + N)); +} + +/// createMipsMipsExpandPseudoPass - Returns a pass that expands pseudo +/// instrs into real instrs +FunctionPass *llvm::createMipsExpandPseudoPass(MipsTargetMachine &tm) { + return new MipsExpandPseudo(tm); +} diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp index 87a097a5d590..21e3314a6669 100644 --- a/lib/Target/Mips/MipsFrameLowering.cpp +++ b/lib/Target/Mips/MipsFrameLowering.cpp @@ -203,6 +203,46 @@ void MipsFrameLowering::adjustMipsStackFrame(MachineFunction &MF) const { MipsFI->setFPUTopSavedRegOff(TopFPUSavedRegOff-StackOffset); } + +// expand pair of register and immediate if the immediate doesn't fit in the +// 16-bit offset field. +// e.g. +// if OrigImm = 0x10000, OrigReg = $sp: +// generate the following sequence of instrs: +// lui $at, hi(0x10000) +// addu $at, $sp, $at +// +// (NewReg, NewImm) = ($at, lo(Ox10000)) +// return true +static bool expandRegLargeImmPair(unsigned OrigReg, int OrigImm, + unsigned& NewReg, int& NewImm, + MachineBasicBlock& MBB, + MachineBasicBlock::iterator I) { + // OrigImm fits in the 16-bit field + if (OrigImm < 0x8000 && OrigImm >= -0x8000) { + NewReg = OrigReg; + NewImm = OrigImm; + return false; + } + + MachineFunction* MF = MBB.getParent(); + const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); + DebugLoc DL = I->getDebugLoc(); + int ImmLo = OrigImm & 0xffff; + int ImmHi = (((unsigned)OrigImm & 0xffff0000) >> 16) + + ((OrigImm & 0x8000) != 0); + + // FIXME: change this when mips goes MC". + BuildMI(MBB, I, DL, TII->get(Mips::NOAT)); + BuildMI(MBB, I, DL, TII->get(Mips::LUi), Mips::AT).addImm(ImmHi); + BuildMI(MBB, I, DL, TII->get(Mips::ADDu), Mips::AT).addReg(OrigReg) + .addReg(Mips::AT); + NewReg = Mips::AT; + NewImm = ImmLo; + + return true; +} + void MipsFrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); MachineFrameInfo *MFI = MF.getFrameInfo(); @@ -214,6 +254,9 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock::iterator MBBI = MBB.begin(); DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); bool isPIC = (MF.getTarget().getRelocationModel() == Reloc::PIC_); + unsigned NewReg = 0; + int NewImm = 0; + bool ATUsed; // Get the right frame order for Mips. adjustMipsStackFrame(MF); @@ -236,22 +279,40 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const { BuildMI(MBB, MBBI, dl, TII.get(Mips::NOMACRO)); // Adjust stack : addi sp, sp, (-imm) + ATUsed = expandRegLargeImmPair(Mips::SP, -StackSize, NewReg, NewImm, MBB, + MBBI); BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDiu), Mips::SP) - .addReg(Mips::SP).addImm(-StackSize); + .addReg(NewReg).addImm(NewImm); - // Save the return address only if the function isnt a leaf one. + // FIXME: change this when mips goes MC". + if (ATUsed) + BuildMI(MBB, MBBI, dl, TII.get(Mips::ATMACRO)); + + // Save the return address only if the function isn't a leaf one. // sw $ra, stack_loc($sp) if (MFI->adjustsStack()) { + ATUsed = expandRegLargeImmPair(Mips::SP, RAOffset, NewReg, NewImm, MBB, + MBBI); BuildMI(MBB, MBBI, dl, TII.get(Mips::SW)) - .addReg(Mips::RA).addImm(RAOffset).addReg(Mips::SP); + .addReg(Mips::RA).addImm(NewImm).addReg(NewReg); + + // FIXME: change this when mips goes MC". + if (ATUsed) + BuildMI(MBB, MBBI, dl, TII.get(Mips::ATMACRO)); } // if framepointer enabled, save it and set it // to point to the stack pointer if (hasFP(MF)) { // sw $fp,stack_loc($sp) + ATUsed = expandRegLargeImmPair(Mips::SP, FPOffset, NewReg, NewImm, MBB, + MBBI); BuildMI(MBB, MBBI, dl, TII.get(Mips::SW)) - .addReg(Mips::FP).addImm(FPOffset).addReg(Mips::SP); + .addReg(Mips::FP).addImm(NewImm).addReg(NewReg); + + // FIXME: change this when mips goes MC". + if (ATUsed) + BuildMI(MBB, MBBI, dl, TII.get(Mips::ATMACRO)); // move $fp, $sp BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDu), Mips::FP) @@ -280,6 +341,10 @@ void MipsFrameLowering::emitEpilogue(MachineFunction &MF, int FPOffset = MipsFI->getFPStackOffset(); int RAOffset = MipsFI->getRAStackOffset(); + unsigned NewReg = 0; + int NewImm = 0; + bool ATUsed = false; + // if framepointer enabled, restore it and restore the // stack pointer if (hasFP(MF)) { @@ -288,21 +353,39 @@ void MipsFrameLowering::emitEpilogue(MachineFunction &MF, .addReg(Mips::FP).addReg(Mips::ZERO); // lw $fp,stack_loc($sp) + ATUsed = expandRegLargeImmPair(Mips::SP, FPOffset, NewReg, NewImm, MBB, + MBBI); BuildMI(MBB, MBBI, dl, TII.get(Mips::LW), Mips::FP) - .addImm(FPOffset).addReg(Mips::SP); + .addImm(NewImm).addReg(NewReg); + + // FIXME: change this when mips goes MC". + if (ATUsed) + BuildMI(MBB, MBBI, dl, TII.get(Mips::ATMACRO)); } - // Restore the return address only if the function isnt a leaf one. + // Restore the return address only if the function isn't a leaf one. // lw $ra, stack_loc($sp) if (MFI->adjustsStack()) { + ATUsed = expandRegLargeImmPair(Mips::SP, RAOffset, NewReg, NewImm, MBB, + MBBI); BuildMI(MBB, MBBI, dl, TII.get(Mips::LW), Mips::RA) - .addImm(RAOffset).addReg(Mips::SP); + .addImm(NewImm).addReg(NewReg); + + // FIXME: change this when mips goes MC". + if (ATUsed) + BuildMI(MBB, MBBI, dl, TII.get(Mips::ATMACRO)); } // adjust stack : insert addi sp, sp, (imm) if (NumBytes) { + ATUsed = expandRegLargeImmPair(Mips::SP, NumBytes, NewReg, NewImm, MBB, + MBBI); BuildMI(MBB, MBBI, dl, TII.get(Mips::ADDiu), Mips::SP) - .addReg(Mips::SP).addImm(NumBytes); + .addReg(NewReg).addImm(NewImm); + + // FIXME: change this when mips goes MC". + if (ATUsed) + BuildMI(MBB, MBBI, dl, TII.get(Mips::ATMACRO)); } } diff --git a/lib/Target/Mips/MipsFrameLowering.h b/lib/Target/Mips/MipsFrameLowering.h index a8426c1b70fd..34647df4f354 100644 --- a/lib/Target/Mips/MipsFrameLowering.h +++ b/lib/Target/Mips/MipsFrameLowering.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef ALPHA_FRAMEINFO_H -#define ALPHA_FRAMEINFO_H +#ifndef MIPS_FRAMEINFO_H +#define MIPS_FRAMEINFO_H #include "Mips.h" #include "MipsSubtarget.h" diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index 755e04df63be..0382964fe942 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -52,19 +52,19 @@ class MipsDAGToDAGISel : public SelectionDAGISel { /// Subtarget - Keep a pointer to the MipsSubtarget around so that we can /// make the right decision when generating code for different targets. const MipsSubtarget &Subtarget; - + public: explicit MipsDAGToDAGISel(MipsTargetMachine &tm) : SelectionDAGISel(tm), TM(tm), Subtarget(tm.getSubtarget()) {} - + // Pass Name virtual const char *getPassName() const { return "MIPS DAG->DAG Pattern Instruction Selection"; - } - + } -private: + +private: // Include the pieces autogenerated from the target description. #include "MipsGenDAGISel.inc" @@ -116,12 +116,14 @@ SelectAddr(SDValue Addr, SDValue &Offset, SDValue &Base) { Offset = CurDAG->getTargetConstant(0, MVT::i32); return true; } - + // on PIC code Load GA if (TM.getRelocationModel() == Reloc::PIC_) { - if ((Addr.getOpcode() == ISD::TargetGlobalAddress) || - (Addr.getOpcode() == ISD::TargetConstantPool) || - (Addr.getOpcode() == ISD::TargetJumpTable)){ + if ((Addr.getOpcode() == ISD::TargetGlobalAddress) || + (Addr.getOpcode() == ISD::TargetConstantPool) || + (Addr.getOpcode() == ISD::TargetJumpTable) || + (Addr.getOpcode() == ISD::TargetBlockAddress) || + (Addr.getOpcode() == ISD::TargetExternalSymbol)) { Base = CurDAG->getRegister(Mips::GP, MVT::i32); Offset = Addr; return true; @@ -130,8 +132,8 @@ SelectAddr(SDValue Addr, SDValue &Offset, SDValue &Base) { if ((Addr.getOpcode() == ISD::TargetExternalSymbol || Addr.getOpcode() == ISD::TargetGlobalAddress)) return false; - } - + } + // Operand is a result from an ADD. if (Addr.getOpcode() == ISD::ADD) { if (ConstantSDNode *CN = dyn_cast(Addr.getOperand(1))) { @@ -158,10 +160,10 @@ SelectAddr(SDValue Addr, SDValue &Offset, SDValue &Base) { // Generate: // lui $2, %hi($CPI1_0) // lwc1 $f0, %lo($CPI1_0)($2) - if ((Addr.getOperand(0).getOpcode() == MipsISD::Hi || + if ((Addr.getOperand(0).getOpcode() == MipsISD::Hi || Addr.getOperand(0).getOpcode() == ISD::LOAD) && Addr.getOperand(1).getOpcode() == MipsISD::Lo) { - SDValue LoVal = Addr.getOperand(1); + SDValue LoVal = Addr.getOperand(1); if (dyn_cast(LoVal.getOperand(0))) { Base = Addr.getOperand(0); Offset = LoVal.getOperand(0); @@ -176,7 +178,7 @@ SelectAddr(SDValue Addr, SDValue &Offset, SDValue &Base) { } SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDNode *N) { - MVT::SimpleValueType NVT = + MVT::SimpleValueType NVT = N->getValueType(0).getSimpleVT().SimpleTy; if (!Subtarget.isMips1() || NVT != MVT::f64) @@ -199,14 +201,14 @@ SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDNode *N) { MemRefs0[0] = cast(N)->getMemOperand(); DebugLoc dl = N->getDebugLoc(); - // The second load should start after for 4 bytes. + // The second load should start after for 4 bytes. if (ConstantSDNode *C = dyn_cast(Offset0)) Offset1 = CurDAG->getTargetConstant(C->getSExtValue()+4, MVT::i32); else if (ConstantPoolSDNode *CP = dyn_cast(Offset0)) - Offset1 = CurDAG->getTargetConstantPool(CP->getConstVal(), - MVT::i32, - CP->getAlignment(), - CP->getOffset()+4, + Offset1 = CurDAG->getTargetConstantPool(CP->getConstVal(), + MVT::i32, + CP->getAlignment(), + CP->getOffset()+4, CP->getTargetFlags()); else return NULL; @@ -220,16 +222,16 @@ SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDNode *N) { // Generate: // lwc $f0, X($3) // lwc $f1, X+4($3) - SDNode *LD0 = CurDAG->getMachineNode(Mips::LWC1, dl, MVT::f32, + SDNode *LD0 = CurDAG->getMachineNode(Mips::LWC1, dl, MVT::f32, MVT::Other, Offset0, Base, Chain); SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, NVT), 0); - SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::sub_fpeven, dl, + SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::sub_fpeven, dl, MVT::f64, Undef, SDValue(LD0, 0)); SDNode *LD1 = CurDAG->getMachineNode(Mips::LWC1, dl, MVT::f32, MVT::Other, Offset1, Base, SDValue(LD0, 1)); - SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::sub_fpodd, dl, + SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::sub_fpodd, dl, MVT::f64, I0, SDValue(LD1, 0)); ReplaceUses(SDValue(N, 0), I1); @@ -241,7 +243,7 @@ SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDNode *N) { SDNode *MipsDAGToDAGISel::SelectStoreFp64(SDNode *N) { - if (!Subtarget.isMips1() || + if (!Subtarget.isMips1() || N->getOperand(1).getValueType() != MVT::f64) return NULL; @@ -265,12 +267,12 @@ SDNode *MipsDAGToDAGISel::SelectStoreFp64(SDNode *N) { DebugLoc dl = N->getDebugLoc(); // Get the even and odd part from the f64 register - SDValue FPOdd = CurDAG->getTargetExtractSubreg(Mips::sub_fpodd, + SDValue FPOdd = CurDAG->getTargetExtractSubreg(Mips::sub_fpodd, dl, MVT::f32, N1); SDValue FPEven = CurDAG->getTargetExtractSubreg(Mips::sub_fpeven, dl, MVT::f32, N1); - // The second store should start after for 4 bytes. + // The second store should start after for 4 bytes. if (ConstantSDNode *C = dyn_cast(Offset0)) Offset1 = CurDAG->getTargetConstant(C->getSExtValue()+4, MVT::i32); else @@ -315,26 +317,26 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { } /// - // Instruction Selection not handled by the auto-generated + // Instruction Selection not handled by the auto-generated // tablegen selection should be handled here. - /// + /// switch(Opcode) { default: break; - case ISD::SUBE: + case ISD::SUBE: case ISD::ADDE: { SDValue InFlag = Node->getOperand(2), CmpLHS; unsigned Opc = InFlag.getOpcode(); (void)Opc; - assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) || - (Opc == ISD::SUBC || Opc == ISD::SUBE)) && + assert(((Opc == ISD::ADDC || Opc == ISD::ADDE) || + (Opc == ISD::SUBC || Opc == ISD::SUBE)) && "(ADD|SUB)E flag operand must come from (ADD|SUB)C/E insn"); unsigned MOp; if (Opcode == ISD::ADDE) { CmpLHS = InFlag.getValue(0); MOp = Mips::ADDu; - } else { + } else { CmpLHS = InFlag.getOperand(0); MOp = Mips::SUBu; } @@ -346,7 +348,7 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { EVT VT = LHS.getValueType(); SDNode *Carry = CurDAG->getMachineNode(Mips::SLTu, dl, VT, Ops, 2); - SDNode *AddCarry = CurDAG->getMachineNode(Mips::ADDu, dl, VT, + SDNode *AddCarry = CurDAG->getMachineNode(Mips::ADDu, dl, VT, SDValue(Carry,0), RHS); return CurDAG->SelectNodeTo(Node, MOp, VT, MVT::Glue, @@ -356,36 +358,34 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { /// Mul/Div with two results case ISD::SDIVREM: case ISD::UDIVREM: + break; case ISD::SMUL_LOHI: case ISD::UMUL_LOHI: { SDValue Op1 = Node->getOperand(0); SDValue Op2 = Node->getOperand(1); unsigned Op; - if (Opcode == ISD::UMUL_LOHI || Opcode == ISD::SMUL_LOHI) - Op = (Opcode == ISD::UMUL_LOHI ? Mips::MULTu : Mips::MULT); - else - Op = (Opcode == ISD::UDIVREM ? Mips::DIVu : Mips::DIV); + Op = (Opcode == ISD::UMUL_LOHI ? Mips::MULTu : Mips::MULT); - SDNode *MulDiv = CurDAG->getMachineNode(Op, dl, MVT::Glue, Op1, Op2); + SDNode *Mul = CurDAG->getMachineNode(Op, dl, MVT::Glue, Op1, Op2); - SDValue InFlag = SDValue(MulDiv, 0); - SDNode *Lo = CurDAG->getMachineNode(Mips::MFLO, dl, MVT::i32, + SDValue InFlag = SDValue(Mul, 0); + SDNode *Lo = CurDAG->getMachineNode(Mips::MFLO, dl, MVT::i32, MVT::Glue, InFlag); InFlag = SDValue(Lo,1); SDNode *Hi = CurDAG->getMachineNode(Mips::MFHI, dl, MVT::i32, InFlag); - if (!SDValue(Node, 0).use_empty()) + if (!SDValue(Node, 0).use_empty()) ReplaceUses(SDValue(Node, 0), SDValue(Lo,0)); - if (!SDValue(Node, 1).use_empty()) + if (!SDValue(Node, 1).use_empty()) ReplaceUses(SDValue(Node, 1), SDValue(Hi,0)); return NULL; } /// Special Muls - case ISD::MUL: + case ISD::MUL: if (Subtarget.isMips32()) break; case ISD::MULHS: @@ -394,7 +394,7 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { SDValue MulOp2 = Node->getOperand(1); unsigned MulOp = (Opcode == ISD::MULHU ? Mips::MULTu : Mips::MULT); - SDNode *MulNode = CurDAG->getMachineNode(MulOp, dl, + SDNode *MulNode = CurDAG->getMachineNode(MulOp, dl, MVT::Glue, MulOp1, MulOp2); SDValue InFlag = SDValue(MulNode, 0); @@ -408,24 +408,9 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { /// Div/Rem operations case ISD::SREM: case ISD::UREM: - case ISD::SDIV: - case ISD::UDIV: { - SDValue Op1 = Node->getOperand(0); - SDValue Op2 = Node->getOperand(1); - - unsigned Op, MOp; - if (Opcode == ISD::SDIV || Opcode == ISD::UDIV) { - Op = (Opcode == ISD::SDIV ? Mips::DIV : Mips::DIVu); - MOp = Mips::MFLO; - } else { - Op = (Opcode == ISD::SREM ? Mips::DIV : Mips::DIVu); - MOp = Mips::MFHI; - } - SDNode *Node = CurDAG->getMachineNode(Op, dl, MVT::Glue, Op1, Op2); - - SDValue InFlag = SDValue(Node, 0); - return CurDAG->getMachineNode(MOp, dl, MVT::i32, InFlag); - } + case ISD::SDIV: + case ISD::UDIV: + break; // Get target GOT address. case ISD::GLOBAL_OFFSET_TABLE: @@ -433,15 +418,15 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { case ISD::ConstantFP: { ConstantFPSDNode *CN = dyn_cast(Node); - if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) { - SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, + if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) { + SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, Mips::ZERO, MVT::i32); SDValue Undef = SDValue( CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::f64), 0); SDNode *MTC = CurDAG->getMachineNode(Mips::MTC1, dl, MVT::f32, Zero); - SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::sub_fpeven, dl, + SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::sub_fpeven, dl, MVT::f64, Undef, SDValue(MTC, 0)); - SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::sub_fpodd, dl, + SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::sub_fpodd, dl, MVT::f64, I0, SDValue(MTC, 0)); ReplaceUses(SDValue(Node, 0), I1); return I1.getNode(); @@ -460,61 +445,6 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { return ResNode; // Other cases are autogenerated. break; - - /// Handle direct and indirect calls when using PIC. On PIC, when - /// GOT is smaller than about 64k (small code) the GA target is - /// loaded with only one instruction. Otherwise GA's target must - /// be loaded with 3 instructions. - case MipsISD::JmpLink: { - if (TM.getRelocationModel() == Reloc::PIC_) { - unsigned LastOpNum = Node->getNumOperands()-1; - - SDValue Chain = Node->getOperand(0); - SDValue Callee = Node->getOperand(1); - SDValue InFlag; - - // Skip the incomming flag if present - if (Node->getOperand(LastOpNum).getValueType() == MVT::Glue) - LastOpNum--; - - if ( (isa(Callee)) || - (isa(Callee)) ) - { - /// Direct call for global addresses and external symbols - SDValue GPReg = CurDAG->getRegister(Mips::GP, MVT::i32); - - // Use load to get GOT target - SDValue Ops[] = { Callee, GPReg, Chain }; - SDValue Load = SDValue(CurDAG->getMachineNode(Mips::LW, dl, MVT::i32, - MVT::Other, Ops, 3), 0); - Chain = Load.getValue(1); - - // Call target must be on T9 - Chain = CurDAG->getCopyToReg(Chain, dl, Mips::T9, Load, InFlag); - } else - /// Indirect call - Chain = CurDAG->getCopyToReg(Chain, dl, Mips::T9, Callee, InFlag); - - // Map the JmpLink operands to JALR - SDVTList NodeTys = CurDAG->getVTList(MVT::Other, MVT::Glue); - SmallVector Ops; - Ops.push_back(CurDAG->getRegister(Mips::T9, MVT::i32)); - - for (unsigned i = 2, e = LastOpNum+1; i != e; ++i) - Ops.push_back(Node->getOperand(i)); - Ops.push_back(Chain); - Ops.push_back(Chain.getValue(1)); - - // Emit Jump and Link Register - SDNode *ResNode = CurDAG->getMachineNode(Mips::JALR, dl, NodeTys, - &Ops[0], Ops.size()); - - // Replace Chain and InFlag - ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0)); - ReplaceUses(SDValue(Node, 1), SDValue(ResNode, 1)); - return ResNode; - } - } } // Select the default instruction @@ -529,7 +459,7 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { return ResNode; } -/// createMipsISelDag - This pass converts a legalized DAG into a +/// createMipsISelDag - This pass converts a legalized DAG into a /// MIPS-specific DAG, ready for instruction scheduling. FunctionPass *llvm::createMipsISelDag(MipsTargetMachine &TM) { return new MipsDAGToDAGISel(TM); diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 1d7a1c0ae8c7..1f1220f19203 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -41,15 +41,19 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { case MipsISD::Lo : return "MipsISD::Lo"; case MipsISD::GPRel : return "MipsISD::GPRel"; case MipsISD::Ret : return "MipsISD::Ret"; - case MipsISD::SelectCC : return "MipsISD::SelectCC"; - case MipsISD::FPSelectCC : return "MipsISD::FPSelectCC"; case MipsISD::FPBrcond : return "MipsISD::FPBrcond"; case MipsISD::FPCmp : return "MipsISD::FPCmp"; + case MipsISD::CMovFP_T : return "MipsISD::CMovFP_T"; + case MipsISD::CMovFP_F : return "MipsISD::CMovFP_F"; case MipsISD::FPRound : return "MipsISD::FPRound"; case MipsISD::MAdd : return "MipsISD::MAdd"; case MipsISD::MAddu : return "MipsISD::MAddu"; case MipsISD::MSub : return "MipsISD::MSub"; case MipsISD::MSubu : return "MipsISD::MSubu"; + case MipsISD::DivRem : return "MipsISD::DivRem"; + case MipsISD::DivRemU : return "MipsISD::DivRemU"; + case MipsISD::BuildPairF64: return "MipsISD::BuildPairF64"; + case MipsISD::ExtractElementF64: return "MipsISD::ExtractElementF64"; default : return NULL; } } @@ -89,25 +93,22 @@ MipsTargetLowering(MipsTargetMachine &TM) // Mips Custom Operations setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); + setOperationAction(ISD::BlockAddress, MVT::i32, Custom); setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); setOperationAction(ISD::JumpTable, MVT::i32, Custom); setOperationAction(ISD::ConstantPool, MVT::i32, Custom); setOperationAction(ISD::SELECT, MVT::f32, Custom); setOperationAction(ISD::SELECT, MVT::f64, Custom); setOperationAction(ISD::SELECT, MVT::i32, Custom); - setOperationAction(ISD::SETCC, MVT::f32, Custom); - setOperationAction(ISD::SETCC, MVT::f64, Custom); setOperationAction(ISD::BRCOND, MVT::Other, Custom); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); setOperationAction(ISD::VASTART, MVT::Other, Custom); - - // We custom lower AND/OR to handle the case where the DAG contain 'ands/ors' - // with operands comming from setcc fp comparions. This is necessary since - // the result from these setcc are in a flag registers (FCR31). - setOperationAction(ISD::AND, MVT::i32, Custom); - setOperationAction(ISD::OR, MVT::i32, Custom); + setOperationAction(ISD::SDIV, MVT::i32, Expand); + setOperationAction(ISD::SREM, MVT::i32, Expand); + setOperationAction(ISD::UDIV, MVT::i32, Expand); + setOperationAction(ISD::UREM, MVT::i32, Expand); // Operations not directly supported by Mips. setOperationAction(ISD::BR_JT, MVT::Other, Expand); @@ -129,7 +130,9 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); setOperationAction(ISD::FSIN, MVT::f32, Expand); + setOperationAction(ISD::FSIN, MVT::f64, Expand); setOperationAction(ISD::FCOS, MVT::f32, Expand); + setOperationAction(ISD::FCOS, MVT::f64, Expand); setOperationAction(ISD::FPOWI, MVT::f32, Expand); setOperationAction(ISD::FPOW, MVT::f32, Expand); setOperationAction(ISD::FLOG, MVT::f32, Expand); @@ -139,6 +142,10 @@ MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); + setOperationAction(ISD::VAARG, MVT::Other, Expand); + setOperationAction(ISD::VACOPY, MVT::Other, Expand); + setOperationAction(ISD::VAEND, MVT::Other, Expand); + // Use the default for now setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); @@ -160,6 +167,9 @@ MipsTargetLowering(MipsTargetMachine &TM) setTargetDAGCombine(ISD::ADDE); setTargetDAGCombine(ISD::SUBE); + setTargetDAGCombine(ISD::SDIVREM); + setTargetDAGCombine(ISD::UDIVREM); + setTargetDAGCombine(ISD::SETCC); setStackPointerRegisterToSaveRestore(Mips::SP); computeRegisterProperties(); @@ -181,7 +191,7 @@ unsigned MipsTargetLowering::getFunctionAlignment(const Function *) const { // multHi/Lo: product of multiplication // Lo0: initial value of Lo register // Hi0: initial value of Hi register -// Return true if mattern matching was successful. +// Return true if pattern matching was successful. static bool SelectMadd(SDNode* ADDENode, SelectionDAG* CurDAG) { // ADDENode's second operand must be a flag output of an ADDC node in order // for the matching to be successful. @@ -255,7 +265,7 @@ static bool SelectMadd(SDNode* ADDENode, SelectionDAG* CurDAG) { // multHi/Lo: product of multiplication // Lo0: initial value of Lo register // Hi0: initial value of Hi register -// Return true if mattern matching was successful. +// Return true if pattern matching was successful. static bool SelectMsub(SDNode* SUBENode, SelectionDAG* CurDAG) { // SUBENode's second operand must be a flag output of an SUBC node in order // for the matching to be successful. @@ -346,6 +356,130 @@ static SDValue PerformSUBECombine(SDNode *N, SelectionDAG& DAG, return SDValue(); } +static SDValue PerformDivRemCombine(SDNode *N, SelectionDAG& DAG, + TargetLowering::DAGCombinerInfo &DCI, + const MipsSubtarget* Subtarget) { + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + + unsigned opc = N->getOpcode() == ISD::SDIVREM ? MipsISD::DivRem : + MipsISD::DivRemU; + DebugLoc dl = N->getDebugLoc(); + + SDValue DivRem = DAG.getNode(opc, dl, MVT::Glue, + N->getOperand(0), N->getOperand(1)); + SDValue InChain = DAG.getEntryNode(); + SDValue InGlue = DivRem; + + // insert MFLO + if (N->hasAnyUseOfValue(0)) { + SDValue CopyFromLo = DAG.getCopyFromReg(InChain, dl, Mips::LO, MVT::i32, + InGlue); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), CopyFromLo); + InChain = CopyFromLo.getValue(1); + InGlue = CopyFromLo.getValue(2); + } + + // insert MFHI + if (N->hasAnyUseOfValue(1)) { + SDValue CopyFromHi = DAG.getCopyFromReg(InChain, dl, + Mips::HI, MVT::i32, InGlue); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), CopyFromHi); + } + + return SDValue(); +} + +static Mips::CondCode FPCondCCodeToFCC(ISD::CondCode CC) { + switch (CC) { + default: llvm_unreachable("Unknown fp condition code!"); + case ISD::SETEQ: + case ISD::SETOEQ: return Mips::FCOND_OEQ; + case ISD::SETUNE: return Mips::FCOND_UNE; + case ISD::SETLT: + case ISD::SETOLT: return Mips::FCOND_OLT; + case ISD::SETGT: + case ISD::SETOGT: return Mips::FCOND_OGT; + case ISD::SETLE: + case ISD::SETOLE: return Mips::FCOND_OLE; + case ISD::SETGE: + case ISD::SETOGE: return Mips::FCOND_OGE; + case ISD::SETULT: return Mips::FCOND_ULT; + case ISD::SETULE: return Mips::FCOND_ULE; + case ISD::SETUGT: return Mips::FCOND_UGT; + case ISD::SETUGE: return Mips::FCOND_UGE; + case ISD::SETUO: return Mips::FCOND_UN; + case ISD::SETO: return Mips::FCOND_OR; + case ISD::SETNE: + case ISD::SETONE: return Mips::FCOND_ONE; + case ISD::SETUEQ: return Mips::FCOND_UEQ; + } +} + + +// Returns true if condition code has to be inverted. +static bool InvertFPCondCode(Mips::CondCode CC) { + if (CC >= Mips::FCOND_F && CC <= Mips::FCOND_NGT) + return false; + + if (CC >= Mips::FCOND_T && CC <= Mips::FCOND_GT) + return true; + + assert(false && "Illegal Condition Code"); + return false; +} + +// Creates and returns an FPCmp node from a setcc node. +// Returns Op if setcc is not a floating point comparison. +static SDValue CreateFPCmp(SelectionDAG& DAG, const SDValue& Op) { + // must be a SETCC node + if (Op.getOpcode() != ISD::SETCC) + return Op; + + SDValue LHS = Op.getOperand(0); + + if (!LHS.getValueType().isFloatingPoint()) + return Op; + + SDValue RHS = Op.getOperand(1); + DebugLoc dl = Op.getDebugLoc(); + + // Assume the 3rd operand is a CondCodeSDNode. Add code to check the type of + // node if necessary. + ISD::CondCode CC = cast(Op.getOperand(2))->get(); + + return DAG.getNode(MipsISD::FPCmp, dl, MVT::Glue, LHS, RHS, + DAG.getConstant(FPCondCCodeToFCC(CC), MVT::i32)); +} + +// Creates and returns a CMovFPT/F node. +static SDValue CreateCMovFP(SelectionDAG& DAG, SDValue Cond, SDValue True, + SDValue False, DebugLoc DL) { + bool invert = InvertFPCondCode((Mips::CondCode) + cast(Cond.getOperand(2)) + ->getSExtValue()); + + return DAG.getNode((invert ? MipsISD::CMovFP_F : MipsISD::CMovFP_T), DL, + True.getValueType(), True, False, Cond); +} + +static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG& DAG, + TargetLowering::DAGCombinerInfo &DCI, + const MipsSubtarget* Subtarget) { + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + + SDValue Cond = CreateFPCmp(DAG, SDValue(N, 0)); + + if (Cond.getOpcode() != MipsISD::FPCmp) + return SDValue(); + + SDValue True = DAG.getConstant(1, MVT::i32); + SDValue False = DAG.getConstant(0, MVT::i32); + + return CreateCMovFP(DAG, Cond, True, False, N->getDebugLoc()); +} + SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -357,6 +491,11 @@ SDValue MipsTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) return PerformADDECombine(N, DAG, DCI, Subtarget); case ISD::SUBE: return PerformSUBECombine(N, DAG, DCI, Subtarget); + case ISD::SDIVREM: + case ISD::UDIVREM: + return PerformDivRemCombine(N, DAG, DCI, Subtarget); + case ISD::SETCC: + return PerformSETCCCombine(N, DAG, DCI, Subtarget); } return SDValue(); @@ -367,17 +506,15 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { - case ISD::AND: return LowerANDOR(Op, DAG); case ISD::BRCOND: return LowerBRCOND(Op, DAG); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); + case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); case ISD::JumpTable: return LowerJumpTable(Op, DAG); - case ISD::OR: return LowerANDOR(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); - case ISD::SETCC: return LowerSETCC(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); } return SDValue(); @@ -410,122 +547,110 @@ static Mips::FPBranchCode GetFPBranchCodeFromCond(Mips::CondCode CC) { return Mips::BRANCH_INVALID; } -static unsigned FPBranchCodeToOpc(Mips::FPBranchCode BC) { - switch(BC) { - default: - llvm_unreachable("Unknown branch code"); - case Mips::BRANCH_T : return Mips::BC1T; - case Mips::BRANCH_F : return Mips::BC1F; - case Mips::BRANCH_TL : return Mips::BC1TL; - case Mips::BRANCH_FL : return Mips::BC1FL; - } -} - -static Mips::CondCode FPCondCCodeToFCC(ISD::CondCode CC) { - switch (CC) { - default: llvm_unreachable("Unknown fp condition code!"); - case ISD::SETEQ: - case ISD::SETOEQ: return Mips::FCOND_EQ; - case ISD::SETUNE: return Mips::FCOND_OGL; - case ISD::SETLT: - case ISD::SETOLT: return Mips::FCOND_OLT; - case ISD::SETGT: - case ISD::SETOGT: return Mips::FCOND_OGT; - case ISD::SETLE: - case ISD::SETOLE: return Mips::FCOND_OLE; - case ISD::SETGE: - case ISD::SETOGE: return Mips::FCOND_OGE; - case ISD::SETULT: return Mips::FCOND_ULT; - case ISD::SETULE: return Mips::FCOND_ULE; - case ISD::SETUGT: return Mips::FCOND_UGT; - case ISD::SETUGE: return Mips::FCOND_UGE; - case ISD::SETUO: return Mips::FCOND_UN; - case ISD::SETO: return Mips::FCOND_OR; - case ISD::SETNE: - case ISD::SETONE: return Mips::FCOND_NEQ; - case ISD::SETUEQ: return Mips::FCOND_UEQ; - } -} - MachineBasicBlock * MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { + // There is no need to expand CMov instructions if target has + // conditional moves. + if (Subtarget->hasCondMov()) + return BB; + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); bool isFPCmp = false; DebugLoc dl = MI->getDebugLoc(); + unsigned Opc; switch (MI->getOpcode()) { default: assert(false && "Unexpected instr type to insert"); - case Mips::Select_FCC: - case Mips::Select_FCC_S32: - case Mips::Select_FCC_D32: - isFPCmp = true; // FALL THROUGH - case Mips::Select_CC: - case Mips::Select_CC_S32: - case Mips::Select_CC_D32: { - // To "insert" a SELECT_CC instruction, we actually have to insert the - // diamond control-flow pattern. The incoming instruction knows the - // destination vreg to set, the condition code register to branch on, the - // true/false values to select between, and a branch opcode to use. - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator It = BB; - ++It; + case Mips::MOVT: + case Mips::MOVT_S: + case Mips::MOVT_D: + isFPCmp = true; + Opc = Mips::BC1F; + break; + case Mips::MOVF: + case Mips::MOVF_S: + case Mips::MOVF_D: + isFPCmp = true; + Opc = Mips::BC1T; + break; + case Mips::MOVZ_I: + case Mips::MOVZ_S: + case Mips::MOVZ_D: + Opc = Mips::BNE; + break; + case Mips::MOVN_I: + case Mips::MOVN_S: + case Mips::MOVN_D: + Opc = Mips::BEQ; + break; + } - // thisMBB: - // ... - // TrueVal = ... - // setcc r1, r2, r3 - // bNE r1, r0, copy1MBB - // fallthrough --> copy0MBB - MachineBasicBlock *thisMBB = BB; - MachineFunction *F = BB->getParent(); - MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); - F->insert(It, copy0MBB); - F->insert(It, sinkMBB); + // To "insert" a SELECT_CC instruction, we actually have to insert the + // diamond control-flow pattern. The incoming instruction knows the + // destination vreg to set, the condition code register to branch on, the + // true/false values to select between, and a branch opcode to use. + const BasicBlock *LLVM_BB = BB->getBasicBlock(); + MachineFunction::iterator It = BB; + ++It; - // Transfer the remainder of BB and its successor edges to sinkMBB. - sinkMBB->splice(sinkMBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); - sinkMBB->transferSuccessorsAndUpdatePHIs(BB); + // thisMBB: + // ... + // TrueVal = ... + // setcc r1, r2, r3 + // bNE r1, r0, copy1MBB + // fallthrough --> copy0MBB + MachineBasicBlock *thisMBB = BB; + MachineFunction *F = BB->getParent(); + MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); + F->insert(It, copy0MBB); + F->insert(It, sinkMBB); - // Next, add the true and fallthrough blocks as its successors. - BB->addSuccessor(copy0MBB); - BB->addSuccessor(sinkMBB); + // Transfer the remainder of BB and its successor edges to sinkMBB. + sinkMBB->splice(sinkMBB->begin(), BB, + llvm::next(MachineBasicBlock::iterator(MI)), + BB->end()); + sinkMBB->transferSuccessorsAndUpdatePHIs(BB); - // Emit the right instruction according to the type of the operands compared - if (isFPCmp) { - // Find the condiction code present in the setcc operation. - Mips::CondCode CC = (Mips::CondCode)MI->getOperand(4).getImm(); - // Get the branch opcode from the branch code. - unsigned Opc = FPBranchCodeToOpc(GetFPBranchCodeFromCond(CC)); - BuildMI(BB, dl, TII->get(Opc)).addMBB(sinkMBB); - } else - BuildMI(BB, dl, TII->get(Mips::BNE)).addReg(MI->getOperand(1).getReg()) - .addReg(Mips::ZERO).addMBB(sinkMBB); + // Next, add the true and fallthrough blocks as its successors. + BB->addSuccessor(copy0MBB); + BB->addSuccessor(sinkMBB); - // copy0MBB: - // %FalseValue = ... - // # fallthrough to sinkMBB - BB = copy0MBB; + // Emit the right instruction according to the type of the operands compared + if (isFPCmp) + BuildMI(BB, dl, TII->get(Opc)).addMBB(sinkMBB); + else + BuildMI(BB, dl, TII->get(Opc)).addReg(MI->getOperand(2).getReg()) + .addReg(Mips::ZERO).addMBB(sinkMBB); - // Update machine-CFG edges - BB->addSuccessor(sinkMBB); - // sinkMBB: - // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ] - // ... - BB = sinkMBB; + // copy0MBB: + // %FalseValue = ... + // # fallthrough to sinkMBB + BB = copy0MBB; + + // Update machine-CFG edges + BB->addSuccessor(sinkMBB); + + // sinkMBB: + // %Result = phi [ %TrueValue, thisMBB ], [ %FalseValue, copy0MBB ] + // ... + BB = sinkMBB; + + if (isFPCmp) BuildMI(*BB, BB->begin(), dl, TII->get(Mips::PHI), MI->getOperand(0).getReg()) .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB) - .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB); + .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB); + else + BuildMI(*BB, BB->begin(), dl, + TII->get(Mips::PHI), MI->getOperand(0).getReg()) + .addReg(MI->getOperand(3).getReg()).addMBB(thisMBB) + .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB); - MI->eraseFromParent(); // The pseudo instruction is gone now. - return BB; - } - } + MI->eraseFromParent(); // The pseudo instruction is gone now. + return BB; } //===----------------------------------------------------------------------===// @@ -589,27 +714,6 @@ LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const return DAG.getMergeValues(Ops, 2, dl); } -SDValue MipsTargetLowering:: -LowerANDOR(SDValue Op, SelectionDAG &DAG) const -{ - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - DebugLoc dl = Op.getDebugLoc(); - - if (LHS.getOpcode() != MipsISD::FPCmp || RHS.getOpcode() != MipsISD::FPCmp) - return Op; - - SDValue True = DAG.getConstant(1, MVT::i32); - SDValue False = DAG.getConstant(0, MVT::i32); - - SDValue LSEL = DAG.getNode(MipsISD::FPSelectCC, dl, True.getValueType(), - LHS, True, False, LHS.getOperand(2)); - SDValue RSEL = DAG.getNode(MipsISD::FPSelectCC, dl, True.getValueType(), - RHS, True, False, RHS.getOperand(2)); - - return DAG.getNode(Op.getOpcode(), dl, MVT::i32, LSEL, RSEL); -} - SDValue MipsTargetLowering:: LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { @@ -619,58 +723,32 @@ LowerBRCOND(SDValue Op, SelectionDAG &DAG) const SDValue Dest = Op.getOperand(2); DebugLoc dl = Op.getDebugLoc(); - if (Op.getOperand(1).getOpcode() != MipsISD::FPCmp) + SDValue CondRes = CreateFPCmp(DAG, Op.getOperand(1)); + + // Return if flag is not set by a floating point comparison. + if (CondRes.getOpcode() != MipsISD::FPCmp) return Op; - SDValue CondRes = Op.getOperand(1); SDValue CCNode = CondRes.getOperand(2); Mips::CondCode CC = (Mips::CondCode)cast(CCNode)->getZExtValue(); SDValue BrCode = DAG.getConstant(GetFPBranchCodeFromCond(CC), MVT::i32); return DAG.getNode(MipsISD::FPBrcond, dl, Op.getValueType(), Chain, BrCode, - Dest, CondRes); -} - -SDValue MipsTargetLowering:: -LowerSETCC(SDValue Op, SelectionDAG &DAG) const -{ - // The operands to this are the left and right operands to compare (ops #0, - // and #1) and the condition code to compare them with (op #2) as a - // CondCodeSDNode. - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - DebugLoc dl = Op.getDebugLoc(); - - ISD::CondCode CC = cast(Op.getOperand(2))->get(); - - return DAG.getNode(MipsISD::FPCmp, dl, Op.getValueType(), LHS, RHS, - DAG.getConstant(FPCondCCodeToFCC(CC), MVT::i32)); + Dest, CondRes); } SDValue MipsTargetLowering:: LowerSELECT(SDValue Op, SelectionDAG &DAG) const { - SDValue Cond = Op.getOperand(0); - SDValue True = Op.getOperand(1); - SDValue False = Op.getOperand(2); - DebugLoc dl = Op.getDebugLoc(); + SDValue Cond = CreateFPCmp(DAG, Op.getOperand(0)); - // if the incomming condition comes from a integer compare, the select - // operation must be SelectCC or a conditional move if the subtarget - // supports it. - if (Cond.getOpcode() != MipsISD::FPCmp) { - if (Subtarget->hasCondMov() && !True.getValueType().isFloatingPoint()) - return Op; - return DAG.getNode(MipsISD::SelectCC, dl, True.getValueType(), - Cond, True, False); - } + // Return if flag is not set by a floating point comparison. + if (Cond.getOpcode() != MipsISD::FPCmp) + return Op; - // if the incomming condition comes from fpcmp, the select - // operation must use FPSelectCC. - SDValue CCNode = Cond.getOperand(2); - return DAG.getNode(MipsISD::FPSelectCC, dl, True.getValueType(), - Cond, True, False, CCNode); + return CreateCMovFP(DAG, Cond, Op.getOperand(1), Op.getOperand(2), + Op.getDebugLoc()); } SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, @@ -693,12 +771,13 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, return DAG.getNode(ISD::ADD, dl, MVT::i32, GOT, GPRelNode); } // %hi/%lo relocation - SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, - MipsII::MO_ABS_HILO); - SDValue HiPart = DAG.getNode(MipsISD::Hi, dl, VTs, &GA, 1); - SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, GA); + SDValue GAHi = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, + MipsII::MO_ABS_HI); + SDValue GALo = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, + MipsII::MO_ABS_LO); + SDValue HiPart = DAG.getNode(MipsISD::Hi, dl, VTs, &GAHi, 1); + SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, GALo); return DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo); - } else { SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, MipsII::MO_GOT); @@ -707,9 +786,12 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, false, false, 0); // On functions and global targets not internal linked only // a load from got/GP is necessary for PIC to work. - if (!GV->hasLocalLinkage() || isa(GV)) + if (!GV->hasInternalLinkage() && + (!GV->hasLocalLinkage() || isa(GV))) return ResNode; - SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, GA); + SDValue GALo = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0, + MipsII::MO_ABS_LO); + SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, GALo); return DAG.getNode(ISD::ADD, dl, MVT::i32, ResNode, Lo); } @@ -717,6 +799,34 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, return SDValue(0,0); } +SDValue MipsTargetLowering::LowerBlockAddress(SDValue Op, + SelectionDAG &DAG) const { + const BlockAddress *BA = cast(Op)->getBlockAddress(); + // FIXME there isn't actually debug info here + DebugLoc dl = Op.getDebugLoc(); + + if (getTargetMachine().getRelocationModel() != Reloc::PIC_) { + // %hi/%lo relocation + SDValue BAHi = DAG.getBlockAddress(BA, MVT::i32, true, + MipsII::MO_ABS_HI); + SDValue BALo = DAG.getBlockAddress(BA, MVT::i32, true, + MipsII::MO_ABS_LO); + SDValue Hi = DAG.getNode(MipsISD::Hi, dl, MVT::i32, BAHi); + SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, BALo); + return DAG.getNode(ISD::ADD, dl, MVT::i32, Hi, Lo); + } + + SDValue BAGOTOffset = DAG.getBlockAddress(BA, MVT::i32, true, + MipsII::MO_GOT); + SDValue BALOOffset = DAG.getBlockAddress(BA, MVT::i32, true, + MipsII::MO_ABS_LO); + SDValue Load = DAG.getLoad(MVT::i32, dl, + DAG.getEntryNode(), BAGOTOffset, + MachinePointerInfo(), false, false, 0); + SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, BALOOffset); + return DAG.getNode(ISD::ADD, dl, MVT::i32, Load, Lo); +} + SDValue MipsTargetLowering:: LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { @@ -732,7 +842,7 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG) const // FIXME there isn't actually debug info here DebugLoc dl = Op.getDebugLoc(); bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_; - unsigned char OpFlag = IsPIC ? MipsII::MO_GOT : MipsII::MO_ABS_HILO; + unsigned char OpFlag = IsPIC ? MipsII::MO_GOT : MipsII::MO_ABS_HI; EVT PtrVT = Op.getValueType(); JumpTableSDNode *JT = cast(Op); @@ -747,7 +857,9 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG) const MachinePointerInfo(), false, false, 0); - SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, JTI); + SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, + MipsII::MO_ABS_LO); + SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, JTILo); ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo); return ResNode; @@ -764,7 +876,7 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) const // gp_rel relocation // FIXME: we should reference the constant pool using small data sections, - // but the asm printer currently doens't support this feature without + // but the asm printer currently doesn't support this feature without // hacking it. This feature should come soon so we can uncomment the // stuff below. //if (IsInSmallSection(C->getType())) { @@ -773,18 +885,22 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) const // ResNode = DAG.getNode(ISD::ADD, MVT::i32, GOT, GPRelNode); if (getTargetMachine().getRelocationModel() != Reloc::PIC_) { - SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), - N->getOffset(), MipsII::MO_ABS_HILO); - SDValue HiPart = DAG.getNode(MipsISD::Hi, dl, MVT::i32, CP); - SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CP); + SDValue CPHi = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), + N->getOffset(), MipsII::MO_ABS_HI); + SDValue CPLo = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), + N->getOffset(), MipsII::MO_ABS_LO); + SDValue HiPart = DAG.getNode(MipsISD::Hi, dl, MVT::i32, CPHi); + SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CPLo); ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, HiPart, Lo); } else { SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), - N->getOffset(), MipsII::MO_GOT); + N->getOffset(), MipsII::MO_GOT); SDValue Load = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), CP, MachinePointerInfo::getConstantPool(), false, false, 0); - SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CP); + SDValue CPLo = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), + N->getOffset(), MipsII::MO_ABS_LO); + SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CPLo); ResNode = DAG.getNode(ISD::ADD, dl, MVT::i32, Load, Lo); } @@ -937,46 +1053,28 @@ static bool CC_MipsO32_VarArgs(unsigned ValNo, MVT ValVT, LocInfo = CCValAssign::AExt; } + unsigned Reg; + if (ValVT == MVT::i32 || ValVT == MVT::f32) { - if (unsigned Reg = State.AllocateReg(IntRegs, IntRegsSize)) { - State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, MVT::i32, LocInfo)); - return false; - } - unsigned Off = State.AllocateStack(4, 4); - State.addLoc(CCValAssign::getMem(ValNo, ValVT, Off, LocVT, LocInfo)); - return false; - } + Reg = State.AllocateReg(IntRegs, IntRegsSize); + LocVT = MVT::i32; + } else if (ValVT == MVT::f64) { + Reg = State.AllocateReg(IntRegs, IntRegsSize); + if (Reg == Mips::A1 || Reg == Mips::A3) + Reg = State.AllocateReg(IntRegs, IntRegsSize); + State.AllocateReg(IntRegs, IntRegsSize); + LocVT = MVT::i32; + } else + llvm_unreachable("Cannot handle this ValVT."); - unsigned UnallocIntReg = State.getFirstUnallocated(IntRegs, IntRegsSize); - if (ValVT == MVT::f64) { - if (IntRegs[UnallocIntReg] == (unsigned (Mips::A1))) { - // A1 can't be used anymore, because 64 bit arguments - // must be aligned when copied back to the caller stack - State.AllocateReg(IntRegs, IntRegsSize); - UnallocIntReg++; - } + if (!Reg) { + unsigned SizeInBytes = ValVT.getSizeInBits() >> 3; + unsigned Offset = State.AllocateStack(SizeInBytes, SizeInBytes); + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + } else + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo)); - if (IntRegs[UnallocIntReg] == (unsigned (Mips::A0)) || - IntRegs[UnallocIntReg] == (unsigned (Mips::A2))) { - unsigned Reg = State.AllocateReg(IntRegs, IntRegsSize); - State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, MVT::i32, LocInfo)); - // Shadow the next register so it can be used - // later to get the other 32bit part. - State.AllocateReg(IntRegs, IntRegsSize); - return false; - } - - // Register is shadowed to preserve alignment, and the - // argument goes to a stack location. - if (UnallocIntReg != IntRegsSize) - State.AllocateReg(IntRegs, IntRegsSize); - - unsigned Off = State.AllocateStack(8, 8); - State.addLoc(CCValAssign::getMem(ValNo, ValVT, Off, LocVT, LocInfo)); - return false; - } - - return true; // CC didn't match + return false; // CC must always match } //===----------------------------------------------------------------------===// @@ -1043,11 +1141,12 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i32) Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); if (VA.getValVT() == MVT::f64 && VA.getLocVT() == MVT::i32) { - Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg); - SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Arg, - DAG.getConstant(0, getPointerTy())); - SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Arg, - DAG.getConstant(1, getPointerTy())); + SDValue Lo = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32, + Arg, DAG.getConstant(0, MVT::i32)); + SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32, + Arg, DAG.getConstant(1, MVT::i32)); + if (!Subtarget->isLittle()) + std::swap(Lo, Hi); RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo)); RegsToPass.push_back(std::make_pair(VA.getLocReg()+1, Hi)); continue; @@ -1100,7 +1199,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Build a sequence of copy-to-reg nodes chained together with token // chain and flag operands which copy the outgoing args into registers. - // The InFlag in necessary since all emited instructions must be + // The InFlag in necessary since all emitted instructions must be // stuck together. SDValue InFlag; for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { @@ -1113,12 +1212,52 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol // node so that legalize doesn't hack it. unsigned char OpFlag = IsPIC ? MipsII::MO_GOT_CALL : MipsII::MO_NO_FLAG; - if (GlobalAddressSDNode *G = dyn_cast(Callee)) - Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, - getPointerTy(), 0, OpFlag); - else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) + bool LoadSymAddr = false; + SDValue CalleeLo; + + if (GlobalAddressSDNode *G = dyn_cast(Callee)) { + if (IsPIC && G->getGlobal()->hasInternalLinkage()) { + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, + getPointerTy(), 0,MipsII:: MO_GOT); + CalleeLo = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy(), + 0, MipsII::MO_ABS_LO); + } else { + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, + getPointerTy(), 0, OpFlag); + } + + LoadSymAddr = true; + } + else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(), OpFlag); + LoadSymAddr = true; + } + + // Create nodes that load address of callee and copy it to T9 + if (IsPIC) { + if (LoadSymAddr) { + // Load callee address + SDValue LoadValue = DAG.getLoad(MVT::i32, dl, Chain, Callee, + MachinePointerInfo::getGOT(), + false, false, 0); + + // Use GOT+LO if callee has internal linkage. + if (CalleeLo.getNode()) { + SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CalleeLo); + Callee = DAG.getNode(ISD::ADD, dl, MVT::i32, LoadValue, Lo); + } else + Callee = LoadValue; + + // Use chain output from LoadValue + Chain = LoadValue.getValue(1); + } + + // copy to T9 + Chain = DAG.getCopyToReg(Chain, dl, Mips::T9, Callee, SDValue(0, 0)); + InFlag = Chain.getValue(1); + Callee = DAG.getRegister(Mips::T9, MVT::i32); + } // MipsJmpLink = #chain, #target_address, #opt_in_flags... // = Chain, Callee, Reg#1, Reg#2, ... @@ -1143,7 +1282,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Create a stack location to hold GP when PIC is used. This stack // location is used on function prologue to save GP and also after all - // emited CALL's to restore GP. + // emitted CALL's to restore GP. if (IsPIC) { // Function can have an arbitrary number of calls, so // hold the LastArgStackLoc with the biggest offset. @@ -1218,18 +1357,18 @@ MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, /// and generate load operations for arguments places on the stack. SDValue MipsTargetLowering::LowerFormalArguments(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl - &Ins, - DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) + CallingConv::ID CallConv, + bool isVarArg, + const SmallVectorImpl + &Ins, + DebugLoc dl, SelectionDAG &DAG, + SmallVectorImpl &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MipsFunctionInfo *MipsFI = MF.getInfo(); - unsigned StackReg = MF.getTarget().getRegisterInfo()->getFrameRegister(MF); MipsFI->setVarArgsFrameIndex(0); // Used with vargs to acumulate store chains. @@ -1249,9 +1388,9 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, else CCInfo.AnalyzeFormalArguments(Ins, CC_Mips); - SDValue StackPtr; - unsigned FirstStackArgLoc = (Subtarget->isABI_EABI() ? 0 : 16); + unsigned LastStackArgEndOffset = 0; + EVT LastRegArgValVT; for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; @@ -1260,6 +1399,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, if (VA.isRegLoc()) { EVT RegVT = VA.getLocVT(); ArgRegEnd = VA.getLocReg(); + LastRegArgValVT = VA.getValVT(); TargetRegisterClass *RC = 0; if (RegVT == MVT::i32) @@ -1300,8 +1440,10 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, unsigned Reg2 = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg()+1, RC); SDValue ArgValue2 = DAG.getCopyFromReg(Chain, dl, Reg2, RegVT); - SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, ArgValue2, ArgValue); - ArgValue = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Pair); + if (!Subtarget->isLittle()) + std::swap(ArgValue, ArgValue2); + ArgValue = DAG.getNode(MipsISD::BuildPairF64, dl, MVT::f64, + ArgValue, ArgValue2); } } @@ -1321,10 +1463,10 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, // used instead of a direct negative address (which is recorded to // be used on emitPrologue) to avoid mis-calc of the first stack // offset on PEI::calculateFrameObjectOffsets. - // Arguments are always 32-bit. - unsigned ArgSize = VA.getLocVT().getSizeInBits()/8; + unsigned ArgSize = VA.getValVT().getSizeInBits()/8; + LastStackArgEndOffset = FirstStackArgLoc + VA.getLocMemOffset() + ArgSize; int FI = MFI->CreateFixedObject(ArgSize, 0, true); - MipsFI->recordLoadArgsFI(FI, -(ArgSize+ + MipsFI->recordLoadArgsFI(FI, -(4 + (FirstStackArgLoc + VA.getLocMemOffset()))); // Create load nodes to retrieve arguments from the stack @@ -1351,29 +1493,52 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, // To meet ABI, when VARARGS are passed on registers, the registers // must have their values written to the caller stack frame. If the last // argument was placed in the stack, there's no need to save any register. - if ((isVarArg) && (Subtarget->isABI_O32() && ArgRegEnd)) { - if (StackPtr.getNode() == 0) - StackPtr = DAG.getRegister(StackReg, getPointerTy()); + if (isVarArg && Subtarget->isABI_O32()) { + if (ArgRegEnd) { + // Last named formal argument is passed in register. - // The last register argument that must be saved is Mips::A3 - TargetRegisterClass *RC = Mips::CPURegsRegisterClass; - unsigned StackLoc = ArgLocs.size()-1; + // The last register argument that must be saved is Mips::A3 + TargetRegisterClass *RC = Mips::CPURegsRegisterClass; + if (LastRegArgValVT == MVT::f64) + ArgRegEnd++; - for (++ArgRegEnd; ArgRegEnd <= Mips::A3; ++ArgRegEnd, ++StackLoc) { - unsigned Reg = AddLiveIn(DAG.getMachineFunction(), ArgRegEnd, RC); - SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, MVT::i32); + if (ArgRegEnd < Mips::A3) { + // Both the last named formal argument and the first variable + // argument are passed in registers. + for (++ArgRegEnd; ArgRegEnd <= Mips::A3; ++ArgRegEnd) { + unsigned Reg = AddLiveIn(DAG.getMachineFunction(), ArgRegEnd, RC); + SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, MVT::i32); - int FI = MFI->CreateFixedObject(4, 0, true); - MipsFI->recordStoreVarArgsFI(FI, -(4+(StackLoc*4))); - SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy()); - OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff, - MachinePointerInfo(), - false, false, 0)); + int FI = MFI->CreateFixedObject(4, 0, true); + MipsFI->recordStoreVarArgsFI(FI, -(4+(ArgRegEnd-Mips::A0)*4)); + SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy()); + OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff, + MachinePointerInfo(), + false, false, 0)); - // Record the frame index of the first variable argument - // which is a value necessary to VASTART. - if (!MipsFI->getVarArgsFrameIndex()) + // Record the frame index of the first variable argument + // which is a value necessary to VASTART. + if (!MipsFI->getVarArgsFrameIndex()) { + MFI->setObjectAlignment(FI, 4); + MipsFI->setVarArgsFrameIndex(FI); + } + } + } else { + // Last named formal argument is in register Mips::A3, and the first + // variable argument is on stack. Record the frame index of the first + // variable argument. + int FI = MFI->CreateFixedObject(4, 0, true); + MFI->setObjectAlignment(FI, 4); + MipsFI->recordStoreVarArgsFI(FI, -20); MipsFI->setVarArgsFrameIndex(FI); + } + } else { + // Last named formal argument and all the variable arguments are passed + // on stack. Record the frame index of the first variable argument. + int FI = MFI->CreateFixedObject(4, 0, true); + MFI->setObjectAlignment(FI, 4); + MipsFI->recordStoreVarArgsFI(FI, -(4+LastStackArgEndOffset)); + MipsFI->setVarArgsFrameIndex(FI); } } diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 9d6b9f3daf87..e4d0c3d24f9c 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -31,45 +31,50 @@ namespace llvm { // Get the Higher 16 bits from a 32-bit immediate // No relation with Mips Hi register - Hi, + Hi, // Get the Lower 16 bits from a 32-bit immediate // No relation with Mips Lo register - Lo, + Lo, // Handle gp_rel (small data/bss sections) relocation. GPRel, - // Select CC Pseudo Instruction - SelectCC, - - // Floating Point Select CC Pseudo Instruction - FPSelectCC, - // Floating Point Branch Conditional FPBrcond, // Floating Point Compare FPCmp, + // Floating Point Conditional Moves + CMovFP_T, + CMovFP_F, + // Floating Point Rounding FPRound, - // Return + // Return Ret, // MAdd/Sub nodes MAdd, MAddu, MSub, - MSubu + MSubu, + + // DivRem(u) + DivRem, + DivRemU, + + BuildPairF64, + ExtractElementF64 }; } //===--------------------------------------------------------------------===// // TargetLowering Implementation //===--------------------------------------------------------------------===// - + class MipsTargetLowering : public TargetLowering { public: explicit MipsTargetLowering(MipsTargetMachine &TM); @@ -77,7 +82,7 @@ namespace llvm { /// LowerOperation - Provide custom lowering hooks for some operations. virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; - /// getTargetNodeName - This method returns the name of a target specific + /// getTargetNodeName - This method returns the name of a target specific // DAG node. virtual const char *getTargetNodeName(unsigned Opcode) const; @@ -87,7 +92,7 @@ namespace llvm { /// getFunctionAlignment - Return the Log2 alignment of this function. virtual unsigned getFunctionAlignment(const Function *F) const; - virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; + virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; private: // Subtarget Info const MipsSubtarget *Subtarget; @@ -101,16 +106,15 @@ namespace llvm { SmallVectorImpl &InVals) const; // Lower Operand specifics - SDValue LowerANDOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; virtual SDValue @@ -149,7 +153,7 @@ namespace llvm { ConstraintWeight getSingleConstraintMatchWeight( AsmOperandInfo &info, const char *constraint) const; - std::pair + std::pair getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index 977e0dfa145a..a86c5c7e8b83 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -24,19 +24,28 @@ //===----------------------------------------------------------------------===// // Floating Point Compare and Branch -def SDT_MipsFPBrcond : SDTypeProfile<0, 3, [SDTCisSameAs<0, 2>, SDTCisInt<0>, - SDTCisVT<1, OtherVT>]>; -def SDT_MipsFPCmp : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, - SDTCisSameAs<1, 2>, SDTCisFP<1>, - SDTCisInt<3>]>; -def SDT_MipsFPSelectCC : SDTypeProfile<1, 4, [SDTCisInt<1>, SDTCisInt<4>, - SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>]>; +def SDT_MipsFPBrcond : SDTypeProfile<0, 2, [SDTCisInt<0>, + SDTCisVT<1, OtherVT>]>; +def SDT_MipsFPCmp : SDTypeProfile<0, 3, [SDTCisSameAs<0, 1>, SDTCisFP<1>, + SDTCisInt<2>]>; +def SDT_MipsCMovFP : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, + SDTCisSameAs<1, 2>]>; +def SDT_MipsBuildPairF64 : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, + SDTCisVT<1, i32>, + SDTCisSameAs<1, 2>]>; +def SDT_MipsExtractElementF64 : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, + SDTCisVT<1, f64>, + SDTCisVT<0, i32>]>; +def MipsFPCmp : SDNode<"MipsISD::FPCmp", SDT_MipsFPCmp, [SDNPOutGlue]>; +def MipsCMovFP_T : SDNode<"MipsISD::CMovFP_T", SDT_MipsCMovFP, [SDNPInGlue]>; +def MipsCMovFP_F : SDNode<"MipsISD::CMovFP_F", SDT_MipsCMovFP, [SDNPInGlue]>; def MipsFPRound : SDNode<"MipsISD::FPRound", SDTFPRoundOp, [SDNPOptInGlue]>; -def MipsFPBrcond : SDNode<"MipsISD::FPBrcond", SDT_MipsFPBrcond, - [SDNPHasChain]>; -def MipsFPCmp : SDNode<"MipsISD::FPCmp", SDT_MipsFPCmp>; -def MipsFPSelectCC : SDNode<"MipsISD::FPSelectCC", SDT_MipsFPSelectCC>; +def MipsFPBrcond : SDNode<"MipsISD::FPBrcond", SDT_MipsFPBrcond, + [SDNPHasChain, SDNPOptInGlue]>; +def MipsBuildPairF64 : SDNode<"MipsISD::BuildPairF64", SDT_MipsBuildPairF64>; +def MipsExtractElementF64 : SDNode<"MipsISD::ExtractElementF64", + SDT_MipsExtractElementF64>; // Operand for printing out a condition code. let PrintMethod = "printFCCOperand" in @@ -54,7 +63,7 @@ def IsNotMipsI : Predicate<"!Subtarget.isMips1()">; //===----------------------------------------------------------------------===// // Instruction Class Templates // -// A set of multiclasses is used to address the register usage. +// A set of multiclasses is used to address the register usage. // // S32 - single precision in 16 32bit even fp registers // single precision in 32 32bit fp registers in SingleOnly mode @@ -65,7 +74,7 @@ def IsNotMipsI : Predicate<"!Subtarget.isMips1()">; // Only S32 and D32 are supported right now. //===----------------------------------------------------------------------===// -multiclass FFR1_1 funct, string asmstr> +multiclass FFR1_1 funct, string asmstr> { def _S32 : FFR<0x11, funct, 0x0, (outs FGR32:$fd), (ins FGR32:$fs), !strconcat(asmstr, ".s $fd, $fs"), []>; @@ -74,31 +83,31 @@ multiclass FFR1_1 funct, string asmstr> !strconcat(asmstr, ".d $fd, $fs"), []>, Requires<[In32BitMode]>; } -multiclass FFR1_2 funct, string asmstr, SDNode FOp> +multiclass FFR1_2 funct, string asmstr, SDNode FOp> { def _S32 : FFR<0x11, funct, 0x0, (outs FGR32:$fd), (ins FGR32:$fs), - !strconcat(asmstr, ".s $fd, $fs"), + !strconcat(asmstr, ".s $fd, $fs"), [(set FGR32:$fd, (FOp FGR32:$fs))]>; def _D32 : FFR<0x11, funct, 0x1, (outs AFGR64:$fd), (ins AFGR64:$fs), - !strconcat(asmstr, ".d $fd, $fs"), + !strconcat(asmstr, ".d $fd, $fs"), [(set AFGR64:$fd, (FOp AFGR64:$fs))]>, Requires<[In32BitMode]>; } -class FFR1_3 funct, bits<5> fmt, RegisterClass RcSrc, - RegisterClass RcDst, string asmstr>: - FFR<0x11, funct, fmt, (outs RcSrc:$fd), (ins RcDst:$fs), - !strconcat(asmstr, " $fd, $fs"), []>; +class FFR1_3 funct, bits<5> fmt, RegisterClass RcSrc, + RegisterClass RcDst, string asmstr>: + FFR<0x11, funct, fmt, (outs RcSrc:$fd), (ins RcDst:$fs), + !strconcat(asmstr, " $fd, $fs"), []>; multiclass FFR1_4 funct, string asmstr, SDNode FOp> { - def _S32 : FFR<0x11, funct, 0x0, (outs FGR32:$fd), - (ins FGR32:$fs, FGR32:$ft), + def _S32 : FFR<0x11, funct, 0x0, (outs FGR32:$fd), + (ins FGR32:$fs, FGR32:$ft), !strconcat(asmstr, ".s $fd, $fs, $ft"), [(set FGR32:$fd, (FOp FGR32:$fs, FGR32:$ft))]>; - def _D32 : FFR<0x11, funct, 0x1, (outs AFGR64:$fd), - (ins AFGR64:$fs, AFGR64:$ft), + def _D32 : FFR<0x11, funct, 0x1, (outs AFGR64:$fd), + (ins AFGR64:$fs, AFGR64:$ft), !strconcat(asmstr, ".d $fd, $fs, $ft"), [(set AFGR64:$fd, (FOp AFGR64:$fs, AFGR64:$ft))]>, Requires<[In32BitMode]>; @@ -115,8 +124,8 @@ let ft = 0 in { defm TRUNC_W : FFR1_1<0b001101, "trunc.w">; defm CVTW : FFR1_1<0b100100, "cvt.w">; - defm FABS : FFR1_2<0b000101, "abs", fabs>; - defm FNEG : FFR1_2<0b000111, "neg", fneg>; + defm FABS : FFR1_2<0b000101, "abs", fabs>; + defm FNEG : FFR1_2<0b000111, "neg", fneg>; defm FSQRT : FFR1_2<0b000100, "sqrt", fsqrt>; /// Convert to Single Precison @@ -140,23 +149,23 @@ let ft = 0 in { def TRUNC_LD : FFR1_3<0b001001, 0x1, AFGR64, AFGR64, "trunc.l">; /// Convert to long signed integer - def CVTL_S : FFR1_3<0b100101, 0x0, FGR32, FGR32, "cvt.l">; - def CVTL_D : FFR1_3<0b100101, 0x1, AFGR64, AFGR64, "cvt.l">; + def CVTL_S : FFR1_3<0b100101, 0x0, FGR32, FGR32, "cvt.l">; + def CVTL_D : FFR1_3<0b100101, 0x1, AFGR64, AFGR64, "cvt.l">; + + /// Convert to Double Precison + def CVTD_S32 : FFR1_3<0b100001, 0x0, AFGR64, FGR32, "cvt.d.s">; + def CVTD_W32 : FFR1_3<0b100001, 0x2, AFGR64, FGR32, "cvt.d.w">; + def CVTD_L32 : FFR1_3<0b100001, 0x3, AFGR64, AFGR64, "cvt.d.l">; - /// Convert to Double Precison - def CVTD_S32 : FFR1_3<0b100001, 0x0, AFGR64, FGR32, "cvt.d.s">; - def CVTD_W32 : FFR1_3<0b100001, 0x2, AFGR64, FGR32, "cvt.d.w">; - def CVTD_L32 : FFR1_3<0b100001, 0x3, AFGR64, AFGR64, "cvt.d.l">; - /// Convert to Single Precison def CVTS_D32 : FFR1_3<0b100000, 0x1, FGR32, AFGR64, "cvt.s.d">; - def CVTS_L32 : FFR1_3<0b100000, 0x3, FGR32, AFGR64, "cvt.s.l">; + def CVTS_L32 : FFR1_3<0b100000, 0x3, FGR32, AFGR64, "cvt.s.l">; } } // The odd-numbered registers are only referenced when doing loads, // stores, and moves between floating-point and integer registers. -// When defining instructions, we reference all 32-bit registers, +// When defining instructions, we reference all 32-bit registers, // regardless of register aliasing. let fd = 0 in { /// Move Control Registers From/To CPU Registers @@ -165,7 +174,7 @@ let fd = 0 in { def CTC1 : FFR<0x11, 0x0, 0x6, (outs CCR:$rt), (ins CPURegs:$fs), "ctc1 $fs, $rt", []>; - + def MFC1 : FFR<0x11, 0x00, 0x00, (outs CPURegs:$rt), (ins FGR32:$fs), "mfc1 $rt, $fs", []>; @@ -180,18 +189,18 @@ def FMOV_D32 : FFR<0x11, 0b000110, 0x1, (outs AFGR64:$fd), (ins AFGR64:$fs), /// Floating Point Memory Instructions let Predicates = [IsNotSingleFloat, IsNotMipsI] in { - def LDC1 : FFI<0b110101, (outs AFGR64:$ft), (ins mem:$addr), + def LDC1 : FFI<0b110101, (outs AFGR64:$ft), (ins mem:$addr), "ldc1 $ft, $addr", [(set AFGR64:$ft, (load addr:$addr))]>; - def SDC1 : FFI<0b111101, (outs), (ins AFGR64:$ft, mem:$addr), + def SDC1 : FFI<0b111101, (outs), (ins AFGR64:$ft, mem:$addr), "sdc1 $ft, $addr", [(store AFGR64:$ft, addr:$addr)]>; } -// LWC1 and SWC1 can always be emited with odd registers. +// LWC1 and SWC1 can always be emitted with odd registers. def LWC1 : FFI<0b110001, (outs FGR32:$ft), (ins mem:$addr), "lwc1 $ft, $addr", - [(set FGR32:$ft, (load addr:$addr))]>; + [(set FGR32:$ft, (load addr:$addr))]>; def SWC1 : FFI<0b111001, (outs), (ins FGR32:$ft, mem:$addr), "swc1 $ft, $addr", - [(store FGR32:$ft, addr:$addr)]>; + [(store FGR32:$ft, addr:$addr)]>; /// Floating-point Aritmetic defm FADD : FFR1_4<0x10, "add", fadd>; @@ -202,7 +211,7 @@ defm FSUB : FFR1_4<0x01, "sub", fsub>; //===----------------------------------------------------------------------===// // Floating Point Branch Codes //===----------------------------------------------------------------------===// -// Mips branch codes. These correspond to condcode in MipsInstrInfo.h. +// Mips branch codes. These correspond to condcode in MipsInstrInfo.h. // They must be kept in synch. def MIPS_BRANCH_F : PatLeaf<(i32 0)>; def MIPS_BRANCH_T : PatLeaf<(i32 1)>; @@ -210,11 +219,11 @@ def MIPS_BRANCH_FL : PatLeaf<(i32 2)>; def MIPS_BRANCH_TL : PatLeaf<(i32 3)>; /// Floating Point Branch of False/True (Likely) -let isBranch=1, isTerminator=1, hasDelaySlot=1, base=0x8, Uses=[FCR31] in { - class FBRANCH : FFI<0x11, (outs), +let isBranch=1, isTerminator=1, hasDelaySlot=1, base=0x8, Uses=[FCR31] in + class FBRANCH : FFI<0x11, (outs), (ins brtarget:$dst), !strconcat(asmstr, " $dst"), - [(MipsFPBrcond op, bb:$dst, FCR31)]>; -} + [(MipsFPBrcond op, bb:$dst)]>; + def BC1F : FBRANCH; def BC1T : FBRANCH; def BC1FL : FBRANCH; @@ -223,11 +232,11 @@ def BC1TL : FBRANCH; //===----------------------------------------------------------------------===// // Floating Point Flag Conditions //===----------------------------------------------------------------------===// -// Mips condition codes. They must correspond to condcode in MipsInstrInfo.h. +// Mips condition codes. They must correspond to condcode in MipsInstrInfo.h. // They must be kept in synch. def MIPS_FCOND_F : PatLeaf<(i32 0)>; def MIPS_FCOND_UN : PatLeaf<(i32 1)>; -def MIPS_FCOND_EQ : PatLeaf<(i32 2)>; +def MIPS_FCOND_OEQ : PatLeaf<(i32 2)>; def MIPS_FCOND_UEQ : PatLeaf<(i32 3)>; def MIPS_FCOND_OLT : PatLeaf<(i32 4)>; def MIPS_FCOND_ULT : PatLeaf<(i32 5)>; @@ -245,44 +254,90 @@ def MIPS_FCOND_NGT : PatLeaf<(i32 15)>; /// Floating Point Compare let hasDelaySlot = 1, Defs=[FCR31] in { def FCMP_S32 : FCC<0x0, (outs), (ins FGR32:$fs, FGR32:$ft, condcode:$cc), - "c.$cc.s $fs, $ft", - [(set FCR31, (MipsFPCmp FGR32:$fs, FGR32:$ft, imm:$cc))]>; - + "c.$cc.s $fs, $ft", + [(MipsFPCmp FGR32:$fs, FGR32:$ft, imm:$cc)]>; + def FCMP_D32 : FCC<0x1, (outs), (ins AFGR64:$fs, AFGR64:$ft, condcode:$cc), - "c.$cc.d $fs, $ft", - [(set FCR31, (MipsFPCmp AFGR64:$fs, AFGR64:$ft, imm:$cc))]>, - Requires<[In32BitMode]>; + "c.$cc.d $fs, $ft", + [(MipsFPCmp AFGR64:$fs, AFGR64:$ft, imm:$cc)]>, + Requires<[In32BitMode]>; +} + + +// Conditional moves: +// These instructions are expanded in +// MipsISelLowering::EmitInstrWithCustomInserter if target does not have +// conditional move instructions. +// flag:int, data:float +let usesCustomInserter = 1, Constraints = "$F = $dst" in +class CondMovIntFP fmt, bits<6> func, + string instr_asm> : + FFR<0x11, func, fmt, (outs RC:$dst), (ins RC:$T, CPURegs:$cond, RC:$F), + !strconcat(instr_asm, "\t$dst, $T, $cond"), []>; + +def MOVZ_S : CondMovIntFP; +def MOVN_S : CondMovIntFP; + +let Predicates = [In32BitMode] in { + def MOVZ_D : CondMovIntFP; + def MOVN_D : CondMovIntFP; +} + +defm : MovzPats; +defm : MovnPats; + +let Predicates = [In32BitMode] in { + defm : MovzPats; + defm : MovnPats; +} + +let usesCustomInserter = 1, Uses = [FCR31], Constraints = "$F = $dst" in { +// flag:float, data:int +class CondMovFPInt tf, string instr_asm> : + FCMOV; + +// flag:float, data:float +class CondMovFPFP fmt, bits<1> tf, + string instr_asm> : + FFCMOV; +} + +def MOVT : CondMovFPInt; +def MOVF : CondMovFPInt; +def MOVT_S : CondMovFPFP; +def MOVF_S : CondMovFPFP; + +let Predicates = [In32BitMode] in { + def MOVT_D : CondMovFPFP; + def MOVF_D : CondMovFPFP; } //===----------------------------------------------------------------------===// // Floating Point Pseudo-Instructions //===----------------------------------------------------------------------===// +def MOVCCRToCCR : MipsPseudo<(outs CCR:$dst), (ins CCR:$src), + "# MOVCCRToCCR", []>; -// For some explanation, see Select_CC at MipsInstrInfo.td. We also embedd a -// condiciton code to enable easy handling by the Custom Inserter. -let usesCustomInserter = 1, Uses=[FCR31] in { - class PseudoFPSelCC : - MipsPseudo<(outs RC:$dst), - (ins CPURegs:$CmpRes, RC:$T, RC:$F, condcode:$cc), asmstr, - [(set RC:$dst, (MipsFPSelectCC CPURegs:$CmpRes, RC:$T, RC:$F, - imm:$cc))]>; -} +// This pseudo instr gets expanded into 2 mtc1 instrs after register +// allocation. +def BuildPairF64 : + MipsPseudo<(outs AFGR64:$dst), + (ins CPURegs:$lo, CPURegs:$hi), "", + [(set AFGR64:$dst, (MipsBuildPairF64 CPURegs:$lo, CPURegs:$hi))]>; -// The values to be selected are fp but the condition test is with integers. -def Select_CC_S32 : PseudoSelCC; -def Select_CC_D32 : PseudoSelCC, - Requires<[In32BitMode]>; - -// The values to be selected are int but the condition test is done with fp. -def Select_FCC : PseudoFPSelCC; - -// The values to be selected and the condition test is done with fp. -def Select_FCC_S32 : PseudoFPSelCC; -def Select_FCC_D32 : PseudoFPSelCC, - Requires<[In32BitMode]>; - -def MOVCCRToCCR : MipsPseudo<(outs CCR:$dst), (ins CCR:$src), - "# MOVCCRToCCR", []>; +// This pseudo instr gets expanded into 2 mfc1 instrs after register +// allocation. +// if n is 0, lower part of src is extracted. +// if n is 1, higher part of src is extracted. +def ExtractElementF64 : + MipsPseudo<(outs CPURegs:$dst), + (ins AFGR64:$src, i32imm:$n), "", + [(set CPURegs:$dst, + (MipsExtractElementF64 AFGR64:$src, imm:$n))]>; //===----------------------------------------------------------------------===// // Floating Point Patterns @@ -306,7 +361,7 @@ def : Pat<(i32 (fp_to_sint FGR32:$src)), (MFC1 (TRUNC_W_S32 FGR32:$src))>; def : Pat<(i32 (bitconvert FGR32:$src)), (MFC1 FGR32:$src)>; def : Pat<(f32 (bitconvert CPURegs:$src)), (MTC1 CPURegs:$src)>; -let Predicates = [In32BitMode] in { +let Predicates = [In32BitMode] in { def : Pat<(f32 (fround AFGR64:$src)), (CVTS_D32 AFGR64:$src)>; def : Pat<(f64 (fextend FGR32:$src)), (CVTD_S32 FGR32:$src)>; } diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td index 98ae2fa7da45..9dfcdfbdb255 100644 --- a/lib/Target/Mips/MipsInstrFormats.td +++ b/lib/Target/Mips/MipsInstrFormats.td @@ -22,8 +22,8 @@ //===----------------------------------------------------------------------===// // Generic Mips Format -class MipsInst pattern, - InstrItinClass itin>: Instruction +class MipsInst pattern, + InstrItinClass itin>: Instruction { field bits<32> Inst; @@ -32,8 +32,8 @@ class MipsInst pattern, bits<6> opcode; // Top 5 bits are the 'opcode' field - let Inst{31-26} = opcode; - + let Inst{31-26} = opcode; + dag OutOperandList = outs; dag InOperandList = ins; @@ -52,7 +52,7 @@ class MipsPseudo pattern>: class FR op, bits<6> _funct, dag outs, dag ins, string asmstr, list pattern, InstrItinClass itin>: - MipsInst + MipsInst { bits<5> rd; bits<5> rs; @@ -64,7 +64,7 @@ class FR op, bits<6> _funct, dag outs, dag ins, string asmstr, let funct = _funct; let Inst{25-21} = rs; - let Inst{20-16} = rt; + let Inst{20-16} = rt; let Inst{15-11} = rd; let Inst{10-6} = shamt; let Inst{5-0} = funct; @@ -75,7 +75,7 @@ class FR op, bits<6> _funct, dag outs, dag ins, string asmstr, //===----------------------------------------------------------------------===// class FI op, dag outs, dag ins, string asmstr, list pattern, - InstrItinClass itin>: MipsInst + InstrItinClass itin>: MipsInst { bits<5> rt; bits<5> rs; @@ -84,7 +84,7 @@ class FI op, dag outs, dag ins, string asmstr, list pattern, let opcode = op; let Inst{25-21} = rs; - let Inst{20-16} = rt; + let Inst{20-16} = rt; let Inst{15-0} = imm16; } @@ -93,12 +93,12 @@ class FI op, dag outs, dag ins, string asmstr, list pattern, //===----------------------------------------------------------------------===// class FJ op, dag outs, dag ins, string asmstr, list pattern, - InstrItinClass itin>: MipsInst + InstrItinClass itin>: MipsInst { bits<26> addr; let opcode = op; - + let Inst{25-0} = addr; } @@ -119,9 +119,9 @@ class FJ op, dag outs, dag ins, string asmstr, list pattern, // Format FR instruction class in Mips : <|opcode|fmt|ft|fs|fd|funct|> //===----------------------------------------------------------------------===// -class FFR op, bits<6> _funct, bits<5> _fmt, dag outs, dag ins, - string asmstr, list pattern> : - MipsInst +class FFR op, bits<6> _funct, bits<5> _fmt, dag outs, dag ins, + string asmstr, list pattern> : + MipsInst { bits<5> fd; bits<5> fs; @@ -134,7 +134,7 @@ class FFR op, bits<6> _funct, bits<5> _fmt, dag outs, dag ins, let fmt = _fmt; let Inst{25-21} = fmt; - let Inst{20-16} = ft; + let Inst{20-16} = ft; let Inst{15-11} = fs; let Inst{10-6} = fd; let Inst{5-0} = funct; @@ -144,8 +144,8 @@ class FFR op, bits<6> _funct, bits<5> _fmt, dag outs, dag ins, // Format FI instruction class in Mips : <|opcode|base|ft|immediate|> //===----------------------------------------------------------------------===// -class FFI op, dag outs, dag ins, string asmstr, list pattern>: - MipsInst +class FFI op, dag outs, dag ins, string asmstr, list pattern>: + MipsInst { bits<5> ft; bits<5> base; @@ -154,7 +154,7 @@ class FFI op, dag outs, dag ins, string asmstr, list pattern>: let opcode = op; let Inst{25-21} = base; - let Inst{20-16} = ft; + let Inst{20-16} = ft; let Inst{15-0} = imm16; } @@ -162,8 +162,8 @@ class FFI op, dag outs, dag ins, string asmstr, list pattern>: // Compare instruction class in Mips : <|010001|fmt|ft|fs|0000011|condcode|> //===----------------------------------------------------------------------===// -class FCC _fmt, dag outs, dag ins, string asmstr, list pattern> : - MipsInst +class FCC _fmt, dag outs, dag ins, string asmstr, list pattern> : + MipsInst { bits<5> fs; bits<5> ft; @@ -174,9 +174,54 @@ class FCC _fmt, dag outs, dag ins, string asmstr, list pattern> : let fmt = _fmt; let Inst{25-21} = fmt; - let Inst{20-16} = ft; + let Inst{20-16} = ft; let Inst{15-11} = fs; let Inst{10-6} = 0; let Inst{5-4} = 0b11; let Inst{3-0} = cc; } + + +class FCMOV _tf, dag outs, dag ins, string asmstr, + list pattern> : + MipsInst +{ + bits<5> rd; + bits<5> rs; + bits<3> N; + bits<1> tf; + + let opcode = 0; + let tf = _tf; + + let Inst{25-21} = rs; + let Inst{20-18} = N; + let Inst{17} = 0; + let Inst{16} = tf; + let Inst{15-11} = rd; + let Inst{10-6} = 0; + let Inst{5-0} = 1; +} + +class FFCMOV _fmt, bits<1> _tf, dag outs, dag ins, string asmstr, + list pattern> : + MipsInst +{ + bits<5> fd; + bits<5> fs; + bits<3> N; + bits<5> fmt; + bits<1> tf; + + let opcode = 17; + let fmt = _fmt; + let tf = _tf; + + let Inst{25-21} = fmt; + let Inst{20-18} = N; + let Inst{17} = 0; + let Inst{16} = tf; + let Inst{15-11} = fs; + let Inst{10-6} = fd; + let Inst{5-0} = 17; +} \ No newline at end of file diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp index aaf307b1ce3f..be044fa1f3b3 100644 --- a/lib/Target/Mips/MipsInstrInfo.cpp +++ b/lib/Target/Mips/MipsInstrInfo.cpp @@ -36,7 +36,7 @@ static bool isZeroImm(const MachineOperand &op) { /// not, return 0. This predicate must return 0 if the instruction has /// any side effects other than loading from the stack slot. unsigned MipsInstrInfo:: -isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const +isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const { if ((MI->getOpcode() == Mips::LW) || (MI->getOpcode() == Mips::LWC1) || (MI->getOpcode() == Mips::LDC1)) { @@ -57,7 +57,7 @@ isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const /// not, return 0. This predicate must return 0 if the instruction has /// any side effects other than storing to the stack slot. unsigned MipsInstrInfo:: -isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const +isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const { if ((MI->getOpcode() == Mips::SW) || (MI->getOpcode() == Mips::SWC1) || (MI->getOpcode() == Mips::SDC1)) { @@ -74,7 +74,7 @@ isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const /// insertNoop - If data hazard condition is found insert the target nop /// instruction. void MipsInstrInfo:: -insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const +insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const { DebugLoc DL; BuildMI(MBB, MI, DL, get(Mips::NOP)); @@ -136,7 +136,7 @@ copyPhysReg(MachineBasicBlock &MBB, .addReg(SrcReg, getKillRegState(KillSrc)); return; } - + if (Mips::AFGR64RegClass.contains(DestReg, SrcReg)) { BuildMI(MBB, I, DL, get(Mips::FMOV_D32), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)); @@ -153,13 +153,13 @@ copyPhysReg(MachineBasicBlock &MBB, void MipsInstrInfo:: storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned SrcReg, bool isKill, int FI, + unsigned SrcReg, bool isKill, int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); - if (RC == Mips::CPURegsRegisterClass) + if (RC == Mips::CPURegsRegisterClass) BuildMI(MBB, I, DL, get(Mips::SW)).addReg(SrcReg, getKillRegState(isKill)) .addImm(0).addFrameIndex(FI); else if (RC == Mips::FGR32RegisterClass) @@ -171,7 +171,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, .addReg(SrcReg, getKillRegState(isKill)) .addImm(0).addFrameIndex(FI); } else { - const TargetRegisterInfo *TRI = + const TargetRegisterInfo *TRI = MBB.getParent()->getTarget().getRegisterInfo(); const unsigned *SubSet = TRI->getSubRegisters(SrcReg); BuildMI(MBB, I, DL, get(Mips::SWC1)) @@ -189,12 +189,12 @@ void MipsInstrInfo:: loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, int FI, const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const + const TargetRegisterInfo *TRI) const { DebugLoc DL; if (I != MBB.end()) DL = I->getDebugLoc(); - if (RC == Mips::CPURegsRegisterClass) + if (RC == Mips::CPURegsRegisterClass) BuildMI(MBB, I, DL, get(Mips::LW), DestReg).addImm(0).addFrameIndex(FI); else if (RC == Mips::FGR32RegisterClass) BuildMI(MBB, I, DL, get(Mips::LWC1), DestReg).addImm(0).addFrameIndex(FI); @@ -202,7 +202,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, if (!TM.getSubtarget().isMips1()) { BuildMI(MBB, I, DL, get(Mips::LDC1), DestReg).addImm(0).addFrameIndex(FI); } else { - const TargetRegisterInfo *TRI = + const TargetRegisterInfo *TRI = MBB.getParent()->getTarget().getRegisterInfo(); const unsigned *SubSet = TRI->getSubRegisters(DestReg); BuildMI(MBB, I, DL, get(Mips::LWC1), SubSet[0]) @@ -218,281 +218,202 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, // Branch Analysis //===----------------------------------------------------------------------===// -/// GetCondFromBranchOpc - Return the Mips CC that matches -/// the correspondent Branch instruction opcode. -static Mips::CondCode GetCondFromBranchOpc(unsigned BrOpc) -{ - switch (BrOpc) { - default: return Mips::COND_INVALID; - case Mips::BEQ : return Mips::COND_E; - case Mips::BNE : return Mips::COND_NE; - case Mips::BGTZ : return Mips::COND_GZ; - case Mips::BGEZ : return Mips::COND_GEZ; - case Mips::BLTZ : return Mips::COND_LZ; - case Mips::BLEZ : return Mips::COND_LEZ; +static unsigned GetAnalyzableBrOpc(unsigned Opc) { + return (Opc == Mips::BEQ || Opc == Mips::BNE || Opc == Mips::BGTZ || + Opc == Mips::BGEZ || Opc == Mips::BLTZ || Opc == Mips::BLEZ || + Opc == Mips::BC1T || Opc == Mips::BC1F || Opc == Mips::J) ? Opc : 0; +} - // We dont do fp branch analysis yet! - case Mips::BC1T : - case Mips::BC1F : return Mips::COND_INVALID; +/// GetOppositeBranchOpc - Return the inverse of the specified +/// opcode, e.g. turning BEQ to BNE. +unsigned Mips::GetOppositeBranchOpc(unsigned Opc) +{ + switch (Opc) { + default: llvm_unreachable("Illegal opcode!"); + case Mips::BEQ : return Mips::BNE; + case Mips::BNE : return Mips::BEQ; + case Mips::BGTZ : return Mips::BLEZ; + case Mips::BGEZ : return Mips::BLTZ; + case Mips::BLTZ : return Mips::BGEZ; + case Mips::BLEZ : return Mips::BGTZ; + case Mips::BC1T : return Mips::BC1F; + case Mips::BC1F : return Mips::BC1T; } } -/// GetCondBranchFromCond - Return the Branch instruction -/// opcode that matches the cc. -unsigned Mips::GetCondBranchFromCond(Mips::CondCode CC) -{ - switch (CC) { - default: llvm_unreachable("Illegal condition code!"); - case Mips::COND_E : return Mips::BEQ; - case Mips::COND_NE : return Mips::BNE; - case Mips::COND_GZ : return Mips::BGTZ; - case Mips::COND_GEZ : return Mips::BGEZ; - case Mips::COND_LZ : return Mips::BLTZ; - case Mips::COND_LEZ : return Mips::BLEZ; +static void AnalyzeCondBr(const MachineInstr* Inst, unsigned Opc, + MachineBasicBlock *&BB, + SmallVectorImpl& Cond) { + assert(GetAnalyzableBrOpc(Opc) && "Not an analyzable branch"); + int NumOp = Inst->getNumExplicitOperands(); + + // for both int and fp branches, the last explicit operand is the + // MBB. + BB = Inst->getOperand(NumOp-1).getMBB(); + Cond.push_back(MachineOperand::CreateImm(Opc)); - case Mips::FCOND_F: - case Mips::FCOND_UN: - case Mips::FCOND_EQ: - case Mips::FCOND_UEQ: - case Mips::FCOND_OLT: - case Mips::FCOND_ULT: - case Mips::FCOND_OLE: - case Mips::FCOND_ULE: - case Mips::FCOND_SF: - case Mips::FCOND_NGLE: - case Mips::FCOND_SEQ: - case Mips::FCOND_NGL: - case Mips::FCOND_LT: - case Mips::FCOND_NGE: - case Mips::FCOND_LE: - case Mips::FCOND_NGT: return Mips::BC1T; - - case Mips::FCOND_T: - case Mips::FCOND_OR: - case Mips::FCOND_NEQ: - case Mips::FCOND_OGL: - case Mips::FCOND_UGE: - case Mips::FCOND_OGE: - case Mips::FCOND_UGT: - case Mips::FCOND_OGT: - case Mips::FCOND_ST: - case Mips::FCOND_GLE: - case Mips::FCOND_SNE: - case Mips::FCOND_GL: - case Mips::FCOND_NLT: - case Mips::FCOND_GE: - case Mips::FCOND_NLE: - case Mips::FCOND_GT: return Mips::BC1F; - } + for (int i=0; igetOperand(i)); } -/// GetOppositeBranchCondition - Return the inverse of the specified -/// condition, e.g. turning COND_E to COND_NE. -Mips::CondCode Mips::GetOppositeBranchCondition(Mips::CondCode CC) -{ - switch (CC) { - default: llvm_unreachable("Illegal condition code!"); - case Mips::COND_E : return Mips::COND_NE; - case Mips::COND_NE : return Mips::COND_E; - case Mips::COND_GZ : return Mips::COND_LEZ; - case Mips::COND_GEZ : return Mips::COND_LZ; - case Mips::COND_LZ : return Mips::COND_GEZ; - case Mips::COND_LEZ : return Mips::COND_GZ; - case Mips::FCOND_F : return Mips::FCOND_T; - case Mips::FCOND_UN : return Mips::FCOND_OR; - case Mips::FCOND_EQ : return Mips::FCOND_NEQ; - case Mips::FCOND_UEQ: return Mips::FCOND_OGL; - case Mips::FCOND_OLT: return Mips::FCOND_UGE; - case Mips::FCOND_ULT: return Mips::FCOND_OGE; - case Mips::FCOND_OLE: return Mips::FCOND_UGT; - case Mips::FCOND_ULE: return Mips::FCOND_OGT; - case Mips::FCOND_SF: return Mips::FCOND_ST; - case Mips::FCOND_NGLE:return Mips::FCOND_GLE; - case Mips::FCOND_SEQ: return Mips::FCOND_SNE; - case Mips::FCOND_NGL: return Mips::FCOND_GL; - case Mips::FCOND_LT: return Mips::FCOND_NLT; - case Mips::FCOND_NGE: return Mips::FCOND_GE; - case Mips::FCOND_LE: return Mips::FCOND_NLE; - case Mips::FCOND_NGT: return Mips::FCOND_GT; - } -} - -bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, +bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl &Cond, - bool AllowModify) const + bool AllowModify) const { - // If the block has no terminators, it just falls into the block after it. - MachineBasicBlock::iterator I = MBB.end(); - if (I == MBB.begin()) - return false; - --I; - while (I->isDebugValue()) { - if (I == MBB.begin()) - return false; - --I; - } - if (!isUnpredicatedTerminator(I)) - return false; - - // Get the last instruction in the block. - MachineInstr *LastInst = I; - - // If there is only one terminator instruction, process it. - unsigned LastOpc = LastInst->getOpcode(); - if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { - if (!LastInst->getDesc().isBranch()) - return true; + MachineBasicBlock::reverse_iterator I = MBB.rbegin(), REnd = MBB.rend(); + // Skip all the debug instructions. + while (I != REnd && I->isDebugValue()) + ++I; + + if (I == REnd || !isUnpredicatedTerminator(&*I)) { + // If this block ends with no branches (it just falls through to its succ) + // just return false, leaving TBB/FBB null. + TBB = FBB = NULL; + return false; + } + + MachineInstr *LastInst = &*I; + unsigned LastOpc = LastInst->getOpcode(); + + // Not an analyzable branch (must be an indirect jump). + if (!GetAnalyzableBrOpc(LastOpc)) + return true; + + // Get the second to last instruction in the block. + unsigned SecondLastOpc = 0; + MachineInstr *SecondLastInst = NULL; + + if (++I != REnd) { + SecondLastInst = &*I; + SecondLastOpc = GetAnalyzableBrOpc(SecondLastInst->getOpcode()); + + // Not an analyzable branch (must be an indirect jump). + if (isUnpredicatedTerminator(SecondLastInst) && !SecondLastOpc) + return true; + } + + // If there is only one terminator instruction, process it. + if (!SecondLastOpc) { // Unconditional branch if (LastOpc == Mips::J) { TBB = LastInst->getOperand(0).getMBB(); return false; } - Mips::CondCode BranchCode = GetCondFromBranchOpc(LastInst->getOpcode()); - if (BranchCode == Mips::COND_INVALID) - return true; // Can't handle indirect branch. - // Conditional branch - // Block ends with fall-through condbranch. - if (LastOpc != Mips::COND_INVALID) { - int LastNumOp = LastInst->getNumOperands(); - - TBB = LastInst->getOperand(LastNumOp-1).getMBB(); - Cond.push_back(MachineOperand::CreateImm(BranchCode)); - - for (int i=0; igetOperand(i)); - } - - return false; - } + AnalyzeCondBr(LastInst, LastOpc, TBB, Cond); + return false; } - - // Get the instruction before it if it is a terminator. - MachineInstr *SecondLastInst = I; - + + // If we reached here, there are two branches. // If there are three terminators, we don't know what sort of block this is. - if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) + if (++I != REnd && isUnpredicatedTerminator(&*I)) return true; - // If the block ends with Mips::J and a Mips::BNE/Mips::BEQ, handle it. - unsigned SecondLastOpc = SecondLastInst->getOpcode(); - Mips::CondCode BranchCode = GetCondFromBranchOpc(SecondLastOpc); + // If second to last instruction is an unconditional branch, + // analyze it and remove the last instruction. + if (SecondLastOpc == Mips::J) { + // Return if the last instruction cannot be removed. + if (!AllowModify) + return true; - if (BranchCode != Mips::COND_INVALID && LastOpc == Mips::J) { - int SecondNumOp = SecondLastInst->getNumOperands(); - - TBB = SecondLastInst->getOperand(SecondNumOp-1).getMBB(); - Cond.push_back(MachineOperand::CreateImm(BranchCode)); - - for (int i=0; igetOperand(i)); - } - - FBB = LastInst->getOperand(0).getMBB(); - return false; - } - - // If the block ends with two unconditional branches, handle it. The last - // one is not executed, so remove it. - if ((SecondLastOpc == Mips::J) && (LastOpc == Mips::J)) { TBB = SecondLastInst->getOperand(0).getMBB(); - I = LastInst; - if (AllowModify) - I->eraseFromParent(); + LastInst->eraseFromParent(); return false; } - // Otherwise, can't handle this. - return true; + // Conditional branch followed by an unconditional branch. + // The last one must be unconditional. + if (LastOpc != Mips::J) + return true; + + AnalyzeCondBr(SecondLastInst, SecondLastOpc, TBB, Cond); + FBB = LastInst->getOperand(0).getMBB(); + + return false; +} + +void MipsInstrInfo::BuildCondBr(MachineBasicBlock &MBB, + MachineBasicBlock *TBB, DebugLoc DL, + const SmallVectorImpl& Cond) + const { + unsigned Opc = Cond[0].getImm(); + const TargetInstrDesc &TID = get(Opc); + MachineInstrBuilder MIB = BuildMI(&MBB, DL, TID); + + for (unsigned i = 1; i < Cond.size(); ++i) + MIB.addReg(Cond[i].getReg()); + + MIB.addMBB(TBB); } unsigned MipsInstrInfo:: -InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, +InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, const SmallVectorImpl &Cond, DebugLoc DL) const { // Shouldn't be a fall through. assert(TBB && "InsertBranch must not be told to insert a fallthrough"); - assert((Cond.size() == 3 || Cond.size() == 2 || Cond.size() == 0) && - "Mips branch conditions can have two|three components!"); - if (FBB == 0) { // One way branch. - if (Cond.empty()) { - // Unconditional branch? - BuildMI(&MBB, DL, get(Mips::J)).addMBB(TBB); - } else { - // Conditional branch. - unsigned Opc = GetCondBranchFromCond((Mips::CondCode)Cond[0].getImm()); - const TargetInstrDesc &TID = get(Opc); + // # of condition operands: + // Unconditional branches: 0 + // Floating point branches: 1 (opc) + // Int BranchZero: 2 (opc, reg) + // Int Branch: 3 (opc, reg0, reg1) + assert((Cond.size() <= 3) && + "# of Mips branch conditions must be <= 3!"); - if (TID.getNumOperands() == 3) - BuildMI(&MBB, DL, TID).addReg(Cond[1].getReg()) - .addReg(Cond[2].getReg()) - .addMBB(TBB); - else - BuildMI(&MBB, DL, TID).addReg(Cond[1].getReg()) - .addMBB(TBB); - - } - return 1; - } - // Two-way Conditional branch. - unsigned Opc = GetCondBranchFromCond((Mips::CondCode)Cond[0].getImm()); - const TargetInstrDesc &TID = get(Opc); + if (FBB) { + BuildCondBr(MBB, TBB, DL, Cond); + BuildMI(&MBB, DL, get(Mips::J)).addMBB(FBB); + return 2; + } - if (TID.getNumOperands() == 3) - BuildMI(&MBB, DL, TID).addReg(Cond[1].getReg()).addReg(Cond[2].getReg()) - .addMBB(TBB); - else - BuildMI(&MBB, DL, TID).addReg(Cond[1].getReg()).addMBB(TBB); - - BuildMI(&MBB, DL, get(Mips::J)).addMBB(FBB); - return 2; + // One way branch. + // Unconditional branch. + if (Cond.empty()) + BuildMI(&MBB, DL, get(Mips::J)).addMBB(TBB); + else // Conditional branch. + BuildCondBr(MBB, TBB, DL, Cond); + return 1; } unsigned MipsInstrInfo:: -RemoveBranch(MachineBasicBlock &MBB) const +RemoveBranch(MachineBasicBlock &MBB) const { - MachineBasicBlock::iterator I = MBB.end(); - if (I == MBB.begin()) return 0; - --I; - while (I->isDebugValue()) { - if (I == MBB.begin()) - return 0; - --I; - } - if (I->getOpcode() != Mips::J && - GetCondFromBranchOpc(I->getOpcode()) == Mips::COND_INVALID) - return 0; - - // Remove the branch. - I->eraseFromParent(); - - I = MBB.end(); - - if (I == MBB.begin()) return 1; - --I; - if (GetCondFromBranchOpc(I->getOpcode()) == Mips::COND_INVALID) - return 1; - - // Remove the branch. - I->eraseFromParent(); - return 2; + MachineBasicBlock::reverse_iterator I = MBB.rbegin(), REnd = MBB.rend(); + MachineBasicBlock::reverse_iterator FirstBr; + unsigned removed; + + // Skip all the debug instructions. + while (I != REnd && I->isDebugValue()) + ++I; + + FirstBr = I; + + // Up to 2 branches are removed. + // Note that indirect branches are not removed. + for(removed = 0; I != REnd && removed < 2; ++I, ++removed) + if (!GetAnalyzableBrOpc(I->getOpcode())) + break; + + MBB.erase(I.base(), FirstBr.base()); + + return removed; } -/// ReverseBranchCondition - Return the inverse opcode of the +/// ReverseBranchCondition - Return the inverse opcode of the /// specified Branch instruction. bool MipsInstrInfo:: -ReverseBranchCondition(SmallVectorImpl &Cond) const +ReverseBranchCondition(SmallVectorImpl &Cond) const { - assert( (Cond.size() == 3 || Cond.size() == 2) && + assert( (Cond.size() && Cond.size() <= 3) && "Invalid Mips branch condition!"); - Cond[0].setImm(GetOppositeBranchCondition((Mips::CondCode)Cond[0].getImm())); + Cond[0].setImm(Mips::GetOppositeBranchOpc(Cond[0].getImm())); return false; } diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h index 52a3d39840ba..5fdbf1f230a0 100644 --- a/lib/Target/Mips/MipsInstrInfo.h +++ b/lib/Target/Mips/MipsInstrInfo.h @@ -37,7 +37,7 @@ namespace Mips { // To be used with float branch True FCOND_F, FCOND_UN, - FCOND_EQ, + FCOND_OEQ, FCOND_UEQ, FCOND_OLT, FCOND_ULT, @@ -57,8 +57,8 @@ namespace Mips { // above ones, but are used with a branch False; FCOND_T, FCOND_OR, - FCOND_NEQ, - FCOND_OGL, + FCOND_UNE, + FCOND_ONE, FCOND_UGE, FCOND_OGE, FCOND_UGT, @@ -70,27 +70,15 @@ namespace Mips { FCOND_NLT, FCOND_GE, FCOND_NLE, - FCOND_GT, - - // Only integer conditions - COND_E, - COND_GZ, - COND_GEZ, - COND_LZ, - COND_LEZ, - COND_NE, - COND_INVALID + FCOND_GT }; - - // Turn condition code into conditional branch opcode. - unsigned GetCondBranchFromCond(CondCode CC); - /// GetOppositeBranchCondition - Return the inverse of the specified cond, - /// e.g. turning COND_E to COND_NE. - CondCode GetOppositeBranchCondition(Mips::CondCode CC); + /// GetOppositeBranchOpc - Return the inverse of the specified + /// opcode, e.g. turning BEQ to BNE. + unsigned GetOppositeBranchOpc(unsigned Opc); /// MipsCCToString - Map each FP condition code to its string - inline static const char *MipsFCCToString(Mips::CondCode CC) + inline static const char *MipsFCCToString(Mips::CondCode CC) { switch (CC) { default: llvm_unreachable("Unknown condition code"); @@ -98,10 +86,10 @@ namespace Mips { case FCOND_T: return "f"; case FCOND_UN: case FCOND_OR: return "un"; - case FCOND_EQ: - case FCOND_NEQ: return "eq"; + case FCOND_OEQ: + case FCOND_UNE: return "eq"; case FCOND_UEQ: - case FCOND_OGL: return "ueq"; + case FCOND_ONE: return "ueq"; case FCOND_OLT: case FCOND_UGE: return "olt"; case FCOND_ULT: @@ -121,11 +109,11 @@ namespace Mips { case FCOND_LT: case FCOND_NLT: return "lt"; case FCOND_NGE: - case FCOND_GE: return "ge"; + case FCOND_GE: return "nge"; case FCOND_LE: - case FCOND_NLE: return "nle"; + case FCOND_NLE: return "le"; case FCOND_NGT: - case FCOND_GT: return "gt"; + case FCOND_GT: return "ngt"; } } } @@ -138,27 +126,27 @@ namespace MipsII { enum TOF { //===------------------------------------------------------------------===// // Mips Specific MachineOperand flags. - + MO_NO_FLAG, /// MO_GOT - Represents the offset into the global offset table at which /// the address the relocation entry symbol resides during execution. MO_GOT, - /// MO_GOT_CALL - Represents the offset into the global offset table at - /// which the address of a call site relocation entry symbol resides + /// MO_GOT_CALL - Represents the offset into the global offset table at + /// which the address of a call site relocation entry symbol resides /// during execution. This is different from the above since this flag /// can only be present in call instructions. MO_GOT_CALL, - /// MO_GPREL - Represents the offset from the current gp value to be used + /// MO_GPREL - Represents the offset from the current gp value to be used /// for the relocatable object file being produced. MO_GPREL, - /// MO_ABS_HILO - Represents the hi or low part of an absolute symbol - /// address. - MO_ABS_HILO - + /// MO_ABS_HI/LO - Represents the hi or low part of an absolute symbol + /// address. + MO_ABS_HI, + MO_ABS_LO }; } @@ -181,7 +169,7 @@ class MipsInstrInfo : public TargetInstrInfoImpl { /// any side effects other than loading from the stack slot. virtual unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; - + /// isStoreToStackSlot - If the specified machine instruction is a direct /// store to a stack slot, return the virtual or physical register number of /// the source reg along with the FrameIndex of the loaded stack slot. If @@ -189,13 +177,19 @@ class MipsInstrInfo : public TargetInstrInfoImpl { /// any side effects other than storing to the stack slot. virtual unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const; - + /// Branch Analysis virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl &Cond, bool AllowModify) const; virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; + +private: + void BuildCondBr(MachineBasicBlock &MBB, MachineBasicBlock *TBB, DebugLoc DL, + const SmallVectorImpl& Cond) const; + +public: virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, const SmallVectorImpl &Cond, @@ -220,7 +214,7 @@ class MipsInstrInfo : public TargetInstrInfoImpl { bool ReverseBranchCondition(SmallVectorImpl &Cond) const; /// Insert nop instruction when hazard condition is found - virtual void insertNoop(MachineBasicBlock &MBB, + virtual void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const; /// getGlobalBaseReg - Return a virtual register initialized with the diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index b70266ac3e80..19b9c359ebb0 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -19,18 +19,19 @@ include "MipsInstrFormats.td" def SDT_MipsRet : SDTypeProfile<0, 1, [SDTCisInt<0>]>; def SDT_MipsJmpLink : SDTypeProfile<0, 1, [SDTCisVT<0, iPTR>]>; -def SDT_MipsSelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 2>, - SDTCisSameAs<2, 3>, SDTCisInt<1>]>; def SDT_MipsCMov : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, - SDTCisSameAs<1, 2>, SDTCisSameAs<3, 4>, - SDTCisInt<4>]>; + SDTCisSameAs<1, 2>, + SDTCisSameAs<3, 4>, + SDTCisInt<4>]>; def SDT_MipsCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>; def SDT_MipsCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; -def SDT_MipsMAddMSub : SDTypeProfile<0, 4, +def SDT_MipsMAddMSub : SDTypeProfile<0, 4, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, - SDTCisSameAs<1, 2>, + SDTCisSameAs<1, 2>, SDTCisSameAs<2, 3>]>; - +def SDT_MipsDivRem : SDTypeProfile<0, 2, + [SDTCisVT<0, i32>, + SDTCisSameAs<0, 1>]>; // Call def MipsJmpLink : SDNode<"MipsISD::JmpLink",SDT_MipsJmpLink, @@ -54,9 +55,6 @@ def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_MipsCallSeqStart, def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_MipsCallSeqEnd, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; -// Select Condition Code -def MipsSelectCC : SDNode<"MipsISD::SelectCC", SDT_MipsSelectCC>; - // MAdd*/MSub* nodes def MipsMAdd : SDNode<"MipsISD::MAdd", SDT_MipsMAddMSub, [SDNPOptInGlue, SDNPOutGlue]>; @@ -67,6 +65,12 @@ def MipsMSub : SDNode<"MipsISD::MSub", SDT_MipsMAddMSub, def MipsMSubu : SDNode<"MipsISD::MSubu", SDT_MipsMAddMSub, [SDNPOptInGlue, SDNPOutGlue]>; +// DivRem(u) nodes +def MipsDivRem : SDNode<"MipsISD::DivRem", SDT_MipsDivRem, + [SDNPOutGlue]>; +def MipsDivRemU : SDNode<"MipsISD::DivRemU", SDT_MipsDivRem, + [SDNPOutGlue]>; + //===----------------------------------------------------------------------===// // Mips Instruction Predicate Definitions. //===----------------------------------------------------------------------===// @@ -165,7 +169,7 @@ class ArithOverflowI op, string instr_asm, SDNode OpNode, let rd = 0, shamt = 0, Defs = [HI, LO], Uses = [HI, LO] in class MArithR func, string instr_asm, SDNode op> : FR<0x1c, func, (outs), (ins CPURegs:$rs, CPURegs:$rt), - !strconcat(instr_asm, "\t$rs, $rt"), + !strconcat(instr_asm, "\t$rs, $rt"), [(op CPURegs:$rs, CPURegs:$rt, LO, HI)], IIImul>; // Logical @@ -185,7 +189,7 @@ class LogicNOR op, bits<6> func, string instr_asm>: [(set CPURegs:$dst, (not (or CPURegs:$b, CPURegs:$c)))], IIAlu>; // Shifts -class LogicR_shift_rotate_imm func, bits<5> _rs, string instr_asm, +class LogicR_shift_rotate_imm func, bits<5> _rs, string instr_asm, SDNode OpNode>: FR<0x00, func, (outs CPURegs:$dst), (ins CPURegs:$b, shamt:$c), !strconcat(instr_asm, "\t$dst, $b, $c"), @@ -193,7 +197,7 @@ class LogicR_shift_rotate_imm func, bits<5> _rs, string instr_asm, let rs = _rs; } -class LogicR_shift_rotate_reg func, bits<5> _shamt, string instr_asm, +class LogicR_shift_rotate_reg func, bits<5> _shamt, string instr_asm, SDNode OpNode>: FR<0x00, func, (outs CPURegs:$dst), (ins CPURegs:$c, CPURegs:$b), !strconcat(instr_asm, "\t$dst, $b, $c"), @@ -283,9 +287,16 @@ let isCall=1, hasDelaySlot=1, } // Mul, Div -class MulDiv func, string instr_asm, InstrItinClass itin>: - FR<0x00, func, (outs), (ins CPURegs:$a, CPURegs:$b), - !strconcat(instr_asm, "\t$a, $b"), [], itin>; +let Defs = [HI, LO] in { + class Mul func, string instr_asm, InstrItinClass itin>: + FR<0x00, func, (outs), (ins CPURegs:$a, CPURegs:$b), + !strconcat(instr_asm, "\t$a, $b"), [], itin>; + + class Div func, string instr_asm, InstrItinClass itin>: + FR<0x00, func, (outs), (ins CPURegs:$a, CPURegs:$b), + !strconcat(instr_asm, "\t$$zero, $a, $b"), + [(op CPURegs:$a, CPURegs:$b)], itin>; +} // Move from Hi/Lo class MoveFromLOHI func, string instr_asm>: @@ -348,6 +359,11 @@ def REORDER : MipsPseudo<(outs), (ins), ".set\treorder", []>; def NOMACRO : MipsPseudo<(outs), (ins), ".set\tnomacro", []>; def NOREORDER : MipsPseudo<(outs), (ins), ".set\tnoreorder", []>; +// These macros are inserted to prevent GAS from complaining +// when using the AT register. +def NOAT : MipsPseudo<(outs), (ins), ".set\tnoat", []>; +def ATMACRO : MipsPseudo<(outs), (ins), ".set\tat", []>; + // When handling PIC code the assembler needs .cpload and .cprestore // directives. If the real instructions corresponding these directives // are used, we have the same behavior, but get also a bunch of warnings @@ -355,18 +371,6 @@ def NOREORDER : MipsPseudo<(outs), (ins), ".set\tnoreorder", []>; def CPLOAD : MipsPseudo<(outs), (ins CPURegs:$picreg), ".cpload\t$picreg", []>; def CPRESTORE : MipsPseudo<(outs), (ins uimm16:$loc), ".cprestore\t$loc\n", []>; -// The supported Mips ISAs dont have any instruction close to the SELECT_CC -// operation. The solution is to create a Mips pseudo SELECT_CC instruction -// (MipsSelectCC), use LowerSELECT_CC to generate this instruction and finally -// replace it for real supported nodes into EmitInstrWithCustomInserter -let usesCustomInserter = 1 in { - class PseudoSelCC: - MipsPseudo<(outs RC:$dst), (ins CPURegs:$CmpRes, RC:$T, RC:$F), asmstr, - [(set RC:$dst, (MipsSelectCC CPURegs:$CmpRes, RC:$T, RC:$F))]>; -} - -def Select_CC : PseudoSelCC; - //===----------------------------------------------------------------------===// // Instruction definition //===----------------------------------------------------------------------===// @@ -447,12 +451,10 @@ let isReturn=1, isTerminator=1, hasDelaySlot=1, "jr\t$target", [(MipsRet CPURegs:$target)], IIBranch>; /// Multiply and Divide Instructions. -let Defs = [HI, LO] in { - def MULT : MulDiv<0x18, "mult", IIImul>; - def MULTu : MulDiv<0x19, "multu", IIImul>; - def DIV : MulDiv<0x1a, "div", IIIdiv>; - def DIVu : MulDiv<0x1b, "divu", IIIdiv>; -} +def MULT : Mul<0x18, "mult", IIImul>; +def MULTu : Mul<0x19, "multu", IIImul>; +def SDIV : Div; +def UDIV : Div; let Defs = [HI] in def MTHI : MoveToLOHI<0x11, "mthi">; @@ -489,10 +491,19 @@ let Predicates = [HasSwap] in { def MIPS_CMOV_ZERO : PatLeaf<(i32 0)>; def MIPS_CMOV_NZERO : PatLeaf<(i32 1)>; -let Predicates = [HasCondMov], Constraints = "$F = $dst" in { - def MOVN : CondMov<0x0a, "movn", MIPS_CMOV_NZERO>; - def MOVZ : CondMov<0x0b, "movz", MIPS_CMOV_ZERO>; -} +// Conditional moves: +// These instructions are expanded in +// MipsISelLowering::EmitInstrWithCustomInserter if target does not have +// conditional move instructions. +// flag:int, data:int +let usesCustomInserter = 1, shamt = 0, Constraints = "$F = $dst" in + class CondMovIntInt funct, string instr_asm> : + FR<0, funct, (outs CPURegs:$dst), + (ins CPURegs:$T, CPURegs:$cond, CPURegs:$F), + !strconcat(instr_asm, "\t$dst, $T, $cond"), [], NoItinerary>; + +def MOVZ_I : CondMovIntInt<0x0a, "movz">; +def MOVN_I : CondMovIntInt<0x0b, "movn">; /// No operation let addr=0 in @@ -533,7 +544,7 @@ def : Pat<(subc CPURegs:$lhs, CPURegs:$rhs), (SUBu CPURegs:$lhs, CPURegs:$rhs)>; def : Pat<(addc CPURegs:$lhs, CPURegs:$rhs), (ADDu CPURegs:$lhs, CPURegs:$rhs)>; -def : Pat<(addc CPURegs:$src, imm:$imm), +def : Pat<(addc CPURegs:$src, immSExt16:$imm), (ADDiu CPURegs:$src, imm:$imm)>; // Call @@ -546,8 +557,11 @@ def : Pat<(MipsJmpLink (i32 texternalsym:$dst)), // hi/lo relocs def : Pat<(MipsHi tglobaladdr:$in), (LUi tglobaladdr:$in)>; +def : Pat<(MipsHi tblockaddress:$in), (LUi tblockaddress:$in)>; def : Pat<(add CPURegs:$hi, (MipsLo tglobaladdr:$lo)), (ADDiu CPURegs:$hi, tglobaladdr:$lo)>; +def : Pat<(add CPURegs:$hi, (MipsLo tblockaddress:$lo)), + (ADDiu CPURegs:$hi, tblockaddress:$lo)>; def : Pat<(MipsHi tjumptable:$in), (LUi tjumptable:$in)>; def : Pat<(add CPURegs:$hi, (MipsLo tjumptable:$lo)), @@ -599,33 +613,43 @@ def : Pat<(brcond CPURegs:$cond, bb:$dst), (BNE CPURegs:$cond, ZERO, bb:$dst)>; // select patterns -def : Pat<(select (setge CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F), - (MOVZ CPURegs:$F, CPURegs:$T, (SLT CPURegs:$lhs, CPURegs:$rhs))>; -def : Pat<(select (setuge CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F), - (MOVZ CPURegs:$F, CPURegs:$T, (SLTu CPURegs:$lhs, CPURegs:$rhs))>; -def : Pat<(select (setge CPURegs:$lhs, immSExt16:$rhs), CPURegs:$T, CPURegs:$F), - (MOVZ CPURegs:$F, CPURegs:$T, (SLTi CPURegs:$lhs, immSExt16:$rhs))>; -def : Pat<(select (setuge CPURegs:$lh, immSExt16:$rh), CPURegs:$T, CPURegs:$F), - (MOVZ CPURegs:$F, CPURegs:$T, (SLTiu CPURegs:$lh, immSExt16:$rh))>; +multiclass MovzPats { + def : Pat<(select (setge CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F), + (MOVZInst RC:$T, (SLT CPURegs:$lhs, CPURegs:$rhs), RC:$F)>; + def : Pat<(select (setuge CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F), + (MOVZInst RC:$T, (SLTu CPURegs:$lhs, CPURegs:$rhs), RC:$F)>; + def : Pat<(select (setge CPURegs:$lhs, immSExt16:$rhs), RC:$T, RC:$F), + (MOVZInst RC:$T, (SLTi CPURegs:$lhs, immSExt16:$rhs), RC:$F)>; + def : Pat<(select (setuge CPURegs:$lh, immSExt16:$rh), RC:$T, RC:$F), + (MOVZInst RC:$T, (SLTiu CPURegs:$lh, immSExt16:$rh), RC:$F)>; + def : Pat<(select (setle CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F), + (MOVZInst RC:$T, (SLT CPURegs:$rhs, CPURegs:$lhs), RC:$F)>; + def : Pat<(select (setule CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F), + (MOVZInst RC:$T, (SLTu CPURegs:$rhs, CPURegs:$lhs), RC:$F)>; + def : Pat<(select (seteq CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F), + (MOVZInst RC:$T, (XOR CPURegs:$lhs, CPURegs:$rhs), RC:$F)>; + def : Pat<(select (seteq CPURegs:$lhs, 0), RC:$T, RC:$F), + (MOVZInst RC:$T, CPURegs:$lhs, RC:$F)>; +} -def : Pat<(select (setle CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F), - (MOVZ CPURegs:$F, CPURegs:$T, (SLT CPURegs:$rhs, CPURegs:$lhs))>; -def : Pat<(select (setule CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F), - (MOVZ CPURegs:$F, CPURegs:$T, (SLTu CPURegs:$rhs, CPURegs:$lhs))>; +multiclass MovnPats { + def : Pat<(select (setne CPURegs:$lhs, CPURegs:$rhs), RC:$T, RC:$F), + (MOVNInst RC:$T, (XOR CPURegs:$lhs, CPURegs:$rhs), RC:$F)>; + def : Pat<(select CPURegs:$cond, RC:$T, RC:$F), + (MOVNInst RC:$T, CPURegs:$cond, RC:$F)>; + def : Pat<(select (setne CPURegs:$lhs, 0), RC:$T, RC:$F), + (MOVNInst RC:$T, CPURegs:$lhs, RC:$F)>; +} -def : Pat<(select (seteq CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F), - (MOVZ CPURegs:$F, CPURegs:$T, (XOR CPURegs:$lhs, CPURegs:$rhs))>; -def : Pat<(select (setne CPURegs:$lhs, CPURegs:$rhs), CPURegs:$T, CPURegs:$F), - (MOVN CPURegs:$F, CPURegs:$T, (XOR CPURegs:$lhs, CPURegs:$rhs))>; - -def : Pat<(select CPURegs:$cond, CPURegs:$T, CPURegs:$F), - (MOVN CPURegs:$F, CPURegs:$T, CPURegs:$cond)>; +defm : MovzPats; +defm : MovnPats; // select patterns with got access -def : Pat<(select (setne CPURegs:$lhs, CPURegs:$rhs), - (i32 tglobaladdr:$T), CPURegs:$F), - (MOVN CPURegs:$F, (ADDiu GP, tglobaladdr:$T), - (XOR CPURegs:$lhs, CPURegs:$rhs))>; +let AddedComplexity = 10 in + def : Pat<(select (setne CPURegs:$lhs, CPURegs:$rhs), + (i32 tglobaladdr:$T), CPURegs:$F), + (MOVN_I CPURegs:$F, (ADDiu GP, tglobaladdr:$T), + (XOR CPURegs:$lhs, CPURegs:$rhs))>; // setcc patterns def : Pat<(seteq CPURegs:$lhs, CPURegs:$rhs), diff --git a/lib/Target/Mips/MipsMCAsmInfo.h b/lib/Target/Mips/MipsMCAsmInfo.h index 15a867ead53e..41b719207b7b 100644 --- a/lib/Target/Mips/MipsMCAsmInfo.h +++ b/lib/Target/Mips/MipsMCAsmInfo.h @@ -19,7 +19,7 @@ namespace llvm { class Target; - + class MipsMCAsmInfo : public MCAsmInfo { public: explicit MipsMCAsmInfo(const Target &T, StringRef TT); diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index 3719e580425f..c09b129f6750 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -38,7 +38,7 @@ using namespace llvm; -MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &ST, +MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &ST, const TargetInstrInfo &tii) : MipsGenRegisterInfo(Mips::ADJCALLSTACKDOWN, Mips::ADJCALLSTACKUP), Subtarget(ST), TII(tii) {} @@ -46,7 +46,7 @@ MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &ST, /// getRegisterNumbering - Given the enum value for some register, e.g. /// Mips::RA, return the number that it corresponds to (e.g. 31). unsigned MipsRegisterInfo:: -getRegisterNumbering(unsigned RegEnum) +getRegisterNumbering(unsigned RegEnum) { switch (RegEnum) { case Mips::ZERO : case Mips::F0 : case Mips::D0 : return 0; @@ -82,30 +82,30 @@ getRegisterNumbering(unsigned RegEnum) case Mips::FP : case Mips::F30: case Mips::D15: return 30; case Mips::RA : case Mips::F31: return 31; default: llvm_unreachable("Unknown register number!"); - } + } return 0; // Not reached } unsigned MipsRegisterInfo::getPICCallReg() { return Mips::T9; } //===----------------------------------------------------------------------===// -// Callee Saved Registers methods +// Callee Saved Registers methods //===----------------------------------------------------------------------===// /// Mips Callee Saved Registers const unsigned* MipsRegisterInfo:: -getCalleeSavedRegs(const MachineFunction *MF) const +getCalleeSavedRegs(const MachineFunction *MF) const { // Mips callee-save register range is $16-$23, $f20-$f30 static const unsigned SingleFloatOnlyCalleeSavedRegs[] = { - Mips::S0, Mips::S1, Mips::S2, Mips::S3, + Mips::S0, Mips::S1, Mips::S2, Mips::S3, Mips::S4, Mips::S5, Mips::S6, Mips::S7, - Mips::F20, Mips::F21, Mips::F22, Mips::F23, Mips::F24, Mips::F25, + Mips::F20, Mips::F21, Mips::F22, Mips::F23, Mips::F24, Mips::F25, Mips::F26, Mips::F27, Mips::F28, Mips::F29, Mips::F30, 0 }; static const unsigned BitMode32CalleeSavedRegs[] = { - Mips::S0, Mips::S1, Mips::S2, Mips::S3, + Mips::S0, Mips::S1, Mips::S2, Mips::S3, Mips::S4, Mips::S5, Mips::S6, Mips::S7, Mips::F20, Mips::F22, Mips::F24, Mips::F26, Mips::F28, Mips::F30, 0 }; @@ -132,11 +132,11 @@ getReservedRegs(const MachineFunction &MF) const { if (!Subtarget.isSingleFloat()) for (unsigned FReg=(Mips::F0)+1; FReg < Mips::F30; FReg+=2) Reserved.set(FReg); - + return Reserved; } -// This function eliminate ADJCALLSTACKDOWN, +// This function eliminate ADJCALLSTACKDOWN, // ADJCALLSTACKUP pseudo instructions void MipsRegisterInfo:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, @@ -157,7 +157,7 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned i = 0; while (!MI.getOperand(i).isFI()) { ++i; - assert(i < MI.getNumOperands() && + assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); } @@ -179,8 +179,43 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n"); - MI.getOperand(i-1).ChangeToImmediate(Offset); - MI.getOperand(i).ChangeToRegister(getFrameRegister(MF), false); + unsigned NewReg = 0; + int NewImm = 0; + MachineBasicBlock &MBB = *MI.getParent(); + bool ATUsed; + unsigned OrigReg = getFrameRegister(MF); + int OrigImm = Offset; + +// OrigImm fits in the 16-bit field + if (OrigImm < 0x8000 && OrigImm >= -0x8000) { + NewReg = OrigReg; + NewImm = OrigImm; + ATUsed = false; + } + else { + const TargetInstrInfo *TII = MF.getTarget().getInstrInfo(); + DebugLoc DL = II->getDebugLoc(); + int ImmLo = OrigImm & 0xffff; + int ImmHi = (((unsigned)OrigImm & 0xffff0000) >> 16) + + ((OrigImm & 0x8000) != 0); + + // FIXME: change this when mips goes MC". + BuildMI(MBB, II, DL, TII->get(Mips::NOAT)); + BuildMI(MBB, II, DL, TII->get(Mips::LUi), Mips::AT).addImm(ImmHi); + BuildMI(MBB, II, DL, TII->get(Mips::ADDu), Mips::AT).addReg(OrigReg) + .addReg(Mips::AT); + NewReg = Mips::AT; + NewImm = ImmLo; + + ATUsed = true; + } + + // FIXME: change this when mips goes MC". + if (ATUsed) + BuildMI(MBB, ++II, MI.getDebugLoc(), TII.get(Mips::ATMACRO)); + + MI.getOperand(i).ChangeToRegister(NewReg, false); + MI.getOperand(i-1).ChangeToImmediate(NewImm); } void MipsRegisterInfo:: diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h index a7f4bf987ae9..767359fd6ed4 100644 --- a/lib/Target/Mips/MipsRegisterInfo.h +++ b/lib/Target/Mips/MipsRegisterInfo.h @@ -26,7 +26,7 @@ class Type; struct MipsRegisterInfo : public MipsGenRegisterInfo { const MipsSubtarget &Subtarget; const TargetInstrInfo &TII; - + MipsRegisterInfo(const MipsSubtarget &Subtarget, const TargetInstrInfo &tii); /// getRegisterNumbering - Given the enum value for some register, e.g. diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index 60efe31fbaf8..9f9cae7d11f7 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -17,7 +17,7 @@ class MipsReg : Register { let Namespace = "Mips"; } -class MipsRegWithSubRegs subregs> +class MipsRegWithSubRegs subregs> : RegisterWithSubRegs { field bits<5> Num; let Namespace = "Mips"; @@ -83,7 +83,7 @@ let Namespace = "Mips" in { def SP : MipsGPRReg< 29, "SP">, DwarfRegNum<[29]>; def FP : MipsGPRReg< 30, "FP">, DwarfRegNum<[30]>; def RA : MipsGPRReg< 31, "RA">, DwarfRegNum<[31]>; - + /// Mips Single point precision FPU Registers def F0 : FPR< 0, "F0">, DwarfRegNum<[32]>; def F1 : FPR< 1, "F1">, DwarfRegNum<[33]>; @@ -117,7 +117,7 @@ let Namespace = "Mips" in { def F29 : FPR<29, "F29">, DwarfRegNum<[61]>; def F30 : FPR<30, "F30">, DwarfRegNum<[62]>; def F31 : FPR<31, "F31">, DwarfRegNum<[63]>; - + /// Mips Double point precision FPU Registers (aliased /// with the single precision to hold 64 bit values) def D0 : AFPR< 0, "F0", [F0, F1]>, DwarfRegNum<[32]>; @@ -149,11 +149,11 @@ let Namespace = "Mips" in { // Register Classes //===----------------------------------------------------------------------===// -def CPURegs : RegisterClass<"Mips", [i32], 32, +def CPURegs : RegisterClass<"Mips", [i32], 32, // Return Values and Arguments [V0, V1, A0, A1, A2, A3, // Not preserved across procedure calls - T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, + T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, // Callee save S0, S1, S2, S3, S4, S5, S6, S7, // Reserved @@ -173,16 +173,16 @@ def CPURegs : RegisterClass<"Mips", [i32], 32, // 64bit fp: // * FGR64 - 32 64-bit registers -// * AFGR64 - 16 32-bit even registers (32-bit FP Mode) +// * AFGR64 - 16 32-bit even registers (32-bit FP Mode) // // 32bit fp: // * FGR32 - 16 32-bit even registers // * FGR32 - 32 32-bit registers (single float only mode) -def FGR32 : RegisterClass<"Mips", [f32], 32, +def FGR32 : RegisterClass<"Mips", [f32], 32, // Return Values and Arguments [F0, F1, F2, F3, F12, F13, F14, F15, // Not preserved across procedure calls - F4, F5, F6, F7, F8, F9, F10, F11, F16, F17, F18, F19, + F4, F5, F6, F7, F8, F9, F10, F11, F16, F17, F18, F19, // Callee save F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30, // Reserved @@ -195,17 +195,17 @@ def FGR32 : RegisterClass<"Mips", [f32], 32, let MethodBodies = [{ static const unsigned MIPS_FGR32[] = { - Mips::F0, Mips::F1, Mips::F2, Mips::F3, Mips::F12, Mips::F13, - Mips::F14, Mips::F15, Mips::F4, Mips::F5, Mips::F6, Mips::F7, - Mips::F8, Mips::F9, Mips::F10, Mips::F11, Mips::F16, Mips::F17, - Mips::F18, Mips::F19, Mips::F20, Mips::F21, Mips::F22, Mips::F23, - Mips::F24, Mips::F25, Mips::F26, Mips::F27, Mips::F28, Mips::F29, + Mips::F0, Mips::F1, Mips::F2, Mips::F3, Mips::F12, Mips::F13, + Mips::F14, Mips::F15, Mips::F4, Mips::F5, Mips::F6, Mips::F7, + Mips::F8, Mips::F9, Mips::F10, Mips::F11, Mips::F16, Mips::F17, + Mips::F18, Mips::F19, Mips::F20, Mips::F21, Mips::F22, Mips::F23, + Mips::F24, Mips::F25, Mips::F26, Mips::F27, Mips::F28, Mips::F29, Mips::F30 }; static const unsigned MIPS_SVR4_FGR32[] = { - Mips::F0, Mips::F2, Mips::F12, Mips::F14, Mips::F4, - Mips::F6, Mips::F8, Mips::F10, Mips::F16, Mips::F18, + Mips::F0, Mips::F2, Mips::F12, Mips::F14, Mips::F4, + Mips::F6, Mips::F8, Mips::F10, Mips::F16, Mips::F18, Mips::F20, Mips::F22, Mips::F24, Mips::F26, Mips::F28, Mips::F30, }; @@ -217,7 +217,7 @@ def FGR32 : RegisterClass<"Mips", [f32], 32, if (Subtarget.isSingleFloat()) return MIPS_FGR32; else - return MIPS_SVR4_FGR32; + return MIPS_SVR4_FGR32; } FGR32Class::iterator @@ -233,11 +233,11 @@ def FGR32 : RegisterClass<"Mips", [f32], 32, }]; } -def AFGR64 : RegisterClass<"Mips", [f64], 64, +def AFGR64 : RegisterClass<"Mips", [f64], 64, // Return Values and Arguments [D0, D1, D6, D7, // Not preserved across procedure calls - D2, D3, D4, D5, D8, D9, + D2, D3, D4, D5, D8, D9, // Callee save D10, D11, D12, D13, D14, // Reserved diff --git a/lib/Target/Mips/MipsSchedule.td b/lib/Target/Mips/MipsSchedule.td index 49ca5d19c9cf..00be8ee94431 100644 --- a/lib/Target/Mips/MipsSchedule.td +++ b/lib/Target/Mips/MipsSchedule.td @@ -14,7 +14,7 @@ def ALU : FuncUnit; def IMULDIV : FuncUnit; //===----------------------------------------------------------------------===// -// Instruction Itinerary classes used for Mips +// Instruction Itinerary classes used for Mips //===----------------------------------------------------------------------===// def IIAlu : InstrItinClass; def IILoad : InstrItinClass; diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp index db114da00d73..70747f5da137 100644 --- a/lib/Target/Mips/MipsSubtarget.cpp +++ b/lib/Target/Mips/MipsSubtarget.cpp @@ -17,7 +17,7 @@ using namespace llvm; MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &FS, - bool little) : + bool little) : MipsArchVersion(Mips1), MipsABI(O32), IsLittle(little), IsSingleFloat(false), IsFP64bit(false), IsGP64bit(false), HasVFPU(false), IsLinux(true), HasSEInReg(false), HasCondMov(false), HasMulDivAdd(false), HasMinMax(false), @@ -33,7 +33,7 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &FS, if (TT.find("linux") == std::string::npos) IsLinux = false; - // When only the target triple is specified and is + // When only the target triple is specified and is // a allegrex target, set the features. We also match // big and little endian allegrex cores (dont really // know if a big one exists) diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h index e4f4b334e13a..096bbed7b047 100644 --- a/lib/Target/Mips/MipsSubtarget.h +++ b/lib/Target/Mips/MipsSubtarget.h @@ -26,7 +26,7 @@ class MipsSubtarget : public TargetSubtarget { public: enum MipsABIEnum { O32, O64, N32, N64, EABI - }; + }; protected: @@ -34,10 +34,10 @@ class MipsSubtarget : public TargetSubtarget { Mips1, Mips2, Mips3, Mips4, Mips32, Mips32r2 }; - // Mips architecture version + // Mips architecture version MipsArchEnum MipsArchVersion; - // Mips supported ABIs + // Mips supported ABIs MipsABIEnum MipsABI; // IsLittle - The target is Little Endian @@ -61,14 +61,14 @@ class MipsSubtarget : public TargetSubtarget { bool IsLinux; /// Features related to the presence of specific instructions. - + // HasSEInReg - SEB and SEH (signext in register) instructions. bool HasSEInReg; // HasCondMov - Conditional mov (MOVZ, MOVN) instructions. bool HasCondMov; - // HasMulDivAdd - Multiply add and sub (MADD, MADDu, MSUB, MSUBu) + // HasMulDivAdd - Multiply add and sub (MADD, MADDu, MSUB, MSUBu) // instructions. bool HasMulDivAdd; @@ -93,14 +93,14 @@ class MipsSubtarget : public TargetSubtarget { /// This constructor initializes the data members to match that /// of the specified triple. MipsSubtarget(const std::string &TT, const std::string &FS, bool little); - - /// ParseSubtargetFeatures - Parses features string setting specified + + /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. std::string ParseSubtargetFeatures(const std::string &FS, const std::string &CPU); bool isMips1() const { return MipsArchVersion == Mips1; } - bool isMips32() const { return MipsArchVersion >= Mips32; } + bool isMips32() const { return MipsArchVersion >= Mips32; } bool isMips32r2() const { return MipsArchVersion == Mips32r2; } bool isLittle() const { return IsLittle; } diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index 7a2dd1f651d2..53190b460041 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -75,3 +75,9 @@ addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) PM.add(createMipsDelaySlotFillerPass(*this)); return true; } + +bool MipsTargetMachine:: +addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { + PM.add(createMipsExpandPseudoPass(*this)); + return true; +} diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h index 43ab7984520e..badb652922b6 100644 --- a/lib/Target/Mips/MipsTargetMachine.h +++ b/lib/Target/Mips/MipsTargetMachine.h @@ -63,6 +63,7 @@ namespace llvm { CodeGenOpt::Level OptLevel); virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel); + virtual bool addPostRegAlloc(PassManagerBase &, CodeGenOpt::Level); }; /// MipselTargetMachine - Mipsel target machine. diff --git a/lib/Target/Mips/MipsTargetObjectFile.h b/lib/Target/Mips/MipsTargetObjectFile.h index 237b1602cf3d..c394a9dc02e4 100644 --- a/lib/Target/Mips/MipsTargetObjectFile.h +++ b/lib/Target/Mips/MipsTargetObjectFile.h @@ -18,22 +18,22 @@ namespace llvm { const MCSection *SmallDataSection; const MCSection *SmallBSSSection; public: - + void Initialize(MCContext &Ctx, const TargetMachine &TM); - + /// IsGlobalInSmallSection - Return true if this global address should be /// placed into small data/bss section. bool IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM, SectionKind Kind)const; bool IsGlobalInSmallSection(const GlobalValue *GV, - const TargetMachine &TM) const; - + const TargetMachine &TM) const; + const MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler *Mang, const TargetMachine &TM) const; - + // TODO: Classify globals as mips wishes. }; } // end namespace llvm diff --git a/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp b/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp index cc3d61e4e71d..a8d6fe94b1ad 100644 --- a/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp +++ b/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp @@ -14,7 +14,7 @@ using namespace llvm; Target llvm::TheMipsTarget, llvm::TheMipselTarget; -extern "C" void LLVMInitializeMipsTargetInfo() { +extern "C" void LLVMInitializeMipsTargetInfo() { RegisterTarget X(TheMipsTarget, "mips", "Mips"); RegisterTarget Y(TheMipselTarget, "mipsel", "Mipsel"); diff --git a/lib/Target/PTX/PTX.h b/lib/Target/PTX/PTX.h index 19385ba1ff8c..ec2be9291a04 100644 --- a/lib/Target/PTX/PTX.h +++ b/lib/Target/PTX/PTX.h @@ -29,6 +29,11 @@ namespace llvm { PARAMETER = 3, SHARED = 4 }; + + enum Predicate { + PRED_NORMAL = 0, + PRED_NEGATE = 1 + }; } // namespace PTX FunctionPass *createPTXISelDag(PTXTargetMachine &TM, @@ -37,7 +42,8 @@ namespace llvm { FunctionPass *createPTXMFInfoExtract(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel); - extern Target ThePTXTarget; + extern Target ThePTX32Target; + extern Target ThePTX64Target; } // namespace llvm; // Defines symbolic names for PTX registers. diff --git a/lib/Target/PTX/PTX.td b/lib/Target/PTX/PTX.td index 8b1a1b18da54..ae8326e3199c 100644 --- a/lib/Target/PTX/PTX.td +++ b/lib/Target/PTX/PTX.td @@ -19,8 +19,35 @@ include "llvm/Target/Target.td" // Subtarget Features. //===----------------------------------------------------------------------===// -def FeatureSM20 : SubtargetFeature<"sm20", "is_sm20", "true", - "Enable sm_20 target architecture">; +//===- Architectural Features ---------------------------------------------===// + +def FeatureDouble : SubtargetFeature<"double", "SupportsDouble", "true", + "Do not demote .f64 to .f32">; + +//===- PTX Version --------------------------------------------------------===// + +def FeaturePTX20 : SubtargetFeature<"ptx20", "PTXVersion", "PTX_VERSION_2_0", + "Use PTX Language Version 2.0", + []>; + +def FeaturePTX21 : SubtargetFeature<"ptx21", "PTXVersion", "PTX_VERSION_2_1", + "Use PTX Language Version 2.1", + [FeaturePTX20]>; + +def FeaturePTX22 : SubtargetFeature<"ptx22", "PTXVersion", "PTX_VERSION_2_2", + "Use PTX Language Version 2.2", + [FeaturePTX21]>; + +//===- PTX Shader Model ---------------------------------------------------===// + +def FeatureSM10 : SubtargetFeature<"sm10", "PTXShaderModel", "PTX_SM_1_0", + "Enable Shader Model 1.0 compliance">; +def FeatureSM13 : SubtargetFeature<"sm13", "PTXShaderModel", "PTX_SM_1_3", + "Enable Shader Model 1.3 compliance", + [FeatureSM10, FeatureDouble]>; +def FeatureSM20 : SubtargetFeature<"sm20", "PTXShaderModel", "PTX_SM_2_0", + "Enable Shader Model 2.0 compliance", + [FeatureSM13]>; //===----------------------------------------------------------------------===// // PTX supported processors. diff --git a/lib/Target/PTX/PTXAsmPrinter.cpp b/lib/Target/PTX/PTXAsmPrinter.cpp index a6059974ab3d..29c4781de654 100644 --- a/lib/Target/PTX/PTXAsmPrinter.cpp +++ b/lib/Target/PTX/PTXAsmPrinter.cpp @@ -24,6 +24,7 @@ #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" @@ -37,13 +38,6 @@ using namespace llvm; -static cl::opt -OptPTXVersion("ptx-version", cl::desc("Set PTX version"), cl::init("1.4")); - -static cl::opt -OptPTXTarget("ptx-target", cl::desc("Set GPU target (comma-separated list)"), - cl::init("sm_10")); - namespace { class PTXAsmPrinter : public AsmPrinter { public: @@ -68,6 +62,7 @@ class PTXAsmPrinter : public AsmPrinter { const char *Modifier = 0); void printParamOperand(const MachineInstr *MI, int opNum, raw_ostream &OS, const char *Modifier = 0); + void printPredicateOperand(const MachineInstr *MI, raw_ostream &O); // autogen'd. void printInstruction(const MachineInstr *MI, raw_ostream &OS); @@ -82,27 +77,20 @@ class PTXAsmPrinter : public AsmPrinter { static const char PARAM_PREFIX[] = "__param_"; static const char *getRegisterTypeName(unsigned RegNo) { -#define TEST_REGCLS(cls, clsstr) \ +#define TEST_REGCLS(cls, clsstr) \ if (PTX::cls ## RegisterClass->contains(RegNo)) return # clsstr; - TEST_REGCLS(RRegs32, s32); TEST_REGCLS(Preds, pred); + TEST_REGCLS(RRegu16, u16); + TEST_REGCLS(RRegu32, u32); + TEST_REGCLS(RRegu64, u64); + TEST_REGCLS(RRegf32, f32); + TEST_REGCLS(RRegf64, f64); #undef TEST_REGCLS llvm_unreachable("Not in any register class!"); return NULL; } -static const char *getInstructionTypeName(const MachineInstr *MI) { - for (int i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - if (MO.getType() == MachineOperand::MO_Register) - return getRegisterTypeName(MO.getReg()); - } - - llvm_unreachable("No reg operand found in instruction!"); - return NULL; -} - static const char *getStateSpaceName(unsigned addressSpace) { switch (addressSpace) { default: llvm_unreachable("Unknown state space"); @@ -115,6 +103,28 @@ static const char *getStateSpaceName(unsigned addressSpace) { return NULL; } +static const char *getTypeName(const Type* type) { + while (true) { + switch (type->getTypeID()) { + default: llvm_unreachable("Unknown type"); + case Type::FloatTyID: return ".f32"; + case Type::DoubleTyID: return ".f64"; + case Type::IntegerTyID: + switch (type->getPrimitiveSizeInBits()) { + default: llvm_unreachable("Unknown integer bit-width"); + case 16: return ".u16"; + case 32: return ".u32"; + case 64: return ".u64"; + } + case Type::ArrayTyID: + case Type::PointerTyID: + type = dyn_cast(type)->getElementType(); + break; + } + } + return NULL; +} + bool PTXAsmPrinter::doFinalization(Module &M) { // XXX Temproarily remove global variables so that doFinalization() will not // emit them again (global variables are emitted at beginning). @@ -146,8 +156,12 @@ bool PTXAsmPrinter::doFinalization(Module &M) { void PTXAsmPrinter::EmitStartOfAsmFile(Module &M) { - OutStreamer.EmitRawText(Twine("\t.version " + OptPTXVersion)); - OutStreamer.EmitRawText(Twine("\t.target " + OptPTXTarget)); + const PTXSubtarget& ST = TM.getSubtarget(); + + OutStreamer.EmitRawText(Twine("\t.version " + ST.getPTXVersionString())); + OutStreamer.EmitRawText(Twine("\t.target " + ST.getTargetString() + + (ST.supportsDouble() ? "" + : ", map_f64_to_f32"))); OutStreamer.AddBlankLine(); // declare global variables @@ -186,17 +200,16 @@ void PTXAsmPrinter::EmitInstruction(const MachineInstr *MI) { std::string str; str.reserve(64); - // Write instruction to str raw_string_ostream OS(str); + + // Emit predicate + printPredicateOperand(MI, OS); + + // Write instruction to str printInstruction(MI, OS); OS << ';'; OS.flush(); - // Replace "%type" if found - size_t pos; - if ((pos = str.find("%type")) != std::string::npos) - str.replace(pos, /*strlen("%type")==*/5, getInstructionTypeName(MI)); - StringRef strref = StringRef(str); OutStreamer.EmitRawText(strref); } @@ -213,11 +226,36 @@ void PTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum, OS << *Mang->getSymbol(MO.getGlobal()); break; case MachineOperand::MO_Immediate: - OS << (int) MO.getImm(); + OS << (long) MO.getImm(); + break; + case MachineOperand::MO_MachineBasicBlock: + OS << *MO.getMBB()->getSymbol(); break; case MachineOperand::MO_Register: OS << getRegisterName(MO.getReg()); break; + case MachineOperand::MO_FPImmediate: + APInt constFP = MO.getFPImm()->getValueAPF().bitcastToAPInt(); + bool isFloat = MO.getFPImm()->getType()->getTypeID() == Type::FloatTyID; + // Emit 0F for 32-bit floats and 0D for 64-bit doubles. + if (isFloat) { + OS << "0F"; + } + else { + OS << "0D"; + } + // Emit the encoded floating-point value. + if (constFP.getZExtValue() > 0) { + OS << constFP.toString(16, false); + } + else { + OS << "00000000"; + // If We have a double-precision zero, pad to 8-bytes. + if (!isFloat) { + OS << "00000000"; + } + } + break; } } @@ -265,13 +303,77 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) { decl += " "; } - // TODO: add types - decl += ".s32 "; - decl += gvsym->getName(); + if (PointerType::classof(gv->getType())) { + const PointerType* pointerTy = dyn_cast(gv->getType()); + const Type* elementTy = pointerTy->getElementType(); - if (ArrayType::classof(gv->getType()) || PointerType::classof(gv->getType())) - decl += "[]"; + decl += ".b8 "; + decl += gvsym->getName(); + decl += "["; + + if (elementTy->isArrayTy()) + { + assert(elementTy->isArrayTy() && "Only pointers to arrays are supported"); + + const ArrayType* arrayTy = dyn_cast(elementTy); + elementTy = arrayTy->getElementType(); + + unsigned numElements = arrayTy->getNumElements(); + + while (elementTy->isArrayTy()) { + + arrayTy = dyn_cast(elementTy); + elementTy = arrayTy->getElementType(); + + numElements *= arrayTy->getNumElements(); + } + + // FIXME: isPrimitiveType() == false for i16? + assert(elementTy->isSingleValueType() && + "Non-primitive types are not handled"); + + // Compute the size of the array, in bytes. + uint64_t arraySize = (elementTy->getPrimitiveSizeInBits() >> 3) + * numElements; + + decl += utostr(arraySize); + } + + decl += "]"; + + // handle string constants (assume ConstantArray means string) + + if (gv->hasInitializer()) + { + Constant *C = gv->getInitializer(); + if (const ConstantArray *CA = dyn_cast(C)) + { + decl += " = {"; + + for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) + { + if (i > 0) decl += ","; + + decl += "0x" + utohexstr(cast(CA->getOperand(i))->getZExtValue()); + } + + decl += "}"; + } + } + } + else { + // Note: this is currently the fall-through case and most likely generates + // incorrect code. + decl += getTypeName(gv->getType()); + decl += " "; + + decl += gvsym->getName(); + + if (ArrayType::classof(gv->getType()) || + PointerType::classof(gv->getType())) + decl += "[]"; + } decl += ";"; @@ -313,16 +415,24 @@ void PTXAsmPrinter::EmitFunctionDeclaration() { if (!MFI->argRegEmpty()) { decl += " ("; if (isKernel) { - for (int i = 0, e = MFI->getNumArg(); i != e; ++i) { - if (i != 0) + unsigned cnt = 0; + for(PTXMachineFunctionInfo::reg_iterator + i = MFI->argRegBegin(), e = MFI->argRegEnd(), b = i; + i != e; ++i) { + reg = *i; + assert(reg != PTX::NoRegister && "Not a valid register!"); + if (i != b) decl += ", "; - decl += ".param .s32 "; // TODO: add types + decl += ".param ."; + decl += getRegisterTypeName(reg); + decl += " "; decl += PARAM_PREFIX; - decl += utostr(i + 1); + decl += utostr(++cnt); } } else { for (PTXMachineFunctionInfo::reg_iterator - i = MFI->argRegBegin(), e = MFI->argRegEnd(), b = i; i != e; ++i) { + i = MFI->argRegBegin(), e = MFI->argRegEnd(), b = i; + i != e; ++i) { reg = *i; assert(reg != PTX::NoRegister && "Not a valid register!"); if (i != b) @@ -339,9 +449,29 @@ void PTXAsmPrinter::EmitFunctionDeclaration() { OutStreamer.EmitRawText(Twine(decl)); } +void PTXAsmPrinter:: +printPredicateOperand(const MachineInstr *MI, raw_ostream &O) { + int i = MI->findFirstPredOperandIdx(); + if (i == -1) + llvm_unreachable("missing predicate operand"); + + unsigned reg = MI->getOperand(i).getReg(); + int predOp = MI->getOperand(i+1).getImm(); + + DEBUG(dbgs() << "predicate: (" << reg << ", " << predOp << ")\n"); + + if (reg != PTX::NoRegister) { + O << '@'; + if (predOp == PTX::PRED_NEGATE) + O << '!'; + O << getRegisterName(reg); + } +} + #include "PTXGenAsmWriter.inc" // Force static initialization. extern "C" void LLVMInitializePTXAsmPrinter() { - RegisterAsmPrinter X(ThePTXTarget); + RegisterAsmPrinter X(ThePTX32Target); + RegisterAsmPrinter Y(ThePTX64Target); } diff --git a/lib/Target/PTX/PTXFrameLowering.h b/lib/Target/PTX/PTXFrameLowering.h index 574ae7a19dc2..9320676150df 100644 --- a/lib/Target/PTX/PTXFrameLowering.h +++ b/lib/Target/PTX/PTXFrameLowering.h @@ -27,7 +27,8 @@ class PTXFrameLowering : public TargetFrameLowering { public: explicit PTXFrameLowering(const PTXSubtarget &sti) - : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 2, -2), STI(sti) { + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 2, -2), + STI(sti) { } /// emitProlog/emitEpilog - These methods insert prolog and epilog code into diff --git a/lib/Target/PTX/PTXISelDAGToDAG.cpp b/lib/Target/PTX/PTXISelDAGToDAG.cpp index efb0e8b1af77..b3c85da7b446 100644 --- a/lib/Target/PTX/PTXISelDAGToDAG.cpp +++ b/lib/Target/PTX/PTXISelDAGToDAG.cpp @@ -15,6 +15,7 @@ #include "PTXTargetMachine.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/DerivedTypes.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -42,8 +43,14 @@ class PTXDAGToDAGISel : public SelectionDAGISel { private: SDNode *SelectREAD_PARAM(SDNode *Node); + // We need this only because we can't match intruction BRAdp + // pattern (PTXbrcond bb:$d, ...) in PTXInstrInfo.td + SDNode *SelectBRCOND(SDNode *Node); + bool isImm(const SDValue &operand); bool SelectImm(const SDValue &operand, SDValue &imm); + + const PTXSubtarget& getSubtarget() const; }; // class PTXDAGToDAGISel } // namespace @@ -59,21 +66,62 @@ PTXDAGToDAGISel::PTXDAGToDAGISel(PTXTargetMachine &TM, : SelectionDAGISel(TM, OptLevel) {} SDNode *PTXDAGToDAGISel::Select(SDNode *Node) { - if (Node->getOpcode() == PTXISD::READ_PARAM) - return SelectREAD_PARAM(Node); - else - return SelectCode(Node); + switch (Node->getOpcode()) { + case PTXISD::READ_PARAM: + return SelectREAD_PARAM(Node); + case ISD::BRCOND: + return SelectBRCOND(Node); + default: + return SelectCode(Node); + } } SDNode *PTXDAGToDAGISel::SelectREAD_PARAM(SDNode *Node) { - SDValue index = Node->getOperand(1); - DebugLoc dl = Node->getDebugLoc(); + SDValue index = Node->getOperand(1); + DebugLoc dl = Node->getDebugLoc(); + unsigned opcode; if (index.getOpcode() != ISD::TargetConstant) llvm_unreachable("READ_PARAM: index is not ISD::TargetConstant"); + if (Node->getValueType(0) == MVT::i16) { + opcode = PTX::LDpiU16; + } + else if (Node->getValueType(0) == MVT::i32) { + opcode = PTX::LDpiU32; + } + else if (Node->getValueType(0) == MVT::i64) { + opcode = PTX::LDpiU64; + } + else if (Node->getValueType(0) == MVT::f32) { + opcode = PTX::LDpiF32; + } + else if (Node->getValueType(0) == MVT::f64) { + opcode = PTX::LDpiF64; + } + else { + llvm_unreachable("Unknown parameter type for ld.param"); + } + return PTXInstrInfo:: - GetPTXMachineNode(CurDAG, PTX::LDpi, dl, MVT::i32, index); + GetPTXMachineNode(CurDAG, opcode, dl, Node->getValueType(0), index); +} + +SDNode *PTXDAGToDAGISel::SelectBRCOND(SDNode *Node) { + assert(Node->getNumOperands() >= 3); + + SDValue Chain = Node->getOperand(0); + SDValue Pred = Node->getOperand(1); + SDValue Target = Node->getOperand(2); // branch target + SDValue PredOp = CurDAG->getTargetConstant(PTX::PRED_NORMAL, MVT::i32); + DebugLoc dl = Node->getDebugLoc(); + + assert(Target.getOpcode() == ISD::BasicBlock); + assert(Pred.getValueType() == MVT::i1); + + // Emit BRAdp + SDValue Ops[] = { Target, Pred, PredOp, Chain }; + return CurDAG->getMachineNode(PTX::BRAdp, dl, MVT::Other, Ops, 4); } // Match memory operand of the form [reg+reg] @@ -82,8 +130,11 @@ bool PTXDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2) { isImm(Addr.getOperand(0)) || isImm(Addr.getOperand(1))) return false; + assert(Addr.getValueType().isSimple() && "Type must be simple"); + R1 = Addr; - R2 = CurDAG->getTargetConstant(0, MVT::i32); + R2 = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT()); + return true; } @@ -95,8 +146,12 @@ bool PTXDAGToDAGISel::SelectADDRri(SDValue &Addr, SDValue &Base, if (isImm(Addr)) return false; // it is [reg] + + assert(Addr.getValueType().isSimple() && "Type must be simple"); + Base = Addr; - Offset = CurDAG->getTargetConstant(0, MVT::i32); + Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT()); + return true; } @@ -129,7 +184,10 @@ bool PTXDAGToDAGISel::SelectADDRii(SDValue &Addr, SDValue &Base, // is [imm]? if (SelectImm(Addr, Base)) { - Offset = CurDAG->getTargetConstant(0, MVT::i32); + assert(Addr.getValueType().isSimple() && "Type must be simple"); + + Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT()); + return true; } @@ -146,6 +204,13 @@ bool PTXDAGToDAGISel::SelectImm(const SDValue &operand, SDValue &imm) { return false; ConstantSDNode *CN = cast(node); - imm = CurDAG->getTargetConstant(*CN->getConstantIntValue(), MVT::i32); + imm = CurDAG->getTargetConstant(*CN->getConstantIntValue(), + operand.getValueType()); return true; } + +const PTXSubtarget& PTXDAGToDAGISel::getSubtarget() const +{ + return TM.getSubtarget(); +} + diff --git a/lib/Target/PTX/PTXISelLowering.cpp b/lib/Target/PTX/PTXISelLowering.cpp index e6d44907ed37..23b93daa433c 100644 --- a/lib/Target/PTX/PTXISelLowering.cpp +++ b/lib/Target/PTX/PTXISelLowering.cpp @@ -20,6 +20,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -27,21 +28,60 @@ PTXTargetLowering::PTXTargetLowering(TargetMachine &TM) : TargetLowering(TM, new TargetLoweringObjectFileELF()) { // Set up the register classes. addRegisterClass(MVT::i1, PTX::PredsRegisterClass); - addRegisterClass(MVT::i32, PTX::RRegs32RegisterClass); + addRegisterClass(MVT::i16, PTX::RRegu16RegisterClass); + addRegisterClass(MVT::i32, PTX::RRegu32RegisterClass); + addRegisterClass(MVT::i64, PTX::RRegu64RegisterClass); + addRegisterClass(MVT::f32, PTX::RRegf32RegisterClass); + addRegisterClass(MVT::f64, PTX::RRegf64RegisterClass); + setBooleanContents(ZeroOrOneBooleanContent); + setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); + setOperationAction(ISD::ConstantFP, MVT::f32, Legal); + setOperationAction(ISD::ConstantFP, MVT::f64, Legal); + + // Turn i16 (z)extload into load + (z)extend + setLoadExtAction(ISD::EXTLOAD, MVT::i16, Expand); + setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Expand); + + // Turn f32 extload into load + fextend + setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); + + // Turn f64 truncstore into trunc + store. + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + // Customize translation of memory addresses setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); + setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); + // Expand BR_CC into BRCOND + setOperationAction(ISD::BR_CC, MVT::Other, Expand); + + // Expand SELECT_CC into SETCC + setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); + setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); + setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); + + // need to lower SETCC of Preds into bitwise logic + setOperationAction(ISD::SETCC, MVT::i1, Custom); + // Compute derived properties from the register classes computeRegisterProperties(); } +MVT::SimpleValueType PTXTargetLowering::getSetCCResultType(EVT VT) const { + return MVT::i1; +} + SDValue PTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { - default: llvm_unreachable("Unimplemented operand"); - case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); + default: + llvm_unreachable("Unimplemented operand"); + case ISD::SETCC: + return LowerSETCC(Op, DAG); + case ISD::GlobalAddress: + return LowerGlobalAddress(Op, DAG); } } @@ -49,6 +89,8 @@ const char *PTXTargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { default: llvm_unreachable("Unknown opcode"); + case PTXISD::COPY_ADDRESS: + return "PTXISD::COPY_ADDRESS"; case PTXISD::READ_PARAM: return "PTXISD::READ_PARAM"; case PTXISD::EXIT: @@ -62,12 +104,43 @@ const char *PTXTargetLowering::getTargetNodeName(unsigned Opcode) const { // Custom Lower Operation //===----------------------------------------------------------------------===// +SDValue PTXTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { + assert(Op.getValueType() == MVT::i1 && "SetCC type must be 1-bit integer"); + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + SDValue Op2 = Op.getOperand(2); + DebugLoc dl = Op.getDebugLoc(); + ISD::CondCode CC = cast(Op.getOperand(2))->get(); + + // Look for X == 0, X == 1, X != 0, or X != 1 + // We can simplify these to bitwise logic + + if (Op1.getOpcode() == ISD::Constant && + (cast(Op1)->getZExtValue() == 1 || + cast(Op1)->isNullValue()) && + (CC == ISD::SETEQ || CC == ISD::SETNE)) { + + return DAG.getNode(ISD::AND, dl, MVT::i1, Op0, Op1); + } + + return DAG.getNode(ISD::SETCC, dl, MVT::i1, Op0, Op1, Op2); +} + SDValue PTXTargetLowering:: LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = getPointerTy(); DebugLoc dl = Op.getDebugLoc(); const GlobalValue *GV = cast(Op)->getGlobal(); - return DAG.getTargetGlobalAddress(GV, dl, PtrVT); + + assert(PtrVT.isSimple() && "Pointer must be to primitive type."); + + SDValue targetGlobal = DAG.getTargetGlobalAddress(GV, dl, PtrVT); + SDValue movInstr = DAG.getNode(PTXISD::COPY_ADDRESS, + dl, + PtrVT.getSimpleVT(), + targetGlobal); + + return movInstr; } //===----------------------------------------------------------------------===// @@ -87,9 +160,13 @@ struct argmap_entry { bool operator==(MVT::SimpleValueType _VT) const { return VT == _VT; } } argmap[] = { argmap_entry(MVT::i1, PTX::PredsRegisterClass), - argmap_entry(MVT::i32, PTX::RRegs32RegisterClass) + argmap_entry(MVT::i16, PTX::RRegu16RegisterClass), + argmap_entry(MVT::i32, PTX::RRegu32RegisterClass), + argmap_entry(MVT::i64, PTX::RRegu64RegisterClass), + argmap_entry(MVT::f32, PTX::RRegf32RegisterClass), + argmap_entry(MVT::f64, PTX::RRegf64RegisterClass) }; -} // end anonymous namespace +} // end anonymous namespace SDValue PTXTargetLowering:: LowerFormalArguments(SDValue Chain, @@ -185,10 +262,25 @@ SDValue PTXTargetLowering:: if (Outs.size() == 0) return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain); - assert(Outs[0].VT == MVT::i32 && "Can return only basic types"); - SDValue Flag; - unsigned reg = PTX::R0; + unsigned reg; + + if (Outs[0].VT == MVT::i16) { + reg = PTX::RH0; + } + else if (Outs[0].VT == MVT::i32) { + reg = PTX::R0; + } + else if (Outs[0].VT == MVT::i64) { + reg = PTX::RD0; + } + else if (Outs[0].VT == MVT::f32) { + reg = PTX::F0; + } + else { + assert(Outs[0].VT == MVT::f64 && "Can return only basic types"); + reg = PTX::FD0; + } MachineFunction &MF = DAG.getMachineFunction(); PTXMachineFunctionInfo *MFI = MF.getInfo(); diff --git a/lib/Target/PTX/PTXISelLowering.h b/lib/Target/PTX/PTXISelLowering.h index b03a9f66630f..6a7e3e6611bd 100644 --- a/lib/Target/PTX/PTXISelLowering.h +++ b/lib/Target/PTX/PTXISelLowering.h @@ -26,7 +26,8 @@ namespace PTXISD { FIRST_NUMBER = ISD::BUILTIN_OP_END, READ_PARAM, EXIT, - RET + RET, + COPY_ADDRESS }; } // namespace PTXISD @@ -41,6 +42,8 @@ class PTXTargetLowering : public TargetLowering { virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; + virtual SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; + virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, @@ -58,7 +61,9 @@ class PTXTargetLowering : public TargetLowering { const SmallVectorImpl &OutVals, DebugLoc dl, SelectionDAG &DAG) const; - + + virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const; + private: SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; }; // class PTXTargetLowering diff --git a/lib/Target/PTX/PTXInstrInfo.cpp b/lib/Target/PTX/PTXInstrInfo.cpp index 805759bcab1e..a12a6d01afa7 100644 --- a/lib/Target/PTX/PTXInstrInfo.cpp +++ b/lib/Target/PTX/PTXInstrInfo.cpp @@ -11,9 +11,15 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "ptx-instrinfo" + #include "PTX.h" #include "PTXInstrInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -27,20 +33,27 @@ static const struct map_entry { const TargetRegisterClass *cls; const int opcode; } map[] = { - { &PTX::RRegs32RegClass, PTX::MOVrr }, - { &PTX::PredsRegClass, PTX::MOVpp } + { &PTX::RRegu16RegClass, PTX::MOVU16rr }, + { &PTX::RRegu32RegClass, PTX::MOVU32rr }, + { &PTX::RRegu64RegClass, PTX::MOVU64rr }, + { &PTX::RRegf32RegClass, PTX::MOVF32rr }, + { &PTX::RRegf64RegClass, PTX::MOVF64rr }, + { &PTX::PredsRegClass, PTX::MOVPREDrr } }; void PTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DstReg, unsigned SrcReg, bool KillSrc) const { - for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i) - if (PTX::RRegs32RegClass.contains(DstReg, SrcReg)) { - BuildMI(MBB, I, DL, - get(PTX::MOVrr), DstReg).addReg(SrcReg, getKillRegState(KillSrc)); + for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i) { + if (map[i].cls->contains(DstReg, SrcReg)) { + const TargetInstrDesc &TID = get(map[i].opcode); + MachineInstr *MI = BuildMI(MBB, I, DL, TID, DstReg). + addReg(SrcReg, getKillRegState(KillSrc)); + AddDefaultPredicate(MI); return; } + } llvm_unreachable("Impossible reg-to-reg copy"); } @@ -56,12 +69,9 @@ bool PTXInstrInfo::copyRegToReg(MachineBasicBlock &MBB, for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i) if (DstRC == map[i].cls) { - MachineInstr *MI = BuildMI(MBB, I, DL, get(map[i].opcode), - DstReg).addReg(SrcReg); - if (MI->findFirstPredOperandIdx() == -1) { - MI->addOperand(MachineOperand::CreateReg(0, false)); - MI->addOperand(MachineOperand::CreateImm(/*IsInv=*/0)); - } + const TargetInstrDesc &TID = get(map[i].opcode); + MachineInstr *MI = BuildMI(MBB, I, DL, TID, DstReg).addReg(SrcReg); + AddDefaultPredicate(MI); return true; } @@ -74,8 +84,12 @@ bool PTXInstrInfo::isMoveInstr(const MachineInstr& MI, switch (MI.getOpcode()) { default: return false; - case PTX::MOVpp: - case PTX::MOVrr: + case PTX::MOVU16rr: + case PTX::MOVU32rr: + case PTX::MOVU64rr: + case PTX::MOVF32rr: + case PTX::MOVF64rr: + case PTX::MOVPREDrr: assert(MI.getNumOperands() >= 2 && MI.getOperand(0).isReg() && MI.getOperand(1).isReg() && "Invalid register-register move instruction"); @@ -85,3 +99,239 @@ bool PTXInstrInfo::isMoveInstr(const MachineInstr& MI, return true; } } + +// predicate support + +bool PTXInstrInfo::isPredicated(const MachineInstr *MI) const { + int i = MI->findFirstPredOperandIdx(); + return i != -1 && MI->getOperand(i).getReg() != PTX::NoRegister; +} + +bool PTXInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { + return !isPredicated(MI) && get(MI->getOpcode()).isTerminator(); +} + +bool PTXInstrInfo:: +PredicateInstruction(MachineInstr *MI, + const SmallVectorImpl &Pred) const { + if (Pred.size() < 2) + llvm_unreachable("lesser than 2 predicate operands are provided"); + + int i = MI->findFirstPredOperandIdx(); + if (i == -1) + llvm_unreachable("missing predicate operand"); + + MI->getOperand(i).setReg(Pred[0].getReg()); + MI->getOperand(i+1).setImm(Pred[1].getImm()); + + return true; +} + +bool PTXInstrInfo:: +SubsumesPredicate(const SmallVectorImpl &Pred1, + const SmallVectorImpl &Pred2) const { + const MachineOperand &PredReg1 = Pred1[0]; + const MachineOperand &PredReg2 = Pred2[0]; + if (PredReg1.getReg() != PredReg2.getReg()) + return false; + + const MachineOperand &PredOp1 = Pred1[1]; + const MachineOperand &PredOp2 = Pred2[1]; + if (PredOp1.getImm() != PredOp2.getImm()) + return false; + + return true; +} + +bool PTXInstrInfo:: +DefinesPredicate(MachineInstr *MI, + std::vector &Pred) const { + // If an instruction sets a predicate register, it defines a predicate. + + // TODO supprot 5-operand format of setp instruction + + if (MI->getNumOperands() < 1) + return false; + + const MachineOperand &MO = MI->getOperand(0); + + if (!MO.isReg() || RI.getRegClass(MO.getReg()) != &PTX::PredsRegClass) + return false; + + Pred.push_back(MO); + Pred.push_back(MachineOperand::CreateImm(PTX::PRED_NORMAL)); + return true; +} + +// branch support + +bool PTXInstrInfo:: +AnalyzeBranch(MachineBasicBlock &MBB, + MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify) const { + // TODO implement cases when AllowModify is true + + if (MBB.empty()) + return true; + + MachineBasicBlock::const_iterator iter = MBB.end(); + const MachineInstr& instLast1 = *--iter; + const TargetInstrDesc &desc1 = instLast1.getDesc(); + // for special case that MBB has only 1 instruction + const bool IsSizeOne = MBB.size() == 1; + // if IsSizeOne is true, *--iter and instLast2 are invalid + // we put a dummy value in instLast2 and desc2 since they are used + const MachineInstr& instLast2 = IsSizeOne ? instLast1 : *--iter; + const TargetInstrDesc &desc2 = IsSizeOne ? desc1 : instLast2.getDesc(); + + DEBUG(dbgs() << "\n"); + DEBUG(dbgs() << "AnalyzeBranch: opcode: " << instLast1.getOpcode() << "\n"); + DEBUG(dbgs() << "AnalyzeBranch: MBB: " << MBB.getName().str() << "\n"); + DEBUG(dbgs() << "AnalyzeBranch: TBB: " << TBB << "\n"); + DEBUG(dbgs() << "AnalyzeBranch: FBB: " << FBB << "\n"); + + // this block ends with no branches + if (!IsAnyKindOfBranch(instLast1)) { + DEBUG(dbgs() << "AnalyzeBranch: ends with no branch\n"); + return false; + } + + // this block ends with only an unconditional branch + if (desc1.isUnconditionalBranch() && + // when IsSizeOne is true, it "absorbs" the evaluation of instLast2 + (IsSizeOne || !IsAnyKindOfBranch(instLast2))) { + DEBUG(dbgs() << "AnalyzeBranch: ends with only uncond branch\n"); + TBB = GetBranchTarget(instLast1); + return false; + } + + // this block ends with a conditional branch and + // it falls through to a successor block + if (desc1.isConditionalBranch() && + IsAnySuccessorAlsoLayoutSuccessor(MBB)) { + DEBUG(dbgs() << "AnalyzeBranch: ends with cond branch and fall through\n"); + TBB = GetBranchTarget(instLast1); + int i = instLast1.findFirstPredOperandIdx(); + Cond.push_back(instLast1.getOperand(i)); + Cond.push_back(instLast1.getOperand(i+1)); + return false; + } + + // when IsSizeOne is true, we are done + if (IsSizeOne) + return true; + + // this block ends with a conditional branch + // followed by an unconditional branch + if (desc2.isConditionalBranch() && + desc1.isUnconditionalBranch()) { + DEBUG(dbgs() << "AnalyzeBranch: ends with cond and uncond branch\n"); + TBB = GetBranchTarget(instLast2); + FBB = GetBranchTarget(instLast1); + int i = instLast2.findFirstPredOperandIdx(); + Cond.push_back(instLast2.getOperand(i)); + Cond.push_back(instLast2.getOperand(i+1)); + return false; + } + + // branch cannot be understood + DEBUG(dbgs() << "AnalyzeBranch: cannot be understood\n"); + return true; +} + +unsigned PTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { + unsigned count = 0; + while (!MBB.empty()) + if (IsAnyKindOfBranch(MBB.back())) { + MBB.pop_back(); + ++count; + } else + break; + DEBUG(dbgs() << "RemoveBranch: MBB: " << MBB.getName().str() << "\n"); + DEBUG(dbgs() << "RemoveBranch: remove " << count << " branch inst\n"); + return count; +} + +unsigned PTXInstrInfo:: +InsertBranch(MachineBasicBlock &MBB, + MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl &Cond, + DebugLoc DL) const { + DEBUG(dbgs() << "InsertBranch: MBB: " << MBB.getName().str() << "\n"); + DEBUG(if (TBB) dbgs() << "InsertBranch: TBB: " << TBB->getName().str() + << "\n"; + else dbgs() << "InsertBranch: TBB: (NULL)\n"); + DEBUG(if (FBB) dbgs() << "InsertBranch: FBB: " << FBB->getName().str() + << "\n"; + else dbgs() << "InsertBranch: FBB: (NULL)\n"); + DEBUG(dbgs() << "InsertBranch: Cond size: " << Cond.size() << "\n"); + + assert(TBB && "TBB is NULL"); + + if (FBB) { + BuildMI(&MBB, DL, get(PTX::BRAdp)) + .addMBB(TBB).addReg(Cond[0].getReg()).addImm(Cond[1].getImm()); + BuildMI(&MBB, DL, get(PTX::BRAd)) + .addMBB(FBB).addReg(PTX::NoRegister).addImm(PTX::PRED_NORMAL); + return 2; + } else if (Cond.size()) { + BuildMI(&MBB, DL, get(PTX::BRAdp)) + .addMBB(TBB).addReg(Cond[0].getReg()).addImm(Cond[1].getImm()); + return 1; + } else { + BuildMI(&MBB, DL, get(PTX::BRAd)) + .addMBB(TBB).addReg(PTX::NoRegister).addImm(PTX::PRED_NORMAL); + return 1; + } +} + +// static helper routines + +MachineSDNode *PTXInstrInfo:: +GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode, + DebugLoc dl, EVT VT, SDValue Op1) { + SDValue predReg = DAG->getRegister(PTX::NoRegister, MVT::i1); + SDValue predOp = DAG->getTargetConstant(PTX::PRED_NORMAL, MVT::i32); + SDValue ops[] = { Op1, predReg, predOp }; + return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops)); +} + +MachineSDNode *PTXInstrInfo:: +GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode, + DebugLoc dl, EVT VT, SDValue Op1, SDValue Op2) { + SDValue predReg = DAG->getRegister(PTX::NoRegister, MVT::i1); + SDValue predOp = DAG->getTargetConstant(PTX::PRED_NORMAL, MVT::i32); + SDValue ops[] = { Op1, Op2, predReg, predOp }; + return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops)); +} + +void PTXInstrInfo::AddDefaultPredicate(MachineInstr *MI) { + if (MI->findFirstPredOperandIdx() == -1) { + MI->addOperand(MachineOperand::CreateReg(PTX::NoRegister, /*IsDef=*/false)); + MI->addOperand(MachineOperand::CreateImm(PTX::PRED_NORMAL)); + } +} + +bool PTXInstrInfo::IsAnyKindOfBranch(const MachineInstr& inst) { + const TargetInstrDesc &desc = inst.getDesc(); + return desc.isTerminator() || desc.isBranch() || desc.isIndirectBranch(); +} + +bool PTXInstrInfo:: +IsAnySuccessorAlsoLayoutSuccessor(const MachineBasicBlock& MBB) { + for (MachineBasicBlock::const_succ_iterator + i = MBB.succ_begin(), e = MBB.succ_end(); i != e; ++i) + if (MBB.isLayoutSuccessor((const MachineBasicBlock*) &*i)) + return true; + return false; +} + +MachineBasicBlock *PTXInstrInfo::GetBranchTarget(const MachineInstr& inst) { + // FIXME So far all branch instructions put destination in 1st operand + const MachineOperand& target = inst.getOperand(0); + assert(target.isMBB() && "FIXME: detect branch target operand"); + return target.getMBB(); +} diff --git a/lib/Target/PTX/PTXInstrInfo.h b/lib/Target/PTX/PTXInstrInfo.h index e7f00f09c2f1..a04be7728f88 100644 --- a/lib/Target/PTX/PTXInstrInfo.h +++ b/lib/Target/PTX/PTXInstrInfo.h @@ -15,61 +15,93 @@ #define PTX_INSTR_INFO_H #include "PTXRegisterInfo.h" -#include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/Target/TargetInstrInfo.h" namespace llvm { class PTXTargetMachine; +class MachineSDNode; +class SDValue; +class SelectionDAG; + class PTXInstrInfo : public TargetInstrInfoImpl { - private: - const PTXRegisterInfo RI; - PTXTargetMachine &TM; +private: + const PTXRegisterInfo RI; + PTXTargetMachine &TM; - public: - explicit PTXInstrInfo(PTXTargetMachine &_TM); +public: + explicit PTXInstrInfo(PTXTargetMachine &_TM); - virtual const PTXRegisterInfo &getRegisterInfo() const { return RI; } + virtual const PTXRegisterInfo &getRegisterInfo() const { return RI; } - virtual void copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DstReg, unsigned SrcReg, - bool KillSrc) const; + virtual void copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, DebugLoc DL, + unsigned DstReg, unsigned SrcReg, + bool KillSrc) const; - virtual bool copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DstReg, unsigned SrcReg, - const TargetRegisterClass *DstRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const; + virtual bool copyRegToReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DstReg, unsigned SrcReg, + const TargetRegisterClass *DstRC, + const TargetRegisterClass *SrcRC, + DebugLoc DL) const; - virtual bool isMoveInstr(const MachineInstr& MI, - unsigned &SrcReg, unsigned &DstReg, - unsigned &SrcSubIdx, unsigned &DstSubIdx) const; + virtual bool isMoveInstr(const MachineInstr& MI, + unsigned &SrcReg, unsigned &DstReg, + unsigned &SrcSubIdx, unsigned &DstSubIdx) const; - // static helper routines + // predicate support - static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode, - DebugLoc dl, EVT VT, - SDValue Op1) { - SDValue pred_reg = DAG->getRegister(0, MVT::i1); - SDValue pred_imm = DAG->getTargetConstant(0, MVT::i32); - SDValue ops[] = { Op1, pred_reg, pred_imm }; - return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops)); - } + virtual bool isPredicated(const MachineInstr *MI) const; - static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode, - DebugLoc dl, EVT VT, - SDValue Op1, - SDValue Op2) { - SDValue pred_reg = DAG->getRegister(0, MVT::i1); - SDValue pred_imm = DAG->getTargetConstant(0, MVT::i32); - SDValue ops[] = { Op1, Op2, pred_reg, pred_imm }; - return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops)); - } + virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const; - }; // class PTXInstrInfo + virtual + bool PredicateInstruction(MachineInstr *MI, + const SmallVectorImpl &Pred) const; + + virtual + bool SubsumesPredicate(const SmallVectorImpl &Pred1, + const SmallVectorImpl &Pred2) const; + + virtual bool DefinesPredicate(MachineInstr *MI, + std::vector &Pred) const; + + // PTX is fully-predicable + virtual bool isPredicable(MachineInstr *MI) const { return true; } + + // branch support + + virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + SmallVectorImpl &Cond, + bool AllowModify = false) const; + + virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; + + virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + const SmallVectorImpl &Cond, + DebugLoc DL) const; + + // static helper routines + + static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode, + DebugLoc dl, EVT VT, + SDValue Op1); + + static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode, + DebugLoc dl, EVT VT, + SDValue Op1, SDValue Op2); + + static void AddDefaultPredicate(MachineInstr *MI); + + static bool IsAnyKindOfBranch(const MachineInstr& inst); + + static bool IsAnySuccessorAlsoLayoutSuccessor(const MachineBasicBlock& MBB); + + static MachineBasicBlock *GetBranchTarget(const MachineInstr& inst); +}; // class PTXInstrInfo } // namespace llvm #endif // PTX_INSTR_INFO_H diff --git a/lib/Target/PTX/PTXInstrInfo.td b/lib/Target/PTX/PTXInstrInfo.td index 9a747788f6a1..1ac9d3f3dc81 100644 --- a/lib/Target/PTX/PTXInstrInfo.td +++ b/lib/Target/PTX/PTXInstrInfo.td @@ -17,6 +17,26 @@ include "PTXInstrFormats.td" +//===----------------------------------------------------------------------===// +// Code Generation Predicates +//===----------------------------------------------------------------------===// + +// Addressing +def Use32BitAddresses : Predicate<"!getSubtarget().is64Bit()">; +def Use64BitAddresses : Predicate<"getSubtarget().is64Bit()">; + +// Shader Model Support +def SupportsSM13 : Predicate<"getSubtarget().supportsSM13()">; +def DoesNotSupportSM13 : Predicate<"!getSubtarget().supportsSM13()">; +def SupportsSM20 : Predicate<"getSubtarget().supportsSM20()">; +def DoesNotSupportSM20 : Predicate<"!getSubtarget().supportsSM20()">; + +// PTX Version Support +def SupportsPTX21 : Predicate<"getSubtarget().supportsPTX21()">; +def DoesNotSupportPTX21 : Predicate<"!getSubtarget().supportsPTX21()">; +def SupportsPTX22 : Predicate<"getSubtarget().supportsPTX22()">; +def DoesNotSupportPTX22 : Predicate<"!getSubtarget().supportsPTX22()">; + //===----------------------------------------------------------------------===// // Instruction Pattern Stuff //===----------------------------------------------------------------------===// @@ -107,24 +127,41 @@ def store_shared }]>; // Addressing modes. -def ADDRrr : ComplexPattern; -def ADDRri : ComplexPattern; -def ADDRii : ComplexPattern; +def ADDRrr32 : ComplexPattern; +def ADDRrr64 : ComplexPattern; +def ADDRri32 : ComplexPattern; +def ADDRri64 : ComplexPattern; +def ADDRii32 : ComplexPattern; +def ADDRii64 : ComplexPattern; // Address operands -def MEMri : Operand { +def MEMri32 : Operand { let PrintMethod = "printMemOperand"; - let MIOperandInfo = (ops RRegs32, i32imm); + let MIOperandInfo = (ops RRegu32, i32imm); } -def MEMii : Operand { +def MEMri64 : Operand { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops RRegu64, i64imm); +} +def MEMii32 : Operand { let PrintMethod = "printMemOperand"; let MIOperandInfo = (ops i32imm, i32imm); } +def MEMii64 : Operand { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops i64imm, i64imm); +} +// The operand here does not correspond to an actual address, so we +// can use i32 in 64-bit address modes. def MEMpi : Operand { let PrintMethod = "printParamOperand"; let MIOperandInfo = (ops i32imm); } +// Branch & call targets have OtherVT type. +def brtarget : Operand; +def calltarget : Operand; + //===----------------------------------------------------------------------===// // PTX Specific Node Definitions //===----------------------------------------------------------------------===// @@ -138,66 +175,389 @@ def PTXexit : SDNode<"PTXISD::EXIT", SDTNone, [SDNPHasChain]>; def PTXret : SDNode<"PTXISD::RET", SDTNone, [SDNPHasChain]>; +def PTXcopyaddress + : SDNode<"PTXISD::COPY_ADDRESS", SDTypeProfile<1, 1, []>, []>; //===----------------------------------------------------------------------===// // Instruction Class Templates //===----------------------------------------------------------------------===// +//===- Floating-Point Instructions - 2 Operand Form -----------------------===// +multiclass PTX_FLOAT_2OP { + def rr32 : InstPTX<(outs RRegf32:$d), + (ins RRegf32:$a), + !strconcat(opcstr, ".f32\t$d, $a"), + [(set RRegf32:$d, (opnode RRegf32:$a))]>; + def ri32 : InstPTX<(outs RRegf32:$d), + (ins f32imm:$a), + !strconcat(opcstr, ".f32\t$d, $a"), + [(set RRegf32:$d, (opnode fpimm:$a))]>; + def rr64 : InstPTX<(outs RRegf64:$d), + (ins RRegf64:$a), + !strconcat(opcstr, ".f64\t$d, $a"), + [(set RRegf64:$d, (opnode RRegf64:$a))]>; + def ri64 : InstPTX<(outs RRegf64:$d), + (ins f64imm:$a), + !strconcat(opcstr, ".f64\t$d, $a"), + [(set RRegf64:$d, (opnode fpimm:$a))]>; +} + +//===- Floating-Point Instructions - 3 Operand Form -----------------------===// +multiclass PTX_FLOAT_3OP { + def rr32 : InstPTX<(outs RRegf32:$d), + (ins RRegf32:$a, RRegf32:$b), + !strconcat(opcstr, ".f32\t$d, $a, $b"), + [(set RRegf32:$d, (opnode RRegf32:$a, RRegf32:$b))]>; + def ri32 : InstPTX<(outs RRegf32:$d), + (ins RRegf32:$a, f32imm:$b), + !strconcat(opcstr, ".f32\t$d, $a, $b"), + [(set RRegf32:$d, (opnode RRegf32:$a, fpimm:$b))]>; + def rr64 : InstPTX<(outs RRegf64:$d), + (ins RRegf64:$a, RRegf64:$b), + !strconcat(opcstr, ".f64\t$d, $a, $b"), + [(set RRegf64:$d, (opnode RRegf64:$a, RRegf64:$b))]>; + def ri64 : InstPTX<(outs RRegf64:$d), + (ins RRegf64:$a, f64imm:$b), + !strconcat(opcstr, ".f64\t$d, $a, $b"), + [(set RRegf64:$d, (opnode RRegf64:$a, fpimm:$b))]>; +} + +//===- Floating-Point Instructions - 4 Operand Form -----------------------===// +multiclass PTX_FLOAT_4OP { + def rrr32 : InstPTX<(outs RRegf32:$d), + (ins RRegf32:$a, RRegf32:$b, RRegf32:$c), + !strconcat(opcstr, ".f32\t$d, $a, $b, $c"), + [(set RRegf32:$d, (opnode2 (opnode1 RRegf32:$a, + RRegf32:$b), + RRegf32:$c))]>; + def rri32 : InstPTX<(outs RRegf32:$d), + (ins RRegf32:$a, RRegf32:$b, f32imm:$c), + !strconcat(opcstr, ".f32\t$d, $a, $b, $c"), + [(set RRegf32:$d, (opnode2 (opnode1 RRegf32:$a, + RRegf32:$b), + fpimm:$c))]>; + def rrr64 : InstPTX<(outs RRegf64:$d), + (ins RRegf64:$a, RRegf64:$b, RRegf64:$c), + !strconcat(opcstr, ".f64\t$d, $a, $b, $c"), + [(set RRegf64:$d, (opnode2 (opnode1 RRegf64:$a, + RRegf64:$b), + RRegf64:$c))]>; + def rri64 : InstPTX<(outs RRegf64:$d), + (ins RRegf64:$a, RRegf64:$b, f64imm:$c), + !strconcat(opcstr, ".f64\t$d, $a, $b, $c"), + [(set RRegf64:$d, (opnode2 (opnode1 RRegf64:$a, + RRegf64:$b), + fpimm:$c))]>; +} + multiclass INT3 { - def rr : InstPTX<(outs RRegs32:$d), - (ins RRegs32:$a, RRegs32:$b), - !strconcat(opcstr, ".%type\t$d, $a, $b"), - [(set RRegs32:$d, (opnode RRegs32:$a, RRegs32:$b))]>; - def ri : InstPTX<(outs RRegs32:$d), - (ins RRegs32:$a, i32imm:$b), - !strconcat(opcstr, ".%type\t$d, $a, $b"), - [(set RRegs32:$d, (opnode RRegs32:$a, imm:$b))]>; + def rr16 : InstPTX<(outs RRegu16:$d), + (ins RRegu16:$a, RRegu16:$b), + !strconcat(opcstr, ".u16\t$d, $a, $b"), + [(set RRegu16:$d, (opnode RRegu16:$a, RRegu16:$b))]>; + def ri16 : InstPTX<(outs RRegu16:$d), + (ins RRegu16:$a, i16imm:$b), + !strconcat(opcstr, ".u16\t$d, $a, $b"), + [(set RRegu16:$d, (opnode RRegu16:$a, imm:$b))]>; + def rr32 : InstPTX<(outs RRegu32:$d), + (ins RRegu32:$a, RRegu32:$b), + !strconcat(opcstr, ".u32\t$d, $a, $b"), + [(set RRegu32:$d, (opnode RRegu32:$a, RRegu32:$b))]>; + def ri32 : InstPTX<(outs RRegu32:$d), + (ins RRegu32:$a, i32imm:$b), + !strconcat(opcstr, ".u32\t$d, $a, $b"), + [(set RRegu32:$d, (opnode RRegu32:$a, imm:$b))]>; + def rr64 : InstPTX<(outs RRegu64:$d), + (ins RRegu64:$a, RRegu64:$b), + !strconcat(opcstr, ".u64\t$d, $a, $b"), + [(set RRegu64:$d, (opnode RRegu64:$a, RRegu64:$b))]>; + def ri64 : InstPTX<(outs RRegu64:$d), + (ins RRegu64:$a, i64imm:$b), + !strconcat(opcstr, ".u64\t$d, $a, $b"), + [(set RRegu64:$d, (opnode RRegu64:$a, imm:$b))]>; +} + +multiclass PTX_LOGIC { + def ripreds : InstPTX<(outs Preds:$d), + (ins Preds:$a, i1imm:$b), + !strconcat(opcstr, ".pred\t$d, $a, $b"), + [(set Preds:$d, (opnode Preds:$a, imm:$b))]>; + def rrpreds : InstPTX<(outs Preds:$d), + (ins Preds:$a, Preds:$b), + !strconcat(opcstr, ".pred\t$d, $a, $b"), + [(set Preds:$d, (opnode Preds:$a, Preds:$b))]>; + def rr16 : InstPTX<(outs RRegu16:$d), + (ins RRegu16:$a, RRegu16:$b), + !strconcat(opcstr, ".b16\t$d, $a, $b"), + [(set RRegu16:$d, (opnode RRegu16:$a, RRegu16:$b))]>; + def ri16 : InstPTX<(outs RRegu16:$d), + (ins RRegu16:$a, i16imm:$b), + !strconcat(opcstr, ".b16\t$d, $a, $b"), + [(set RRegu16:$d, (opnode RRegu16:$a, imm:$b))]>; + def rr32 : InstPTX<(outs RRegu32:$d), + (ins RRegu32:$a, RRegu32:$b), + !strconcat(opcstr, ".b32\t$d, $a, $b"), + [(set RRegu32:$d, (opnode RRegu32:$a, RRegu32:$b))]>; + def ri32 : InstPTX<(outs RRegu32:$d), + (ins RRegu32:$a, i32imm:$b), + !strconcat(opcstr, ".b32\t$d, $a, $b"), + [(set RRegu32:$d, (opnode RRegu32:$a, imm:$b))]>; + def rr64 : InstPTX<(outs RRegu64:$d), + (ins RRegu64:$a, RRegu64:$b), + !strconcat(opcstr, ".b64\t$d, $a, $b"), + [(set RRegu64:$d, (opnode RRegu64:$a, RRegu64:$b))]>; + def ri64 : InstPTX<(outs RRegu64:$d), + (ins RRegu64:$a, i64imm:$b), + !strconcat(opcstr, ".b64\t$d, $a, $b"), + [(set RRegu64:$d, (opnode RRegu64:$a, imm:$b))]>; } -// no %type directive, non-communtable multiclass INT3ntnc { - def rr : InstPTX<(outs RRegs32:$d), - (ins RRegs32:$a, RRegs32:$b), - !strconcat(opcstr, "\t$d, $a, $b"), - [(set RRegs32:$d, (opnode RRegs32:$a, RRegs32:$b))]>; - def ri : InstPTX<(outs RRegs32:$d), - (ins RRegs32:$a, i32imm:$b), - !strconcat(opcstr, "\t$d, $a, $b"), - [(set RRegs32:$d, (opnode RRegs32:$a, imm:$b))]>; - def ir : InstPTX<(outs RRegs32:$d), - (ins i32imm:$a, RRegs32:$b), - !strconcat(opcstr, "\t$d, $a, $b"), - [(set RRegs32:$d, (opnode imm:$a, RRegs32:$b))]>; + def rr16 : InstPTX<(outs RRegu16:$d), + (ins RRegu16:$a, RRegu16:$b), + !strconcat(opcstr, "16\t$d, $a, $b"), + [(set RRegu16:$d, (opnode RRegu16:$a, RRegu16:$b))]>; + def rr32 : InstPTX<(outs RRegu32:$d), + (ins RRegu32:$a, RRegu32:$b), + !strconcat(opcstr, "32\t$d, $a, $b"), + [(set RRegu32:$d, (opnode RRegu32:$a, RRegu32:$b))]>; + def rr64 : InstPTX<(outs RRegu64:$d), + (ins RRegu64:$a, RRegu64:$b), + !strconcat(opcstr, "64\t$d, $a, $b"), + [(set RRegu64:$d, (opnode RRegu64:$a, RRegu64:$b))]>; + def ri16 : InstPTX<(outs RRegu16:$d), + (ins RRegu16:$a, i16imm:$b), + !strconcat(opcstr, "16\t$d, $a, $b"), + [(set RRegu16:$d, (opnode RRegu16:$a, imm:$b))]>; + def ri32 : InstPTX<(outs RRegu32:$d), + (ins RRegu32:$a, i32imm:$b), + !strconcat(opcstr, "32\t$d, $a, $b"), + [(set RRegu32:$d, (opnode RRegu32:$a, imm:$b))]>; + def ri64 : InstPTX<(outs RRegu64:$d), + (ins RRegu64:$a, i64imm:$b), + !strconcat(opcstr, "64\t$d, $a, $b"), + [(set RRegu64:$d, (opnode RRegu64:$a, imm:$b))]>; + def ir16 : InstPTX<(outs RRegu16:$d), + (ins i16imm:$a, RRegu16:$b), + !strconcat(opcstr, "16\t$d, $a, $b"), + [(set RRegu16:$d, (opnode imm:$a, RRegu16:$b))]>; + def ir32 : InstPTX<(outs RRegu32:$d), + (ins i32imm:$a, RRegu32:$b), + !strconcat(opcstr, "32\t$d, $a, $b"), + [(set RRegu32:$d, (opnode imm:$a, RRegu32:$b))]>; + def ir64 : InstPTX<(outs RRegu64:$d), + (ins i64imm:$a, RRegu64:$b), + !strconcat(opcstr, "64\t$d, $a, $b"), + [(set RRegu64:$d, (opnode imm:$a, RRegu64:$b))]>; } -multiclass PTX_LD { - def rr : InstPTX<(outs RC:$d), - (ins MEMri:$a), - !strconcat(opstr, ".%type\t$d, [$a]"), - [(set RC:$d, (pat_load ADDRrr:$a))]>; - def ri : InstPTX<(outs RC:$d), - (ins MEMri:$a), - !strconcat(opstr, ".%type\t$d, [$a]"), - [(set RC:$d, (pat_load ADDRri:$a))]>; - def ii : InstPTX<(outs RC:$d), - (ins MEMii:$a), - !strconcat(opstr, ".%type\t$d, [$a]"), - [(set RC:$d, (pat_load ADDRii:$a))]>; +multiclass PTX_SETP_I { + // TODO support 5-operand format: p|q, a, b, c + + def rr + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b), + !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"), + [(set Preds:$p, (setcc RC:$a, RC:$b, cmp))]>; + def ri + : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b), + !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"), + [(set Preds:$p, (setcc RC:$a, imm:$b, cmp))]>; + + def rr_and_r + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"), + [(set Preds:$p, (and (setcc RC:$a, RC:$b, cmp), Preds:$c))]>; + def ri_and_r + : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c), + !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"), + [(set Preds:$p, (and (setcc RC:$a, imm:$b, cmp), Preds:$c))]>; + def rr_or_r + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"), + [(set Preds:$p, (or (setcc RC:$a, RC:$b, cmp), Preds:$c))]>; + def ri_or_r + : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c), + !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"), + [(set Preds:$p, (or (setcc RC:$a, imm:$b, cmp), Preds:$c))]>; + def rr_xor_r + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"), + [(set Preds:$p, (xor (setcc RC:$a, RC:$b, cmp), Preds:$c))]>; + def ri_xor_r + : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c), + !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"), + [(set Preds:$p, (xor (setcc RC:$a, imm:$b, cmp), Preds:$c))]>; + + def rr_and_not_r + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"), + [(set Preds:$p, (and (setcc RC:$a, RC:$b, cmp), (not Preds:$c)))]>; + def ri_and_not_r + : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c), + !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"), + [(set Preds:$p, (and (setcc RC:$a, imm:$b, cmp), (not Preds:$c)))]>; + def rr_or_not_r + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"), + [(set Preds:$p, (or (setcc RC:$a, RC:$b, cmp), (not Preds:$c)))]>; + def ri_or_not_r + : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c), + !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"), + [(set Preds:$p, (or (setcc RC:$a, imm:$b, cmp), (not Preds:$c)))]>; + def rr_xor_not_r + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"), + [(set Preds:$p, (xor (setcc RC:$a, RC:$b, cmp), (not Preds:$c)))]>; + def ri_xor_not_r + : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c), + !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"), + [(set Preds:$p, (xor (setcc RC:$a, imm:$b, cmp), (not Preds:$c)))]>; } -multiclass PTX_ST { - def rr : InstPTX<(outs), - (ins RC:$d, MEMri:$a), - !strconcat(opstr, ".%type\t[$a], $d"), - [(pat_store RC:$d, ADDRrr:$a)]>; - def ri : InstPTX<(outs), - (ins RC:$d, MEMri:$a), - !strconcat(opstr, ".%type\t[$a], $d"), - [(pat_store RC:$d, ADDRri:$a)]>; - def ii : InstPTX<(outs), - (ins RC:$d, MEMii:$a), - !strconcat(opstr, ".%type\t[$a], $d"), - [(pat_store RC:$d, ADDRii:$a)]>; +multiclass PTX_SETP_FP { + // TODO support 5-operand format: p|q, a, b, c + + def rr_u + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b), + !strconcat("setp.", cmpstr, "u.", regclsname, "\t$p, $a, $b"), + [(set Preds:$p, (setcc RC:$a, RC:$b, ucmp))]>; + def rr_o + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b), + !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"), + [(set Preds:$p, (setcc RC:$a, RC:$b, ocmp))]>; + + def rr_and_r_u + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, "u.and.", regclsname, "\t$p, $a, $b, $c"), + [(set Preds:$p, (and (setcc RC:$a, RC:$b, ucmp), Preds:$c))]>; + def rr_and_r_o + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"), + [(set Preds:$p, (and (setcc RC:$a, RC:$b, ocmp), Preds:$c))]>; + + def rr_or_r_u + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, "u.or.", regclsname, "\t$p, $a, $b, $c"), + [(set Preds:$p, (or (setcc RC:$a, RC:$b, ucmp), Preds:$c))]>; + def rr_or_r_o + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"), + [(set Preds:$p, (or (setcc RC:$a, RC:$b, ocmp), Preds:$c))]>; + + def rr_xor_r_u + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, "u.xor.", regclsname, "\t$p, $a, $b, $c"), + [(set Preds:$p, (xor (setcc RC:$a, RC:$b, ucmp), Preds:$c))]>; + def rr_xor_r_o + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"), + [(set Preds:$p, (xor (setcc RC:$a, RC:$b, ocmp), Preds:$c))]>; + + def rr_and_not_r_u + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, "u.and.", regclsname, "\t$p, $a, $b, !$c"), + [(set Preds:$p, (and (setcc RC:$a, RC:$b, ucmp), (not Preds:$c)))]>; + def rr_and_not_r_o + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"), + [(set Preds:$p, (and (setcc RC:$a, RC:$b, ocmp), (not Preds:$c)))]>; + + def rr_or_not_r_u + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, "u.or.", regclsname, "\t$p, $a, $b, !$c"), + [(set Preds:$p, (or (setcc RC:$a, RC:$b, ucmp), (not Preds:$c)))]>; + def rr_or_not_r_o + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"), + [(set Preds:$p, (or (setcc RC:$a, RC:$b, ocmp), (not Preds:$c)))]>; + + def rr_xor_not_r_u + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, "u.xor.", regclsname, "\t$p, $a, $b, !$c"), + [(set Preds:$p, (xor (setcc RC:$a, RC:$b, ucmp), (not Preds:$c)))]>; + def rr_xor_not_r_o + : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c), + !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"), + [(set Preds:$p, (xor (setcc RC:$a, RC:$b, ocmp), (not Preds:$c)))]>; +} + +multiclass PTX_SELP { + def rr + : InstPTX<(outs RC:$r), (ins Preds:$a, RC:$b, RC:$c), + !strconcat("selp.", regclsname, "\t$r, $b, $c, $a"), + [(set RC:$r, (select Preds:$a, RC:$b, RC:$c))]>; +} + +multiclass PTX_LD { + def rr32 : InstPTX<(outs RC:$d), + (ins MEMri32:$a), + !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (pat_load ADDRrr32:$a))]>, Requires<[Use32BitAddresses]>; + def rr64 : InstPTX<(outs RC:$d), + (ins MEMri64:$a), + !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (pat_load ADDRrr64:$a))]>, Requires<[Use64BitAddresses]>; + def ri32 : InstPTX<(outs RC:$d), + (ins MEMri32:$a), + !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (pat_load ADDRri32:$a))]>, Requires<[Use32BitAddresses]>; + def ri64 : InstPTX<(outs RC:$d), + (ins MEMri64:$a), + !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (pat_load ADDRri64:$a))]>, Requires<[Use64BitAddresses]>; + def ii32 : InstPTX<(outs RC:$d), + (ins MEMii32:$a), + !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (pat_load ADDRii32:$a))]>, Requires<[Use32BitAddresses]>; + def ii64 : InstPTX<(outs RC:$d), + (ins MEMii64:$a), + !strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")), + [(set RC:$d, (pat_load ADDRii64:$a))]>, Requires<[Use64BitAddresses]>; +} + +multiclass PTX_LD_ALL { + defm u16 : PTX_LD; + defm u32 : PTX_LD; + defm u64 : PTX_LD; + defm f32 : PTX_LD; + defm f64 : PTX_LD; +} + +multiclass PTX_ST { + def rr32 : InstPTX<(outs), + (ins RC:$d, MEMri32:$a), + !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), + [(pat_store RC:$d, ADDRrr32:$a)]>, Requires<[Use32BitAddresses]>; + def rr64 : InstPTX<(outs), + (ins RC:$d, MEMri64:$a), + !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), + [(pat_store RC:$d, ADDRrr64:$a)]>, Requires<[Use64BitAddresses]>; + def ri32 : InstPTX<(outs), + (ins RC:$d, MEMri32:$a), + !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), + [(pat_store RC:$d, ADDRri32:$a)]>, Requires<[Use32BitAddresses]>; + def ri64 : InstPTX<(outs), + (ins RC:$d, MEMri64:$a), + !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), + [(pat_store RC:$d, ADDRri64:$a)]>, Requires<[Use64BitAddresses]>; + def ii32 : InstPTX<(outs), + (ins RC:$d, MEMii32:$a), + !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), + [(pat_store RC:$d, ADDRii32:$a)]>, Requires<[Use32BitAddresses]>; + def ii64 : InstPTX<(outs), + (ins RC:$d, MEMii64:$a), + !strconcat(opstr, !strconcat(typestr, "\t[$a], $d")), + [(pat_store RC:$d, ADDRii64:$a)]>, Requires<[Use64BitAddresses]>; +} + +multiclass PTX_ST_ALL { + defm u16 : PTX_ST; + defm u32 : PTX_ST; + defm u64 : PTX_ST; + defm f32 : PTX_ST; + defm f64 : PTX_ST; } //===----------------------------------------------------------------------===// @@ -208,50 +568,392 @@ multiclass PTX_ST { defm ADD : INT3<"add", add>; defm SUB : INT3<"sub", sub>; +defm MUL : INT3<"mul.lo", mul>; // FIXME: Allow 32x32 -> 64 multiplies +defm DIV : INT3<"div", udiv>; +defm REM : INT3<"rem", urem>; + +///===- Floating-Point Arithmetic Instructions ----------------------------===// + +// Standard Unary Operations +defm FNEG : PTX_FLOAT_2OP<"neg", fneg>; + +// Standard Binary Operations +defm FADD : PTX_FLOAT_3OP<"add", fadd>; +defm FSUB : PTX_FLOAT_3OP<"sub", fsub>; +defm FMUL : PTX_FLOAT_3OP<"mul", fmul>; + +// TODO: Allow user selection of rounding modes for fdiv. +// For division, we need to have f32 and f64 differently. +// For f32, we just always use .approx since it is supported on all hardware +// for PTX 1.4+, which is our minimum target. +def FDIVrr32 : InstPTX<(outs RRegf32:$d), + (ins RRegf32:$a, RRegf32:$b), + "div.approx.f32\t$d, $a, $b", + [(set RRegf32:$d, (fdiv RRegf32:$a, RRegf32:$b))]>; +def FDIVri32 : InstPTX<(outs RRegf32:$d), + (ins RRegf32:$a, f32imm:$b), + "div.approx.f32\t$d, $a, $b", + [(set RRegf32:$d, (fdiv RRegf32:$a, fpimm:$b))]>; + +// For f64, we must specify a rounding for sm 1.3+ but *not* for sm 1.0. +def FDIVrr64SM13 : InstPTX<(outs RRegf64:$d), + (ins RRegf64:$a, RRegf64:$b), + "div.rn.f64\t$d, $a, $b", + [(set RRegf64:$d, (fdiv RRegf64:$a, RRegf64:$b))]>, + Requires<[SupportsSM13]>; +def FDIVri64SM13 : InstPTX<(outs RRegf64:$d), + (ins RRegf64:$a, f64imm:$b), + "div.rn.f64\t$d, $a, $b", + [(set RRegf64:$d, (fdiv RRegf64:$a, fpimm:$b))]>, + Requires<[SupportsSM13]>; +def FDIVrr64SM10 : InstPTX<(outs RRegf64:$d), + (ins RRegf64:$a, RRegf64:$b), + "div.f64\t$d, $a, $b", + [(set RRegf64:$d, (fdiv RRegf64:$a, RRegf64:$b))]>, + Requires<[DoesNotSupportSM13]>; +def FDIVri64SM10 : InstPTX<(outs RRegf64:$d), + (ins RRegf64:$a, f64imm:$b), + "div.f64\t$d, $a, $b", + [(set RRegf64:$d, (fdiv RRegf64:$a, fpimm:$b))]>, + Requires<[DoesNotSupportSM13]>; + + + +// Multi-operation hybrid instructions + +// The selection of mad/fma is tricky. In some cases, they are the *same* +// instruction, but in other cases we may prefer one or the other. Also, +// different PTX versions differ on whether rounding mode flags are required. +// In the short term, mad is supported on all PTX versions and we use a +// default rounding mode no matter what shader model or PTX version. +// TODO: Allow the rounding mode to be selectable through llc. +defm FMADSM13 : PTX_FLOAT_4OP<"mad.rn", fmul, fadd>, Requires<[SupportsSM13]>; +defm FMAD : PTX_FLOAT_4OP<"mad", fmul, fadd>, Requires<[DoesNotSupportSM13]>; + +///===- Floating-Point Intrinsic Instructions -----------------------------===// + +def FSQRT32 : InstPTX<(outs RRegf32:$d), + (ins RRegf32:$a), + "sqrt.rn.f32\t$d, $a", + [(set RRegf32:$d, (fsqrt RRegf32:$a))]>; + +def FSQRT64 : InstPTX<(outs RRegf64:$d), + (ins RRegf64:$a), + "sqrt.rn.f64\t$d, $a", + [(set RRegf64:$d, (fsqrt RRegf64:$a))]>; + +def FSIN32 : InstPTX<(outs RRegf32:$d), + (ins RRegf32:$a), + "sin.approx.f32\t$d, $a", + [(set RRegf32:$d, (fsin RRegf32:$a))]>; + +def FSIN64 : InstPTX<(outs RRegf64:$d), + (ins RRegf64:$a), + "sin.approx.f64\t$d, $a", + [(set RRegf64:$d, (fsin RRegf64:$a))]>; + +def FCOS32 : InstPTX<(outs RRegf32:$d), + (ins RRegf32:$a), + "cos.approx.f32\t$d, $a", + [(set RRegf32:$d, (fcos RRegf32:$a))]>; + +def FCOS64 : InstPTX<(outs RRegf64:$d), + (ins RRegf64:$a), + "cos.approx.f64\t$d, $a", + [(set RRegf64:$d, (fcos RRegf64:$a))]>; + + +///===- Comparison and Selection Instructions -----------------------------===// + +// Compare u16 + +defm SETPEQu16 : PTX_SETP_I; +defm SETPNEu16 : PTX_SETP_I; +defm SETPLTu16 : PTX_SETP_I; +defm SETPLEu16 : PTX_SETP_I; +defm SETPGTu16 : PTX_SETP_I; +defm SETPGEu16 : PTX_SETP_I; + +// Compare u32 + +defm SETPEQu32 : PTX_SETP_I; +defm SETPNEu32 : PTX_SETP_I; +defm SETPLTu32 : PTX_SETP_I; +defm SETPLEu32 : PTX_SETP_I; +defm SETPGTu32 : PTX_SETP_I; +defm SETPGEu32 : PTX_SETP_I; + +// Compare u64 + +defm SETPEQu64 : PTX_SETP_I; +defm SETPNEu64 : PTX_SETP_I; +defm SETPLTu64 : PTX_SETP_I; +defm SETPLEu64 : PTX_SETP_I; +defm SETPGTu64 : PTX_SETP_I; +defm SETPGEu64 : PTX_SETP_I; + +// Compare f32 + +defm SETPEQf32 : PTX_SETP_FP; +defm SETPNEf32 : PTX_SETP_FP; +defm SETPLTf32 : PTX_SETP_FP; +defm SETPLEf32 : PTX_SETP_FP; +defm SETPGTf32 : PTX_SETP_FP; +defm SETPGEf32 : PTX_SETP_FP; + +// Compare f64 + +defm SETPEQf64 : PTX_SETP_FP; +defm SETPNEf64 : PTX_SETP_FP; +defm SETPLTf64 : PTX_SETP_FP; +defm SETPLEf64 : PTX_SETP_FP; +defm SETPGTf64 : PTX_SETP_FP; +defm SETPGEf64 : PTX_SETP_FP; + +// .selp + +defm PTX_SELPu16 : PTX_SELP; +defm PTX_SELPu32 : PTX_SELP; +defm PTX_SELPu64 : PTX_SELP; +defm PTX_SELPf32 : PTX_SELP; +defm PTX_SELPf64 : PTX_SELP; ///===- Logic and Shift Instructions --------------------------------------===// -defm SHL : INT3ntnc<"shl.b32", PTXshl>; -defm SRL : INT3ntnc<"shr.u32", PTXsrl>; -defm SRA : INT3ntnc<"shr.s32", PTXsra>; +defm SHL : INT3ntnc<"shl.b", PTXshl>; +defm SRL : INT3ntnc<"shr.u", PTXsrl>; +defm SRA : INT3ntnc<"shr.s", PTXsra>; + +defm AND : PTX_LOGIC<"and", and>; +defm OR : PTX_LOGIC<"or", or>; +defm XOR : PTX_LOGIC<"xor", xor>; ///===- Data Movement and Conversion Instructions -------------------------===// let neverHasSideEffects = 1 in { - // rely on isMoveInstr to separate MOVpp, MOVrr, etc. - def MOVpp + def MOVPREDrr : InstPTX<(outs Preds:$d), (ins Preds:$a), "mov.pred\t$d, $a", []>; - def MOVrr - : InstPTX<(outs RRegs32:$d), (ins RRegs32:$a), "mov.%type\t$d, $a", []>; + def MOVU16rr + : InstPTX<(outs RRegu16:$d), (ins RRegu16:$a), "mov.u16\t$d, $a", []>; + def MOVU32rr + : InstPTX<(outs RRegu32:$d), (ins RRegu32:$a), "mov.u32\t$d, $a", []>; + def MOVU64rr + : InstPTX<(outs RRegu64:$d), (ins RRegu64:$a), "mov.u64\t$d, $a", []>; + def MOVF32rr + : InstPTX<(outs RRegf32:$d), (ins RRegf32:$a), "mov.f32\t$d, $a", []>; + def MOVF64rr + : InstPTX<(outs RRegf64:$d), (ins RRegf64:$a), "mov.f64\t$d, $a", []>; } let isReMaterializable = 1, isAsCheapAsAMove = 1 in { - def MOVpi + def MOVPREDri : InstPTX<(outs Preds:$d), (ins i1imm:$a), "mov.pred\t$d, $a", [(set Preds:$d, imm:$a)]>; - def MOVri - : InstPTX<(outs RRegs32:$d), (ins i32imm:$a), "mov.s32\t$d, $a", - [(set RRegs32:$d, imm:$a)]>; + def MOVU16ri + : InstPTX<(outs RRegu16:$d), (ins i16imm:$a), "mov.u16\t$d, $a", + [(set RRegu16:$d, imm:$a)]>; + def MOVU32ri + : InstPTX<(outs RRegu32:$d), (ins i32imm:$a), "mov.u32\t$d, $a", + [(set RRegu32:$d, imm:$a)]>; + def MOVU64ri + : InstPTX<(outs RRegu64:$d), (ins i64imm:$a), "mov.u64\t$d, $a", + [(set RRegu64:$d, imm:$a)]>; + def MOVF32ri + : InstPTX<(outs RRegf32:$d), (ins f32imm:$a), "mov.f32\t$d, $a", + [(set RRegf32:$d, fpimm:$a)]>; + def MOVF64ri + : InstPTX<(outs RRegf64:$d), (ins f64imm:$a), "mov.f64\t$d, $a", + [(set RRegf64:$d, fpimm:$a)]>; } -defm LDg : PTX_LD<"ld.global", RRegs32, load_global>; -defm LDc : PTX_LD<"ld.const", RRegs32, load_constant>; -defm LDl : PTX_LD<"ld.local", RRegs32, load_local>; -defm LDp : PTX_LD<"ld.param", RRegs32, load_parameter>; -defm LDs : PTX_LD<"ld.shared", RRegs32, load_shared>; +let isReMaterializable = 1, isAsCheapAsAMove = 1 in { + def MOVaddr32 + : InstPTX<(outs RRegu32:$d), (ins i32imm:$a), "mov.u32\t$d, $a", + [(set RRegu32:$d, (PTXcopyaddress tglobaladdr:$a))]>; + def MOVaddr64 + : InstPTX<(outs RRegu64:$d), (ins i64imm:$a), "mov.u64\t$d, $a", + [(set RRegu64:$d, (PTXcopyaddress tglobaladdr:$a))]>; +} -def LDpi : InstPTX<(outs RRegs32:$d), (ins MEMpi:$a), - "ld.param.%type\t$d, [$a]", []>; +// Loads +defm LDg : PTX_LD_ALL<"ld.global", load_global>; +defm LDc : PTX_LD_ALL<"ld.const", load_constant>; +defm LDl : PTX_LD_ALL<"ld.local", load_local>; +defm LDs : PTX_LD_ALL<"ld.shared", load_shared>; -defm STg : PTX_ST<"st.global", RRegs32, store_global>; -defm STl : PTX_ST<"st.local", RRegs32, store_local>; -// Store to parameter state space requires PTX 2.0 or higher? -// defm STp : PTX_ST<"st.param", RRegs32, store_parameter>; -defm STs : PTX_ST<"st.shared", RRegs32, store_shared>; +// This is a special instruction that is manually inserted for kernel parameters +def LDpiU16 : InstPTX<(outs RRegu16:$d), (ins MEMpi:$a), + "ld.param.u16\t$d, [$a]", []>; +def LDpiU32 : InstPTX<(outs RRegu32:$d), (ins MEMpi:$a), + "ld.param.u32\t$d, [$a]", []>; +def LDpiU64 : InstPTX<(outs RRegu64:$d), (ins MEMpi:$a), + "ld.param.u64\t$d, [$a]", []>; +def LDpiF32 : InstPTX<(outs RRegf32:$d), (ins MEMpi:$a), + "ld.param.f32\t$d, [$a]", []>; +def LDpiF64 : InstPTX<(outs RRegf64:$d), (ins MEMpi:$a), + "ld.param.f64\t$d, [$a]", []>; + +// Stores +defm STg : PTX_ST_ALL<"st.global", store_global>; +defm STl : PTX_ST_ALL<"st.local", store_local>; +defm STs : PTX_ST_ALL<"st.shared", store_shared>; + +// defm STp : PTX_ST_ALL<"st.param", store_parameter>; +// defm LDp : PTX_LD_ALL<"ld.param", load_parameter>; +// TODO: Do something with st.param if/when it is needed. + +// Conversion to pred + +def CVT_pred_u16 + : InstPTX<(outs Preds:$d), (ins RRegu16:$a), "cvt.pred.u16\t$d, $a", + [(set Preds:$d, (trunc RRegu16:$a))]>; + +def CVT_pred_u32 + : InstPTX<(outs Preds:$d), (ins RRegu32:$a), "cvt.pred.u32\t$d, $a", + [(set Preds:$d, (trunc RRegu32:$a))]>; + +def CVT_pred_u64 + : InstPTX<(outs Preds:$d), (ins RRegu64:$a), "cvt.pred.u64\t$d, $a", + [(set Preds:$d, (trunc RRegu64:$a))]>; + +def CVT_pred_f32 + : InstPTX<(outs Preds:$d), (ins RRegf32:$a), "cvt.rni.pred.f32\t$d, $a", + [(set Preds:$d, (fp_to_uint RRegf32:$a))]>; + +def CVT_pred_f64 + : InstPTX<(outs Preds:$d), (ins RRegf64:$a), "cvt.rni.pred.f64\t$d, $a", + [(set Preds:$d, (fp_to_uint RRegf64:$a))]>; + +// Conversion to u16 + +def CVT_u16_pred + : InstPTX<(outs RRegu16:$d), (ins Preds:$a), "cvt.u16.pred\t$d, $a", + [(set RRegu16:$d, (zext Preds:$a))]>; + +def CVT_u16_u32 + : InstPTX<(outs RRegu16:$d), (ins RRegu32:$a), "cvt.u16.u32\t$d, $a", + [(set RRegu16:$d, (trunc RRegu32:$a))]>; + +def CVT_u16_u64 + : InstPTX<(outs RRegu16:$d), (ins RRegu64:$a), "cvt.u16.u64\t$d, $a", + [(set RRegu16:$d, (trunc RRegu64:$a))]>; + +def CVT_u16_f32 + : InstPTX<(outs RRegu16:$d), (ins RRegf32:$a), "cvt.rni.u16.f32\t$d, $a", + [(set RRegu16:$d, (fp_to_uint RRegf32:$a))]>; + +def CVT_u16_f64 + : InstPTX<(outs RRegu16:$d), (ins RRegf64:$a), "cvt.rni.u16.f64\t$d, $a", + [(set RRegu16:$d, (fp_to_uint RRegf64:$a))]>; + +// Conversion to u32 + +def CVT_u32_pred + : InstPTX<(outs RRegu32:$d), (ins Preds:$a), "cvt.u32.pred\t$d, $a", + [(set RRegu32:$d, (zext Preds:$a))]>; + +def CVT_u32_u16 + : InstPTX<(outs RRegu32:$d), (ins RRegu16:$a), "cvt.u32.u16\t$d, $a", + [(set RRegu32:$d, (zext RRegu16:$a))]>; + +def CVT_u32_u64 + : InstPTX<(outs RRegu32:$d), (ins RRegu64:$a), "cvt.u32.u64\t$d, $a", + [(set RRegu32:$d, (trunc RRegu64:$a))]>; + +def CVT_u32_f32 + : InstPTX<(outs RRegu32:$d), (ins RRegf32:$a), "cvt.rni.u32.f32\t$d, $a", + [(set RRegu32:$d, (fp_to_uint RRegf32:$a))]>; + +def CVT_u32_f64 + : InstPTX<(outs RRegu32:$d), (ins RRegf64:$a), "cvt.rni.u32.f64\t$d, $a", + [(set RRegu32:$d, (fp_to_uint RRegf64:$a))]>; + +// Conversion to u64 + +def CVT_u64_pred + : InstPTX<(outs RRegu64:$d), (ins Preds:$a), "cvt.u64.pred\t$d, $a", + [(set RRegu64:$d, (zext Preds:$a))]>; + +def CVT_u64_u16 + : InstPTX<(outs RRegu64:$d), (ins RRegu16:$a), "cvt.u64.u16\t$d, $a", + [(set RRegu64:$d, (zext RRegu16:$a))]>; + +def CVT_u64_u32 + : InstPTX<(outs RRegu64:$d), (ins RRegu32:$a), "cvt.u64.u32\t$d, $a", + [(set RRegu64:$d, (zext RRegu32:$a))]>; + +def CVT_u64_f32 + : InstPTX<(outs RRegu64:$d), (ins RRegf32:$a), "cvt.rni.u64.f32\t$d, $a", + [(set RRegu64:$d, (fp_to_uint RRegf32:$a))]>; + +def CVT_u64_f64 + : InstPTX<(outs RRegu64:$d), (ins RRegf64:$a), "cvt.rni.u64.f64\t$d, $a", + [(set RRegu64:$d, (fp_to_uint RRegf64:$a))]>; + +// Conversion to f32 + +def CVT_f32_pred + : InstPTX<(outs RRegf32:$d), (ins Preds:$a), "cvt.rn.f32.pred\t$d, $a", + [(set RRegf32:$d, (uint_to_fp Preds:$a))]>; + +def CVT_f32_u16 + : InstPTX<(outs RRegf32:$d), (ins RRegu16:$a), "cvt.rn.f32.u16\t$d, $a", + [(set RRegf32:$d, (uint_to_fp RRegu16:$a))]>; + +def CVT_f32_u32 + : InstPTX<(outs RRegf32:$d), (ins RRegu32:$a), "cvt.rn.f32.u32\t$d, $a", + [(set RRegf32:$d, (uint_to_fp RRegu32:$a))]>; + +def CVT_f32_u64 + : InstPTX<(outs RRegf32:$d), (ins RRegu64:$a), "cvt.rn.f32.u64\t$d, $a", + [(set RRegf32:$d, (uint_to_fp RRegu64:$a))]>; + +def CVT_f32_f64 + : InstPTX<(outs RRegf32:$d), (ins RRegf64:$a), "cvt.rn.f32.f64\t$d, $a", + [(set RRegf32:$d, (fround RRegf64:$a))]>; + +// Conversion to f64 + +def CVT_f64_pred + : InstPTX<(outs RRegf64:$d), (ins Preds:$a), "cvt.rn.f64.pred\t$d, $a", + [(set RRegf64:$d, (uint_to_fp Preds:$a))]>; + +def CVT_f64_u16 + : InstPTX<(outs RRegf64:$d), (ins RRegu16:$a), "cvt.rn.f64.u16\t$d, $a", + [(set RRegf64:$d, (uint_to_fp RRegu16:$a))]>; + +def CVT_f64_u32 + : InstPTX<(outs RRegf64:$d), (ins RRegu32:$a), "cvt.rn.f64.u32\t$d, $a", + [(set RRegf64:$d, (uint_to_fp RRegu32:$a))]>; + +def CVT_f64_u64 + : InstPTX<(outs RRegf64:$d), (ins RRegu64:$a), "cvt.rn.f64.u64\t$d, $a", + [(set RRegf64:$d, (uint_to_fp RRegu64:$a))]>; + +def CVT_f64_f32 + : InstPTX<(outs RRegf64:$d), (ins RRegf32:$a), "cvt.f64.f32\t$d, $a", + [(set RRegf64:$d, (fextend RRegf32:$a))]>; ///===- Control Flow Instructions -----------------------------------------===// +let isBranch = 1, isTerminator = 1, isBarrier = 1 in { + def BRAd + : InstPTX<(outs), (ins brtarget:$d), "bra\t$d", [(br bb:$d)]>; +} + +let isBranch = 1, isTerminator = 1 in { + // FIXME: The pattern part is blank because I cannot (or do not yet know + // how to) use the first operand of PredicateOperand (a Preds register) here + def BRAdp + : InstPTX<(outs), (ins brtarget:$d), "bra\t$d", + [/*(brcond pred:$_p, bb:$d)*/]>; +} + let isReturn = 1, isTerminator = 1, isBarrier = 1 in { def EXIT : InstPTX<(outs), (ins), "exit", [(PTXexit)]>; def RET : InstPTX<(outs), (ins), "ret", [(PTXret)]>; } + +///===- Intrinsic Instructions --------------------------------------------===// + +include "PTXIntrinsicInstrInfo.td" diff --git a/lib/Target/PTX/PTXIntrinsicInstrInfo.td b/lib/Target/PTX/PTXIntrinsicInstrInfo.td new file mode 100644 index 000000000000..320934a2228c --- /dev/null +++ b/lib/Target/PTX/PTXIntrinsicInstrInfo.td @@ -0,0 +1,84 @@ +//===- PTXIntrinsicInstrInfo.td - Defines PTX intrinsics ---*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines all of the PTX-specific intrinsic instructions. +// +//===----------------------------------------------------------------------===// + +// PTX Special Purpose Register Accessor Intrinsics + +class PTX_READ_SPECIAL_REGISTER_R64 + : InstPTX<(outs RRegu64:$d), (ins), + !strconcat("mov.u64\t$d, %", regname), + [(set RRegu64:$d, (intop))]>; + +class PTX_READ_SPECIAL_REGISTER_R32 + : InstPTX<(outs RRegu32:$d), (ins), + !strconcat("mov.u32\t$d, %", regname), + [(set RRegu32:$d, (intop))]>; + +// TODO Add read vector-version of special registers + +//def PTX_READ_TID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"tid", int_ptx_read_tid_r64>; +def PTX_READ_TID_X : PTX_READ_SPECIAL_REGISTER_R32<"tid.x", int_ptx_read_tid_x>; +def PTX_READ_TID_Y : PTX_READ_SPECIAL_REGISTER_R32<"tid.y", int_ptx_read_tid_y>; +def PTX_READ_TID_Z : PTX_READ_SPECIAL_REGISTER_R32<"tid.z", int_ptx_read_tid_z>; +def PTX_READ_TID_W : PTX_READ_SPECIAL_REGISTER_R32<"tid.w", int_ptx_read_tid_w>; + +//def PTX_READ_NTID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"ntid", int_ptx_read_ntid_r64>; +def PTX_READ_NTID_X : PTX_READ_SPECIAL_REGISTER_R32<"ntid.x", int_ptx_read_ntid_x>; +def PTX_READ_NTID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ntid.y", int_ptx_read_ntid_y>; +def PTX_READ_NTID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ntid.z", int_ptx_read_ntid_z>; +def PTX_READ_NTID_W : PTX_READ_SPECIAL_REGISTER_R32<"ntid.w", int_ptx_read_ntid_w>; + +def PTX_READ_LANEID : PTX_READ_SPECIAL_REGISTER_R32<"laneid", int_ptx_read_laneid>; +def PTX_READ_WARPID : PTX_READ_SPECIAL_REGISTER_R32<"warpid", int_ptx_read_warpid>; +def PTX_READ_NWARPID : PTX_READ_SPECIAL_REGISTER_R32<"nwarpid", int_ptx_read_nwarpid>; + +//def PTX_READ_CTAID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"ctaid", int_ptx_read_ctaid_r64>; +def PTX_READ_CTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.x", int_ptx_read_ctaid_x>; +def PTX_READ_CTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.y", int_ptx_read_ctaid_y>; +def PTX_READ_CTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.z", int_ptx_read_ctaid_z>; +def PTX_READ_CTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.w", int_ptx_read_ctaid_w>; + +//def PTX_READ_NCTAID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"nctaid", int_ptx_read_nctaid_r64>; +def PTX_READ_NCTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.x", int_ptx_read_nctaid_x>; +def PTX_READ_NCTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.y", int_ptx_read_nctaid_y>; +def PTX_READ_NCTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.z", int_ptx_read_nctaid_z>; +def PTX_READ_NCTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.w", int_ptx_read_nctaid_w>; + +def PTX_READ_SMID : PTX_READ_SPECIAL_REGISTER_R32<"smid", int_ptx_read_smid>; +def PTX_READ_NSMID : PTX_READ_SPECIAL_REGISTER_R32<"nsmid", int_ptx_read_nsmid>; +def PTX_READ_GRIDID : PTX_READ_SPECIAL_REGISTER_R32<"gridid", int_ptx_read_gridid>; + +def PTX_READ_LANEMASK_EQ + : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_eq", int_ptx_read_lanemask_eq>; +def PTX_READ_LANEMASK_LE + : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_le", int_ptx_read_lanemask_le>; +def PTX_READ_LANEMASK_LT + : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_lt", int_ptx_read_lanemask_lt>; +def PTX_READ_LANEMASK_GE + : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_ge", int_ptx_read_lanemask_ge>; +def PTX_READ_LANEMASK_GT + : PTX_READ_SPECIAL_REGISTER_R32<"lanemask_gt", int_ptx_read_lanemask_gt>; + +def PTX_READ_CLOCK + : PTX_READ_SPECIAL_REGISTER_R32<"clock", int_ptx_read_clock>; +def PTX_READ_CLOCK64 + : PTX_READ_SPECIAL_REGISTER_R64<"clock64", int_ptx_read_clock64>; + +def PTX_READ_PM0 : PTX_READ_SPECIAL_REGISTER_R32<"pm0", int_ptx_read_pm0>; +def PTX_READ_PM1 : PTX_READ_SPECIAL_REGISTER_R32<"pm1", int_ptx_read_pm1>; +def PTX_READ_PM2 : PTX_READ_SPECIAL_REGISTER_R32<"pm2", int_ptx_read_pm2>; +def PTX_READ_PM3 : PTX_READ_SPECIAL_REGISTER_R32<"pm3", int_ptx_read_pm3>; + +// PTX Parallel Synchronization and Communication Intrinsics + +def PTX_BAR_SYNC : InstPTX<(outs), (ins i32imm:$i), "bar.sync\t$i", + [(int_ptx_bar_sync imm:$i)]>; diff --git a/lib/Target/PTX/PTXMCAsmStreamer.cpp b/lib/Target/PTX/PTXMCAsmStreamer.cpp index 0886ba8008f3..1574670b6e9b 100644 --- a/lib/Target/PTX/PTXMCAsmStreamer.cpp +++ b/lib/Target/PTX/PTXMCAsmStreamer.cpp @@ -143,9 +143,9 @@ class PTXMCAsmStreamer : public MCStreamer { virtual void EmitBytes(StringRef Data, unsigned AddrSpace); virtual void EmitValueImpl(const MCExpr *Value, unsigned Size, - bool isPCRel, unsigned AddrSpace); - virtual void EmitULEB128Value(const MCExpr *Value, unsigned AddrSpace = 0); - virtual void EmitSLEB128Value(const MCExpr *Value, unsigned AddrSpace = 0); + unsigned AddrSpace); + virtual void EmitULEB128Value(const MCExpr *Value); + virtual void EmitSLEB128Value(const MCExpr *Value); virtual void EmitGPRel32Value(const MCExpr *Value); @@ -233,7 +233,7 @@ void PTXMCAsmStreamer::ChangeSection(const MCSection *Section) { void PTXMCAsmStreamer::EmitLabel(MCSymbol *Symbol) { assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); assert(!Symbol->isVariable() && "Cannot emit a variable symbol!"); - assert(getCurrentSection() && "Cannot emit before setting section!"); + //assert(getCurrentSection() && "Cannot emit before setting section!"); OS << *Symbol << MAI.getLabelSuffix(); EmitEOL(); @@ -352,9 +352,8 @@ void PTXMCAsmStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) { } void PTXMCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size, - bool isPCRel, unsigned AddrSpace) { + unsigned AddrSpace) { assert(getCurrentSection() && "Cannot emit contents before setting section!"); - assert(!isPCRel && "Cannot emit pc relative relocations!"); const char *Directive = 0; switch (Size) { default: break; @@ -383,15 +382,13 @@ void PTXMCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size, EmitEOL(); } -void PTXMCAsmStreamer::EmitULEB128Value(const MCExpr *Value, - unsigned AddrSpace) { +void PTXMCAsmStreamer::EmitULEB128Value(const MCExpr *Value) { assert(MAI.hasLEB128() && "Cannot print a .uleb"); OS << ".uleb128 " << *Value; EmitEOL(); } -void PTXMCAsmStreamer::EmitSLEB128Value(const MCExpr *Value, - unsigned AddrSpace) { +void PTXMCAsmStreamer::EmitSLEB128Value(const MCExpr *Value) { assert(MAI.hasLEB128() && "Cannot print a .sleb"); OS << ".sleb128 " << *Value; EmitEOL(); @@ -423,7 +420,8 @@ void PTXMCAsmStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue, MCStreamer::EmitFill(NumBytes, FillValue, AddrSpace); } -void PTXMCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment, int64_t Value, +void PTXMCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment, + int64_t Value, unsigned ValueSize, unsigned MaxBytesToEmit) { // Some assemblers don't support non-power of two alignments, so we always @@ -532,7 +530,7 @@ void PTXMCAsmStreamer::Finish() {} namespace llvm { MCStreamer *createPTXAsmStreamer(MCContext &Context, formatted_raw_ostream &OS, - bool isVerboseAsm, bool useLoc, + bool isVerboseAsm, bool useLoc, bool useCFI, MCInstPrinter *IP, MCCodeEmitter *CE, TargetAsmBackend *TAB, bool ShowInst) { diff --git a/lib/Target/PTX/PTXMFInfoExtract.cpp b/lib/Target/PTX/PTXMFInfoExtract.cpp index b37c740006f9..c5e191007239 100644 --- a/lib/Target/PTX/PTXMFInfoExtract.cpp +++ b/lib/Target/PTX/PTXMFInfoExtract.cpp @@ -79,12 +79,12 @@ bool PTXMFInfoExtract::runOnMachineFunction(MachineFunction &MF) { DEBUG(for (PTXMachineFunctionInfo::reg_iterator i = MFI->argRegBegin(), e = MFI->argRegEnd(); - i != e; ++i) + i != e; ++i) dbgs() << "Arg Reg: " << *i << "\n";); DEBUG(for (PTXMachineFunctionInfo::reg_iterator i = MFI->localVarRegBegin(), e = MFI->localVarRegEnd(); - i != e; ++i) + i != e; ++i) dbgs() << "Local Var Reg: " << *i << "\n";); return false; diff --git a/lib/Target/PTX/PTXMachineFunctionInfo.h b/lib/Target/PTX/PTXMachineFunctionInfo.h index 56d044b5fc0d..81df1c236cb2 100644 --- a/lib/Target/PTX/PTXMachineFunctionInfo.h +++ b/lib/Target/PTX/PTXMachineFunctionInfo.h @@ -42,36 +42,37 @@ class PTXMachineFunctionInfo : public MachineFunctionInfo { void setRetReg(unsigned reg) { reg_ret = reg; } void doneAddArg(void) { - std::sort(reg_arg.begin(), reg_arg.end()); _isDoneAddArg = true; } - void doneAddLocalVar(void) { - std::sort(reg_local_var.begin(), reg_local_var.end()); - } + void doneAddLocalVar(void) {} bool isDoneAddArg(void) { return _isDoneAddArg; } bool isKernel() const { return is_kernel; } - typedef std::vector::const_iterator reg_iterator; + typedef std::vector::const_iterator reg_iterator; + typedef std::vector::const_reverse_iterator reg_reverse_iterator; - bool argRegEmpty() const { return reg_arg.empty(); } - int getNumArg() const { return reg_arg.size(); } + bool argRegEmpty() const { return reg_arg.empty(); } + int getNumArg() const { return reg_arg.size(); } reg_iterator argRegBegin() const { return reg_arg.begin(); } reg_iterator argRegEnd() const { return reg_arg.end(); } + reg_reverse_iterator argRegReverseBegin() const { return reg_arg.rbegin(); } + reg_reverse_iterator argRegReverseEnd() const { return reg_arg.rend(); } - bool localVarRegEmpty() const { return reg_local_var.empty(); } + bool localVarRegEmpty() const { return reg_local_var.empty(); } reg_iterator localVarRegBegin() const { return reg_local_var.begin(); } reg_iterator localVarRegEnd() const { return reg_local_var.end(); } unsigned retReg() const { return reg_ret; } bool isArgReg(unsigned reg) const { - return std::binary_search(reg_arg.begin(), reg_arg.end(), reg); + return std::find(reg_arg.begin(), reg_arg.end(), reg) != reg_arg.end(); } bool isLocalVarReg(unsigned reg) const { - return std::binary_search(reg_local_var.begin(), reg_local_var.end(), reg); + return std::find(reg_local_var.begin(), reg_local_var.end(), reg) + != reg_local_var.end(); } }; // class PTXMachineFunctionInfo } // namespace llvm diff --git a/lib/Target/PTX/PTXRegisterInfo.td b/lib/Target/PTX/PTXRegisterInfo.td index 22e2b343a0e5..f6161419fec1 100644 --- a/lib/Target/PTX/PTXRegisterInfo.td +++ b/lib/Target/PTX/PTXRegisterInfo.td @@ -19,6 +19,8 @@ class PTXReg : Register { // Registers //===----------------------------------------------------------------------===// +///===- Predicate Registers -----------------------------------------------===// + def P0 : PTXReg<"p0">; def P1 : PTXReg<"p1">; def P2 : PTXReg<"p2">; @@ -51,6 +53,108 @@ def P28 : PTXReg<"p28">; def P29 : PTXReg<"p29">; def P30 : PTXReg<"p30">; def P31 : PTXReg<"p31">; +def P32 : PTXReg<"p32">; +def P33 : PTXReg<"p33">; +def P34 : PTXReg<"p34">; +def P35 : PTXReg<"p35">; +def P36 : PTXReg<"p36">; +def P37 : PTXReg<"p37">; +def P38 : PTXReg<"p38">; +def P39 : PTXReg<"p39">; +def P40 : PTXReg<"p40">; +def P41 : PTXReg<"p41">; +def P42 : PTXReg<"p42">; +def P43 : PTXReg<"p43">; +def P44 : PTXReg<"p44">; +def P45 : PTXReg<"p45">; +def P46 : PTXReg<"p46">; +def P47 : PTXReg<"p47">; +def P48 : PTXReg<"p48">; +def P49 : PTXReg<"p49">; +def P50 : PTXReg<"p50">; +def P51 : PTXReg<"p51">; +def P52 : PTXReg<"p52">; +def P53 : PTXReg<"p53">; +def P54 : PTXReg<"p54">; +def P55 : PTXReg<"p55">; +def P56 : PTXReg<"p56">; +def P57 : PTXReg<"p57">; +def P58 : PTXReg<"p58">; +def P59 : PTXReg<"p59">; +def P60 : PTXReg<"p60">; +def P61 : PTXReg<"p61">; +def P62 : PTXReg<"p62">; +def P63 : PTXReg<"p63">; + +///===- 16-bit Integer Registers ------------------------------------------===// + +def RH0 : PTXReg<"rh0">; +def RH1 : PTXReg<"rh1">; +def RH2 : PTXReg<"rh2">; +def RH3 : PTXReg<"rh3">; +def RH4 : PTXReg<"rh4">; +def RH5 : PTXReg<"rh5">; +def RH6 : PTXReg<"rh6">; +def RH7 : PTXReg<"rh7">; +def RH8 : PTXReg<"rh8">; +def RH9 : PTXReg<"rh9">; +def RH10 : PTXReg<"rh10">; +def RH11 : PTXReg<"rh11">; +def RH12 : PTXReg<"rh12">; +def RH13 : PTXReg<"rh13">; +def RH14 : PTXReg<"rh14">; +def RH15 : PTXReg<"rh15">; +def RH16 : PTXReg<"rh16">; +def RH17 : PTXReg<"rh17">; +def RH18 : PTXReg<"rh18">; +def RH19 : PTXReg<"rh19">; +def RH20 : PTXReg<"rh20">; +def RH21 : PTXReg<"rh21">; +def RH22 : PTXReg<"rh22">; +def RH23 : PTXReg<"rh23">; +def RH24 : PTXReg<"rh24">; +def RH25 : PTXReg<"rh25">; +def RH26 : PTXReg<"rh26">; +def RH27 : PTXReg<"rh27">; +def RH28 : PTXReg<"rh28">; +def RH29 : PTXReg<"rh29">; +def RH30 : PTXReg<"rh30">; +def RH31 : PTXReg<"rh31">; +def RH32 : PTXReg<"rh32">; +def RH33 : PTXReg<"rh33">; +def RH34 : PTXReg<"rh34">; +def RH35 : PTXReg<"rh35">; +def RH36 : PTXReg<"rh36">; +def RH37 : PTXReg<"rh37">; +def RH38 : PTXReg<"rh38">; +def RH39 : PTXReg<"rh39">; +def RH40 : PTXReg<"rh40">; +def RH41 : PTXReg<"rh41">; +def RH42 : PTXReg<"rh42">; +def RH43 : PTXReg<"rh43">; +def RH44 : PTXReg<"rh44">; +def RH45 : PTXReg<"rh45">; +def RH46 : PTXReg<"rh46">; +def RH47 : PTXReg<"rh47">; +def RH48 : PTXReg<"rh48">; +def RH49 : PTXReg<"rh49">; +def RH50 : PTXReg<"rh50">; +def RH51 : PTXReg<"rh51">; +def RH52 : PTXReg<"rh52">; +def RH53 : PTXReg<"rh53">; +def RH54 : PTXReg<"rh54">; +def RH55 : PTXReg<"rh55">; +def RH56 : PTXReg<"rh56">; +def RH57 : PTXReg<"rh57">; +def RH58 : PTXReg<"rh58">; +def RH59 : PTXReg<"rh59">; +def RH60 : PTXReg<"rh60">; +def RH61 : PTXReg<"rh61">; +def RH62 : PTXReg<"rh62">; +def RH63 : PTXReg<"rh63">; + + +///===- 32-bit Integer Registers ------------------------------------------===// def R0 : PTXReg<"r0">; def R1 : PTXReg<"r1">; @@ -84,6 +188,243 @@ def R28 : PTXReg<"r28">; def R29 : PTXReg<"r29">; def R30 : PTXReg<"r30">; def R31 : PTXReg<"r31">; +def R32 : PTXReg<"r32">; +def R33 : PTXReg<"r33">; +def R34 : PTXReg<"r34">; +def R35 : PTXReg<"r35">; +def R36 : PTXReg<"r36">; +def R37 : PTXReg<"r37">; +def R38 : PTXReg<"r38">; +def R39 : PTXReg<"r39">; +def R40 : PTXReg<"r40">; +def R41 : PTXReg<"r41">; +def R42 : PTXReg<"r42">; +def R43 : PTXReg<"r43">; +def R44 : PTXReg<"r44">; +def R45 : PTXReg<"r45">; +def R46 : PTXReg<"r46">; +def R47 : PTXReg<"r47">; +def R48 : PTXReg<"r48">; +def R49 : PTXReg<"r49">; +def R50 : PTXReg<"r50">; +def R51 : PTXReg<"r51">; +def R52 : PTXReg<"r52">; +def R53 : PTXReg<"r53">; +def R54 : PTXReg<"r54">; +def R55 : PTXReg<"r55">; +def R56 : PTXReg<"r56">; +def R57 : PTXReg<"r57">; +def R58 : PTXReg<"r58">; +def R59 : PTXReg<"r59">; +def R60 : PTXReg<"r60">; +def R61 : PTXReg<"r61">; +def R62 : PTXReg<"r62">; +def R63 : PTXReg<"r63">; + + +///===- 64-bit Integer Registers ------------------------------------------===// + +def RD0 : PTXReg<"rd0">; +def RD1 : PTXReg<"rd1">; +def RD2 : PTXReg<"rd2">; +def RD3 : PTXReg<"rd3">; +def RD4 : PTXReg<"rd4">; +def RD5 : PTXReg<"rd5">; +def RD6 : PTXReg<"rd6">; +def RD7 : PTXReg<"rd7">; +def RD8 : PTXReg<"rd8">; +def RD9 : PTXReg<"rd9">; +def RD10 : PTXReg<"rd10">; +def RD11 : PTXReg<"rd11">; +def RD12 : PTXReg<"rd12">; +def RD13 : PTXReg<"rd13">; +def RD14 : PTXReg<"rd14">; +def RD15 : PTXReg<"rd15">; +def RD16 : PTXReg<"rd16">; +def RD17 : PTXReg<"rd17">; +def RD18 : PTXReg<"rd18">; +def RD19 : PTXReg<"rd19">; +def RD20 : PTXReg<"rd20">; +def RD21 : PTXReg<"rd21">; +def RD22 : PTXReg<"rd22">; +def RD23 : PTXReg<"rd23">; +def RD24 : PTXReg<"rd24">; +def RD25 : PTXReg<"rd25">; +def RD26 : PTXReg<"rd26">; +def RD27 : PTXReg<"rd27">; +def RD28 : PTXReg<"rd28">; +def RD29 : PTXReg<"rd29">; +def RD30 : PTXReg<"rd30">; +def RD31 : PTXReg<"rd31">; +def RD32 : PTXReg<"rd32">; +def RD33 : PTXReg<"rd33">; +def RD34 : PTXReg<"rd34">; +def RD35 : PTXReg<"rd35">; +def RD36 : PTXReg<"rd36">; +def RD37 : PTXReg<"rd37">; +def RD38 : PTXReg<"rd38">; +def RD39 : PTXReg<"rd39">; +def RD40 : PTXReg<"rd40">; +def RD41 : PTXReg<"rd41">; +def RD42 : PTXReg<"rd42">; +def RD43 : PTXReg<"rd43">; +def RD44 : PTXReg<"rd44">; +def RD45 : PTXReg<"rd45">; +def RD46 : PTXReg<"rd46">; +def RD47 : PTXReg<"rd47">; +def RD48 : PTXReg<"rd48">; +def RD49 : PTXReg<"rd49">; +def RD50 : PTXReg<"rd50">; +def RD51 : PTXReg<"rd51">; +def RD52 : PTXReg<"rd52">; +def RD53 : PTXReg<"rd53">; +def RD54 : PTXReg<"rd54">; +def RD55 : PTXReg<"rd55">; +def RD56 : PTXReg<"rd56">; +def RD57 : PTXReg<"rd57">; +def RD58 : PTXReg<"rd58">; +def RD59 : PTXReg<"rd59">; +def RD60 : PTXReg<"rd60">; +def RD61 : PTXReg<"rd61">; +def RD62 : PTXReg<"rd62">; +def RD63 : PTXReg<"rd63">; + + +///===- 32-bit Floating-Point Registers -----------------------------------===// + +def F0 : PTXReg<"f0">; +def F1 : PTXReg<"f1">; +def F2 : PTXReg<"f2">; +def F3 : PTXReg<"f3">; +def F4 : PTXReg<"f4">; +def F5 : PTXReg<"f5">; +def F6 : PTXReg<"f6">; +def F7 : PTXReg<"f7">; +def F8 : PTXReg<"f8">; +def F9 : PTXReg<"f9">; +def F10 : PTXReg<"f10">; +def F11 : PTXReg<"f11">; +def F12 : PTXReg<"f12">; +def F13 : PTXReg<"f13">; +def F14 : PTXReg<"f14">; +def F15 : PTXReg<"f15">; +def F16 : PTXReg<"f16">; +def F17 : PTXReg<"f17">; +def F18 : PTXReg<"f18">; +def F19 : PTXReg<"f19">; +def F20 : PTXReg<"f20">; +def F21 : PTXReg<"f21">; +def F22 : PTXReg<"f22">; +def F23 : PTXReg<"f23">; +def F24 : PTXReg<"f24">; +def F25 : PTXReg<"f25">; +def F26 : PTXReg<"f26">; +def F27 : PTXReg<"f27">; +def F28 : PTXReg<"f28">; +def F29 : PTXReg<"f29">; +def F30 : PTXReg<"f30">; +def F31 : PTXReg<"f31">; +def F32 : PTXReg<"f32">; +def F33 : PTXReg<"f33">; +def F34 : PTXReg<"f34">; +def F35 : PTXReg<"f35">; +def F36 : PTXReg<"f36">; +def F37 : PTXReg<"f37">; +def F38 : PTXReg<"f38">; +def F39 : PTXReg<"f39">; +def F40 : PTXReg<"f40">; +def F41 : PTXReg<"f41">; +def F42 : PTXReg<"f42">; +def F43 : PTXReg<"f43">; +def F44 : PTXReg<"f44">; +def F45 : PTXReg<"f45">; +def F46 : PTXReg<"f46">; +def F47 : PTXReg<"f47">; +def F48 : PTXReg<"f48">; +def F49 : PTXReg<"f49">; +def F50 : PTXReg<"f50">; +def F51 : PTXReg<"f51">; +def F52 : PTXReg<"f52">; +def F53 : PTXReg<"f53">; +def F54 : PTXReg<"f54">; +def F55 : PTXReg<"f55">; +def F56 : PTXReg<"f56">; +def F57 : PTXReg<"f57">; +def F58 : PTXReg<"f58">; +def F59 : PTXReg<"f59">; +def F60 : PTXReg<"f60">; +def F61 : PTXReg<"f61">; +def F62 : PTXReg<"f62">; +def F63 : PTXReg<"f63">; + + +///===- 64-bit Floating-Point Registers -----------------------------------===// + +def FD0 : PTXReg<"fd0">; +def FD1 : PTXReg<"fd1">; +def FD2 : PTXReg<"fd2">; +def FD3 : PTXReg<"fd3">; +def FD4 : PTXReg<"fd4">; +def FD5 : PTXReg<"fd5">; +def FD6 : PTXReg<"fd6">; +def FD7 : PTXReg<"fd7">; +def FD8 : PTXReg<"fd8">; +def FD9 : PTXReg<"fd9">; +def FD10 : PTXReg<"fd10">; +def FD11 : PTXReg<"fd11">; +def FD12 : PTXReg<"fd12">; +def FD13 : PTXReg<"fd13">; +def FD14 : PTXReg<"fd14">; +def FD15 : PTXReg<"fd15">; +def FD16 : PTXReg<"fd16">; +def FD17 : PTXReg<"fd17">; +def FD18 : PTXReg<"fd18">; +def FD19 : PTXReg<"fd19">; +def FD20 : PTXReg<"fd20">; +def FD21 : PTXReg<"fd21">; +def FD22 : PTXReg<"fd22">; +def FD23 : PTXReg<"fd23">; +def FD24 : PTXReg<"fd24">; +def FD25 : PTXReg<"fd25">; +def FD26 : PTXReg<"fd26">; +def FD27 : PTXReg<"fd27">; +def FD28 : PTXReg<"fd28">; +def FD29 : PTXReg<"fd29">; +def FD30 : PTXReg<"fd30">; +def FD31 : PTXReg<"fd31">; +def FD32 : PTXReg<"fd32">; +def FD33 : PTXReg<"fd33">; +def FD34 : PTXReg<"fd34">; +def FD35 : PTXReg<"fd35">; +def FD36 : PTXReg<"fd36">; +def FD37 : PTXReg<"fd37">; +def FD38 : PTXReg<"fd38">; +def FD39 : PTXReg<"fd39">; +def FD40 : PTXReg<"fd40">; +def FD41 : PTXReg<"fd41">; +def FD42 : PTXReg<"fd42">; +def FD43 : PTXReg<"fd43">; +def FD44 : PTXReg<"fd44">; +def FD45 : PTXReg<"fd45">; +def FD46 : PTXReg<"f4d6">; +def FD47 : PTXReg<"fd47">; +def FD48 : PTXReg<"fd48">; +def FD49 : PTXReg<"fd49">; +def FD50 : PTXReg<"fd50">; +def FD51 : PTXReg<"fd51">; +def FD52 : PTXReg<"fd52">; +def FD53 : PTXReg<"fd53">; +def FD54 : PTXReg<"fd54">; +def FD55 : PTXReg<"fd55">; +def FD56 : PTXReg<"fd56">; +def FD57 : PTXReg<"fd57">; +def FD58 : PTXReg<"fd58">; +def FD59 : PTXReg<"fd59">; +def FD60 : PTXReg<"fd60">; +def FD61 : PTXReg<"fd61">; +def FD62 : PTXReg<"fd62">; +def FD63 : PTXReg<"fd63">; + //===----------------------------------------------------------------------===// // Register classes @@ -93,10 +434,58 @@ def Preds : RegisterClass<"PTX", [i1], 8, [P0, P1, P2, P3, P4, P5, P6, P7, P8, P9, P10, P11, P12, P13, P14, P15, P16, P17, P18, P19, P20, P21, P22, P23, - P24, P25, P26, P27, P28, P29, P30, P31]>; + P24, P25, P26, P27, P28, P29, P30, P31, + P32, P33, P34, P35, P36, P37, P38, P39, + P40, P41, P42, P43, P44, P45, P46, P47, + P48, P49, P50, P51, P52, P53, P54, P55, + P56, P57, P58, P59, P60, P61, P62, P63]>; -def RRegs32 : RegisterClass<"PTX", [i32], 32, +def RRegu16 : RegisterClass<"PTX", [i16], 16, + [RH0, RH1, RH2, RH3, RH4, RH5, RH6, RH7, + RH8, RH9, RH10, RH11, RH12, RH13, RH14, RH15, + RH16, RH17, RH18, RH19, RH20, RH21, RH22, RH23, + RH24, RH25, RH26, RH27, RH28, RH29, RH30, RH31, + RH32, RH33, RH34, RH35, RH36, RH37, RH38, RH39, + RH40, RH41, RH42, RH43, RH44, RH45, RH46, RH47, + RH48, RH49, RH50, RH51, RH52, RH53, RH54, RH55, + RH56, RH57, RH58, RH59, RH60, RH61, RH62, RH63]>; + +def RRegu32 : RegisterClass<"PTX", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, - R24, R25, R26, R27, R28, R29, R30, R31]>; + R24, R25, R26, R27, R28, R29, R30, R31, + R32, R33, R34, R35, R36, R37, R38, R39, + R40, R41, R42, R43, R44, R45, R46, R47, + R48, R49, R50, R51, R52, R53, R54, R55, + R56, R57, R58, R59, R60, R61, R62, R63]>; + +def RRegu64 : RegisterClass<"PTX", [i64], 64, + [RD0, RD1, RD2, RD3, RD4, RD5, RD6, RD7, + RD8, RD9, RD10, RD11, RD12, RD13, RD14, RD15, + RD16, RD17, RD18, RD19, RD20, RD21, RD22, RD23, + RD24, RD25, RD26, RD27, RD28, RD29, RD30, RD31, + RD32, RD33, RD34, RD35, RD36, RD37, RD38, RD39, + RD40, RD41, RD42, RD43, RD44, RD45, RD46, RD47, + RD48, RD49, RD50, RD51, RD52, RD53, RD54, RD55, + RD56, RD57, RD58, RD59, RD60, RD61, RD62, RD63]>; + +def RRegf32 : RegisterClass<"PTX", [f32], 32, + [F0, F1, F2, F3, F4, F5, F6, F7, + F8, F9, F10, F11, F12, F13, F14, F15, + F16, F17, F18, F19, F20, F21, F22, F23, + F24, F25, F26, F27, F28, F29, F30, F31, + F32, F33, F34, F35, F36, F37, F38, F39, + F40, F41, F42, F43, F44, F45, F46, F47, + F48, F49, F50, F51, F52, F53, F54, F55, + F56, F57, F58, F59, F60, F61, F62, F63]>; + +def RRegf64 : RegisterClass<"PTX", [f64], 64, + [FD0, FD1, FD2, FD3, FD4, FD5, FD6, FD7, + FD8, FD9, FD10, FD11, FD12, FD13, FD14, FD15, + FD16, FD17, FD18, FD19, FD20, FD21, FD22, FD23, + FD24, FD25, FD26, FD27, FD28, FD29, FD30, FD31, + FD32, FD33, FD34, FD35, FD36, FD37, FD38, FD39, + FD40, FD41, FD42, FD43, FD44, FD45, FD46, FD47, + FD48, FD49, FD50, FD51, FD52, FD53, FD54, FD55, + FD56, FD57, FD58, FD59, FD60, FD61, FD62, FD63]>; diff --git a/lib/Target/PTX/PTXSubtarget.cpp b/lib/Target/PTX/PTXSubtarget.cpp index 00e2c882a5ca..a224f2b8be1a 100644 --- a/lib/Target/PTX/PTXSubtarget.cpp +++ b/lib/Target/PTX/PTXSubtarget.cpp @@ -12,12 +12,36 @@ //===----------------------------------------------------------------------===// #include "PTXSubtarget.h" +#include "llvm/Support/ErrorHandling.h" using namespace llvm; -PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &FS) { - std::string TARGET = "sm_20"; - // TODO: call ParseSubtargetFeatures(FS, TARGET); +PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &FS, + bool is64Bit) + : PTXShaderModel(PTX_SM_1_0), + PTXVersion(PTX_VERSION_2_0), + SupportsDouble(false), + Is64Bit(is64Bit) { + std::string TARGET = "generic"; + ParseSubtargetFeatures(FS, TARGET); +} + +std::string PTXSubtarget::getTargetString() const { + switch(PTXShaderModel) { + default: llvm_unreachable("Unknown shader model"); + case PTX_SM_1_0: return "sm_10"; + case PTX_SM_1_3: return "sm_13"; + case PTX_SM_2_0: return "sm_20"; + } +} + +std::string PTXSubtarget::getPTXVersionString() const { + switch(PTXVersion) { + default: llvm_unreachable("Unknown PTX version"); + case PTX_VERSION_2_0: return "2.0"; + case PTX_VERSION_2_1: return "2.1"; + case PTX_VERSION_2_2: return "2.2"; + } } #include "PTXGenSubtarget.inc" diff --git a/lib/Target/PTX/PTXSubtarget.h b/lib/Target/PTX/PTXSubtarget.h index 7fd85f873ae4..47d98424065b 100644 --- a/lib/Target/PTX/PTXSubtarget.h +++ b/lib/Target/PTX/PTXSubtarget.h @@ -19,10 +19,57 @@ namespace llvm { class PTXSubtarget : public TargetSubtarget { private: - bool is_sm20; + + /** + * Enumeration of Shader Models supported by the back-end. + */ + enum PTXShaderModelEnum { + PTX_SM_1_0, /*< Shader Model 1.0 */ + PTX_SM_1_3, /*< Shader Model 1.3 */ + PTX_SM_2_0 /*< Shader Model 2.0 */ + }; + + /** + * Enumeration of PTX versions supported by the back-end. + * + * Currently, PTX 2.0 is the minimum supported version. + */ + enum PTXVersionEnum { + PTX_VERSION_2_0, /*< PTX Version 2.0 */ + PTX_VERSION_2_1, /*< PTX Version 2.1 */ + PTX_VERSION_2_2 /*< PTX Version 2.2 */ + }; + + /// Shader Model supported on the target GPU. + PTXShaderModelEnum PTXShaderModel; + + /// PTX Language Version. + PTXVersionEnum PTXVersion; + + // The native .f64 type is supported on the hardware. + bool SupportsDouble; + + // Use .u64 instead of .u32 for addresses. + bool Is64Bit; public: - PTXSubtarget(const std::string &TT, const std::string &FS); + PTXSubtarget(const std::string &TT, const std::string &FS, bool is64Bit); + + std::string getTargetString() const; + + std::string getPTXVersionString() const; + + bool supportsDouble() const { return SupportsDouble; } + + bool is64Bit() const { return Is64Bit; } + + bool supportsSM13() const { return PTXShaderModel >= PTX_SM_1_3; } + + bool supportsSM20() const { return PTXShaderModel >= PTX_SM_2_0; } + + bool supportsPTX21() const { return PTXVersion >= PTX_VERSION_2_1; } + + bool supportsPTX22() const { return PTXVersion >= PTX_VERSION_2_2; } std::string ParseSubtargetFeatures(const std::string &FS, const std::string &CPU); diff --git a/lib/Target/PTX/PTXTargetMachine.cpp b/lib/Target/PTX/PTXTargetMachine.cpp index b263813cb4e7..1b737c9d8634 100644 --- a/lib/Target/PTX/PTXTargetMachine.cpp +++ b/lib/Target/PTX/PTXTargetMachine.cpp @@ -16,12 +16,14 @@ #include "PTXTargetMachine.h" #include "llvm/PassManager.h" #include "llvm/Target/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; namespace llvm { MCStreamer *createPTXAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS, bool isVerboseAsm, bool useLoc, + bool useCFI, MCInstPrinter *InstPrint, MCCodeEmitter *CE, TargetAsmBackend *TAB, @@ -29,21 +31,47 @@ namespace llvm { } extern "C" void LLVMInitializePTXTarget() { - RegisterTargetMachine X(ThePTXTarget); - RegisterAsmInfo Y(ThePTXTarget); - TargetRegistry::RegisterAsmStreamer(ThePTXTarget, createPTXAsmStreamer); + + RegisterTargetMachine X(ThePTX32Target); + RegisterTargetMachine Y(ThePTX64Target); + + RegisterAsmInfo Z(ThePTX32Target); + RegisterAsmInfo W(ThePTX64Target); + + TargetRegistry::RegisterAsmStreamer(ThePTX32Target, createPTXAsmStreamer); + TargetRegistry::RegisterAsmStreamer(ThePTX64Target, createPTXAsmStreamer); +} + +namespace { + const char* DataLayout32 = + "e-p:32:32-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64"; + const char* DataLayout64 = + "e-p:64:64-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64"; } // DataLayout and FrameLowering are filled with dummy data PTXTargetMachine::PTXTargetMachine(const Target &T, const std::string &TT, - const std::string &FS) + const std::string &FS, + bool is64Bit) : LLVMTargetMachine(T, TT), - DataLayout("e-p:32:32-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64"), + DataLayout(is64Bit ? DataLayout64 : DataLayout32), + Subtarget(TT, FS, is64Bit), FrameLowering(Subtarget), InstrInfo(*this), - TLInfo(*this), - Subtarget(TT, FS) { + TLInfo(*this) { +} + +PTX32TargetMachine::PTX32TargetMachine(const Target &T, + const std::string& TT, + const std::string& FS) + : PTXTargetMachine(T, TT, FS, false) { +} + +PTX64TargetMachine::PTX64TargetMachine(const Target &T, + const std::string& TT, + const std::string& FS) + : PTXTargetMachine(T, TT, FS, true) { } bool PTXTargetMachine::addInstSelector(PassManagerBase &PM, diff --git a/lib/Target/PTX/PTXTargetMachine.h b/lib/Target/PTX/PTXTargetMachine.h index 728e36f56f01..149be8e3b7e9 100644 --- a/lib/Target/PTX/PTXTargetMachine.h +++ b/lib/Target/PTX/PTXTargetMachine.h @@ -25,15 +25,15 @@ namespace llvm { class PTXTargetMachine : public LLVMTargetMachine { private: - const TargetData DataLayout; - PTXFrameLowering FrameLowering; - PTXInstrInfo InstrInfo; + const TargetData DataLayout; + PTXSubtarget Subtarget; // has to be initialized before FrameLowering + PTXFrameLowering FrameLowering; + PTXInstrInfo InstrInfo; PTXTargetLowering TLInfo; - PTXSubtarget Subtarget; public: PTXTargetMachine(const Target &T, const std::string &TT, - const std::string &FS); + const std::string &FS, bool is64Bit); virtual const TargetData *getTargetData() const { return &DataLayout; } @@ -55,6 +55,22 @@ class PTXTargetMachine : public LLVMTargetMachine { virtual bool addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel); }; // class PTXTargetMachine + + +class PTX32TargetMachine : public PTXTargetMachine { +public: + + PTX32TargetMachine(const Target &T, const std::string &TT, + const std::string& FS); +}; // class PTX32TargetMachine + +class PTX64TargetMachine : public PTXTargetMachine { +public: + + PTX64TargetMachine(const Target &T, const std::string &TT, + const std::string& FS); +}; // class PTX32TargetMachine + } // namespace llvm #endif // PTX_TARGET_MACHINE_H diff --git a/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp b/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp index a577d7755af5..9df6c7567bd1 100644 --- a/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp +++ b/lib/Target/PTX/TargetInfo/PTXTargetInfo.cpp @@ -13,9 +13,13 @@ using namespace llvm; -Target llvm::ThePTXTarget; +Target llvm::ThePTX32Target; +Target llvm::ThePTX64Target; extern "C" void LLVMInitializePTXTargetInfo() { // see llvm/ADT/Triple.h - RegisterTarget X(ThePTXTarget, "ptx", "PTX"); + RegisterTarget X32(ThePTX32Target, "ptx32", + "PTX (32-bit) [Experimental]"); + RegisterTarget X64(ThePTX64Target, "ptx64", + "PTX (64-bit) [Experimental]"); } diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h index ebc10daa5f16..9cf9db9c26b7 100644 --- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h +++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h @@ -17,13 +17,16 @@ #include "llvm/MC/MCInstPrinter.h" namespace llvm { - class MCOperand; + +class MCOperand; +class TargetMachine; class PPCInstPrinter : public MCInstPrinter { // 0 -> AIX, 1 -> Darwin. unsigned SyntaxVariant; public: - PPCInstPrinter(const MCAsmInfo &MAI, unsigned syntaxVariant) + PPCInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI, + unsigned syntaxVariant) : MCInstPrinter(MAI), SyntaxVariant(syntaxVariant) {} bool isDarwinSyntax() const { diff --git a/lib/Target/PowerPC/PPCAsmBackend.cpp b/lib/Target/PowerPC/PPCAsmBackend.cpp index c4d4ac9b3eb9..f562a3f4f9e8 100644 --- a/lib/Target/PowerPC/PPCAsmBackend.cpp +++ b/lib/Target/PowerPC/PPCAsmBackend.cpp @@ -110,10 +110,8 @@ namespace { TargetAsmBackend *llvm::createPPCAsmBackend(const Target &T, const std::string &TT) { - switch (Triple(TT).getOS()) { - case Triple::Darwin: + if (Triple(TT).isOSDarwin()) return new DarwinPPCAsmBackend(T); - default: - return 0; - } + + return 0; } diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index 8ed5d7f0ee71..09a9be998247 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -680,9 +680,10 @@ static AsmPrinter *createPPCAsmPrinterPass(TargetMachine &tm, } static MCInstPrinter *createPPCMCInstPrinter(const Target &T, + TargetMachine &TM, unsigned SyntaxVariant, const MCAsmInfo &MAI) { - return new PPCInstPrinter(MAI, SyntaxVariant); + return new PPCInstPrinter(TM, MAI, SyntaxVariant); } diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 70d00e4b5cc5..128522c88431 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -899,7 +899,8 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, short Imm; if (isIntS16Immediate(CN, Imm)) { Disp = DAG.getTargetConstant(Imm, CN->getValueType(0)); - Base = DAG.getRegister(PPC::R0, CN->getValueType(0)); + Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0, + CN->getValueType(0)); return true; } @@ -947,7 +948,8 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base, } // Otherwise, do it the hard way, using R0 as the base register. - Base = DAG.getRegister(PPC::R0, N.getValueType()); + Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::X0 : PPC::R0, + N.getValueType()); Index = N; return true; } @@ -2153,7 +2155,7 @@ CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG, } /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be -/// adjusted to accomodate the arguments for the tailcall. +/// adjusted to accommodate the arguments for the tailcall. static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall, unsigned ParamSize) { @@ -2394,7 +2396,7 @@ void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, // Emit a sequence of copyto/copyfrom virtual registers for arguments that // might overwrite each other in case of tail call optimization. SmallVector MemOpChains2; - // Do not flag preceeding copytoreg stuff together with the following stuff. + // Do not flag preceding copytoreg stuff together with the following stuff. InFlag = SDValue(); StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments, MemOpChains2, dl); @@ -2442,7 +2444,8 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, if (!DAG.getTarget().getSubtarget().isJITCodeModel()) { unsigned OpFlags = 0; if (DAG.getTarget().getRelocationModel() != Reloc::Static && - PPCSubTarget.getDarwinVers() < 9 && + (!PPCSubTarget.getTargetTriple().isMacOSX() || + PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5)) && (G->getGlobal()->isDeclaration() || G->getGlobal()->isWeakForLinker())) { // PC-relative references to external symbols should go through $stub, @@ -2465,7 +2468,8 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, unsigned char OpFlags = 0; if (DAG.getTarget().getRelocationModel() != Reloc::Static && - PPCSubTarget.getDarwinVers() < 9) { + (!PPCSubTarget.getTargetTriple().isMacOSX() || + PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5))) { // PC-relative references to external symbols should go through $stub, // unless we're building with the leopard linker or later, which // automatically synthesizes these stubs. @@ -4571,6 +4575,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, // registers without caring whether they're 32 or 64, but here we're // doing actual arithmetic on the addresses. bool is64bit = PPCSubTarget.isPPC64(); + unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0; const BasicBlock *LLVM_BB = BB->getBasicBlock(); MachineFunction *F = BB->getParent(); @@ -4634,8 +4639,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, // bne- loopMBB // fallthrough --> exitMBB // srw dest, tmpDest, shift - - if (ptrA!=PPC::R0) { + if (ptrA != ZeroReg) { Ptr1Reg = RegInfo.createVirtualRegister(RC); BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) .addReg(ptrA).addReg(ptrB); @@ -4665,7 +4669,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, BB = loopMBB; BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) - .addReg(PPC::R0).addReg(PtrReg); + .addReg(ZeroReg).addReg(PtrReg); if (BinOpcode) BuildMI(BB, dl, TII->get(BinOpcode), TmpReg) .addReg(Incr2Reg).addReg(TmpDestReg); @@ -4676,7 +4680,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg) .addReg(Tmp3Reg).addReg(Tmp2Reg); BuildMI(BB, dl, TII->get(PPC::STWCX)) - .addReg(Tmp4Reg).addReg(PPC::R0).addReg(PtrReg); + .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg); BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); BB->addSuccessor(loopMBB); @@ -4685,7 +4689,8 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, // exitMBB: // ... BB = exitMBB; - BuildMI(BB, dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg).addReg(ShiftReg); + BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg) + .addReg(ShiftReg); return BB; } @@ -4933,6 +4938,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); unsigned Ptr1Reg; unsigned TmpReg = RegInfo.createVirtualRegister(RC); + unsigned ZeroReg = is64bit ? PPC::X0 : PPC::R0; // thisMBB: // ... // fallthrough --> loopMBB @@ -4965,7 +4971,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // stwcx. tmpDest, ptr // exitBB: // srw dest, tmpDest, shift - if (ptrA!=PPC::R0) { + if (ptrA != ZeroReg) { Ptr1Reg = RegInfo.createVirtualRegister(RC); BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) .addReg(ptrA).addReg(ptrB); @@ -5002,7 +5008,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, BB = loop1MBB; BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) - .addReg(PPC::R0).addReg(PtrReg); + .addReg(ZeroReg).addReg(PtrReg); BuildMI(BB, dl, TII->get(PPC::AND),TmpReg) .addReg(TmpDestReg).addReg(MaskReg); BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0) @@ -5018,7 +5024,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg) .addReg(Tmp2Reg).addReg(NewVal3Reg); BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg) - .addReg(PPC::R0).addReg(PtrReg); + .addReg(ZeroReg).addReg(PtrReg); BuildMI(BB, dl, TII->get(PPC::BCC)) .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); @@ -5027,13 +5033,14 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, BB = midMBB; BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg) - .addReg(PPC::R0).addReg(PtrReg); + .addReg(ZeroReg).addReg(PtrReg); BB->addSuccessor(exitMBB); // exitMBB: // ... BB = exitMBB; - BuildMI(BB, dl, TII->get(PPC::SRW),dest).addReg(TmpReg).addReg(ShiftReg); + BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg) + .addReg(ShiftReg); } else { llvm_unreachable("Unexpected instr type to insert"); } diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 6636b6927191..9f0fae53ec08 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -130,7 +130,7 @@ def : Pat<(PPCnop), // Atomic operations let usesCustomInserter = 1 in { - let Uses = [CR0] in { + let Defs = [CR0] in { def ATOMIC_LOAD_ADD_I64 : Pseudo< (outs G8RC:$dst), (ins memrr:$ptr, G8RC:$incr), "", [(set G8RC:$dst, (atomic_load_add_64 xoaddr:$ptr, G8RC:$incr))]>; diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 82aadeb47ad1..24071b79ab06 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -550,7 +550,7 @@ def DCBZL : DCB_Form<1014, 1, (outs), (ins memrr:$dst), // Atomic operations let usesCustomInserter = 1 in { - let Uses = [CR0] in { + let Defs = [CR0] in { def ATOMIC_LOAD_ADD_I8 : Pseudo< (outs GPRC:$dst), (ins memrr:$ptr, GPRC:$incr), "", [(set GPRC:$dst, (atomic_load_add_8 xoaddr:$ptr, GPRC:$incr))]>; diff --git a/lib/Target/PowerPC/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/PPCMCAsmInfo.cpp index d1178dd7e1ff..9e508cc9babb 100644 --- a/lib/Target/PowerPC/PPCMCAsmInfo.cpp +++ b/lib/Target/PowerPC/PPCMCAsmInfo.cpp @@ -17,7 +17,7 @@ using namespace llvm; PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) { PCSymbol = "."; CommentString = ";"; - ExceptionsType = ExceptionHandling::DwarfTable; + ExceptionsType = ExceptionHandling::DwarfCFI; if (!is64Bit) Data64bitsDirective = 0; // We can't emit a 64-bit unit in PPC32 mode. diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index 72a1deeced44..5f3aa2328f9e 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -70,7 +70,7 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &FS, , HasSTFIWX(false) , HasLazyResolverStubs(false) , IsJITCodeModel(false) - , DarwinVers(0) { + , TargetTriple(TT) { // Determine default and user specified characteristics std::string CPU = "generic"; @@ -92,19 +92,6 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &FS, // support it, ignore. if (use64BitRegs() && !has64BitSupport()) Use64BitRegs = false; - - // Set the boolean corresponding to the current target triple, or the default - // if one cannot be determined, to true. - if (TT.length() > 7) { - // Determine which version of darwin this is. - size_t DarwinPos = TT.find("-darwin"); - if (DarwinPos != std::string::npos) { - if (isdigit(TT[DarwinPos+7])) - DarwinVers = atoi(&TT[DarwinPos+7]); - else - DarwinVers = 8; // Minimum supported darwin is Tiger. - } - } // Set up darwin-specific properties. if (isDarwin()) diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 00ec7474c9e3..8fd1a447692d 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -14,6 +14,7 @@ #ifndef POWERPCSUBTARGET_H #define POWERPCSUBTARGET_H +#include "llvm/ADT/Triple.h" #include "llvm/Target/TargetInstrItineraries.h" #include "llvm/Target/TargetSubtarget.h" @@ -65,9 +66,9 @@ class PPCSubtarget : public TargetSubtarget { bool HasLazyResolverStubs; bool IsJITCodeModel; - /// DarwinVers - Nonzero if this is a darwin platform. Otherwise, the numeric - /// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc. - unsigned char DarwinVers; // Is any darwin-ppc platform. + /// TargetTriple - What processor and OS we're targeting. + Triple TargetTriple; + public: /// This constructor initializes the data members to match that /// of the specified triple. @@ -134,13 +135,10 @@ class PPCSubtarget : public TargetSubtarget { bool hasAltivec() const { return HasAltivec; } bool isGigaProcessor() const { return IsGigaProcessor; } - /// isDarwin - True if this is any darwin platform. - bool isDarwin() const { return DarwinVers != 0; } - /// isDarwin - True if this is darwin9 (leopard, 10.5) or above. - bool isDarwin9() const { return DarwinVers >= 9; } + const Triple &getTargetTriple() const { return TargetTriple; } - /// getDarwinVers - Return the darwin version number, 8 = tiger, 9 = leopard. - unsigned getDarwinVers() const { return DarwinVers; } + /// isDarwin - True if this is any darwin platform. + bool isDarwin() const { return TargetTriple.isMacOSX(); } bool isDarwinABI() const { return isDarwin(); } bool isSVR4ABI() const { return !isDarwin(); } diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index 212b450e7db9..d27e54e56699 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -24,7 +24,7 @@ using namespace llvm; static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { Triple TheTriple(TT); bool isPPC64 = TheTriple.getArch() == Triple::ppc64; - if (TheTriple.getOS() == Triple::Darwin) + if (TheTriple.isOSDarwin()) return new PPCMCAsmInfoDarwin(isPPC64); return new PPCLinuxMCAsmInfo(isPPC64); @@ -37,12 +37,10 @@ static MCStreamer *createMCStreamer(const Target &T, const std::string &TT, MCCodeEmitter *Emitter, bool RelaxAll, bool NoExecStack) { - switch (Triple(TT).getOS()) { - case Triple::Darwin: + if (Triple(TT).isOSDarwin()) return createMachOStreamer(Ctx, TAB, OS, Emitter, RelaxAll); - default: - return NULL; - } + + return NULL; } extern "C" void LLVMInitializePowerPCTarget() { diff --git a/lib/Target/README.txt b/lib/Target/README.txt index f85914b61d9d..ffe3fa477b9e 100644 --- a/lib/Target/README.txt +++ b/lib/Target/README.txt @@ -392,34 +392,6 @@ PHI Slicing could be extended to do this. //===---------------------------------------------------------------------===// -LSR should know what GPR types a target has from TargetData. This code: - -volatile short X, Y; // globals - -void foo(int N) { - int i; - for (i = 0; i < N; i++) { X = i; Y = i*4; } -} - -produces two near identical IV's (after promotion) on PPC/ARM: - -LBB1_2: - ldr r3, LCPI1_0 - ldr r3, [r3] - strh r2, [r3] - ldr r3, LCPI1_1 - ldr r3, [r3] - strh r1, [r3] - add r1, r1, #4 - add r2, r2, #1 <- [0,+,1] - sub r0, r0, #1 <- [0,-,1] - cmp r0, #0 - bne LBB1_2 - -LSR should reuse the "+" IV for the exit test. - -//===---------------------------------------------------------------------===// - Tail call elim should be more aggressive, checking to see if the call is followed by an uncond branch to an exit block. @@ -1325,6 +1297,21 @@ codegen. //===---------------------------------------------------------------------===// +simplifylibcalls should turn these snprintf idioms into memcpy (GCC PR47917) + +char buf1[6], buf2[6], buf3[4], buf4[4]; +int i; + +int foo (void) { + int ret = snprintf (buf1, sizeof buf1, "abcde"); + ret += snprintf (buf2, sizeof buf2, "abcdef") * 16; + ret += snprintf (buf3, sizeof buf3, "%s", i++ < 6 ? "abc" : "def") * 256; + ret += snprintf (buf4, sizeof buf4, "%s", i++ > 10 ? "abcde" : "defgh")*4096; + return ret; +} + +//===---------------------------------------------------------------------===// + "gas" uses this idiom: else if (strchr ("+-/*%|&^:[]()~", *intel_parser.op_string)) .. @@ -1780,43 +1767,6 @@ case it choses instead to keep the max operation obvious. //===---------------------------------------------------------------------===// -Take the following testcase on x86-64 (similar testcases exist for all targets -with addc/adde): - -define void @a(i64* nocapture %s, i64* nocapture %t, i64 %a, i64 %b, -i64 %c) nounwind { -entry: - %0 = zext i64 %a to i128 ; [#uses=1] - %1 = zext i64 %b to i128 ; [#uses=1] - %2 = add i128 %1, %0 ; [#uses=2] - %3 = zext i64 %c to i128 ; [#uses=1] - %4 = shl i128 %3, 64 ; [#uses=1] - %5 = add i128 %4, %2 ; [#uses=1] - %6 = lshr i128 %5, 64 ; [#uses=1] - %7 = trunc i128 %6 to i64 ; [#uses=1] - store i64 %7, i64* %s, align 8 - %8 = trunc i128 %2 to i64 ; [#uses=1] - store i64 %8, i64* %t, align 8 - ret void -} - -Generated code: - addq %rcx, %rdx - sbbq %rax, %rax - subq %rax, %r8 - movq %r8, (%rdi) - movq %rdx, (%rsi) - ret - -Expected code: - addq %rcx, %rdx - adcq $0, %r8 - movq %r8, (%rdi) - movq %rdx, (%rsi) - ret - -//===---------------------------------------------------------------------===// - Switch lowering generates less than ideal code for the following switch: define void @a(i32 %x) nounwind { entry: @@ -2124,11 +2074,12 @@ for.end: ; preds = %entry } This shouldn't need the ((zext (%n - 1)) + 1) game, and it should ideally fold -the two memset's together. The issue with %n seems to stem from poor handling -of the original loop. +the two memset's together. -To simplify this, we need SCEV to know that "n != 0" because of the dominating -conditional. That would turn the second memset into a simple memset of 'n'. +The issue with the addition only occurs in 64-bit mode, and appears to be at +least partially caused by Scalar Evolution not keeping its cache updated: it +returns the "wrong" result immediately after indvars runs, but figures out the +expected result if it is run from scratch on IR resulting from running indvars. //===---------------------------------------------------------------------===// @@ -2287,4 +2238,71 @@ missed cases: //===---------------------------------------------------------------------===// +define i1 @test1(i32 %x) nounwind { + %and = and i32 %x, 3 + %cmp = icmp ult i32 %and, 2 + ret i1 %cmp +} + +Can be folded to (x & 2) == 0. + +define i1 @test2(i32 %x) nounwind { + %and = and i32 %x, 3 + %cmp = icmp ugt i32 %and, 1 + ret i1 %cmp +} + +Can be folded to (x & 2) != 0. + +SimplifyDemandedBits shrinks the "and" constant to 2 but instcombine misses the +icmp transform. + +//===---------------------------------------------------------------------===// + +This code: + +typedef struct { +int f1:1; +int f2:1; +int f3:1; +int f4:29; +} t1; + +typedef struct { +int f1:1; +int f2:1; +int f3:30; +} t2; + +t1 s1; +t2 s2; + +void func1(void) +{ +s1.f1 = s2.f1; +s1.f2 = s2.f2; +} + +Compiles into this IR (on x86-64 at least): + +%struct.t1 = type { i8, [3 x i8] } +@s2 = global %struct.t1 zeroinitializer, align 4 +@s1 = global %struct.t1 zeroinitializer, align 4 +define void @func1() nounwind ssp noredzone { +entry: + %0 = load i32* bitcast (%struct.t1* @s2 to i32*), align 4 + %bf.val.sext5 = and i32 %0, 1 + %1 = load i32* bitcast (%struct.t1* @s1 to i32*), align 4 + %2 = and i32 %1, -4 + %3 = or i32 %2, %bf.val.sext5 + %bf.val.sext26 = and i32 %0, 2 + %4 = or i32 %3, %bf.val.sext26 + store i32 %4, i32* bitcast (%struct.t1* @s1 to i32*), align 4 + ret void +} + +The two or/and's should be merged into one each. + +//===---------------------------------------------------------------------===// + diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index 70574c370f35..edb62fa0c625 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -544,7 +544,7 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Build a sequence of copy-to-reg nodes chained together with token // chain and flag operands which copy the outgoing args into registers. - // The InFlag in necessary since all emited instructions must be + // The InFlag in necessary since all emitted instructions must be // stuck together. SDValue InFlag; for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { diff --git a/lib/Target/SubtargetFeature.cpp b/lib/Target/SubtargetFeature.cpp index 3cf95b57c5dc..e0a9de82983f 100644 --- a/lib/Target/SubtargetFeature.cpp +++ b/lib/Target/SubtargetFeature.cpp @@ -211,7 +211,7 @@ const std::string & SubtargetFeatures::getCPU() const { /// feature, set it. /// static -void SetImpliedBits(uint32_t &Bits, const SubtargetFeatureKV *FeatureEntry, +void SetImpliedBits(uint64_t &Bits, const SubtargetFeatureKV *FeatureEntry, const SubtargetFeatureKV *FeatureTable, size_t FeatureTableSize) { for (size_t i = 0; i < FeatureTableSize; ++i) { @@ -230,7 +230,7 @@ void SetImpliedBits(uint32_t &Bits, const SubtargetFeatureKV *FeatureEntry, /// feature, clear it. /// static -void ClearImpliedBits(uint32_t &Bits, const SubtargetFeatureKV *FeatureEntry, +void ClearImpliedBits(uint64_t &Bits, const SubtargetFeatureKV *FeatureEntry, const SubtargetFeatureKV *FeatureTable, size_t FeatureTableSize) { for (size_t i = 0; i < FeatureTableSize; ++i) { @@ -247,7 +247,7 @@ void ClearImpliedBits(uint32_t &Bits, const SubtargetFeatureKV *FeatureEntry, /// getBits - Get feature bits. /// -uint32_t SubtargetFeatures::getBits(const SubtargetFeatureKV *CPUTable, +uint64_t SubtargetFeatures::getBits(const SubtargetFeatureKV *CPUTable, size_t CPUTableSize, const SubtargetFeatureKV *FeatureTable, size_t FeatureTableSize) { @@ -263,7 +263,7 @@ uint32_t SubtargetFeatures::getBits(const SubtargetFeatureKV *CPUTable, "CPU features table is not sorted"); } #endif - uint32_t Bits = 0; // Resulting bits + uint64_t Bits = 0; // Resulting bits // Check if help is needed if (Features[0] == "help") diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 90939c312065..d331614400e4 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -451,7 +451,7 @@ SystemZTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, // Build a sequence of copy-to-reg nodes chained together with token chain and // flag operands which copy the outgoing args into registers. The InFlag in - // necessary since all emited instructions must be stuck together. + // necessary since all emitted instructions must be stuck together. SDValue InFlag; for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp index c628df04e710..1990bc7b929c 100644 --- a/lib/Target/TargetData.cpp +++ b/lib/Target/TargetData.cpp @@ -617,10 +617,14 @@ uint64_t TargetData::getIndexedOffset(const Type *ptrTy, Value* const* Indices, unsigned TargetData::getPreferredAlignment(const GlobalVariable *GV) const { const Type *ElemType = GV->getType()->getElementType(); unsigned Alignment = getPrefTypeAlignment(ElemType); - if (GV->getAlignment() > Alignment) - Alignment = GV->getAlignment(); + unsigned GVAlignment = GV->getAlignment(); + if (GVAlignment >= Alignment) { + Alignment = GVAlignment; + } else if (GVAlignment != 0) { + Alignment = std::max(GVAlignment, getABITypeAlignment(ElemType)); + } - if (GV->hasInitializer()) { + if (GV->hasInitializer() && GVAlignment == 0) { if (Alignment < 16) { // If the global is not external, see if it is large. If so, give it a // larger alignment. diff --git a/lib/Target/TargetInstrInfo.cpp b/lib/Target/TargetInstrInfo.cpp index 97f3bf6e57ad..d4b76972e49a 100644 --- a/lib/Target/TargetInstrInfo.cpp +++ b/lib/Target/TargetInstrInfo.cpp @@ -149,10 +149,10 @@ bool TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { /// Measure the specified inline asm to determine an approximation of its /// length. -/// Comments (which run till the next SeparatorChar or newline) do not +/// Comments (which run till the next SeparatorString or newline) do not /// count as an instruction. /// Any other non-whitespace text is considered an instruction, with -/// multiple instructions separated by SeparatorChar or newlines. +/// multiple instructions separated by SeparatorString or newlines. /// Variable-length instructions are not handled here; this function /// may be overloaded in the target code to do that. unsigned TargetInstrInfo::getInlineAsmLength(const char *Str, @@ -163,7 +163,8 @@ unsigned TargetInstrInfo::getInlineAsmLength(const char *Str, bool atInsnStart = true; unsigned Length = 0; for (; *Str; ++Str) { - if (*Str == '\n' || *Str == MAI.getSeparatorChar()) + if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(), + strlen(MAI.getSeparatorString())) == 0) atInsnStart = true; if (atInsnStart && !std::isspace(*Str)) { Length += MAI.getMaxInstLength(); diff --git a/lib/Target/TargetLibraryInfo.cpp b/lib/Target/TargetLibraryInfo.cpp index c8bed18ffabe..e336b09291a4 100644 --- a/lib/Target/TargetLibraryInfo.cpp +++ b/lib/Target/TargetLibraryInfo.cpp @@ -28,9 +28,22 @@ static void initialize(TargetLibraryInfo &TLI, const Triple &T) { // memset_pattern16 is only available on iOS 3.0 and Mac OS/X 10.5 and later. - if (T.getOS() != Triple::Darwin || T.getDarwinMajorNumber() < 9) + if (T.isMacOSX()) { + if (T.isMacOSXVersionLT(10, 5)) + TLI.setUnavailable(LibFunc::memset_pattern16); + } else if (T.getOS() == Triple::IOS) { + if (T.isOSVersionLT(3, 0)) + TLI.setUnavailable(LibFunc::memset_pattern16); + } else { TLI.setUnavailable(LibFunc::memset_pattern16); - + } + + // iprintf and friends are only available on XCore. + if (T.getArch() != Triple::xcore) { + TLI.setUnavailable(LibFunc::iprintf); + TLI.setUnavailable(LibFunc::siprintf); + TLI.setUnavailable(LibFunc::fiprintf); + } } diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp index 5d34c7d7fa3d..717ad4122013 100644 --- a/lib/Target/TargetLoweringObjectFile.cpp +++ b/lib/Target/TargetLoweringObjectFile.cpp @@ -120,6 +120,18 @@ static bool IsNullTerminatedString(const Constant *C) { return false; } +MCSymbol *TargetLoweringObjectFile:: +getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang, + MachineModuleInfo *MMI) const { + return Mang->getSymbol(GV); +} + +void TargetLoweringObjectFile::emitPersonalityValue(MCStreamer &Streamer, + const TargetMachine &TM, + const MCSymbol *Sym) const { +} + + /// getKindForGlobal - This is a top-level target-independent classifier for /// a global variable. Given an global variable and information from TM, it /// classifies the global in a variety of ways that make various target @@ -305,16 +317,15 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, MachineModuleInfo *MMI, unsigned Encoding, MCStreamer &Streamer) const { const MCSymbol *Sym = Mang->getSymbol(GV); - return getExprForDwarfReference(Sym, Mang, MMI, Encoding, Streamer); + return getExprForDwarfReference(Sym, Encoding, Streamer); } const MCExpr *TargetLoweringObjectFile:: -getExprForDwarfReference(const MCSymbol *Sym, Mangler *Mang, - MachineModuleInfo *MMI, unsigned Encoding, +getExprForDwarfReference(const MCSymbol *Sym, unsigned Encoding, MCStreamer &Streamer) const { const MCExpr *Res = MCSymbolRefExpr::Create(Sym, getContext()); - switch (Encoding & 0xF0) { + switch (Encoding & 0x70) { default: report_fatal_error("We do not support this DWARF encoding yet!"); case dwarf::DW_EH_PE_absptr: @@ -339,7 +350,7 @@ unsigned TargetLoweringObjectFile::getLSDAEncoding() const { return dwarf::DW_EH_PE_absptr; } -unsigned TargetLoweringObjectFile::getFDEEncoding() const { +unsigned TargetLoweringObjectFile::getFDEEncoding(bool CFI) const { return dwarf::DW_EH_PE_absptr; } diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index d579d95a99c4..76ccc09195a0 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -48,6 +48,7 @@ namespace llvm { bool RealignStack; bool DisableJumpTables; bool StrongPHIElim; + bool HasDivModLibcall; bool AsmVerbosityDefault(false); } @@ -205,6 +206,10 @@ EnableStrongPHIElim(cl::Hidden, "strong-phi-elim", cl::desc("Use strong PHI elimination."), cl::location(StrongPHIElim), cl::init(false)); +static cl::opt +TrapFuncName("trap-func", cl::Hidden, + cl::desc("Emit a call to trap function rather than a trap instruction"), + cl::init("")); static cl::opt DataSections("fdata-sections", cl::desc("Emit data into separate sections"), @@ -221,7 +226,9 @@ TargetMachine::TargetMachine(const Target &T) : TheTarget(T), AsmInfo(0), MCRelaxAll(false), MCNoExecStack(false), - MCUseLoc(true) { + MCSaveTempLabels(false), + MCUseLoc(true), + MCUseCFI(true) { // Typically it will be subtargets that will adjust FloatABIType from Default // to Soft or Hard. if (UseSoftFloat) @@ -303,4 +310,11 @@ namespace llvm { bool HonorSignDependentRoundingFPMath() { return !UnsafeFPMath && HonorSignDependentRoundingFPMathOption; } + + /// getTrapFunctionName - If this returns a non-empty string, this means isel + /// should lower Intrinsic::trap to a call to the specified function name + /// instead of an ISD::TRAP node. + StringRef getTrapFunctionName() { + return TrapFuncName; + } } diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 8fe549ba3126..c352bfcd8cce 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -53,6 +53,14 @@ class X86ATTAsmParser : public TargetAsmParser { SmallVectorImpl &Operands, MCStreamer &Out); + /// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi) + /// in 64bit mode or (%edi) or %es:(%edi) in 32bit mode. + bool isSrcOp(X86Operand &Op); + + /// isDstOp - Returns true if operand is either %es:(%rdi) in 64bit mode + /// or %es:(%edi) in 32bit mode. + bool isDstOp(X86Operand &Op); + /// @name Auto-generated Matcher Functions /// { @@ -356,6 +364,24 @@ struct X86Operand : public MCParsedAsmOperand { } // end anonymous namespace. +bool X86ATTAsmParser::isSrcOp(X86Operand &Op) { + unsigned basereg = Is64Bit ? X86::RSI : X86::ESI; + + return (Op.isMem() && + (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::DS) && + isa(Op.Mem.Disp) && + cast(Op.Mem.Disp)->getValue() == 0 && + Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0); +} + +bool X86ATTAsmParser::isDstOp(X86Operand &Op) { + unsigned basereg = Is64Bit ? X86::RDI : X86::EDI; + + return Op.isMem() && Op.Mem.SegReg == X86::ES && + isa(Op.Mem.Disp) && + cast(Op.Mem.Disp)->getValue() == 0 && + Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0; +} bool X86ATTAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) { @@ -788,7 +814,106 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, delete &Op; } } - + // Transform "ins[bwl] %dx, %es:(%edi)" into "ins[bwl]" + if (Name.startswith("ins") && Operands.size() == 3 && + (Name == "insb" || Name == "insw" || Name == "insl")) { + X86Operand &Op = *(X86Operand*)Operands.begin()[1]; + X86Operand &Op2 = *(X86Operand*)Operands.begin()[2]; + if (Op.isReg() && Op.getReg() == X86::DX && isDstOp(Op2)) { + Operands.pop_back(); + Operands.pop_back(); + delete &Op; + delete &Op2; + } + } + + // Transform "outs[bwl] %ds:(%esi), %dx" into "out[bwl]" + if (Name.startswith("outs") && Operands.size() == 3 && + (Name == "outsb" || Name == "outsw" || Name == "outsl")) { + X86Operand &Op = *(X86Operand*)Operands.begin()[1]; + X86Operand &Op2 = *(X86Operand*)Operands.begin()[2]; + if (isSrcOp(Op) && Op2.isReg() && Op2.getReg() == X86::DX) { + Operands.pop_back(); + Operands.pop_back(); + delete &Op; + delete &Op2; + } + } + + // Transform "movs[bwl] %ds:(%esi), %es:(%edi)" into "movs[bwl]" + if (Name.startswith("movs") && Operands.size() == 3 && + (Name == "movsb" || Name == "movsw" || Name == "movsl" || + (Is64Bit && Name == "movsq"))) { + X86Operand &Op = *(X86Operand*)Operands.begin()[1]; + X86Operand &Op2 = *(X86Operand*)Operands.begin()[2]; + if (isSrcOp(Op) && isDstOp(Op2)) { + Operands.pop_back(); + Operands.pop_back(); + delete &Op; + delete &Op2; + } + } + // Transform "lods[bwl] %ds:(%esi),{%al,%ax,%eax,%rax}" into "lods[bwl]" + if (Name.startswith("lods") && Operands.size() == 3 && + (Name == "lods" || Name == "lodsb" || Name == "lodsw" || + Name == "lodsl" || (Is64Bit && Name == "lodsq"))) { + X86Operand *Op1 = static_cast(Operands[1]); + X86Operand *Op2 = static_cast(Operands[2]); + if (isSrcOp(*Op1) && Op2->isReg()) { + const char *ins; + unsigned reg = Op2->getReg(); + bool isLods = Name == "lods"; + if (reg == X86::AL && (isLods || Name == "lodsb")) + ins = "lodsb"; + else if (reg == X86::AX && (isLods || Name == "lodsw")) + ins = "lodsw"; + else if (reg == X86::EAX && (isLods || Name == "lodsl")) + ins = "lodsl"; + else if (reg == X86::RAX && (isLods || Name == "lodsq")) + ins = "lodsq"; + else + ins = NULL; + if (ins != NULL) { + Operands.pop_back(); + Operands.pop_back(); + delete Op1; + delete Op2; + if (Name != ins) + static_cast(Operands[0])->setTokenValue(ins); + } + } + } + // Transform "stos[bwl] {%al,%ax,%eax,%rax},%es:(%edi)" into "stos[bwl]" + if (Name.startswith("stos") && Operands.size() == 3 && + (Name == "stos" || Name == "stosb" || Name == "stosw" || + Name == "stosl" || (Is64Bit && Name == "stosq"))) { + X86Operand *Op1 = static_cast(Operands[1]); + X86Operand *Op2 = static_cast(Operands[2]); + if (isDstOp(*Op2) && Op1->isReg()) { + const char *ins; + unsigned reg = Op1->getReg(); + bool isStos = Name == "stos"; + if (reg == X86::AL && (isStos || Name == "stosb")) + ins = "stosb"; + else if (reg == X86::AX && (isStos || Name == "stosw")) + ins = "stosw"; + else if (reg == X86::EAX && (isStos || Name == "stosl")) + ins = "stosl"; + else if (reg == X86::RAX && (isStos || Name == "stosq")) + ins = "stosq"; + else + ins = NULL; + if (ins != NULL) { + Operands.pop_back(); + Operands.pop_back(); + delete Op1; + delete Op2; + if (Name != ins) + static_cast(Operands[0])->setTokenValue(ins); + } + } + } + // FIXME: Hack to handle recognize s{hr,ar,hl} $1, . Canonicalize to // "shift ". if ((Name.startswith("shr") || Name.startswith("sar") || @@ -803,6 +928,18 @@ ParseInstruction(StringRef Name, SMLoc NameLoc, Operands.erase(Operands.begin() + 1); } } + + // Transforms "int $3" into "int3" as a size optimization. We can't write an + // instalias with an immediate operand yet. + if (Name == "int" && Operands.size() == 2) { + X86Operand *Op1 = static_cast(Operands[1]); + if (Op1->isImm() && isa(Op1->getImm()) && + cast(Op1->getImm())->getValue() == 3) { + delete Operands[1]; + Operands.erase(Operands.begin() + 1); + static_cast(Operands[0])->setTokenValue("int3"); + } + } return false; } diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index f7777561b6a7..d8a105e7e9d2 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -409,6 +409,7 @@ static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, case TYPE_XMM32: case TYPE_XMM64: case TYPE_XMM128: + case TYPE_XMM256: case TYPE_DEBUGREG: case TYPE_CONTROLREG: return translateRMRegister(mcInst, insn); @@ -418,6 +419,7 @@ static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand, case TYPE_M32: case TYPE_M64: case TYPE_M128: + case TYPE_M256: case TYPE_M512: case TYPE_Mv: case TYPE_M32FP: @@ -500,6 +502,9 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand, case ENCODING_Rv: translateRegister(mcInst, insn.opcodeRegister); return false; + case ENCODING_VVVV: + translateRegister(mcInst, insn.vvvv); + return false; case ENCODING_DUP: return translateOperand(mcInst, insn.spec->operands[operand.type - TYPE_DUP0], diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c index b6546fc9e86c..de1610ba3d66 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c @@ -75,6 +75,12 @@ static int modRMRequired(OpcodeType type, case THREEBYTE_3A: decision = &THREEBYTE3A_SYM; break; + case THREEBYTE_A6: + decision = &THREEBYTEA6_SYM; + break; + case THREEBYTE_A7: + decision = &THREEBYTEA7_SYM; + break; } return decision->opcodeDecisions[insnContext].modRMDecisions[opcode]. @@ -115,6 +121,12 @@ static InstrUID decode(OpcodeType type, case THREEBYTE_3A: dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; break; + case THREEBYTE_A6: + dec = &THREEBYTEA6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; + break; + case THREEBYTE_A7: + dec = &THREEBYTEA7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode]; + break; } switch (dec->modrm_type) { @@ -368,29 +380,109 @@ static int readPrefixes(struct InternalInstruction* insn) { if (isPrefix) dbgprintf(insn, "Found prefix 0x%hhx", byte); } + + insn->vexSize = 0; - if (insn->mode == MODE_64BIT) { - if ((byte & 0xf0) == 0x40) { - uint8_t opcodeByte; + if (byte == 0xc4) { + uint8_t byte1; - if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) { - dbgprintf(insn, "Redundant REX prefix"); - return -1; - } - - insn->rexPrefix = byte; - insn->necessaryPrefixLocation = insn->readerCursor - 2; - - dbgprintf(insn, "Found REX prefix 0x%hhx", byte); - } else { + if (lookAtByte(insn, &byte1)) { + dbgprintf(insn, "Couldn't read second byte of VEX"); + return -1; + } + + if (insn->mode == MODE_64BIT || byte1 & 0x8) { + insn->vexSize = 3; + insn->necessaryPrefixLocation = insn->readerCursor - 1; + } + else { unconsumeByte(insn); insn->necessaryPrefixLocation = insn->readerCursor - 1; } - } else { - unconsumeByte(insn); - insn->necessaryPrefixLocation = insn->readerCursor - 1; + + if (insn->vexSize == 3) { + insn->vexPrefix[0] = byte; + consumeByte(insn, &insn->vexPrefix[1]); + consumeByte(insn, &insn->vexPrefix[2]); + + /* We simulate the REX prefix for simplicity's sake */ + + insn->rexPrefix = 0x40 + | (wFromVEX3of3(insn->vexPrefix[2]) << 3) + | (rFromVEX2of3(insn->vexPrefix[1]) << 2) + | (xFromVEX2of3(insn->vexPrefix[1]) << 1) + | (bFromVEX2of3(insn->vexPrefix[1]) << 0); + + switch (ppFromVEX3of3(insn->vexPrefix[2])) + { + default: + break; + case VEX_PREFIX_66: + hasOpSize = TRUE; + break; + } + + dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1], insn->vexPrefix[2]); + } } - + else if (byte == 0xc5) { + uint8_t byte1; + + if (lookAtByte(insn, &byte1)) { + dbgprintf(insn, "Couldn't read second byte of VEX"); + return -1; + } + + if (insn->mode == MODE_64BIT || byte1 & 0x8) { + insn->vexSize = 2; + } + else { + unconsumeByte(insn); + } + + if (insn->vexSize == 2) { + insn->vexPrefix[0] = byte; + consumeByte(insn, &insn->vexPrefix[1]); + + insn->rexPrefix = 0x40 + | (rFromVEX2of2(insn->vexPrefix[1]) << 2); + + switch (ppFromVEX2of2(insn->vexPrefix[1])) + { + default: + break; + case VEX_PREFIX_66: + hasOpSize = TRUE; + break; + } + + dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1]); + } + } + else { + if (insn->mode == MODE_64BIT) { + if ((byte & 0xf0) == 0x40) { + uint8_t opcodeByte; + + if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) { + dbgprintf(insn, "Redundant REX prefix"); + return -1; + } + + insn->rexPrefix = byte; + insn->necessaryPrefixLocation = insn->readerCursor - 2; + + dbgprintf(insn, "Found REX prefix 0x%hhx", byte); + } else { + unconsumeByte(insn); + insn->necessaryPrefixLocation = insn->readerCursor - 1; + } + } else { + unconsumeByte(insn); + insn->necessaryPrefixLocation = insn->readerCursor - 1; + } + } + if (insn->mode == MODE_16BIT) { insn->registerSize = (hasOpSize ? 4 : 2); insn->addressSize = (hasAdSize ? 4 : 2); @@ -438,6 +530,39 @@ static int readOpcode(struct InternalInstruction* insn) { dbgprintf(insn, "readOpcode()"); insn->opcodeType = ONEBYTE; + + if (insn->vexSize == 3) + { + switch (mmmmmFromVEX2of3(insn->vexPrefix[1])) + { + default: + dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", mmmmmFromVEX2of3(insn->vexPrefix[1])); + return -1; + case 0: + break; + case VEX_LOB_0F: + insn->twoByteEscape = 0x0f; + insn->opcodeType = TWOBYTE; + return consumeByte(insn, &insn->opcode); + case VEX_LOB_0F38: + insn->twoByteEscape = 0x0f; + insn->threeByteEscape = 0x38; + insn->opcodeType = THREEBYTE_38; + return consumeByte(insn, &insn->opcode); + case VEX_LOB_0F3A: + insn->twoByteEscape = 0x0f; + insn->threeByteEscape = 0x3a; + insn->opcodeType = THREEBYTE_3A; + return consumeByte(insn, &insn->opcode); + } + } + else if (insn->vexSize == 2) + { + insn->twoByteEscape = 0x0f; + insn->opcodeType = TWOBYTE; + return consumeByte(insn, &insn->opcode); + } + if (consumeByte(insn, ¤t)) return -1; @@ -467,6 +592,24 @@ static int readOpcode(struct InternalInstruction* insn) { return -1; insn->opcodeType = THREEBYTE_3A; + } else if (current == 0xa6) { + dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); + + insn->threeByteEscape = current; + + if (consumeByte(insn, ¤t)) + return -1; + + insn->opcodeType = THREEBYTE_A6; + } else if (current == 0xa7) { + dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current); + + insn->threeByteEscape = current; + + if (consumeByte(insn, ¤t)) + return -1; + + insn->opcodeType = THREEBYTE_A7; } else { dbgprintf(insn, "Didn't find a three-byte escape prefix"); @@ -600,20 +743,64 @@ static int getID(struct InternalInstruction* insn) { dbgprintf(insn, "getID()"); attrMask = ATTR_NONE; - + if (insn->mode == MODE_64BIT) attrMask |= ATTR_64BIT; + + if (insn->vexSize) { + attrMask |= ATTR_VEX; + + if (insn->vexSize == 3) { + switch (ppFromVEX3of3(insn->vexPrefix[2])) { + case VEX_PREFIX_66: + attrMask |= ATTR_OPSIZE; + break; + case VEX_PREFIX_F3: + attrMask |= ATTR_XS; + break; + case VEX_PREFIX_F2: + attrMask |= ATTR_XD; + break; + } + + if (wFromVEX3of3(insn->vexPrefix[2])) + attrMask |= ATTR_REXW; + if (lFromVEX3of3(insn->vexPrefix[2])) + attrMask |= ATTR_VEXL; + } + else if (insn->vexSize == 2) { + switch (ppFromVEX2of2(insn->vexPrefix[1])) { + case VEX_PREFIX_66: + attrMask |= ATTR_OPSIZE; + break; + case VEX_PREFIX_F3: + attrMask |= ATTR_XS; + break; + case VEX_PREFIX_F2: + attrMask |= ATTR_XD; + break; + } + + if (lFromVEX2of2(insn->vexPrefix[1])) + attrMask |= ATTR_VEXL; + } + else { + return -1; + } + } + else { + if (insn->rexPrefix & 0x08) + attrMask |= ATTR_REXW; - if (insn->rexPrefix & 0x08) - attrMask |= ATTR_REXW; - - if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) - attrMask |= ATTR_OPSIZE; - else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation)) - attrMask |= ATTR_XS; - else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation)) - attrMask |= ATTR_XD; - + if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) + attrMask |= ATTR_OPSIZE; + else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation)) + attrMask |= ATTR_XS; + else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation)) + attrMask |= ATTR_XD; + + } + if (getIDWithAttrMask(&instructionID, insn, attrMask)) return -1; @@ -749,7 +936,7 @@ static int readSIB(struct InternalInstruction* insn) { insn->sibIndex = SIB_INDEX_NONE; break; default: - insn->sibIndex = (EABase)(sibIndexBase + index); + insn->sibIndex = (SIBIndex)(sibIndexBase + index); if (insn->sibIndex == SIB_INDEX_sib || insn->sibIndex == SIB_INDEX_sib64) insn->sibIndex = SIB_INDEX_NONE; @@ -796,7 +983,7 @@ static int readSIB(struct InternalInstruction* insn) { } break; default: - insn->sibBase = (EABase)(sibBaseBase + base); + insn->sibBase = (SIBBase)(sibBaseBase + base); break; } @@ -1012,6 +1199,8 @@ static int readModRM(struct InternalInstruction* insn) { return prefix##_EAX + index; \ case TYPE_R64: \ return prefix##_RAX + index; \ + case TYPE_XMM256: \ + return prefix##_YMM0 + index; \ case TYPE_XMM128: \ case TYPE_XMM64: \ case TYPE_XMM32: \ @@ -1073,6 +1262,14 @@ static int fixupReg(struct InternalInstruction *insn, default: debug("Expected a REG or R/M encoding in fixupReg"); return -1; + case ENCODING_VVVV: + insn->vvvv = (Reg)fixupRegValue(insn, + (OperandType)op->type, + insn->vvvv, + &valid); + if (!valid) + return -1; + break; case ENCODING_REG: insn->reg = (Reg)fixupRegValue(insn, (OperandType)op->type, @@ -1236,6 +1433,27 @@ static int readImmediate(struct InternalInstruction* insn, uint8_t size) { return 0; } +/* + * readVVVV - Consumes an immediate operand from an instruction, given the + * desired operand size. + * + * @param insn - The instruction whose operand is to be read. + * @return - 0 if the immediate was successfully consumed; nonzero + * otherwise. + */ +static int readVVVV(struct InternalInstruction* insn) { + dbgprintf(insn, "readVVVV()"); + + if (insn->vexSize == 3) + insn->vvvv = vvvvFromVEX3of3(insn->vexPrefix[2]); + else if (insn->vexSize == 2) + insn->vvvv = vvvvFromVEX2of2(insn->vexPrefix[1]); + else + return -1; + + return 0; +} + /* * readOperands - Consults the specifier for an instruction and consumes all * operands for that instruction, interpreting them as it goes. @@ -1317,6 +1535,13 @@ static int readOperands(struct InternalInstruction* insn) { case ENCODING_I: if (readOpcodeModifier(insn)) return -1; + break; + case ENCODING_VVVV: + if (readVVVV(insn)) + return -1; + if (fixupReg(insn, &insn->spec->operands[index])) + return -1; + break; case ENCODING_DUP: break; default: diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h index d0dc8b56aea5..a9c90f8f9bda 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h @@ -34,16 +34,30 @@ extern "C" { /* * Accessor functions for various fields of an Intel instruction */ -#define modFromModRM(modRM) ((modRM & 0xc0) >> 6) -#define regFromModRM(modRM) ((modRM & 0x38) >> 3) -#define rmFromModRM(modRM) (modRM & 0x7) -#define scaleFromSIB(sib) ((sib & 0xc0) >> 6) -#define indexFromSIB(sib) ((sib & 0x38) >> 3) -#define baseFromSIB(sib) (sib & 0x7) -#define wFromREX(rex) ((rex & 0x8) >> 3) -#define rFromREX(rex) ((rex & 0x4) >> 2) -#define xFromREX(rex) ((rex & 0x2) >> 1) -#define bFromREX(rex) (rex & 0x1) +#define modFromModRM(modRM) (((modRM) & 0xc0) >> 6) +#define regFromModRM(modRM) (((modRM) & 0x38) >> 3) +#define rmFromModRM(modRM) ((modRM) & 0x7) +#define scaleFromSIB(sib) (((sib) & 0xc0) >> 6) +#define indexFromSIB(sib) (((sib) & 0x38) >> 3) +#define baseFromSIB(sib) ((sib) & 0x7) +#define wFromREX(rex) (((rex) & 0x8) >> 3) +#define rFromREX(rex) (((rex) & 0x4) >> 2) +#define xFromREX(rex) (((rex) & 0x2) >> 1) +#define bFromREX(rex) ((rex) & 0x1) + +#define rFromVEX2of3(vex) (((~(vex)) & 0x80) >> 7) +#define xFromVEX2of3(vex) (((~(vex)) & 0x40) >> 6) +#define bFromVEX2of3(vex) (((~(vex)) & 0x20) >> 5) +#define mmmmmFromVEX2of3(vex) ((vex) & 0x1f) +#define wFromVEX3of3(vex) (((vex) & 0x80) >> 7) +#define vvvvFromVEX3of3(vex) (((~(vex)) & 0x78) >> 3) +#define lFromVEX3of3(vex) (((vex) & 0x4) >> 2) +#define ppFromVEX3of3(vex) ((vex) & 0x3) + +#define rFromVEX2of2(vex) (((~(vex)) & 0x80) >> 7) +#define vvvvFromVEX2of2(vex) (((~(vex)) & 0x78) >> 3) +#define lFromVEX2of2(vex) (((vex) & 0x4) >> 2) +#define ppFromVEX2of2(vex) ((vex) & 0x3) /* * These enums represent Intel registers for use by the decoder. @@ -206,7 +220,25 @@ extern "C" { ENTRY(XMM13) \ ENTRY(XMM14) \ ENTRY(XMM15) - + +#define REGS_YMM \ + ENTRY(YMM0) \ + ENTRY(YMM1) \ + ENTRY(YMM2) \ + ENTRY(YMM3) \ + ENTRY(YMM4) \ + ENTRY(YMM5) \ + ENTRY(YMM6) \ + ENTRY(YMM7) \ + ENTRY(YMM8) \ + ENTRY(YMM9) \ + ENTRY(YMM10) \ + ENTRY(YMM11) \ + ENTRY(YMM12) \ + ENTRY(YMM13) \ + ENTRY(YMM14) \ + ENTRY(YMM15) + #define REGS_SEGMENT \ ENTRY(ES) \ ENTRY(CS) \ @@ -252,6 +284,7 @@ extern "C" { REGS_64BIT \ REGS_MMX \ REGS_XMM \ + REGS_YMM \ REGS_SEGMENT \ REGS_DEBUG \ REGS_CONTROL \ @@ -332,6 +365,27 @@ typedef enum { SEG_OVERRIDE_GS, SEG_OVERRIDE_max } SegmentOverride; + +/* + * VEXLeadingOpcodeByte - Possible values for the VEX.m-mmmm field + */ + +typedef enum { + VEX_LOB_0F = 0x1, + VEX_LOB_0F38 = 0x2, + VEX_LOB_0F3A = 0x3 +} VEXLeadingOpcodeByte; + +/* + * VEXPrefixCode - Possible values for the VEX.pp field + */ + +typedef enum { + VEX_PREFIX_NONE = 0x0, + VEX_PREFIX_66 = 0x1, + VEX_PREFIX_F3 = 0x2, + VEX_PREFIX_F2 = 0x3 +} VEXPrefixCode; typedef uint8_t BOOL; @@ -389,10 +443,12 @@ struct InternalInstruction { uint8_t prefixPresent[0x100]; /* contains the location (for use with the reader) of the prefix byte */ uint64_t prefixLocations[0x100]; + /* The value of the VEX prefix, if present */ + uint8_t vexPrefix[3]; + /* The length of the VEX prefix (0 if not present) */ + uint8_t vexSize; /* The value of the REX prefix, if present */ uint8_t rexPrefix; - /* The location of the REX prefix */ - uint64_t rexLocation; /* The location where a mandatory prefix would have to be (i.e., right before the opcode, or right before the REX prefix if one is present) */ uint64_t necessaryPrefixLocation; @@ -428,6 +484,10 @@ struct InternalInstruction { /* state for additional bytes, consumed during operand decode. Pattern: consumed___ indicates that the byte was already consumed and does not need to be consumed again */ + + /* The VEX.vvvv field, which contains a third register operand for some AVX + instructions */ + Reg vvvv; /* The ModR/M byte, which contains most register operands and some portion of all memory operands */ diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h index 1425b86ba53f..70315ed572b4 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h @@ -30,6 +30,8 @@ #define TWOBYTE_SYM x86DisassemblerTwoByteOpcodes #define THREEBYTE38_SYM x86DisassemblerThreeByte38Opcodes #define THREEBYTE3A_SYM x86DisassemblerThreeByte3AOpcodes +#define THREEBYTEA6_SYM x86DisassemblerThreeByteA6Opcodes +#define THREEBYTEA7_SYM x86DisassemblerThreeByteA7Opcodes #define INSTRUCTIONS_STR "x86DisassemblerInstrSpecifiers" #define CONTEXTS_STR "x86DisassemblerContexts" @@ -37,6 +39,8 @@ #define TWOBYTE_STR "x86DisassemblerTwoByteOpcodes" #define THREEBYTE38_STR "x86DisassemblerThreeByte38Opcodes" #define THREEBYTE3A_STR "x86DisassemblerThreeByte3AOpcodes" +#define THREEBYTEA6_STR "x86DisassemblerThreeByteA6Opcodes" +#define THREEBYTEA7_STR "x86DisassemblerThreeByteA7Opcodes" /* * Attributes of an instruction that must be known before the opcode can be @@ -49,7 +53,9 @@ ENUM_ENTRY(ATTR_XS, 0x02) \ ENUM_ENTRY(ATTR_XD, 0x04) \ ENUM_ENTRY(ATTR_REXW, 0x08) \ - ENUM_ENTRY(ATTR_OPSIZE, 0x10) + ENUM_ENTRY(ATTR_OPSIZE, 0x10) \ + ENUM_ENTRY(ATTR_VEX, 0x20) \ + ENUM_ENTRY(ATTR_VEXL, 0x40) #define ENUM_ENTRY(n, v) n = v, enum attributeBits { @@ -87,7 +93,20 @@ enum attributeBits { "IC_64BIT_REXW_XS") \ ENUM_ENTRY(IC_64BIT_REXW_OPSIZE, 7, "The Dynamic Duo! Prefer over all " \ "else because this changes most " \ - "operands' meaning") + "operands' meaning") \ + ENUM_ENTRY(IC_VEX, 1, "requires a VEX prefix") \ + ENUM_ENTRY(IC_VEX_XS, 2, "requires VEX and the XS prefix") \ + ENUM_ENTRY(IC_VEX_XD, 2, "requires VEX and the XD prefix") \ + ENUM_ENTRY(IC_VEX_OPSIZE, 2, "requires VEX and the OpSize prefix") \ + ENUM_ENTRY(IC_VEX_W, 3, "requires VEX and the W prefix") \ + ENUM_ENTRY(IC_VEX_W_XS, 4, "requires VEX, W, and XS prefix") \ + ENUM_ENTRY(IC_VEX_W_XD, 4, "requires VEX, W, and XD prefix") \ + ENUM_ENTRY(IC_VEX_W_OPSIZE, 4, "requires VEX, W, and OpSize") \ + ENUM_ENTRY(IC_VEX_L, 3, "requires VEX and the L prefix") \ + ENUM_ENTRY(IC_VEX_L_XS, 4, "requires VEX and the L and XS prefix")\ + ENUM_ENTRY(IC_VEX_L_XD, 4, "requires VEX and the L and XS prefix")\ + ENUM_ENTRY(IC_VEX_L_OPSIZE, 4, "requires VEX, L, and OpSize") + #define ENUM_ENTRY(n, r, d) n, typedef enum { @@ -104,7 +123,9 @@ typedef enum { ONEBYTE = 0, TWOBYTE = 1, THREEBYTE_38 = 2, - THREEBYTE_3A = 3 + THREEBYTE_3A = 3, + THREEBYTE_A6 = 4, + THREEBYTE_A7 = 5 } OpcodeType; /* @@ -183,6 +204,7 @@ struct ContextDecision { ENUM_ENTRY(ENCODING_NONE, "") \ ENUM_ENTRY(ENCODING_REG, "Register operand in ModR/M byte.") \ ENUM_ENTRY(ENCODING_RM, "R/M operand in ModR/M byte.") \ + ENUM_ENTRY(ENCODING_VVVV, "Register operand in VEX.vvvv byte.") \ ENUM_ENTRY(ENCODING_CB, "1-byte code offset (possible new CS value)") \ ENUM_ENTRY(ENCODING_CW, "2-byte") \ ENUM_ENTRY(ENCODING_CD, "4-byte") \ @@ -278,6 +300,7 @@ struct ContextDecision { ENUM_ENTRY(TYPE_XMM32, "4-byte XMM register or memory operand") \ ENUM_ENTRY(TYPE_XMM64, "8-byte") \ ENUM_ENTRY(TYPE_XMM128, "16-byte") \ + ENUM_ENTRY(TYPE_XMM256, "32-byte") \ ENUM_ENTRY(TYPE_XMM0, "Implicit use of XMM0") \ ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand") \ ENUM_ENTRY(TYPE_DEBUGREG, "Debug register operand") \ diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp index d6950f49f824..dd6e3533849d 100644 --- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp @@ -15,6 +15,7 @@ #define DEBUG_TYPE "asm-printer" #include "X86ATTInstPrinter.h" #include "X86InstComments.h" +#include "X86Subtarget.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" @@ -22,24 +23,38 @@ #include "llvm/Support/Format.h" #include "llvm/Support/FormattedStream.h" #include "X86GenInstrNames.inc" +#include using namespace llvm; // Include the auto-generated portion of the assembly writer. #define GET_INSTRUCTION_NAME +#define PRINT_ALIAS_INSTR +#include "X86GenRegisterNames.inc" #include "X86GenAsmWriter.inc" +#undef PRINT_ALIAS_INSTR +#undef GET_INSTRUCTION_NAME + +X86ATTInstPrinter::X86ATTInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI) + : MCInstPrinter(MAI) { + // Initialize the set of available features. + setAvailableFeatures(ComputeAvailableFeatures( + &TM.getSubtarget())); +} void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS) { - printInstruction(MI, OS); + // Try to print any aliases first. + if (!printAliasInstr(MI, OS)) + printInstruction(MI, OS); // If verbose assembly is enabled, we can print some informative comments. if (CommentStream) EmitAnyX86InstComments(MI, *CommentStream, getRegisterName); } + StringRef X86ATTInstPrinter::getOpcodeName(unsigned Opcode) const { return getInstructionName(Opcode); } - void X86ATTInstPrinter::printSSECC(const MCInst *MI, unsigned Op, raw_ostream &O) { switch (MI->getOperand(Op).getImm()) { diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h index eb986643014c..8d69391c968b 100644 --- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h +++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h @@ -17,16 +17,24 @@ #include "llvm/MC/MCInstPrinter.h" namespace llvm { - class MCOperand; + +class MCOperand; +class X86Subtarget; +class TargetMachine; class X86ATTInstPrinter : public MCInstPrinter { public: - X86ATTInstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) {} - + X86ATTInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI); virtual void printInst(const MCInst *MI, raw_ostream &OS); virtual StringRef getOpcodeName(unsigned Opcode) const; + // Methods used to print the alias of an instruction. + unsigned ComputeAvailableFeatures(const X86Subtarget *Subtarget) const; + // Autogenerated by tblgen, returns true if we successfully printed an + // alias. + bool printAliasInstr(const MCInst *MI, raw_ostream &OS); + // Autogenerated by tblgen. void printInstruction(const MCInst *MI, raw_ostream &OS); static const char *getRegisterName(unsigned RegNo); diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp index 12144e3f5056..c642acc3b9a2 100644 --- a/lib/Target/X86/InstPrinter/X86InstComments.cpp +++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp @@ -111,28 +111,28 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, // FALL THROUGH. case X86::PUNPCKLBWrm: Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodePUNPCKLMask(16, ShuffleMask); + DecodePUNPCKLBWMask(16, ShuffleMask); break; case X86::PUNPCKLWDrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::PUNPCKLWDrm: Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodePUNPCKLMask(8, ShuffleMask); + DecodePUNPCKLWDMask(8, ShuffleMask); break; case X86::PUNPCKLDQrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::PUNPCKLDQrm: Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodePUNPCKLMask(4, ShuffleMask); + DecodePUNPCKLDQMask(4, ShuffleMask); break; case X86::PUNPCKLQDQrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::PUNPCKLQDQrm: Src1Name = getRegName(MI->getOperand(0).getReg()); - DecodePUNPCKLMask(2, ShuffleMask); + DecodePUNPCKLQDQMask(2, ShuffleMask); break; case X86::SHUFPDrri: @@ -153,16 +153,44 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS, Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::UNPCKLPDrm: - DecodeUNPCKLPMask(2, ShuffleMask); + DecodeUNPCKLPDMask(2, ShuffleMask); Src1Name = getRegName(MI->getOperand(0).getReg()); break; + case X86::VUNPCKLPDrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VUNPCKLPDrm: + DecodeUNPCKLPDMask(2, ShuffleMask); + Src1Name = getRegName(MI->getOperand(1).getReg()); + break; + case X86::VUNPCKLPDYrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VUNPCKLPDYrm: + DecodeUNPCKLPDMask(4, ShuffleMask); + Src1Name = getRegName(MI->getOperand(1).getReg()); + break; case X86::UNPCKLPSrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. case X86::UNPCKLPSrm: - DecodeUNPCKLPMask(4, ShuffleMask); + DecodeUNPCKLPSMask(4, ShuffleMask); Src1Name = getRegName(MI->getOperand(0).getReg()); break; + case X86::VUNPCKLPSrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VUNPCKLPSrm: + DecodeUNPCKLPSMask(4, ShuffleMask); + Src1Name = getRegName(MI->getOperand(1).getReg()); + break; + case X86::VUNPCKLPSYrr: + Src2Name = getRegName(MI->getOperand(2).getReg()); + // FALL THROUGH. + case X86::VUNPCKLPSYrm: + DecodeUNPCKLPSMask(8, ShuffleMask); + Src1Name = getRegName(MI->getOperand(1).getReg()); + break; case X86::UNPCKHPDrr: Src2Name = getRegName(MI->getOperand(2).getReg()); // FALL THROUGH. diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp index 048452985089..47253ebd202e 100644 --- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp @@ -15,6 +15,7 @@ #define DEBUG_TYPE "asm-printer" #include "X86IntelInstPrinter.h" #include "X86InstComments.h" +#include "X86Subtarget.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h index 6f120322742b..ca99dc09b8a4 100644 --- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h +++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h @@ -18,13 +18,15 @@ #include "llvm/Support/raw_ostream.h" namespace llvm { - class MCOperand; + +class MCOperand; +class TargetMachine; class X86IntelInstPrinter : public MCInstPrinter { public: - X86IntelInstPrinter(const MCAsmInfo &MAI) + X86IntelInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI) : MCInstPrinter(MAI) {} - + virtual void printInst(const MCInst *MI, raw_ostream &OS); virtual StringRef getOpcodeName(unsigned Opcode) const; @@ -33,7 +35,6 @@ class X86IntelInstPrinter : public MCInstPrinter { static const char *getRegisterName(unsigned RegNo); static const char *getInstructionName(unsigned Opcode); - void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &O); void printSSECC(const MCInst *MI, unsigned Op, raw_ostream &O); diff --git a/lib/Target/X86/README-X86-64.txt b/lib/Target/X86/README-X86-64.txt index e21d69a7bcbf..e7429a308106 100644 --- a/lib/Target/X86/README-X86-64.txt +++ b/lib/Target/X86/README-X86-64.txt @@ -36,7 +36,7 @@ _conv: cmovb %rcx, %rax ret -Seems like the jb branch has high likelyhood of being taken. It would have +Seems like the jb branch has high likelihood of being taken. It would have saved a few instructions. //===---------------------------------------------------------------------===// diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt index abd1515cf5d7..ea3014e7b927 100644 --- a/lib/Target/X86/README.txt +++ b/lib/Target/X86/README.txt @@ -7,14 +7,6 @@ copy (3-addr bswap + memory support?) This is available on Atom processors. //===---------------------------------------------------------------------===// -CodeGen/X86/lea-3.ll:test3 should be a single LEA, not a shift/move. The X86 -backend knows how to three-addressify this shift, but it appears the register -allocator isn't even asking it to do so in this case. We should investigate -why this isn't happening, it could have significant impact on other important -cases for X86 as well. - -//===---------------------------------------------------------------------===// - This should be one DIV/IDIV instruction, not a libcall: unsigned test(unsigned long long X, unsigned Y) { @@ -1572,7 +1564,7 @@ Implement processor-specific optimizations for parity with GCC on these processors. GCC does two optimizations: 1. ix86_pad_returns inserts a noop before ret instructions if immediately - preceeded by a conditional branch or is the target of a jump. + preceded by a conditional branch or is the target of a jump. 2. ix86_avoid_jump_misspredicts inserts noops in cases where a 16-byte block of code contains more than 3 branches. @@ -1656,28 +1648,61 @@ information to add the "lock" prefix. //===---------------------------------------------------------------------===// -_Bool bar(int *x) { return *x & 1; } +struct B { + unsigned char y0 : 1; +}; -define zeroext i1 @bar(i32* nocapture %x) nounwind readonly { -entry: - %tmp1 = load i32* %x ; [#uses=1] - %and = and i32 %tmp1, 1 ; [#uses=1] - %tobool = icmp ne i32 %and, 0 ; [#uses=1] - ret i1 %tobool +int bar(struct B* a) { return a->y0; } + +define i32 @bar(%struct.B* nocapture %a) nounwind readonly optsize { + %1 = getelementptr inbounds %struct.B* %a, i64 0, i32 0 + %2 = load i8* %1, align 1 + %3 = and i8 %2, 1 + %4 = zext i8 %3 to i32 + ret i32 %4 } -bar: # @bar -# BB#0: # %entry - movl 4(%esp), %eax - movb (%eax), %al - andb $1, %al - movzbl %al, %eax - ret +bar: # @bar +# BB#0: + movb (%rdi), %al + andb $1, %al + movzbl %al, %eax + ret Missed optimization: should be movl+andl. //===---------------------------------------------------------------------===// +The x86_64 abi says: + +Booleans, when stored in a memory object, are stored as single byte objects the +value of which is always 0 (false) or 1 (true). + +We are not using this fact: + +int bar(_Bool *a) { return *a; } + +define i32 @bar(i8* nocapture %a) nounwind readonly optsize { + %1 = load i8* %a, align 1, !tbaa !0 + %tmp = and i8 %1, 1 + %2 = zext i8 %tmp to i32 + ret i32 %2 +} + +bar: + movb (%rdi), %al + andb $1, %al + movzbl %al, %eax + ret + +GCC produces + +bar: + movzbl (%rdi), %eax + ret + +//===---------------------------------------------------------------------===// + Consider the following two functions compiled with clang: _Bool foo(int *x) { return !(*x & 4); } unsigned bar(int *x) { return !(*x & 4); } @@ -1702,26 +1727,6 @@ are functionally identical. //===---------------------------------------------------------------------===// -Take the following C code: -int x(int y) { return (y & 63) << 14; } - -Code produced by gcc: - andl $63, %edi - sall $14, %edi - movl %edi, %eax - ret - -Code produced by clang: - shll $14, %edi - movl %edi, %eax - andl $1032192, %eax - ret - -The code produced by gcc is 3 bytes shorter. This sort of construct often -shows up with bitfields. - -//===---------------------------------------------------------------------===// - Take the following C code: int f(int a, int b) { return (unsigned char)a == (unsigned char)b; } @@ -1947,3 +1952,91 @@ which is "perfect". //===---------------------------------------------------------------------===// +For the branch in the following code: +int a(); +int b(int x, int y) { + if (x & (1<<(y&7))) + return a(); + return y; +} + +We currently generate: + movb %sil, %al + andb $7, %al + movzbl %al, %eax + btl %eax, %edi + jae .LBB0_2 + +movl+andl would be shorter than the movb+andb+movzbl sequence. + +//===---------------------------------------------------------------------===// + +For the following: +struct u1 { + float x, y; +}; +float foo(struct u1 u) { + return u.x + u.y; +} + +We currently generate: + movdqa %xmm0, %xmm1 + pshufd $1, %xmm0, %xmm0 # xmm0 = xmm0[1,0,0,0] + addss %xmm1, %xmm0 + ret + +We could save an instruction here by commuting the addss. + +//===---------------------------------------------------------------------===// + +This (from PR9661): + +float clamp_float(float a) { + if (a > 1.0f) + return 1.0f; + else if (a < 0.0f) + return 0.0f; + else + return a; +} + +Could compile to: + +clamp_float: # @clamp_float + movss .LCPI0_0(%rip), %xmm1 + minss %xmm1, %xmm0 + pxor %xmm1, %xmm1 + maxss %xmm1, %xmm0 + ret + +with -ffast-math. + +//===---------------------------------------------------------------------===// + +This function (from PR9803): + +int clamp2(int a) { + if (a > 5) + a = 5; + if (a < 0) + return 0; + return a; +} + +Compiles to: + +_clamp2: ## @clamp2 + pushq %rbp + movq %rsp, %rbp + cmpl $5, %edi + movl $5, %ecx + cmovlel %edi, %ecx + testl %ecx, %ecx + movl $0, %eax + cmovnsl %ecx, %eax + popq %rbp + ret + +The move of 0 could be scheduled above the test to make it is xor reg,reg. + +//===---------------------------------------------------------------------===// diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/lib/Target/X86/Utils/X86ShuffleDecode.cpp index 12879774d780..cd06060748b7 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.cpp +++ b/lib/Target/X86/Utils/X86ShuffleDecode.cpp @@ -1,4 +1,4 @@ -//===-- X86ShuffleDecode.h - X86 shuffle decode logic ---------------------===// +//===-- X86ShuffleDecode.cpp - X86 shuffle decode logic -------------------===// // // The LLVM Compiler Infrastructure // @@ -95,12 +95,29 @@ void DecodePSHUFLWMask(unsigned Imm, ShuffleMask.push_back(7); } -void DecodePUNPCKLMask(unsigned NElts, +void DecodePUNPCKLBWMask(unsigned NElts, + SmallVectorImpl &ShuffleMask) { + DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i8, NElts), ShuffleMask); +} + +void DecodePUNPCKLWDMask(unsigned NElts, + SmallVectorImpl &ShuffleMask) { + DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i16, NElts), ShuffleMask); +} + +void DecodePUNPCKLDQMask(unsigned NElts, + SmallVectorImpl &ShuffleMask) { + DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i32, NElts), ShuffleMask); +} + +void DecodePUNPCKLQDQMask(unsigned NElts, + SmallVectorImpl &ShuffleMask) { + DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i64, NElts), ShuffleMask); +} + +void DecodePUNPCKLMask(EVT VT, SmallVectorImpl &ShuffleMask) { - for (unsigned i = 0; i != NElts/2; ++i) { - ShuffleMask.push_back(i); - ShuffleMask.push_back(i+NElts); - } + DecodeUNPCKLPMask(VT, ShuffleMask); } void DecodePUNPCKHMask(unsigned NElts, @@ -133,15 +150,40 @@ void DecodeUNPCKHPMask(unsigned NElts, } } +void DecodeUNPCKLPSMask(unsigned NElts, + SmallVectorImpl &ShuffleMask) { + DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i32, NElts), ShuffleMask); +} + +void DecodeUNPCKLPDMask(unsigned NElts, + SmallVectorImpl &ShuffleMask) { + DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i64, NElts), ShuffleMask); +} /// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd -/// etc. NElts indicates the number of elements in the vector allowing it to -/// handle different datatypes and vector widths. -void DecodeUNPCKLPMask(unsigned NElts, +/// etc. VT indicates the type of the vector allowing it to handle different +/// datatypes and vector widths. +void DecodeUNPCKLPMask(EVT VT, SmallVectorImpl &ShuffleMask) { - for (unsigned i = 0; i != NElts/2; ++i) { - ShuffleMask.push_back(i); // Reads from dest - ShuffleMask.push_back(i+NElts); // Reads from src + unsigned NumElts = VT.getVectorNumElements(); + + // Handle vector lengths > 128 bits. Define a "section" as a set of + // 128 bits. AVX defines UNPCK* to operate independently on 128-bit + // sections. + unsigned NumSections = VT.getSizeInBits() / 128; + if (NumSections == 0 ) NumSections = 1; // Handle MMX + unsigned NumSectionElts = NumElts / NumSections; + + unsigned Start = 0; + unsigned End = NumSectionElts / 2; + for (unsigned s = 0; s < NumSections; ++s) { + for (unsigned i = Start; i != End; ++i) { + ShuffleMask.push_back(i); // Reads from dest/src1 + ShuffleMask.push_back(i+NumSectionElts); // Reads from src/src2 + } + // Process the next 128 bits. + Start += NumSectionElts; + End += NumSectionElts; } } diff --git a/lib/Target/X86/Utils/X86ShuffleDecode.h b/lib/Target/X86/Utils/X86ShuffleDecode.h index 50d9ccbfa68c..b18f67033096 100644 --- a/lib/Target/X86/Utils/X86ShuffleDecode.h +++ b/lib/Target/X86/Utils/X86ShuffleDecode.h @@ -16,6 +16,7 @@ #define X86_SHUFFLE_DECODE_H #include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/ValueTypes.h" //===----------------------------------------------------------------------===// // Vector Mask Decoding @@ -45,7 +46,19 @@ void DecodePSHUFHWMask(unsigned Imm, void DecodePSHUFLWMask(unsigned Imm, SmallVectorImpl &ShuffleMask); -void DecodePUNPCKLMask(unsigned NElts, +void DecodePUNPCKLBWMask(unsigned NElts, + SmallVectorImpl &ShuffleMask); + +void DecodePUNPCKLWDMask(unsigned NElts, + SmallVectorImpl &ShuffleMask); + +void DecodePUNPCKLDQMask(unsigned NElts, + SmallVectorImpl &ShuffleMask); + +void DecodePUNPCKLQDQMask(unsigned NElts, + SmallVectorImpl &ShuffleMask); + +void DecodePUNPCKLMask(EVT VT, SmallVectorImpl &ShuffleMask); void DecodePUNPCKHMask(unsigned NElts, @@ -57,11 +70,16 @@ void DecodeSHUFPSMask(unsigned NElts, unsigned Imm, void DecodeUNPCKHPMask(unsigned NElts, SmallVectorImpl &ShuffleMask); +void DecodeUNPCKLPSMask(unsigned NElts, + SmallVectorImpl &ShuffleMask); + +void DecodeUNPCKLPDMask(unsigned NElts, + SmallVectorImpl &ShuffleMask); /// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd -/// etc. NElts indicates the number of elements in the vector allowing it to -/// handle different datatypes and vector widths. -void DecodeUNPCKLPMask(unsigned NElts, +/// etc. VT indicates the type of the vector allowing it to handle different +/// datatypes and vector widths. +void DecodeUNPCKLPMask(EVT VT, SmallVectorImpl &ShuffleMask); } // llvm namespace diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index efb6c8c0adc6..25b8d3ea1d21 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -1,13 +1,13 @@ //===- X86.td - Target definition file for the Intel X86 ---*- tablegen -*-===// -// +// // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. -// +// //===----------------------------------------------------------------------===// // -// This is a target description file for the Intel i386 architecture, refered to +// This is a target description file for the Intel i386 architecture, referred to // here as the "X86" architecture. // //===----------------------------------------------------------------------===// @@ -32,7 +32,7 @@ def FeatureMMX : SubtargetFeature<"mmx","X86SSELevel", "MMX", def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1", "Enable SSE instructions", // SSE codegen depends on cmovs, and all - // SSE1+ processors support them. + // SSE1+ processors support them. [FeatureMMX, FeatureCMOV]>; def FeatureSSE2 : SubtargetFeature<"sse2", "X86SSELevel", "SSE2", "Enable SSE2 instructions", @@ -50,7 +50,8 @@ def FeatureSSE42 : SubtargetFeature<"sse42", "X86SSELevel", "SSE42", "Enable SSE 4.2 instructions", [FeatureSSE41, FeaturePOPCNT]>; def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow", - "Enable 3DNow! instructions">; + "Enable 3DNow! instructions", + [FeatureMMX]>; def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA", "Enable 3DNow! Athlon instructions", [Feature3DNow]>; @@ -125,10 +126,10 @@ def : Proc<"sandybridge", [FeatureSSE42, Feature64Bit, FeatureAES, FeatureCLMUL]>; def : Proc<"k6", [FeatureMMX]>; -def : Proc<"k6-2", [FeatureMMX, Feature3DNow]>; -def : Proc<"k6-3", [FeatureMMX, Feature3DNow]>; -def : Proc<"athlon", [FeatureMMX, Feature3DNowA, FeatureSlowBTMem]>; -def : Proc<"athlon-tbird", [FeatureMMX, Feature3DNowA, FeatureSlowBTMem]>; +def : Proc<"k6-2", [Feature3DNow]>; +def : Proc<"k6-3", [Feature3DNow]>; +def : Proc<"athlon", [Feature3DNowA, FeatureSlowBTMem]>; +def : Proc<"athlon-tbird", [Feature3DNowA, FeatureSlowBTMem]>; def : Proc<"athlon-4", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>; def : Proc<"athlon-xp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>; def : Proc<"athlon-mp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>; @@ -156,8 +157,8 @@ def : Proc<"shanghai", [Feature3DNowA, Feature64Bit, FeatureSSE4A, Feature3DNowA]>; def : Proc<"winchip-c6", [FeatureMMX]>; -def : Proc<"winchip2", [FeatureMMX, Feature3DNow]>; -def : Proc<"c3", [FeatureMMX, Feature3DNow]>; +def : Proc<"winchip2", [Feature3DNow]>; +def : Proc<"c3", [Feature3DNow]>; def : Proc<"c3-2", [FeatureSSE1]>; //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/X86AsmBackend.cpp index da5f5b182ce9..4d7d96dcb36b 100644 --- a/lib/Target/X86/X86AsmBackend.cpp +++ b/lib/Target/X86/X86AsmBackend.cpp @@ -21,6 +21,7 @@ #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/Object/MachOFormat.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -28,6 +29,13 @@ #include "llvm/Target/TargetAsmBackend.h" using namespace llvm; +// Option to allow disabling arithmetic relaxation to workaround PR9807, which +// is useful when running bitwise comparison experiments on Darwin. We should be +// able to remove this once PR9807 is resolved. +static cl::opt +MCDisableArithRelaxation("mc-x86-disable-arith-relaxation", + cl::desc("Disable relaxation of arithmetic instruction for X86")); + static unsigned getFixupKindLog2Size(unsigned Kind) { switch (Kind) { default: assert(0 && "invalid fixup kind!"); @@ -201,6 +209,9 @@ bool X86AsmBackend::MayNeedRelaxation(const MCInst &Inst) const { if (getRelaxedOpcodeBranch(Inst.getOpcode()) != Inst.getOpcode()) return true; + if (MCDisableArithRelaxation) + return false; + // Check if this instruction is ever relaxable. if (getRelaxedOpcodeArith(Inst.getOpcode()) == Inst.getOpcode()) return false; @@ -307,10 +318,13 @@ class ELFX86_32AsmBackend : public ELFX86AsmBackend { : ELFX86AsmBackend(T, OSType) {} MCObjectWriter *createObjectWriter(raw_ostream &OS) const { - return createELFObjectWriter(new X86ELFObjectWriter(false, OSType, - ELF::EM_386, false), + return createELFObjectWriter(createELFObjectTargetWriter(), OS, /*IsLittleEndian*/ true); } + + MCELFObjectTargetWriter *createELFObjectTargetWriter() const { + return new X86ELFObjectWriter(false, OSType, ELF::EM_386, false); + } }; class ELFX86_64AsmBackend : public ELFX86AsmBackend { @@ -319,10 +333,13 @@ class ELFX86_64AsmBackend : public ELFX86AsmBackend { : ELFX86AsmBackend(T, OSType) {} MCObjectWriter *createObjectWriter(raw_ostream &OS) const { - return createELFObjectWriter(new X86ELFObjectWriter(true, OSType, - ELF::EM_X86_64, true), + return createELFObjectWriter(createELFObjectTargetWriter(), OS, /*IsLittleEndian*/ true); } + + MCELFObjectTargetWriter *createELFObjectTargetWriter() const { + return new X86ELFObjectWriter(true, OSType, ELF::EM_X86_64, true); + } }; class WindowsX86AsmBackend : public X86AsmBackend { @@ -408,34 +425,26 @@ class DarwinX86_64AsmBackend : public DarwinX86AsmBackend { TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T, const std::string &TT) { - switch (Triple(TT).getOS()) { - case Triple::Darwin: + Triple TheTriple(TT); + + if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO) return new DarwinX86_32AsmBackend(T); - case Triple::MinGW32: - case Triple::Cygwin: - case Triple::Win32: - if (Triple(TT).getEnvironment() == Triple::MachO) - return new DarwinX86_32AsmBackend(T); - else - return new WindowsX86AsmBackend(T, false); - default: - return new ELFX86_32AsmBackend(T, Triple(TT).getOS()); - } + + if (TheTriple.isOSWindows()) + return new WindowsX86AsmBackend(T, false); + + return new ELFX86_32AsmBackend(T, TheTriple.getOS()); } TargetAsmBackend *llvm::createX86_64AsmBackend(const Target &T, const std::string &TT) { - switch (Triple(TT).getOS()) { - case Triple::Darwin: + Triple TheTriple(TT); + + if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO) return new DarwinX86_64AsmBackend(T); - case Triple::MinGW32: - case Triple::Cygwin: - case Triple::Win32: - if (Triple(TT).getEnvironment() == Triple::MachO) - return new DarwinX86_64AsmBackend(T); - else - return new WindowsX86AsmBackend(T, true); - default: - return new ELFX86_64AsmBackend(T, Triple(TT).getOS()); - } + + if (TheTriple.isOSWindows()) + return new WindowsX86AsmBackend(T, true); + + return new ELFX86_64AsmBackend(T, TheTriple.getOS()); } diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index 99b4479a9fc9..c2d53c4dd26c 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -709,12 +709,13 @@ void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI, //===----------------------------------------------------------------------===// static MCInstPrinter *createX86MCInstPrinter(const Target &T, + TargetMachine &TM, unsigned SyntaxVariant, const MCAsmInfo &MAI) { if (SyntaxVariant == 0) - return new X86ATTInstPrinter(MAI); + return new X86ATTInstPrinter(TM, MAI); if (SyntaxVariant == 1) - return new X86IntelInstPrinter(MAI); + return new X86IntelInstPrinter(TM, MAI); return 0; } diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index a44fb694e725..56351756e8dd 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -215,6 +215,13 @@ def CC_X86_Win64_C : CallingConv<[ // The first 4 integer arguments are passed in integer registers. CCIfType<[i32], CCAssignToRegWithShadow<[ECX , EDX , R8D , R9D ], [XMM0, XMM1, XMM2, XMM3]>>, + + // Do not pass the sret argument in RCX, the Win64 thiscall calling + // convention requires "this" to be passed in RCX. + CCIfCC<"CallingConv::X86_ThisCall", + CCIfSRet>>>, + CCIfType<[i64], CCAssignToRegWithShadow<[RCX , RDX , R8 , R9 ], [XMM0, XMM1, XMM2, XMM3]>>, diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index 60d9d4ad064e..421e221d205c 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -652,6 +652,8 @@ void Emitter::emitInstruction(MachineInstr &MI, case X86II::TB: // Two-byte opcode prefix case X86II::T8: // 0F 38 case X86II::TA: // 0F 3A + case X86II::A6: // 0F A6 + case X86II::A7: // 0F A7 Need0FPrefix = true; break; case X86II::TF: // F2 0F 38 @@ -695,6 +697,12 @@ void Emitter::emitInstruction(MachineInstr &MI, case X86II::TA: // 0F 3A MCE.emitByte(0x3A); break; + case X86II::A6: // 0F A6 + MCE.emitByte(0xA6); + break; + case X86II::A7: // 0F A7 + MCE.emitByte(0xA7); + break; } // If this is a two-address instruction, skip one of the register operands. diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 6fa928462b28..1382f184c343 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -23,6 +23,7 @@ #include "llvm/GlobalVariable.h" #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" +#include "llvm/Operator.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" @@ -77,10 +78,8 @@ class X86FastISel : public FastISel { bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR); - bool X86FastEmitStore(EVT VT, const Value *Val, - const X86AddressMode &AM); - bool X86FastEmitStore(EVT VT, unsigned Val, - const X86AddressMode &AM); + bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM); + bool X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM); bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT, unsigned &ResultReg); @@ -125,6 +124,8 @@ class X86FastISel : public FastISel { unsigned TargetMaterializeAlloca(const AllocaInst *C); + unsigned TargetMaterializeFloatZero(const ConstantFP *CF); + /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is /// computed in an SSE register, not on the X87 floating point stack. bool isScalarFPTypeInSSEReg(EVT VT) const { @@ -133,6 +134,9 @@ class X86FastISel : public FastISel { } bool isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1 = false); + + bool TryEmitSmallMemcpy(X86AddressMode DestAM, + X86AddressMode SrcAM, uint64_t Len); }; } // end anonymous namespace. @@ -224,8 +228,7 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM, /// and a displacement offset, or a GlobalAddress, /// i.e. V. Return true if it is possible. bool -X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, - const X86AddressMode &AM) { +X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM) { // Get opcode and regclass of the output for the given store instruction. unsigned Opc = 0; switch (VT.getSimpleVT().SimpleTy) { @@ -395,37 +398,45 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { const Value *Op = *i; if (const StructType *STy = dyn_cast(*GTI)) { const StructLayout *SL = TD.getStructLayout(STy); - unsigned Idx = cast(Op)->getZExtValue(); - Disp += SL->getElementOffset(Idx); - } else { - uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType()); - SmallVector Worklist; - Worklist.push_back(Op); - do { - Op = Worklist.pop_back_val(); - if (const ConstantInt *CI = dyn_cast(Op)) { - // Constant-offset addressing. - Disp += CI->getSExtValue() * S; - } else if (isa(Op) && - isa(cast(Op)->getOperand(1))) { - // An add with a constant operand. Fold the constant. - ConstantInt *CI = - cast(cast(Op)->getOperand(1)); - Disp += CI->getSExtValue() * S; - // Add the other operand back to the work list. - Worklist.push_back(cast(Op)->getOperand(0)); - } else if (IndexReg == 0 && - (!AM.GV || !Subtarget->isPICStyleRIPRel()) && - (S == 1 || S == 2 || S == 4 || S == 8)) { - // Scaled-index addressing. - Scale = S; - IndexReg = getRegForGEPIndex(Op).first; - if (IndexReg == 0) - return false; - } else - // Unsupported. - goto unsupported_gep; - } while (!Worklist.empty()); + Disp += SL->getElementOffset(cast(Op)->getZExtValue()); + continue; + } + + // A array/variable index is always of the form i*S where S is the + // constant scale size. See if we can push the scale into immediates. + uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType()); + for (;;) { + if (const ConstantInt *CI = dyn_cast(Op)) { + // Constant-offset addressing. + Disp += CI->getSExtValue() * S; + break; + } + if (isa(Op) && + (!isa(Op) || + FuncInfo.MBBMap[cast(Op)->getParent()] + == FuncInfo.MBB) && + isa(cast(Op)->getOperand(1))) { + // An add (in the same block) with a constant operand. Fold the + // constant. + ConstantInt *CI = + cast(cast(Op)->getOperand(1)); + Disp += CI->getSExtValue() * S; + // Iterate on the other operand. + Op = cast(Op)->getOperand(0); + continue; + } + if (IndexReg == 0 && + (!AM.GV || !Subtarget->isPICStyleRIPRel()) && + (S == 1 || S == 2 || S == 4 || S == 8)) { + // Scaled-index addressing. + Scale = S; + IndexReg = getRegForGEPIndex(Op).first; + if (IndexReg == 0) + return false; + break; + } + // Unsupported. + goto unsupported_gep; } } // Check for displacement overflow. @@ -439,7 +450,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { if (X86SelectAddress(U->getOperand(0), AM)) return true; - // If we couldn't merge the sub value into this addr mode, revert back to + // If we couldn't merge the gep value into this addr mode, revert back to // our address and just match the value instead of completely failing. AM = SavedAM; break; @@ -451,91 +462,91 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { // Handle constant address. if (const GlobalValue *GV = dyn_cast(V)) { - // Can't handle alternate code models yet. + // Can't handle alternate code models or TLS yet. if (TM.getCodeModel() != CodeModel::Small) return false; - // RIP-relative addresses can't have additional register operands. - if (Subtarget->isPICStyleRIPRel() && - (AM.Base.Reg != 0 || AM.IndexReg != 0)) - return false; - - // Can't handle TLS yet. if (const GlobalVariable *GVar = dyn_cast(GV)) if (GVar->isThreadLocal()) return false; + + // RIP-relative addresses can't have additional register operands, so if + // we've already folded stuff into the addressing mode, just force the + // global value into its own register, which we can use as the basereg. + if (!Subtarget->isPICStyleRIPRel() || + (AM.Base.Reg == 0 && AM.IndexReg == 0)) { + // Okay, we've committed to selecting this global. Set up the address. + AM.GV = GV; - // Okay, we've committed to selecting this global. Set up the basic address. - AM.GV = GV; + // Allow the subtarget to classify the global. + unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM); - // Allow the subtarget to classify the global. - unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM); - - // If this reference is relative to the pic base, set it now. - if (isGlobalRelativeToPICBase(GVFlags)) { - // FIXME: How do we know Base.Reg is free?? - AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); - } - - // Unless the ABI requires an extra load, return a direct reference to - // the global. - if (!isGlobalStubReference(GVFlags)) { - if (Subtarget->isPICStyleRIPRel()) { - // Use rip-relative addressing if we can. Above we verified that the - // base and index registers are unused. - assert(AM.Base.Reg == 0 && AM.IndexReg == 0); - AM.Base.Reg = X86::RIP; + // If this reference is relative to the pic base, set it now. + if (isGlobalRelativeToPICBase(GVFlags)) { + // FIXME: How do we know Base.Reg is free?? + AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); } - AM.GVOpFlags = GVFlags; + + // Unless the ABI requires an extra load, return a direct reference to + // the global. + if (!isGlobalStubReference(GVFlags)) { + if (Subtarget->isPICStyleRIPRel()) { + // Use rip-relative addressing if we can. Above we verified that the + // base and index registers are unused. + assert(AM.Base.Reg == 0 && AM.IndexReg == 0); + AM.Base.Reg = X86::RIP; + } + AM.GVOpFlags = GVFlags; + return true; + } + + // Ok, we need to do a load from a stub. If we've already loaded from + // this stub, reuse the loaded pointer, otherwise emit the load now. + DenseMap::iterator I = LocalValueMap.find(V); + unsigned LoadReg; + if (I != LocalValueMap.end() && I->second != 0) { + LoadReg = I->second; + } else { + // Issue load from stub. + unsigned Opc = 0; + const TargetRegisterClass *RC = NULL; + X86AddressMode StubAM; + StubAM.Base.Reg = AM.Base.Reg; + StubAM.GV = GV; + StubAM.GVOpFlags = GVFlags; + + // Prepare for inserting code in the local-value area. + SavePoint SaveInsertPt = enterLocalValueArea(); + + if (TLI.getPointerTy() == MVT::i64) { + Opc = X86::MOV64rm; + RC = X86::GR64RegisterClass; + + if (Subtarget->isPICStyleRIPRel()) + StubAM.Base.Reg = X86::RIP; + } else { + Opc = X86::MOV32rm; + RC = X86::GR32RegisterClass; + } + + LoadReg = createResultReg(RC); + MachineInstrBuilder LoadMI = + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), LoadReg); + addFullAddress(LoadMI, StubAM); + + // Ok, back to normal mode. + leaveLocalValueArea(SaveInsertPt); + + // Prevent loading GV stub multiple times in same MBB. + LocalValueMap[V] = LoadReg; + } + + // Now construct the final address. Note that the Disp, Scale, + // and Index values may already be set here. + AM.Base.Reg = LoadReg; + AM.GV = 0; return true; } - - // Ok, we need to do a load from a stub. If we've already loaded from this - // stub, reuse the loaded pointer, otherwise emit the load now. - DenseMap::iterator I = LocalValueMap.find(V); - unsigned LoadReg; - if (I != LocalValueMap.end() && I->second != 0) { - LoadReg = I->second; - } else { - // Issue load from stub. - unsigned Opc = 0; - const TargetRegisterClass *RC = NULL; - X86AddressMode StubAM; - StubAM.Base.Reg = AM.Base.Reg; - StubAM.GV = GV; - StubAM.GVOpFlags = GVFlags; - - // Prepare for inserting code in the local-value area. - SavePoint SaveInsertPt = enterLocalValueArea(); - - if (TLI.getPointerTy() == MVT::i64) { - Opc = X86::MOV64rm; - RC = X86::GR64RegisterClass; - - if (Subtarget->isPICStyleRIPRel()) - StubAM.Base.Reg = X86::RIP; - } else { - Opc = X86::MOV32rm; - RC = X86::GR32RegisterClass; - } - - LoadReg = createResultReg(RC); - MachineInstrBuilder LoadMI = - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), LoadReg); - addFullAddress(LoadMI, StubAM); - - // Ok, back to normal mode. - leaveLocalValueArea(SaveInsertPt); - - // Prevent loading GV stub multiple times in same MBB. - LocalValueMap[V] = LoadReg; - } - - // Now construct the final address. Note that the Disp, Scale, - // and Index values may already be set here. - AM.Base.Reg = LoadReg; - AM.GV = 0; - return true; } // If all else fails, try to materialize the value in a register. @@ -856,12 +867,9 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) { unsigned NEReg = createResultReg(&X86::GR8RegClass); unsigned PReg = createResultReg(&X86::GR8RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(X86::SETNEr), NEReg); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(X86::SETPr), PReg); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(X86::OR8rr), ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SETNEr), NEReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SETPr), PReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::OR8rr),ResultReg) .addReg(PReg).addReg(NEReg); UpdateValueMap(I, ResultReg); return true; @@ -1059,14 +1067,49 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { } } } + } else if (TruncInst *TI = dyn_cast(BI->getCondition())) { + // Handle things like "%cond = trunc i32 %X to i1 / br i1 %cond", which + // typically happen for _Bool and C++ bools. + MVT SourceVT; + if (TI->hasOneUse() && TI->getParent() == I->getParent() && + isTypeLegal(TI->getOperand(0)->getType(), SourceVT)) { + unsigned TestOpc = 0; + switch (SourceVT.SimpleTy) { + default: break; + case MVT::i8: TestOpc = X86::TEST8ri; break; + case MVT::i16: TestOpc = X86::TEST16ri; break; + case MVT::i32: TestOpc = X86::TEST32ri; break; + case MVT::i64: TestOpc = X86::TEST64ri32; break; + } + if (TestOpc) { + unsigned OpReg = getRegForValue(TI->getOperand(0)); + if (OpReg == 0) return false; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TestOpc)) + .addReg(OpReg).addImm(1); + + unsigned JmpOpc = X86::JNE_4; + if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) { + std::swap(TrueMBB, FalseMBB); + JmpOpc = X86::JE_4; + } + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(JmpOpc)) + .addMBB(TrueMBB); + FastEmitBranch(FalseMBB, DL); + FuncInfo.MBB->addSuccessor(TrueMBB); + return true; + } + } } // Otherwise do a clumsy setcc and re-test it. + // Note that i1 essentially gets ANY_EXTEND'ed to i8 where it isn't used + // in an explicit cast, so make sure to handle that correctly. unsigned OpReg = getRegForValue(BI->getCondition()); if (OpReg == 0) return false; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TEST8rr)) - .addReg(OpReg).addReg(OpReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TEST8ri)) + .addReg(OpReg).addImm(1); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::JNE_4)) .addMBB(TrueMBB); FastEmitBranch(FalseMBB, DL); @@ -1075,42 +1118,42 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { } bool X86FastISel::X86SelectShift(const Instruction *I) { - unsigned CReg = 0, OpReg = 0, OpImm = 0; + unsigned CReg = 0, OpReg = 0; const TargetRegisterClass *RC = NULL; if (I->getType()->isIntegerTy(8)) { CReg = X86::CL; RC = &X86::GR8RegClass; switch (I->getOpcode()) { - case Instruction::LShr: OpReg = X86::SHR8rCL; OpImm = X86::SHR8ri; break; - case Instruction::AShr: OpReg = X86::SAR8rCL; OpImm = X86::SAR8ri; break; - case Instruction::Shl: OpReg = X86::SHL8rCL; OpImm = X86::SHL8ri; break; + case Instruction::LShr: OpReg = X86::SHR8rCL; break; + case Instruction::AShr: OpReg = X86::SAR8rCL; break; + case Instruction::Shl: OpReg = X86::SHL8rCL; break; default: return false; } } else if (I->getType()->isIntegerTy(16)) { CReg = X86::CX; RC = &X86::GR16RegClass; switch (I->getOpcode()) { - case Instruction::LShr: OpReg = X86::SHR16rCL; OpImm = X86::SHR16ri; break; - case Instruction::AShr: OpReg = X86::SAR16rCL; OpImm = X86::SAR16ri; break; - case Instruction::Shl: OpReg = X86::SHL16rCL; OpImm = X86::SHL16ri; break; + case Instruction::LShr: OpReg = X86::SHR16rCL; break; + case Instruction::AShr: OpReg = X86::SAR16rCL; break; + case Instruction::Shl: OpReg = X86::SHL16rCL; break; default: return false; } } else if (I->getType()->isIntegerTy(32)) { CReg = X86::ECX; RC = &X86::GR32RegClass; switch (I->getOpcode()) { - case Instruction::LShr: OpReg = X86::SHR32rCL; OpImm = X86::SHR32ri; break; - case Instruction::AShr: OpReg = X86::SAR32rCL; OpImm = X86::SAR32ri; break; - case Instruction::Shl: OpReg = X86::SHL32rCL; OpImm = X86::SHL32ri; break; + case Instruction::LShr: OpReg = X86::SHR32rCL; break; + case Instruction::AShr: OpReg = X86::SAR32rCL; break; + case Instruction::Shl: OpReg = X86::SHL32rCL; break; default: return false; } } else if (I->getType()->isIntegerTy(64)) { CReg = X86::RCX; RC = &X86::GR64RegClass; switch (I->getOpcode()) { - case Instruction::LShr: OpReg = X86::SHR64rCL; OpImm = X86::SHR64ri; break; - case Instruction::AShr: OpReg = X86::SAR64rCL; OpImm = X86::SAR64ri; break; - case Instruction::Shl: OpReg = X86::SHL64rCL; OpImm = X86::SHL64ri; break; + case Instruction::LShr: OpReg = X86::SHR64rCL; break; + case Instruction::AShr: OpReg = X86::SAR64rCL; break; + case Instruction::Shl: OpReg = X86::SHL64rCL; break; default: return false; } } else { @@ -1124,15 +1167,6 @@ bool X86FastISel::X86SelectShift(const Instruction *I) { unsigned Op0Reg = getRegForValue(I->getOperand(0)); if (Op0Reg == 0) return false; - // Fold immediate in shl(x,3). - if (const ConstantInt *CI = dyn_cast(I->getOperand(1))) { - unsigned ResultReg = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpImm), - ResultReg).addReg(Op0Reg).addImm(CI->getZExtValue() & 0xff); - UpdateValueMap(I, ResultReg); - return true; - } - unsigned Op1Reg = getRegForValue(I->getOperand(1)); if (Op1Reg == 0) return false; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), @@ -1294,10 +1328,61 @@ bool X86FastISel::X86SelectExtractValue(const Instruction *I) { return false; } +bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM, + X86AddressMode SrcAM, uint64_t Len) { + // Make sure we don't bloat code by inlining very large memcpy's. + bool i64Legal = TLI.isTypeLegal(MVT::i64); + if (Len > (i64Legal ? 32 : 16)) return false; + + // We don't care about alignment here since we just emit integer accesses. + while (Len) { + MVT VT; + if (Len >= 8 && i64Legal) + VT = MVT::i64; + else if (Len >= 4) + VT = MVT::i32; + else if (Len >= 2) + VT = MVT::i16; + else { + assert(Len == 1); + VT = MVT::i8; + } + + unsigned Reg; + bool RV = X86FastEmitLoad(VT, SrcAM, Reg); + RV &= X86FastEmitStore(VT, Reg, DestAM); + assert(RV && "Failed to emit load or store??"); + + unsigned Size = VT.getSizeInBits()/8; + Len -= Size; + DestAM.Disp += Size; + SrcAM.Disp += Size; + } + + return true; +} + bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { // FIXME: Handle more intrinsics. switch (I.getIntrinsicID()) { default: return false; + case Intrinsic::memcpy: { + const MemCpyInst &MCI = cast(I); + // Don't handle volatile or variable length memcpys. + if (MCI.isVolatile() || !isa(MCI.getLength())) + return false; + + uint64_t Len = cast(MCI.getLength())->getZExtValue(); + + // Get the address of the dest and source addresses. + X86AddressMode DestAM, SrcAM; + if (!X86SelectAddress(MCI.getRawDest(), DestAM) || + !X86SelectAddress(MCI.getRawSource(), SrcAM)) + return false; + + return TryEmitSmallMemcpy(DestAM, SrcAM, Len); + } + case Intrinsic::stackprotector: { // Emit code inline code to store the stack guard onto the stack. EVT PtrTy = TLI.getPointerTy(); @@ -1308,17 +1393,14 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { // Grab the frame index. X86AddressMode AM; if (!X86SelectAddress(Slot, AM)) return false; - if (!X86FastEmitStore(PtrTy, Op1, AM)) return false; - return true; } case Intrinsic::objectsize: { - ConstantInt *CI = dyn_cast(I.getArgOperand(1)); + // FIXME: This should be moved to generic code! + ConstantInt *CI = cast(I.getArgOperand(1)); const Type *Ty = I.getCalledFunction()->getReturnType(); - assert(CI && "Non-constant type in Intrinsic::objectsize?"); - MVT VT; if (!isTypeLegal(Ty, VT)) return false; @@ -1356,6 +1438,8 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { } case Intrinsic::sadd_with_overflow: case Intrinsic::uadd_with_overflow: { + // FIXME: Should fold immediates. + // Replace "add with overflow" intrinsics with an "add" instruction followed // by a seto/setc instruction. Later on, when the "extractvalue" // instructions are encountered, we use the fact that two registers were @@ -1427,8 +1511,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { // Handle only C and fastcc calling conventions for now. ImmutableCallSite CS(CI); CallingConv::ID CC = CS.getCallingConv(); - if (CC != CallingConv::C && - CC != CallingConv::Fast && + if (CC != CallingConv::C && CC != CallingConv::Fast && CC != CallingConv::X86_FastCall) return false; @@ -1437,14 +1520,17 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { if (CC == CallingConv::Fast && GuaranteedTailCallOpt) return false; - // Let SDISel handle vararg functions. const PointerType *PT = cast(CS.getCalledValue()->getType()); const FunctionType *FTy = cast(PT->getElementType()); - if (FTy->isVarArg()) + bool isVarArg = FTy->isVarArg(); + + // Don't know how to handle Win64 varargs yet. Nothing special needed for + // x86-32. Special handling for x86-64 is implemented. + if (isVarArg && Subtarget->isTargetWin64()) return false; // Fast-isel doesn't know about callee-pop yet. - if (Subtarget->IsCalleePop(FTy->isVarArg(), CC)) + if (Subtarget->IsCalleePop(isVarArg, CC)) return false; // Handle *simple* calls for now. @@ -1487,9 +1573,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { ArgFlags.reserve(CS.arg_size()); for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) { - unsigned Arg = getRegForValue(*i); - if (Arg == 0) - return false; + Value *ArgVal = *i; ISD::ArgFlagsTy Flags; unsigned AttrInd = i - CS.arg_begin() + 1; if (CS.paramHasAttr(AttrInd, Attribute::SExt)) @@ -1497,34 +1581,67 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { if (CS.paramHasAttr(AttrInd, Attribute::ZExt)) Flags.setZExt(); + // If this is an i1/i8/i16 argument, promote to i32 to avoid an extra + // instruction. This is safe because it is common to all fastisel supported + // calling conventions on x86. + if (ConstantInt *CI = dyn_cast(ArgVal)) { + if (CI->getBitWidth() == 1 || CI->getBitWidth() == 8 || + CI->getBitWidth() == 16) { + if (Flags.isSExt()) + ArgVal = ConstantExpr::getSExt(CI,Type::getInt32Ty(CI->getContext())); + else + ArgVal = ConstantExpr::getZExt(CI,Type::getInt32Ty(CI->getContext())); + } + } + + unsigned ArgReg; + + // Passing bools around ends up doing a trunc to i1 and passing it. + // Codegen this as an argument + "and 1". + if (ArgVal->getType()->isIntegerTy(1) && isa(ArgVal) && + cast(ArgVal)->getParent() == I->getParent() && + ArgVal->hasOneUse()) { + ArgVal = cast(ArgVal)->getOperand(0); + ArgReg = getRegForValue(ArgVal); + if (ArgReg == 0) return false; + + MVT ArgVT; + if (!isTypeLegal(ArgVal->getType(), ArgVT)) return false; + + ArgReg = FastEmit_ri(ArgVT, ArgVT, ISD::AND, ArgReg, + ArgVal->hasOneUse(), 1); + } else { + ArgReg = getRegForValue(ArgVal); + } + + if (ArgReg == 0) return false; + // FIXME: Only handle *easy* calls for now. if (CS.paramHasAttr(AttrInd, Attribute::InReg) || - CS.paramHasAttr(AttrInd, Attribute::StructRet) || CS.paramHasAttr(AttrInd, Attribute::Nest) || CS.paramHasAttr(AttrInd, Attribute::ByVal)) return false; - const Type *ArgTy = (*i)->getType(); + const Type *ArgTy = ArgVal->getType(); MVT ArgVT; if (!isTypeLegal(ArgTy, ArgVT)) return false; unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy); Flags.setOrigAlign(OriginalAlignment); - Args.push_back(Arg); - ArgVals.push_back(*i); + Args.push_back(ArgReg); + ArgVals.push_back(ArgVal); ArgVTs.push_back(ArgVT); ArgFlags.push_back(Flags); } // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; - CCState CCInfo(CC, false, TM, ArgLocs, I->getParent()->getContext()); + CCState CCInfo(CC, isVarArg, TM, ArgLocs, I->getParent()->getContext()); // Allocate shadow area for Win64 - if (Subtarget->isTargetWin64()) { + if (Subtarget->isTargetWin64()) CCInfo.AllocateStack(32, 8); - } CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_X86); @@ -1618,6 +1735,17 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { X86::EBX).addReg(Base); } + if (Subtarget->is64Bit() && isVarArg && !Subtarget->isTargetWin64()) { + // Count the number of XMM registers allocated. + static const unsigned XMMArgRegs[] = { + X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, + X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 + }; + unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::MOV8ri), + X86::AL).addImm(NumXMMRegs); + } + // Issue the call. MachineInstrBuilder MIB; if (CalleeOp) { @@ -1656,7 +1784,8 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { OpFlags = X86II::MO_PLT; } else if (Subtarget->isPICStyleStubAny() && (GV->isDeclaration() || GV->isWeakForLinker()) && - Subtarget->getDarwinVers() < 9) { + (!Subtarget->getTargetTriple().isMacOSX() || + Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) { // PC-relative references to external symbols should go through $stub, // unless we're building with the leopard linker or later, which // automatically synthesizes these stubs. @@ -1672,14 +1801,20 @@ bool X86FastISel::X86SelectCall(const Instruction *I) { if (Subtarget->isPICStyleGOT()) MIB.addReg(X86::EBX); + if (Subtarget->is64Bit() && isVarArg && !Subtarget->isTargetWin64()) + MIB.addReg(X86::AL); + // Add implicit physical register uses to the call. for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) MIB.addReg(RegArgs[i]); // Issue CALLSEQ_END unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode(); + unsigned NumBytesCallee = 0; + if (!Subtarget->is64Bit() && CS.paramHasAttr(1, Attribute::StructRet)) + NumBytesCallee = 4; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackUp)) - .addImm(NumBytes).addImm(0); + .addImm(NumBytes).addImm(NumBytesCallee); // Now handle call return value (if any). SmallVector UsedRegs; @@ -1850,10 +1985,13 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) { if (isa(C)) { X86AddressMode AM; if (X86SelectAddress(C, AM)) { - if (TLI.getPointerTy() == MVT::i32) - Opc = X86::LEA32r; - else - Opc = X86::LEA64r; + // If the expression is just a basereg, then we're done, otherwise we need + // to emit an LEA. + if (AM.BaseType == X86AddressMode::RegBase && + AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == 0) + return AM.Base.Reg; + + Opc = TLI.getPointerTy() == MVT::i32 ? X86::LEA32r : X86::LEA64r; unsigned ResultReg = createResultReg(RC); addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg), AM); @@ -1915,6 +2053,45 @@ unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) { return ResultReg; } +unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) { + MVT VT; + if (!isTypeLegal(CF->getType(), VT)) + return false; + + // Get opcode and regclass for the given zero. + unsigned Opc = 0; + const TargetRegisterClass *RC = NULL; + switch (VT.SimpleTy) { + default: return false; + case MVT::f32: + if (Subtarget->hasSSE1()) { + Opc = X86::FsFLD0SS; + RC = X86::FR32RegisterClass; + } else { + Opc = X86::LD_Fp032; + RC = X86::RFP32RegisterClass; + } + break; + case MVT::f64: + if (Subtarget->hasSSE2()) { + Opc = X86::FsFLD0SD; + RC = X86::FR64RegisterClass; + } else { + Opc = X86::LD_Fp064; + RC = X86::RFP64RegisterClass; + } + break; + case MVT::f80: + // No f80 support yet. + return false; + } + + unsigned ResultReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg); + return ResultReg; +} + + /// TryToFoldLoad - The specified machine instr operand is a vreg, and that /// vreg is being provided by the specified load instruction. If possible, /// try to fold the load as an operand to the instruction, returning true if diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index 3aaa69327976..325d0611817d 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -1307,7 +1307,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { // set up by FpSET_ST0, and our StackTop is off by one because of it. unsigned Op0 = getFPReg(MI->getOperand(0)); // Restore the actual StackTop from before Fp_SET_ST0. - // Note we can't handle Fp_SET_ST1 without a preceeding Fp_SET_ST0, and we + // Note we can't handle Fp_SET_ST1 without a preceding Fp_SET_ST0, and we // are not enforcing the constraint. ++StackTop; unsigned RegOnTop = getStackEntry(0); // This reg must remain in st(0). diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 0a3f931acf93..06d12fc04a34 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -22,6 +22,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/MC/MCAsmInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Support/CommandLine.h" @@ -296,7 +297,7 @@ void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF, // FIXME: This is dirty hack. The code itself is pretty mess right now. // It should be rewritten from scratch and generalized sometimes. - // Determine maximum offset (minumum due to stack growth). + // Determine maximum offset (minimum due to stack growth). int64_t MaxOffset = 0; for (std::vector::const_iterator I = CSI.begin(), E = CSI.end(); I != E; ++I) @@ -551,65 +552,71 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { // responsible for adjusting the stack pointer. Touching the stack at 4K // increments is necessary to ensure that the guard pages used by the OS // virtual memory manager are allocated in correct sequence. - if (NumBytes >= 4096 && - (STI.isTargetCygMing() || STI.isTargetWin32()) && - !STI.isTargetEnvMacho()) { + if (NumBytes >= 4096 && STI.isTargetCOFF() && !STI.isTargetEnvMacho()) { + const char *StackProbeSymbol; + bool isSPUpdateNeeded = false; + + if (Is64Bit) { + if (STI.isTargetCygMing()) + StackProbeSymbol = "___chkstk"; + else { + StackProbeSymbol = "__chkstk"; + isSPUpdateNeeded = true; + } + } else if (STI.isTargetCygMing()) + StackProbeSymbol = "_alloca"; + else + StackProbeSymbol = "_chkstk"; + // Check whether EAX is livein for this function. bool isEAXAlive = isEAXLiveIn(MF); - const char *StackProbeSymbol = - STI.isTargetWindows() ? "_chkstk" : "_alloca"; - if (Is64Bit && STI.isTargetCygMing()) - StackProbeSymbol = "__chkstk"; - unsigned CallOp = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32; - if (!isEAXAlive) { - BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) - .addImm(NumBytes); - BuildMI(MBB, MBBI, DL, TII.get(CallOp)) - .addExternalSymbol(StackProbeSymbol) - .addReg(StackPtr, RegState::Define | RegState::Implicit) - .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); - } else { + if (isEAXAlive) { + // Sanity check that EAX is not livein for this function. + // It should not be, so throw an assert. + assert(!Is64Bit && "EAX is livein in x64 case!"); + // Save EAX BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r)) .addReg(X86::EAX, RegState::Kill); - - // Allocate NumBytes-4 bytes on stack. We'll also use 4 already - // allocated bytes for EAX. - BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) - .addImm(NumBytes - 4); - BuildMI(MBB, MBBI, DL, TII.get(CallOp)) - .addExternalSymbol(StackProbeSymbol) - .addReg(StackPtr, RegState::Define | RegState::Implicit) - .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); - - // Restore EAX - MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), - X86::EAX), - StackPtr, false, NumBytes - 4); - MBB.insert(MBBI, MI); } - } else if (NumBytes >= 4096 && - STI.isTargetWin64() && - !STI.isTargetEnvMacho()) { - // Sanity check that EAX is not livein for this function. It should - // not be, so throw an assert. - assert(!isEAXLiveIn(MF) && "EAX is livein in the Win64 case!"); - // Handle the 64-bit Windows ABI case where we need to call __chkstk. - // Function prologue is responsible for adjusting the stack pointer. - BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) - .addImm(NumBytes); - BuildMI(MBB, MBBI, DL, TII.get(X86::WINCALL64pcrel32)) - .addExternalSymbol("__chkstk") - .addReg(StackPtr, RegState::Define | RegState::Implicit); - emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, - TII, *RegInfo); + if (Is64Bit) { + // Handle the 64-bit Windows ABI case where we need to call __chkstk. + // Function prologue is responsible for adjusting the stack pointer. + BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::RAX) + .addImm(NumBytes); + } else { + // Allocate NumBytes-4 bytes on stack in case of isEAXAlive. + // We'll also use 4 already allocated bytes for EAX. + BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) + .addImm(isEAXAlive ? NumBytes - 4 : NumBytes); + } + + BuildMI(MBB, MBBI, DL, + TII.get(Is64Bit ? X86::W64ALLOCA : X86::CALLpcrel32)) + .addExternalSymbol(StackProbeSymbol) + .addReg(StackPtr, RegState::Define | RegState::Implicit) + .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); + + // MSVC x64's __chkstk needs to adjust %rsp. + // FIXME: %rax preserves the offset and should be available. + if (isSPUpdateNeeded) + emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, + TII, *RegInfo); + + if (isEAXAlive) { + // Restore EAX + MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), + X86::EAX), + StackPtr, false, NumBytes - 4); + MBB.insert(MBBI, MI); + } } else if (NumBytes) emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII, *RegInfo); - if ((NumBytes || PushedRegs) && needsFrameMoves) { + if (( (!HasFP && NumBytes) || PushedRegs) && needsFrameMoves) { // Mark end of stack pointer adjustment. MCSymbol *Label = MMI.getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label); @@ -779,7 +786,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, assert(Offset >= 0 && "Offset should never be negative"); if (Offset) { - // Check for possible merge with preceeding ADD instruction. + // Check for possible merge with preceding ADD instruction. Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true); emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, TII, *RegInfo); } @@ -823,7 +830,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, int delta = -1*X86FI->getTCReturnAddrDelta(); MBBI = MBB.getLastNonDebugInstr(); - // Check for possible merge with preceeding ADD instruction. + // Check for possible merge with preceding ADD instruction. delta += mergeSPUpdates(MBB, MBBI, StackPtr, true); emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, TII, *RegInfo); } @@ -892,7 +899,6 @@ bool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineFunction &MF = *MBB.getParent(); - bool isWin64 = STI.isTargetWin64(); unsigned SlotSize = STI.is64Bit() ? 8 : 4; unsigned FPReg = TRI->getFrameRegister(MF); unsigned CalleeFrameSize = 0; @@ -900,25 +906,39 @@ bool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); X86MachineFunctionInfo *X86FI = MF.getInfo(); + // Push GPRs. It increases frame size. unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r; for (unsigned i = CSI.size(); i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); + if (!X86::GR64RegClass.contains(Reg) && + !X86::GR32RegClass.contains(Reg)) + continue; // Add the callee-saved register as live-in. It's killed at the spill. MBB.addLiveIn(Reg); if (Reg == FPReg) // X86RegisterInfo::emitPrologue will handle spilling of frame register. continue; - if (!X86::VR128RegClass.contains(Reg) && !isWin64) { - CalleeFrameSize += SlotSize; - BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill); - } else { - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), - RC, TRI); - } + CalleeFrameSize += SlotSize; + BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill); } X86FI->setCalleeSavedFrameSize(CalleeFrameSize); + + // Make XMM regs spilled. X86 does not have ability of push/pop XMM. + // It can be done by spilling XMMs to stack frame. + // Note that only Win64 ABI might spill XMMs. + for (unsigned i = CSI.size(); i != 0; --i) { + unsigned Reg = CSI[i-1].getReg(); + if (X86::GR64RegClass.contains(Reg) || + X86::GR32RegClass.contains(Reg)) + continue; + // Add the callee-saved register as live-in. It's killed at the spill. + MBB.addLiveIn(Reg); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), + RC, TRI); + } + return true; } @@ -933,21 +953,30 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + + // Reload XMMs from stack frame. + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + if (X86::GR64RegClass.contains(Reg) || + X86::GR32RegClass.contains(Reg)) + continue; + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), + RC, TRI); + } + + // POP GPRs. unsigned FPReg = TRI->getFrameRegister(MF); - bool isWin64 = STI.isTargetWin64(); unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r; for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); + if (!X86::GR64RegClass.contains(Reg) && + !X86::GR32RegClass.contains(Reg)) + continue; if (Reg == FPReg) // X86RegisterInfo::emitEpilogue will handle restoring of frame register. continue; - if (!X86::VR128RegClass.contains(Reg) && !isWin64) { - BuildMI(MBB, MI, DL, TII.get(Opc), Reg); - } else { - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), - RC, TRI); - } + BuildMI(MBB, MI, DL, TII.get(Opc), Reg); } return true; } diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 9b0ec6e123fe..4534e853914d 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1580,6 +1580,81 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { return RetVal; break; } + case ISD::AND: + case ISD::OR: + case ISD::XOR: { + // For operations of the form (x << C1) op C2, check if we can use a smaller + // encoding for C2 by transforming it into (x op (C2>>C1)) << C1. + SDValue N0 = Node->getOperand(0); + SDValue N1 = Node->getOperand(1); + + if (N0->getOpcode() != ISD::SHL || !N0->hasOneUse()) + break; + + // i8 is unshrinkable, i16 should be promoted to i32. + if (NVT != MVT::i32 && NVT != MVT::i64) + break; + + ConstantSDNode *Cst = dyn_cast(N1); + ConstantSDNode *ShlCst = dyn_cast(N0->getOperand(1)); + if (!Cst || !ShlCst) + break; + + int64_t Val = Cst->getSExtValue(); + uint64_t ShlVal = ShlCst->getZExtValue(); + + // Make sure that we don't change the operation by removing bits. + // This only matters for OR and XOR, AND is unaffected. + if (Opcode != ISD::AND && ((Val >> ShlVal) << ShlVal) != Val) + break; + + unsigned ShlOp, Op = 0; + EVT CstVT = NVT; + + // Check the minimum bitwidth for the new constant. + // TODO: AND32ri is the same as AND64ri32 with zext imm. + // TODO: MOV32ri+OR64r is cheaper than MOV64ri64+OR64rr + // TODO: Using 16 and 8 bit operations is also possible for or32 & xor32. + if (!isInt<8>(Val) && isInt<8>(Val >> ShlVal)) + CstVT = MVT::i8; + else if (!isInt<32>(Val) && isInt<32>(Val >> ShlVal)) + CstVT = MVT::i32; + + // Bail if there is no smaller encoding. + if (NVT == CstVT) + break; + + switch (NVT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unsupported VT!"); + case MVT::i32: + assert(CstVT == MVT::i8); + ShlOp = X86::SHL32ri; + + switch (Opcode) { + case ISD::AND: Op = X86::AND32ri8; break; + case ISD::OR: Op = X86::OR32ri8; break; + case ISD::XOR: Op = X86::XOR32ri8; break; + } + break; + case MVT::i64: + assert(CstVT == MVT::i8 || CstVT == MVT::i32); + ShlOp = X86::SHL64ri; + + switch (Opcode) { + case ISD::AND: Op = CstVT==MVT::i8? X86::AND64ri8 : X86::AND64ri32; break; + case ISD::OR: Op = CstVT==MVT::i8? X86::OR64ri8 : X86::OR64ri32; break; + case ISD::XOR: Op = CstVT==MVT::i8? X86::XOR64ri8 : X86::XOR64ri32; break; + } + break; + } + + // Emit the smaller op and the shift. + SDValue NewCst = CurDAG->getTargetConstant(Val >> ShlVal, CstVT); + SDNode *New = CurDAG->getMachineNode(Op, dl, NVT, N0->getOperand(0),NewCst); + return CurDAG->SelectNodeTo(Node, ShlOp, NVT, SDValue(New, 0), + getI8Imm(ShlVal)); + break; + } case X86ISD::UMUL: { SDValue N0 = Node->getOperand(0); SDValue N1 = Node->getOperand(1); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 2f49dbcebf3c..703c01d373ef 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -45,6 +45,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/VectorExtras.h" +#include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" @@ -221,7 +222,13 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // X86 is weird, it always uses i8 for shift amounts and setcc results. setBooleanContents(ZeroOrOneBooleanContent); - setSchedulingPreference(Sched::RegPressure); + + // For 64-bit since we have so many registers use the ILP scheduler, for + // 32-bit code use the register pressure specific scheduling. + if (Subtarget->is64Bit()) + setSchedulingPreference(Sched::ILP); + else + setSchedulingPreference(Sched::RegPressure); setStackPointerRegisterToSaveRestore(X86StackPtr); if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing()) { @@ -543,12 +550,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); - if (Subtarget->is64Bit()) - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); - if (Subtarget->isTargetCygMing() || Subtarget->isTargetWindows()) - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); - else - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); + setOperationAction(ISD::DYNAMIC_STACKALLOC, + (Subtarget->is64Bit() ? MVT::i64 : MVT::i32), + (Subtarget->isTargetCOFF() + && !Subtarget->isTargetEnvMacho() + ? Custom : Expand)); if (!UseSoftFloat && X86ScalarSSEf64) { // f32 and f64 use SSE. @@ -921,6 +927,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // Can turn SHL into an integer multiply. setOperationAction(ISD::SHL, MVT::v4i32, Custom); setOperationAction(ISD::SHL, MVT::v16i8, Custom); + setOperationAction(ISD::SRL, MVT::v4i32, Legal); // i8 and i16 vectors are custom , because the source register and source // source memory operand types are not the same width. f32 vectors are @@ -1271,27 +1278,6 @@ X86TargetLowering::findRepresentativeClass(EVT VT) const{ return std::make_pair(RRC, Cost); } -// FIXME: Why this routine is here? Move to RegInfo! -unsigned -X86TargetLowering::getRegPressureLimit(const TargetRegisterClass *RC, - MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - - unsigned FPDiff = TFI->hasFP(MF) ? 1 : 0; - switch (RC->getID()) { - default: - return 0; - case X86::GR32RegClassID: - return 4 - FPDiff; - case X86::GR64RegClassID: - return 8 - FPDiff; - case X86::VR128RegClassID: - return Subtarget->is64Bit() ? 10 : 4; - case X86::VR64RegClassID: - return 4; - } -} - bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace, unsigned &Offset) const { if (!Subtarget->isTargetLinux()) @@ -1463,6 +1449,20 @@ bool X86TargetLowering::isUsedByReturnOnly(SDNode *N) const { return HasRet; } +EVT +X86TargetLowering::getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT, + ISD::NodeType ExtendKind) const { + MVT ReturnMVT; + // TODO: Is this also valid on 32-bit? + if (Subtarget->is64Bit() && VT == MVT::i1 && ExtendKind == ISD::ZERO_EXTEND) + ReturnMVT = MVT::i8; + else + ReturnMVT = MVT::i32; + + EVT MinVT = getRegisterType(Context, ReturnMVT); + return VT.bitsLT(MinVT) ? MinVT : VT; +} + /// LowerCallResult - Lower the result values of a call into the /// appropriate copies out of appropriate physical registers. /// @@ -1595,6 +1595,18 @@ static bool IsTailCallConvention(CallingConv::ID CC) { return (CC == CallingConv::Fast || CC == CallingConv::GHC); } +bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { + if (!CI->isTailCall()) + return false; + + CallSite CS(CI); + CallingConv::ID CalleeCC = CS.getCallingConv(); + if (!IsTailCallConvention(CalleeCC) && CalleeCC != CallingConv::C) + return false; + + return true; +} + /// FuncIsMadeTailCallSafe - Return true if the function is being made into /// a tailcall target by changing its ABI. static bool FuncIsMadeTailCallSafe(CallingConv::ID CC) { @@ -1627,8 +1639,9 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, // In case of tail call optimization mark all arguments mutable. Since they // could be overwritten by lowering of arguments in case of a tail call. if (Flags.isByVal()) { - int FI = MFI->CreateFixedObject(Flags.getByValSize(), - VA.getLocMemOffset(), isImmutable); + unsigned Bytes = Flags.getByValSize(); + if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects. + int FI = MFI->CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable); return DAG.getFrameIndex(FI, getPointerTy()); } else { int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8, @@ -1765,8 +1778,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, // If the function takes variable number of arguments, make a frame index for // the start of the first vararg value... for expansion of llvm.va_start. if (isVarArg) { - if (!IsWin64 && (Is64Bit || (CallConv != CallingConv::X86_FastCall && - CallConv != CallingConv::X86_ThisCall))) { + if (Is64Bit || (CallConv != CallingConv::X86_FastCall && + CallConv != CallingConv::X86_ThisCall)) { FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize,true)); } if (Is64Bit) { @@ -1818,7 +1831,9 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, int HomeOffset = TFI.getOffsetOfLocalArea() + 8; FuncInfo->setRegSaveFrameIndex( MFI->CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false)); - FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex()); + // Fixup to set vararg frame on shadow area (4 x i64). + if (NumIntRegs < 4) + FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex()); } else { // For X86-64, if there are vararg parameters that are passed via // registers, then we must store them to their spots on the stack so they @@ -1937,7 +1952,7 @@ X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG, return SDValue(OutRetAddr.getNode(), 1); } -/// EmitTailCallStoreRetAddr - Emit a store of the return adress if tail call +/// EmitTailCallStoreRetAddr - Emit a store of the return address if tail call /// optimization is performed and it is required (FPDiff!=0). static SDValue EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF, @@ -2028,7 +2043,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); SDValue RetAddrFrIdx; - // Load return adress for tail calls. + // Load return address for tail calls. if (isTailCall && FPDiff) Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall, Is64Bit, FPDiff, dl); @@ -2185,7 +2200,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, SmallVector MemOpChains2; SDValue FIN; int FI = 0; - // Do not flag preceeding copytoreg stuff together with the following stuff. + // Do not flag preceding copytoreg stuff together with the following stuff. InFlag = SDValue(); if (GuaranteedTailCallOpt) { for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { @@ -2266,7 +2281,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, OpFlags = X86II::MO_PLT; } else if (Subtarget->isPICStyleStubAny() && (GV->isDeclaration() || GV->isWeakForLinker()) && - Subtarget->getDarwinVers() < 9) { + (!Subtarget->getTargetTriple().isMacOSX() || + Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) { // PC-relative references to external symbols should go through $stub, // unless we're building with the leopard linker or later, which // automatically synthesizes these stubs. @@ -2285,7 +2301,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, getTargetMachine().getRelocationModel() == Reloc::PIC_) { OpFlags = X86II::MO_PLT; } else if (Subtarget->isPICStyleStubAny() && - Subtarget->getDarwinVers() < 9) { + (!Subtarget->getTargetTriple().isMacOSX() || + Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) { // PC-relative references to external symbols should go through $stub, // unless we're building with the leopard linker or later, which // automatically synthesizes these stubs. @@ -3173,7 +3190,8 @@ bool X86::isMOVLPMask(ShuffleVectorSDNode *N) { bool X86::isMOVLHPSMask(ShuffleVectorSDNode *N) { unsigned NumElems = N->getValueType(0).getVectorNumElements(); - if (NumElems != 2 && NumElems != 4) + if ((NumElems != 2 && NumElems != 4) + || N->getValueType(0).getSizeInBits() > 128) return false; for (unsigned i = 0; i < NumElems/2; ++i) @@ -3195,19 +3213,36 @@ static bool isUNPCKLMask(const SmallVectorImpl &Mask, EVT VT, if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) return false; - for (int i = 0, j = 0; i != NumElts; i += 2, ++j) { - int BitI = Mask[i]; - int BitI1 = Mask[i+1]; - if (!isUndefOrEqual(BitI, j)) - return false; - if (V2IsSplat) { - if (!isUndefOrEqual(BitI1, NumElts)) - return false; - } else { - if (!isUndefOrEqual(BitI1, j + NumElts)) + // Handle vector lengths > 128 bits. Define a "section" as a set of + // 128 bits. AVX defines UNPCK* to operate independently on 128-bit + // sections. + unsigned NumSections = VT.getSizeInBits() / 128; + if (NumSections == 0 ) NumSections = 1; // Handle MMX + unsigned NumSectionElts = NumElts / NumSections; + + unsigned Start = 0; + unsigned End = NumSectionElts; + for (unsigned s = 0; s < NumSections; ++s) { + for (unsigned i = Start, j = s * NumSectionElts; + i != End; + i += 2, ++j) { + int BitI = Mask[i]; + int BitI1 = Mask[i+1]; + if (!isUndefOrEqual(BitI, j)) return false; + if (V2IsSplat) { + if (!isUndefOrEqual(BitI1, NumElts)) + return false; + } else { + if (!isUndefOrEqual(BitI1, j + NumElts)) + return false; + } } + // Process the next 128 bits. + Start += NumSectionElts; + End += NumSectionElts; } + return true; } @@ -3255,14 +3290,27 @@ static bool isUNPCKL_v_undef_Mask(const SmallVectorImpl &Mask, EVT VT) { if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) return false; - for (int i = 0, j = 0; i != NumElems; i += 2, ++j) { - int BitI = Mask[i]; - int BitI1 = Mask[i+1]; - if (!isUndefOrEqual(BitI, j)) - return false; - if (!isUndefOrEqual(BitI1, j)) - return false; + // Handle vector lengths > 128 bits. Define a "section" as a set of + // 128 bits. AVX defines UNPCK* to operate independently on 128-bit + // sections. + unsigned NumSections = VT.getSizeInBits() / 128; + if (NumSections == 0 ) NumSections = 1; // Handle MMX + unsigned NumSectionElts = NumElems / NumSections; + + for (unsigned s = 0; s < NumSections; ++s) { + for (unsigned i = s * NumSectionElts, j = s * NumSectionElts; + i != NumSectionElts * (s + 1); + i += 2, ++j) { + int BitI = Mask[i]; + int BitI1 = Mask[i+1]; + + if (!isUndefOrEqual(BitI, j)) + return false; + if (!isUndefOrEqual(BitI1, j)) + return false; + } } + return true; } @@ -3846,8 +3894,8 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx, /// getShuffleScalarElt - Returns the scalar element that will make up the ith /// element of the result of the vector shuffle. -SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, - unsigned Depth) { +static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, + unsigned Depth) { if (Depth == 6) return SDValue(); // Limit search depth. @@ -3895,11 +3943,15 @@ SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, case X86ISD::PUNPCKLWD: case X86ISD::PUNPCKLDQ: case X86ISD::PUNPCKLQDQ: - DecodePUNPCKLMask(NumElems, ShuffleMask); + DecodePUNPCKLMask(VT, ShuffleMask); break; case X86ISD::UNPCKLPS: case X86ISD::UNPCKLPD: - DecodeUNPCKLPMask(NumElems, ShuffleMask); + case X86ISD::VUNPCKLPS: + case X86ISD::VUNPCKLPD: + case X86ISD::VUNPCKLPSY: + case X86ISD::VUNPCKLPDY: + DecodeUNPCKLPMask(VT, ShuffleMask); break; case X86ISD::MOVHLPS: DecodeMOVHLPSMask(NumElems, ShuffleMask); @@ -3968,7 +4020,7 @@ SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG, /// getNumOfConsecutiveZeros - Return the number of elements of a vector /// shuffle operation which come from a consecutively from a zero. The -/// search can start in two diferent directions, from left or right. +/// search can start in two different directions, from left or right. static unsigned getNumOfConsecutiveZeros(SDNode *N, int NumElems, bool ZerosFromLeft, SelectionDAG &DAG) { @@ -5263,6 +5315,7 @@ LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { // Break it into (shuffle shuffle_hi, shuffle_lo). Locs.clear(); + Locs.resize(4); SmallVector LoMask(4U, -1); SmallVector HiMask(4U, -1); @@ -5508,12 +5561,16 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) { X86::getShuffleSHUFImmediate(SVOp), DAG); } -static inline unsigned getUNPCKLOpcode(EVT VT) { +static inline unsigned getUNPCKLOpcode(EVT VT, const X86Subtarget *Subtarget) { switch(VT.getSimpleVT().SimpleTy) { case MVT::v4i32: return X86ISD::PUNPCKLDQ; case MVT::v2i64: return X86ISD::PUNPCKLQDQ; - case MVT::v4f32: return X86ISD::UNPCKLPS; - case MVT::v2f64: return X86ISD::UNPCKLPD; + case MVT::v4f32: + return Subtarget->hasAVX() ? X86ISD::VUNPCKLPS : X86ISD::UNPCKLPS; + case MVT::v2f64: + return Subtarget->hasAVX() ? X86ISD::VUNPCKLPD : X86ISD::UNPCKLPD; + case MVT::v8f32: return X86ISD::VUNPCKLPSY; + case MVT::v4f64: return X86ISD::VUNPCKLPDY; case MVT::v16i8: return X86ISD::PUNPCKLBW; case MVT::v8i16: return X86ISD::PUNPCKLWD; default: @@ -5641,7 +5698,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { // unpckh_undef). Only use pshufd if speed is more important than size. if (OptForSize && X86::isUNPCKL_v_undef_Mask(SVOp)) if (VT != MVT::v2i64 && VT != MVT::v2f64) - return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG); + return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()), dl, VT, V1, V1, DAG); if (OptForSize && X86::isUNPCKH_v_undef_Mask(SVOp)) if (VT != MVT::v2i64 && VT != MVT::v2f64) return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG); @@ -5762,7 +5819,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { } if (X86::isUNPCKLMask(SVOp)) - return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V2, DAG); + return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()), + dl, VT, V1, V2, DAG); if (X86::isUNPCKHMask(SVOp)) return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V2, DAG); @@ -5789,7 +5847,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { ShuffleVectorSDNode *NewSVOp = cast(NewOp); if (X86::isUNPCKLMask(NewSVOp)) - return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V2, V1, DAG); + return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()), + dl, VT, V2, V1, DAG); if (X86::isUNPCKHMask(NewSVOp)) return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V2, V1, DAG); @@ -5812,8 +5871,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) && SVOp->getSplatIndex() == 0 && V2IsUndef) { - if (VT == MVT::v2f64) - return getTargetShuffleNode(X86ISD::UNPCKLPD, dl, VT, V1, V1, DAG); + if (VT == MVT::v2f64) { + X86ISD::NodeType Opcode = + getSubtarget()->hasAVX() ? X86ISD::VUNPCKLPD : X86ISD::UNPCKLPD; + return getTargetShuffleNode(Opcode, dl, VT, V1, V1, DAG); + } if (VT == MVT::v2i64) return getTargetShuffleNode(X86ISD::PUNPCKLQDQ, dl, VT, V1, V1, DAG); } @@ -5840,7 +5902,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { if (X86::isUNPCKL_v_undef_Mask(SVOp)) if (VT != MVT::v2i64 && VT != MVT::v2f64) - return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG); + return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()), + dl, VT, V1, V1, DAG); if (X86::isUNPCKH_v_undef_Mask(SVOp)) if (VT != MVT::v2i64 && VT != MVT::v2f64) return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG); @@ -7868,6 +7931,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { assert((Subtarget->isTargetCygMing() || Subtarget->isTargetWindows()) && "This should be used only on Windows targets"); + assert(!Subtarget->isTargetEnvMacho()); DebugLoc dl = Op.getDebugLoc(); // Get the inputs. @@ -7878,8 +7942,9 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SDValue Flag; EVT SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; + unsigned Reg = (Subtarget->is64Bit() ? X86::RAX : X86::EAX); - Chain = DAG.getCopyToReg(Chain, dl, X86::EAX, Size, Flag); + Chain = DAG.getCopyToReg(Chain, dl, Reg, Size, Flag); Flag = Chain.getValue(1); SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); @@ -8809,8 +8874,8 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { case ISD::SADDO: // A subtract of one will be selected as a INC. Note that INC doesn't // set CF, so we can't do this for UADDO. - if (ConstantSDNode *C = dyn_cast(Op)) - if (C->getAPIntValue() == 1) { + if (ConstantSDNode *C = dyn_cast(RHS)) + if (C->isOne()) { BaseOp = X86ISD::INC; Cond = X86::COND_O; break; @@ -8825,8 +8890,8 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const { case ISD::SSUBO: // A subtract of one will be selected as a DEC. Note that DEC doesn't // set CF, so we can't do this for USUBO. - if (ConstantSDNode *C = dyn_cast(Op)) - if (C->getAPIntValue() == 1) { + if (ConstantSDNode *C = dyn_cast(RHS)) + if (C->isOne()) { BaseOp = X86ISD::DEC; Cond = X86::COND_O; break; @@ -10351,21 +10416,48 @@ X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI, const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); DebugLoc DL = MI->getDebugLoc(); + assert(!Subtarget->isTargetEnvMacho()); + // The lowering is pretty easy: we're just emitting the call to _alloca. The // non-trivial part is impdef of ESP. - // FIXME: The code should be tweaked as soon as we'll try to do codegen for - // mingw-w64. - const char *StackProbeSymbol = + if (Subtarget->isTargetWin64()) { + if (Subtarget->isTargetCygMing()) { + // ___chkstk(Mingw64): + // Clobbers R10, R11, RAX and EFLAGS. + // Updates RSP. + BuildMI(*BB, MI, DL, TII->get(X86::W64ALLOCA)) + .addExternalSymbol("___chkstk") + .addReg(X86::RAX, RegState::Implicit) + .addReg(X86::RSP, RegState::Implicit) + .addReg(X86::RAX, RegState::Define | RegState::Implicit) + .addReg(X86::RSP, RegState::Define | RegState::Implicit) + .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); + } else { + // __chkstk(MSVCRT): does not update stack pointer. + // Clobbers R10, R11 and EFLAGS. + // FIXME: RAX(allocated size) might be reused and not killed. + BuildMI(*BB, MI, DL, TII->get(X86::W64ALLOCA)) + .addExternalSymbol("__chkstk") + .addReg(X86::RAX, RegState::Implicit) + .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); + // RAX has the offset to subtracted from RSP. + BuildMI(*BB, MI, DL, TII->get(X86::SUB64rr), X86::RSP) + .addReg(X86::RSP) + .addReg(X86::RAX); + } + } else { + const char *StackProbeSymbol = Subtarget->isTargetWindows() ? "_chkstk" : "_alloca"; - BuildMI(*BB, MI, DL, TII->get(X86::CALLpcrel32)) - .addExternalSymbol(StackProbeSymbol) - .addReg(X86::EAX, RegState::Implicit) - .addReg(X86::ESP, RegState::Implicit) - .addReg(X86::EAX, RegState::Define | RegState::Implicit) - .addReg(X86::ESP, RegState::Define | RegState::Implicit) - .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); + BuildMI(*BB, MI, DL, TII->get(X86::CALLpcrel32)) + .addExternalSymbol(StackProbeSymbol) + .addReg(X86::EAX, RegState::Implicit) + .addReg(X86::ESP, RegState::Implicit) + .addReg(X86::EAX, RegState::Define | RegState::Implicit) + .addReg(X86::ESP, RegState::Define | RegState::Implicit) + .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); + } MI->eraseFromParent(); // The pseudo instruction is gone now. return BB; @@ -12126,7 +12218,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { AsmPieces.clear(); SplitString(AsmStr, AsmPieces, " \t"); // Split with whitespace. - // FIXME: this should verify that we are targetting a 486 or better. If not, + // FIXME: this should verify that we are targeting a 486 or better. If not, // we will turn this bswap into something that will be lowered to logical ops // instead of emitting the bswap asm. For now, we don't support 486 or lower // so don't worry about this. diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 6ec4a7de7558..630105739899 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -677,9 +677,6 @@ namespace llvm { /// getFunctionAlignment - Return the Log2 alignment of this function. virtual unsigned getFunctionAlignment(const Function *F) const; - unsigned getRegPressureLimit(const TargetRegisterClass *RC, - MachineFunction &MF) const; - /// getStackCookieLocation - Return true if the target stores stack /// protector cookies at a fixed offset in some non-standard address /// space, and populates the address space and offset as @@ -846,6 +843,12 @@ namespace llvm { virtual bool isUsedByReturnOnly(SDNode *N) const; + virtual bool mayBeEmittedAsTailCall(CallInst *CI) const; + + virtual EVT + getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT, + ISD::NodeType ExtendKind) const; + virtual bool CanLowerReturn(CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, diff --git a/lib/Target/X86/X86Instr3DNow.td b/lib/Target/X86/X86Instr3DNow.td index 45d1c6bc9d29..dd4f6a5a85a4 100644 --- a/lib/Target/X86/X86Instr3DNow.td +++ b/lib/Target/X86/X86Instr3DNow.td @@ -12,66 +12,91 @@ // //===----------------------------------------------------------------------===// -// FIXME: We don't support any intrinsics for these instructions yet. - -class I3DNow o, Format F, dag outs, dag ins, string asm, - list pattern> - : I, TB, Requires<[Has3DNow]> { +class I3DNow o, Format F, dag outs, dag ins, string asm, list pat> + : I, TB, Requires<[Has3DNow]> { } -class I3DNow_binop o, Format F, dag ins, string Mnemonic> - : I, - TB, Requires<[Has3DNow]>, Has3DNow0F0FOpcode { +class I3DNow_binop o, Format F, dag ins, string Mnemonic, list pat> + : I3DNow, + Has3DNow0F0FOpcode { + // FIXME: The disassembler doesn't support Has3DNow0F0FOpcode yet. + let isAsmParserOnly = 1; + let Constraints = "$src1 = $dst"; +} + +class I3DNow_conv o, Format F, dag ins, string Mnemonic, list pat> + : I3DNow, + Has3DNow0F0FOpcode { // FIXME: The disassembler doesn't support Has3DNow0F0FOpcode yet. let isAsmParserOnly = 1; } - -let Constraints = "$src1 = $dst" in { - // MMXI_binop_rm_int - Simple MMX binary operator based on intrinsic. - // When this is cleaned up, remove the FIXME from X86RecognizableInstr.cpp. - multiclass I3DNow_binop_rm opc, string Mn> { - def rr : I3DNow_binop; - def rm : I3DNow_binop; - } +multiclass I3DNow_binop_rm opc, string Mn> { + def rr : I3DNow_binop; + def rm : I3DNow_binop; } -defm PAVGUSB : I3DNow_binop_rm<0xBF, "pavgusb">; -defm PF2ID : I3DNow_binop_rm<0x1D, "pf2id">; -defm PFACC : I3DNow_binop_rm<0xAE, "pfacc">; -defm PFADD : I3DNow_binop_rm<0x9E, "pfadd">; -defm PFCMPEQ : I3DNow_binop_rm<0xB0, "pfcmpeq">; -defm PFCMPGE : I3DNow_binop_rm<0x90, "pfcmpge">; -defm PFCMPGT : I3DNow_binop_rm<0xA0, "pfcmpgt">; -defm PFMAX : I3DNow_binop_rm<0xA4, "pfmax">; -defm PFMIN : I3DNow_binop_rm<0x94, "pfmin">; -defm PFMUL : I3DNow_binop_rm<0xB4, "pfmul">; -defm PFRCP : I3DNow_binop_rm<0x96, "pfrcp">; -defm PFRCPIT1 : I3DNow_binop_rm<0xA6, "pfrcpit1">; -defm PFRCPIT2 : I3DNow_binop_rm<0xB6, "pfrcpit2">; -defm PFRSQIT1 : I3DNow_binop_rm<0xA7, "pfrsqit1">; -defm PFRSQRT : I3DNow_binop_rm<0x97, "pfrsqrt">; -defm PFSUB : I3DNow_binop_rm<0x9A, "pfsub">; -defm PFSUBR : I3DNow_binop_rm<0xAA, "pfsubr">; -defm PI2FD : I3DNow_binop_rm<0x0D, "pi2fd">; -defm PMULHRW : I3DNow_binop_rm<0xB7, "pmulhrw">; +multiclass I3DNow_binop_rm_int opc, string Mn, string Ver = ""> { + def rr : I3DNow_binop( + !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1, VR64:$src2))]>; + def rm : I3DNow_binop( + !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1, + (bitconvert (load_mmx addr:$src2))))]>; +} + +multiclass I3DNow_conv_rm opc, string Mn> { + def rr : I3DNow_conv; + def rm : I3DNow_conv; +} + +multiclass I3DNow_conv_rm_int opc, string Mn, string Ver = ""> { + def rr : I3DNow_conv( + !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src))]>; + def rm : I3DNow_conv( + !strconcat("int_x86_3dnow", Ver, "_", Mn)) + (bitconvert (load_mmx addr:$src))))]>; +} + +defm PAVGUSB : I3DNow_binop_rm_int<0xBF, "pavgusb">; +defm PF2ID : I3DNow_conv_rm_int<0x1D, "pf2id">; +defm PFACC : I3DNow_binop_rm_int<0xAE, "pfacc">; +defm PFADD : I3DNow_binop_rm_int<0x9E, "pfadd">; +defm PFCMPEQ : I3DNow_binop_rm_int<0xB0, "pfcmpeq">; +defm PFCMPGE : I3DNow_binop_rm_int<0x90, "pfcmpge">; +defm PFCMPGT : I3DNow_binop_rm_int<0xA0, "pfcmpgt">; +defm PFMAX : I3DNow_binop_rm_int<0xA4, "pfmax">; +defm PFMIN : I3DNow_binop_rm_int<0x94, "pfmin">; +defm PFMUL : I3DNow_binop_rm_int<0xB4, "pfmul">; +defm PFRCP : I3DNow_conv_rm_int<0x96, "pfrcp">; +defm PFRCPIT1 : I3DNow_binop_rm_int<0xA6, "pfrcpit1">; +defm PFRCPIT2 : I3DNow_binop_rm_int<0xB6, "pfrcpit2">; +defm PFRSQIT1 : I3DNow_binop_rm_int<0xA7, "pfrsqit1">; +defm PFRSQRT : I3DNow_conv_rm_int<0x97, "pfrsqrt">; +defm PFSUB : I3DNow_binop_rm_int<0x9A, "pfsub">; +defm PFSUBR : I3DNow_binop_rm_int<0xAA, "pfsubr">; +defm PI2FD : I3DNow_conv_rm_int<0x0D, "pi2fd">; +defm PMULHRW : I3DNow_binop_rm_int<0xB7, "pmulhrw">; def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms", [(int_x86_mmx_femms)]>; def PREFETCH : I3DNow<0x0D, MRM0m, (outs), (ins i32mem:$addr), "prefetch $addr", []>; - + // FIXME: Diassembler gets a bogus decode conflict. -let isAsmParserOnly = 1 in { +let isAsmParserOnly = 1 in def PREFETCHW : I3DNow<0x0D, MRM1m, (outs), (ins i16mem:$addr), "prefetchw $addr", []>; -} // "3DNowA" instructions -defm PF2IW : I3DNow_binop_rm<0x1C, "pf2iw">; -defm PI2FW : I3DNow_binop_rm<0x0C, "pi2fw">; -defm PFNACC : I3DNow_binop_rm<0x8A, "pfnacc">; -defm PFPNACC : I3DNow_binop_rm<0x8E, "pfpnacc">; -defm PSWAPD : I3DNow_binop_rm<0xBB, "pswapd">; +defm PF2IW : I3DNow_conv_rm_int<0x1C, "pf2iw", "a">; +defm PI2FW : I3DNow_conv_rm_int<0x0C, "pi2fw", "a">; +defm PFNACC : I3DNow_binop_rm_int<0x8A, "pfnacc", "a">; +defm PFPNACC : I3DNow_binop_rm_int<0x8E, "pfpnacc", "a">; +defm PSWAPD : I3DNow_conv_rm_int<0xBB, "pswapd", "a">; diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index f0ea06870869..9f7a4b06dc6f 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -163,7 +163,7 @@ def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst), } // Defs = [EFLAGS] -// Suprisingly enough, these are not two address instructions! +// Surprisingly enough, these are not two address instructions! let Defs = [EFLAGS] in { // Register-Integer Signed Integer Multiply def IMUL16rri : Ii16<0x69, MRMSrcReg, // GR16 = GR16*I16 diff --git a/lib/Target/X86/X86InstrControl.td b/lib/Target/X86/X86InstrControl.td index 77f47250e9fd..c228a0aed59c 100644 --- a/lib/Target/X86/X86InstrControl.td +++ b/lib/Target/X86/X86InstrControl.td @@ -263,6 +263,16 @@ let isCall = 1, isCodeGenOnly = 1 in Requires<[IsWin64]>; } +let isCall = 1, isCodeGenOnly = 1 in + // __chkstk(MSVC): clobber R10, R11 and EFLAGS. + // ___chkstk(Mingw64): clobber R10, R11, RAX and EFLAGS, and update RSP. + let Defs = [RAX, R10, R11, RSP, EFLAGS], + Uses = [RSP] in { + def W64ALLOCA : Ii32PCRel<0xE8, RawFrm, + (outs), (ins i64i32imm_pcrel:$dst, variable_ops), + "call{q}\t$dst", []>, + Requires<[IsWin64]>; + } let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, isCodeGenOnly = 1 in diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index 0660072589e4..7daa26492274 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -91,21 +91,23 @@ class REX_W { bit hasREX_WPrefix = 1; } class LOCK { bit hasLockPrefix = 1; } class SegFS { bits<2> SegOvrBits = 1; } class SegGS { bits<2> SegOvrBits = 2; } -class TB { bits<4> Prefix = 1; } -class REP { bits<4> Prefix = 2; } -class D8 { bits<4> Prefix = 3; } -class D9 { bits<4> Prefix = 4; } -class DA { bits<4> Prefix = 5; } -class DB { bits<4> Prefix = 6; } -class DC { bits<4> Prefix = 7; } -class DD { bits<4> Prefix = 8; } -class DE { bits<4> Prefix = 9; } -class DF { bits<4> Prefix = 10; } -class XD { bits<4> Prefix = 11; } -class XS { bits<4> Prefix = 12; } -class T8 { bits<4> Prefix = 13; } -class TA { bits<4> Prefix = 14; } -class TF { bits<4> Prefix = 15; } +class TB { bits<5> Prefix = 1; } +class REP { bits<5> Prefix = 2; } +class D8 { bits<5> Prefix = 3; } +class D9 { bits<5> Prefix = 4; } +class DA { bits<5> Prefix = 5; } +class DB { bits<5> Prefix = 6; } +class DC { bits<5> Prefix = 7; } +class DD { bits<5> Prefix = 8; } +class DE { bits<5> Prefix = 9; } +class DF { bits<5> Prefix = 10; } +class XD { bits<5> Prefix = 11; } +class XS { bits<5> Prefix = 12; } +class T8 { bits<5> Prefix = 13; } +class TA { bits<5> Prefix = 14; } +class A6 { bits<5> Prefix = 15; } +class A7 { bits<5> Prefix = 16; } +class TF { bits<5> Prefix = 17; } class VEX { bit hasVEXPrefix = 1; } class VEX_W { bit hasVEX_WPrefix = 1; } class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; } @@ -136,7 +138,7 @@ class X86Inst opcod, Format f, ImmType i, dag outs, dag ins, bit hasOpSizePrefix = 0; // Does this inst have a 0x66 prefix? bit hasAdSizePrefix = 0; // Does this inst have a 0x67 prefix? - bits<4> Prefix = 0; // Which prefix byte does this inst have? + bits<5> Prefix = 0; // Which prefix byte does this inst have? bit hasREX_WPrefix = 0; // Does this inst require the REX.W prefix? FPFormat FPForm = NotFP; // What flavor of FP instruction is this? bit hasLockPrefix = 0; // Does this inst have a 0xF0 prefix? @@ -154,20 +156,20 @@ class X86Inst opcod, Format f, ImmType i, dag outs, dag ins, let TSFlags{5-0} = FormBits; let TSFlags{6} = hasOpSizePrefix; let TSFlags{7} = hasAdSizePrefix; - let TSFlags{11-8} = Prefix; - let TSFlags{12} = hasREX_WPrefix; - let TSFlags{15-13} = ImmT.Value; - let TSFlags{18-16} = FPForm.Value; - let TSFlags{19} = hasLockPrefix; - let TSFlags{21-20} = SegOvrBits; - let TSFlags{23-22} = ExeDomain.Value; - let TSFlags{31-24} = Opcode; - let TSFlags{32} = hasVEXPrefix; - let TSFlags{33} = hasVEX_WPrefix; - let TSFlags{34} = hasVEX_4VPrefix; - let TSFlags{35} = hasVEX_i8ImmReg; - let TSFlags{36} = hasVEX_L; - let TSFlags{37} = has3DNow0F0FOpcode; + let TSFlags{12-8} = Prefix; + let TSFlags{13} = hasREX_WPrefix; + let TSFlags{16-14} = ImmT.Value; + let TSFlags{19-17} = FPForm.Value; + let TSFlags{20} = hasLockPrefix; + let TSFlags{22-21} = SegOvrBits; + let TSFlags{24-23} = ExeDomain.Value; + let TSFlags{32-25} = Opcode; + let TSFlags{33} = hasVEXPrefix; + let TSFlags{34} = hasVEX_WPrefix; + let TSFlags{35} = hasVEX_4VPrefix; + let TSFlags{36} = hasVEX_i8ImmReg; + let TSFlags{37} = hasVEX_L; + let TSFlags{38} = has3DNow0F0FOpcode; } class PseudoI pattern> @@ -319,7 +321,7 @@ class VSSI o, Format F, dag outs, dag ins, string asm, Requires<[HasAVX]>; class VPSI o, Format F, dag outs, dag ins, string asm, list pattern> - : I, + : I, TB, Requires<[HasAVX]>; // SSE2 Instruction Templates: @@ -353,7 +355,7 @@ class VSDI o, Format F, dag outs, dag ins, string asm, Requires<[HasAVX]>; class VPDI o, Format F, dag outs, dag ins, string asm, list pattern> - : I, + : I, TB, OpSize, Requires<[HasAVX]>; // SSE3 Instruction Templates: diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 5016c0f171ae..3cbfac1c1a9f 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -132,6 +132,8 @@ def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>; def X86Unpcklps : SDNode<"X86ISD::UNPCKLPS", SDTShuff2Op>; def X86Unpcklpd : SDNode<"X86ISD::UNPCKLPD", SDTShuff2Op>; +def X86Unpcklpsy : SDNode<"X86ISD::VUNPCKLPSY", SDTShuff2Op>; +def X86Unpcklpdy : SDNode<"X86ISD::VUNPCKLPDY", SDTShuff2Op>; def X86Unpckhps : SDNode<"X86ISD::UNPCKHPS", SDTShuff2Op>; def X86Unpckhpd : SDNode<"X86ISD::UNPCKHPD", SDTShuff2Op>; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 76a9b12b8aad..83f0260d63b4 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -232,7 +232,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) assert(!RegOp2MemOpTable2Addr.count(RegOp) && "Duplicated entries?"); RegOp2MemOpTable2Addr[RegOp] = std::make_pair(MemOp, 0U); - // If this is not a reversable operation (because there is a many->one) + // If this is not a reversible operation (because there is a many->one) // mapping, don't insert the reverse of the operation into MemOp2RegOpTable. if (OpTbl2Addr[i][1] & TB_NOT_REVERSABLE) continue; @@ -335,7 +335,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) assert(!RegOp2MemOpTable0.count(RegOp) && "Duplicated entries?"); RegOp2MemOpTable0[RegOp] = std::make_pair(MemOp, Align); - // If this is not a reversable operation (because there is a many->one) + // If this is not a reversible operation (because there is a many->one) // mapping, don't insert the reverse of the operation into MemOp2RegOpTable. if (OpTbl0[i][1] & TB_NOT_REVERSABLE) continue; @@ -460,7 +460,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) assert(!RegOp2MemOpTable1.count(RegOp) && "Duplicate entries"); RegOp2MemOpTable1[RegOp] = std::make_pair(MemOp, Align); - // If this is not a reversable operation (because there is a many->one) + // If this is not a reversible operation (because there is a many->one) // mapping, don't insert the reverse of the operation into MemOp2RegOpTable. if (OpTbl1[i][1] & TB_NOT_REVERSABLE) continue; @@ -682,7 +682,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) assert(!RegOp2MemOpTable2.count(RegOp) && "Duplicate entry!"); RegOp2MemOpTable2[RegOp] = std::make_pair(MemOp, Align); - // If this is not a reversable operation (because there is a many->one) + // If this is not a reversible operation (because there is a many->one) // mapping, don't insert the reverse of the operation into MemOp2RegOpTable. if (OpTbl2[i][1] & TB_NOT_REVERSABLE) continue; @@ -916,7 +916,6 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI, case X86::MOVSDrm: case X86::MOVAPSrm: case X86::MOVUPSrm: - case X86::MOVUPSrm_Int: case X86::MOVAPDrm: case X86::MOVDQArm: case X86::MMX_MOVD64rm: @@ -2241,6 +2240,12 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, bool isTwoAddr = NumOps > 1 && MI->getDesc().getOperandConstraint(1, TOI::TIED_TO) != -1; + // FIXME: AsmPrinter doesn't know how to handle + // X86II::MO_GOT_ABSOLUTE_ADDRESS after folding. + if (MI->getOpcode() == X86::ADD32ri && + MI->getOperand(2).getTargetFlags() == X86II::MO_GOT_ABSOLUTE_ADDRESS) + return NULL; + MachineInstr *NewMI = NULL; // Folding a memory location into the two-address part of a two-address // instruction is different than folding it other places. It requires @@ -2535,6 +2540,12 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, case X86::TEST32rr: case X86::TEST64rr: return true; + case X86::ADD32ri: + // FIXME: AsmPrinter doesn't know how to handle + // X86II::MO_GOT_ABSOLUTE_ADDRESS after folding. + if (MI->getOperand(2).getTargetFlags() == X86II::MO_GOT_ABSOLUTE_ADDRESS) + return false; + break; } } @@ -2845,11 +2856,9 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, case X86::FsMOVAPDrm: case X86::MOVAPSrm: case X86::MOVUPSrm: - case X86::MOVUPSrm_Int: case X86::MOVAPDrm: case X86::MOVDQArm: case X86::MOVDQUrm: - case X86::MOVDQUrm_Int: break; } switch (Opc2) { @@ -2869,11 +2878,9 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, case X86::FsMOVAPDrm: case X86::MOVAPSrm: case X86::MOVUPSrm: - case X86::MOVUPSrm_Int: case X86::MOVAPDrm: case X86::MOVDQArm: case X86::MOVDQUrm: - case X86::MOVDQUrm_Int: break; } @@ -3085,12 +3092,8 @@ void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { NopInst.setOpcode(X86::NOOP); } -bool X86InstrInfo:: -hasHighOperandLatency(const InstrItineraryData *ItinData, - const MachineRegisterInfo *MRI, - const MachineInstr *DefMI, unsigned DefIdx, - const MachineInstr *UseMI, unsigned UseIdx) const { - switch (DefMI->getOpcode()) { +bool X86InstrInfo::isHighLatencyDef(int opc) const { + switch (opc) { default: return false; case X86::DIVSDrm: case X86::DIVSDrm_Int: @@ -3120,6 +3123,14 @@ hasHighOperandLatency(const InstrItineraryData *ItinData, } } +bool X86InstrInfo:: +hasHighOperandLatency(const InstrItineraryData *ItinData, + const MachineRegisterInfo *MRI, + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *UseMI, unsigned UseIdx) const { + return isHighLatencyDef(DefMI->getOpcode()); +} + namespace { /// CGBR - Create Global Base Reg pass. This initializes the PIC /// global base register for x86-32. diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index fcb5a25104ac..8da68b570177 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -33,15 +33,15 @@ namespace X86 { AddrScaleAmt = 1, AddrIndexReg = 2, AddrDisp = 3, - + /// AddrSegmentReg - The operand # of the segment in the memory operand. AddrSegmentReg = 4, /// AddrNumOperands - Total number of operands in a memory reference. AddrNumOperands = 5 }; - - + + // X86 specific condition code. These correspond to X86_*_COND in // X86InstrInfo.td. They must be kept in synch. enum CondCode { @@ -72,16 +72,16 @@ namespace X86 { COND_INVALID }; - + // Turn condition code into conditional branch opcode. unsigned GetCondBranchFromCond(CondCode CC); - + /// GetOppositeBranchCondition - Return the inverse of the specified cond, /// e.g. turning COND_E to COND_NE. CondCode GetOppositeBranchCondition(X86::CondCode CC); } - + /// X86II - This namespace holds all of the target specific flags that /// instruction info tracks. /// @@ -90,14 +90,14 @@ namespace X86II { enum TOF { //===------------------------------------------------------------------===// // X86 Specific MachineOperand flags. - + MO_NO_FLAG, - + /// MO_GOT_ABSOLUTE_ADDRESS - On a symbol operand, this represents a /// relocation of: /// SYMBOL_LABEL + [. - PICBASELABEL] MO_GOT_ABSOLUTE_ADDRESS, - + /// MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the /// immediate should get the value of the symbol minus the PIC base label: /// SYMBOL_LABEL - PICBASELABEL @@ -106,77 +106,77 @@ namespace X86II { /// MO_GOT - On a symbol operand this indicates that the immediate is the /// offset to the GOT entry for the symbol name from the base of the GOT. /// - /// See the X86-64 ELF ABI supplement for more details. + /// See the X86-64 ELF ABI supplement for more details. /// SYMBOL_LABEL @GOT MO_GOT, - + /// MO_GOTOFF - On a symbol operand this indicates that the immediate is - /// the offset to the location of the symbol name from the base of the GOT. + /// the offset to the location of the symbol name from the base of the GOT. /// - /// See the X86-64 ELF ABI supplement for more details. + /// See the X86-64 ELF ABI supplement for more details. /// SYMBOL_LABEL @GOTOFF MO_GOTOFF, - + /// MO_GOTPCREL - On a symbol operand this indicates that the immediate is /// offset to the GOT entry for the symbol name from the current code - /// location. + /// location. /// - /// See the X86-64 ELF ABI supplement for more details. + /// See the X86-64 ELF ABI supplement for more details. /// SYMBOL_LABEL @GOTPCREL MO_GOTPCREL, - + /// MO_PLT - On a symbol operand this indicates that the immediate is - /// offset to the PLT entry of symbol name from the current code location. + /// offset to the PLT entry of symbol name from the current code location. /// - /// See the X86-64 ELF ABI supplement for more details. + /// See the X86-64 ELF ABI supplement for more details. /// SYMBOL_LABEL @PLT MO_PLT, - + /// MO_TLSGD - On a symbol operand this indicates that the immediate is /// some TLS offset. /// - /// See 'ELF Handling for Thread-Local Storage' for more details. + /// See 'ELF Handling for Thread-Local Storage' for more details. /// SYMBOL_LABEL @TLSGD MO_TLSGD, - + /// MO_GOTTPOFF - On a symbol operand this indicates that the immediate is /// some TLS offset. /// - /// See 'ELF Handling for Thread-Local Storage' for more details. + /// See 'ELF Handling for Thread-Local Storage' for more details. /// SYMBOL_LABEL @GOTTPOFF MO_GOTTPOFF, - + /// MO_INDNTPOFF - On a symbol operand this indicates that the immediate is /// some TLS offset. /// - /// See 'ELF Handling for Thread-Local Storage' for more details. + /// See 'ELF Handling for Thread-Local Storage' for more details. /// SYMBOL_LABEL @INDNTPOFF MO_INDNTPOFF, - + /// MO_TPOFF - On a symbol operand this indicates that the immediate is /// some TLS offset. /// - /// See 'ELF Handling for Thread-Local Storage' for more details. + /// See 'ELF Handling for Thread-Local Storage' for more details. /// SYMBOL_LABEL @TPOFF MO_TPOFF, - + /// MO_NTPOFF - On a symbol operand this indicates that the immediate is /// some TLS offset. /// - /// See 'ELF Handling for Thread-Local Storage' for more details. + /// See 'ELF Handling for Thread-Local Storage' for more details. /// SYMBOL_LABEL @NTPOFF MO_NTPOFF, - + /// MO_DLLIMPORT - On a symbol operand "FOO", this indicates that the /// reference is actually to the "__imp_FOO" symbol. This is used for /// dllimport linkage on windows. MO_DLLIMPORT, - + /// MO_DARWIN_STUB - On a symbol operand "FOO", this indicates that the /// reference is actually to the "FOO$stub" symbol. This is used for calls /// and jumps to external functions on Tiger and earlier. MO_DARWIN_STUB, - + /// MO_DARWIN_NONLAZY - On a symbol operand "FOO", this indicates that the /// reference is actually to the "FOO$non_lazy_ptr" symbol, which is a /// non-PIC-base-relative reference to a non-hidden dyld lazy pointer stub. @@ -186,19 +186,19 @@ namespace X86II { /// that the reference is actually to "FOO$non_lazy_ptr - PICBASE", which is /// a PIC-base-relative reference to a non-hidden dyld lazy pointer stub. MO_DARWIN_NONLAZY_PIC_BASE, - + /// MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE - On a symbol operand "FOO", this /// indicates that the reference is actually to "FOO$non_lazy_ptr -PICBASE", /// which is a PIC-base-relative reference to a hidden dyld lazy pointer /// stub. MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE, - + /// MO_TLVP - On a symbol operand this indicates that the immediate is /// some TLS offset. /// /// This is the TLS offset for the Darwin TLS mechanism. MO_TLVP, - + /// MO_TLVP_PIC_BASE - On a symbol operand this indicates that the immediate /// is some TLS offset from the picbase. /// @@ -239,7 +239,7 @@ inline static bool isGlobalRelativeToPICBase(unsigned char TargetFlag) { return false; } } - + /// X86II - This namespace holds all of the target specific flags that /// instruction info tracks. /// @@ -299,7 +299,7 @@ namespace X86II { // MRMInitReg - This form is used for instructions whose source and // destinations are the same register. MRMInitReg = 32, - + //// MRM_C1 - A mod/rm byte of exactly 0xC1. MRM_C1 = 33, MRM_C2 = 34, @@ -318,7 +318,7 @@ namespace X86II { /// immediates, the first of which is a 16-bit immediate (specified by /// the imm encoding) and the second is a 8-bit fixed value. RawFrmImm8 = 43, - + /// RawFrmImm16 - This is used for CALL FAR instructions, which have two /// immediates, the first of which is a 16 or 32-bit immediate (specified by /// the imm encoding) and the second is a 16-bit fixed value. In the AMD @@ -347,7 +347,7 @@ namespace X86II { // set, there is no prefix byte for obtaining a multibyte opcode. // Op0Shift = 8, - Op0Mask = 0xF << Op0Shift, + Op0Mask = 0x1F << Op0Shift, // TB - TwoByte - Set if this instruction has a two byte opcode, which // starts with a 0x0F byte before the real opcode. @@ -368,11 +368,12 @@ namespace X86II { // floating point operations performed in the SSE registers. XD = 11 << Op0Shift, XS = 12 << Op0Shift, - // T8, TA - Prefix after the 0x0F prefix. + // T8, TA, A6, A7 - Prefix after the 0x0F prefix. T8 = 13 << Op0Shift, TA = 14 << Op0Shift, - + A6 = 15 << Op0Shift, A7 = 16 << Op0Shift, + // TF - Prefix before and after 0x0F - TF = 15 << Op0Shift, + TF = 17 << Op0Shift, //===------------------------------------------------------------------===// // REX_W - REX prefixes are instruction prefixes used in 64-bit mode. @@ -380,13 +381,13 @@ namespace X86II { // etc. We only cares about REX.W and REX.R bits and only the former is // statically determined. // - REXShift = 12, + REXShift = Op0Shift + 5, REX_W = 1 << REXShift, //===------------------------------------------------------------------===// // This three-bit field describes the size of an immediate operand. Zero is // unused so that we can tell if we forgot to set a value. - ImmShift = 13, + ImmShift = REXShift + 1, ImmMask = 7 << ImmShift, Imm8 = 1 << ImmShift, Imm8PCRel = 2 << ImmShift, @@ -400,7 +401,7 @@ namespace X86II { // FP Instruction Classification... Zero is non-fp instruction. // FPTypeMask - Mask for all of the FP types... - FPTypeShift = 16, + FPTypeShift = ImmShift + 3, FPTypeMask = 7 << FPTypeShift, // NotFP - The default, set for instructions that do not use FP registers. @@ -433,25 +434,26 @@ namespace X86II { SpecialFP = 7 << FPTypeShift, // Lock prefix - LOCKShift = 19, + LOCKShift = FPTypeShift + 3, LOCK = 1 << LOCKShift, // Segment override prefixes. Currently we just need ability to address // stuff in gs and fs segments. - SegOvrShift = 20, + SegOvrShift = LOCKShift + 1, SegOvrMask = 3 << SegOvrShift, FS = 1 << SegOvrShift, GS = 2 << SegOvrShift, - // Execution domain for SSE instructions in bits 22, 23. - // 0 in bits 22-23 means normal, non-SSE instruction. - SSEDomainShift = 22, + // Execution domain for SSE instructions in bits 23, 24. + // 0 in bits 23-24 means normal, non-SSE instruction. + SSEDomainShift = SegOvrShift + 2, - OpcodeShift = 24, - OpcodeMask = 0xFF << OpcodeShift, + OpcodeShift = SSEDomainShift + 2, + OpcodeMask = 0xFFULL << OpcodeShift, //===------------------------------------------------------------------===// /// VEX - The opcode prefix used by AVX instructions + VEXShift = OpcodeShift + 8, VEX = 1U << 0, /// VEX_W - Has a opcode specific functionality, but is used in the same @@ -473,7 +475,7 @@ namespace X86II { /// if a VR256 register is used, but some AVX instructions also have this /// field marked when using a f256 memory references. VEX_L = 1U << 4, - + /// Has3DNow0F0FOpcode - This flag indicates that the instruction uses the /// wacky 0x0F 0x0F prefix for 3DNow! instructions. The manual documents /// this as having a 0x0F prefix with a 0x0F opcode, and each instruction @@ -482,18 +484,18 @@ namespace X86II { /// this flag to indicate that the encoder should do the wacky 3DNow! thing. Has3DNow0F0FOpcode = 1U << 5 }; - + // getBaseOpcodeFor - This function returns the "base" X86 opcode for the // specified machine instruction. // static inline unsigned char getBaseOpcodeFor(uint64_t TSFlags) { return TSFlags >> X86II::OpcodeShift; } - + static inline bool hasImm(uint64_t TSFlags) { return (TSFlags & X86II::ImmMask) != 0; } - + /// getSizeOfImm - Decode the "size of immediate" field from the TSFlags field /// of the specified instruction. static inline unsigned getSizeOfImm(uint64_t TSFlags) { @@ -508,7 +510,7 @@ namespace X86II { case X86II::Imm64: return 8; } } - + /// isImmPCRel - Return true if the immediate of the specified instruction's /// TSFlags indicates that it is pc relative. static inline unsigned isImmPCRel(uint64_t TSFlags) { @@ -525,7 +527,7 @@ namespace X86II { return false; } } - + /// getMemoryOperandNo - The function returns the MCInst operand # for the /// first field of the memory operand. If the instruction doesn't have a /// memory operand, this returns -1. @@ -549,11 +551,11 @@ namespace X86II { case X86II::MRMDestMem: return 0; case X86II::MRMSrcMem: { - bool HasVEX_4V = (TSFlags >> 32) & X86II::VEX_4V; + bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V; unsigned FirstMemOp = 1; if (HasVEX_4V) ++FirstMemOp;// Skip the register source (which is encoded in VEX_VVVV). - + // FIXME: Maybe lea should have its own form? This is a horrible hack. //if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r || // Opcode == X86::LEA16r || Opcode == X86::LEA32r) @@ -613,7 +615,7 @@ inline static bool isMem(const MachineInstr *MI, unsigned Op) { class X86InstrInfo : public TargetInstrInfoImpl { X86TargetMachine &TM; const X86RegisterInfo RI; - + /// RegOp2MemOpTable2Addr, RegOp2MemOpTable0, RegOp2MemOpTable1, /// RegOp2MemOpTable2 - Load / store folding opcode maps. /// @@ -621,7 +623,7 @@ class X86InstrInfo : public TargetInstrInfoImpl { DenseMap > RegOp2MemOpTable0; DenseMap > RegOp2MemOpTable1; DenseMap > RegOp2MemOpTable2; - + /// MemOp2RegOpTable - Load / store unfolding opcode map. /// DenseMap > MemOp2RegOpTable; @@ -795,7 +797,7 @@ class X86InstrInfo : public TargetInstrInfoImpl { virtual unsigned getOpcodeAfterMemoryUnfold(unsigned Opc, bool UnfoldLoad, bool UnfoldStore, unsigned *LoadRegIndex = 0) const; - + /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler /// to determine if two loads are loading from the same base address. It /// should only return true if the base pointers are the same and the @@ -805,7 +807,7 @@ class X86InstrInfo : public TargetInstrInfoImpl { int64_t &Offset1, int64_t &Offset2) const; /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to - /// determine (in conjuction with areLoadsFromSameBasePtr) if two loads should + /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should /// be scheduled togther. On some targets if two loads are loading from /// addresses in the same cache line, it's better if they are scheduled /// together. This function takes two integers that represent the load offsets @@ -829,7 +831,7 @@ class X86InstrInfo : public TargetInstrInfoImpl { return (reg == X86::SPL || reg == X86::BPL || reg == X86::SIL || reg == X86::DIL); } - + static bool isX86_64ExtendedReg(const MachineOperand &MO) { if (!MO.isReg()) return false; return isX86_64ExtendedReg(MO.getReg()); @@ -858,11 +860,13 @@ class X86InstrInfo : public TargetInstrInfoImpl { const SmallVectorImpl &MOs, unsigned Size, unsigned Alignment) const; + bool isHighLatencyDef(int opc) const; + bool hasHighOperandLatency(const InstrItineraryData *ItinData, const MachineRegisterInfo *MRI, const MachineInstr *DefMI, unsigned DefIdx, const MachineInstr *UseMI, unsigned UseIdx) const; - + private: MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc, MachineFunction::iterator &MFI, diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index f832a7c85a8a..03a0b0c3aedd 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -459,7 +459,7 @@ def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">; include "X86InstrFormats.td" //===----------------------------------------------------------------------===// -// Pattern fragments... +// Pattern fragments. // // X86 specific condition code. These correspond to CondCode in @@ -481,21 +481,21 @@ def X86_COND_O : PatLeaf<(i8 13)>; def X86_COND_P : PatLeaf<(i8 14)>; // alt. COND_PE def X86_COND_S : PatLeaf<(i8 15)>; -def immSext8 : PatLeaf<(imm), [{ return immSext8(N); }]>; +let FastIselShouldIgnore = 1 in { // FastIsel should ignore all simm8 instrs. + def i16immSExt8 : ImmLeaf; + def i32immSExt8 : ImmLeaf; + def i64immSExt8 : ImmLeaf; +} -def i16immSExt8 : PatLeaf<(i16 immSext8)>; -def i32immSExt8 : PatLeaf<(i32 immSext8)>; -def i64immSExt8 : PatLeaf<(i64 immSext8)>; -def i64immSExt32 : PatLeaf<(i64 imm), [{ return i64immSExt32(N); }]>; -def i64immZExt32 : PatLeaf<(i64 imm), [{ - // i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit - // unsignedsign extended field. - return (uint64_t)N->getZExtValue() == (uint32_t)N->getZExtValue(); -}]>; +def i64immSExt32 : ImmLeaf; -def i64immZExt32SExt8 : PatLeaf<(i64 imm), [{ - uint64_t v = N->getZExtValue(); - return v == (uint32_t)v && (int32_t)v == (int8_t)v; + +// i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit +// unsigned field. +def i64immZExt32 : ImmLeaf; + +def i64immZExt32SExt8 : ImmLeaf; // Helper fragments for loads. @@ -1437,7 +1437,7 @@ def : InstAlias<"idivq $src, %rax", (IDIV64m i64mem:$src)>; // Various unary fpstack operations default to operating on on ST1. // For example, "fxch" -> "fxch %st(1)" -def : InstAlias<"faddp", (ADD_FPrST0 ST1)>; +def : InstAlias<"faddp", (ADD_FPrST0 ST1), 0>; def : InstAlias<"fsubp", (SUBR_FPrST0 ST1)>; def : InstAlias<"fsubrp", (SUB_FPrST0 ST1)>; def : InstAlias<"fmulp", (MUL_FPrST0 ST1)>; @@ -1455,13 +1455,15 @@ def : InstAlias<"fucompi", (UCOM_FIPr ST1)>; // For example, "fadd %st(4), %st(0)" -> "fadd %st(4)". We also disambiguate // instructions like "fadd %st(0), %st(0)" as "fadd %st(0)" for consistency with // gas. -multiclass FpUnaryAlias { - def : InstAlias; - def : InstAlias; +multiclass FpUnaryAlias { + def : InstAlias; + def : InstAlias; } defm : FpUnaryAlias<"fadd", ADD_FST0r>; -defm : FpUnaryAlias<"faddp", ADD_FPrST0>; +defm : FpUnaryAlias<"faddp", ADD_FPrST0, 0>; defm : FpUnaryAlias<"fsub", SUB_FST0r>; defm : FpUnaryAlias<"fsubp", SUBR_FPrST0>; defm : FpUnaryAlias<"fsubr", SUBR_FST0r>; @@ -1472,8 +1474,8 @@ defm : FpUnaryAlias<"fdiv", DIV_FST0r>; defm : FpUnaryAlias<"fdivp", DIVR_FPrST0>; defm : FpUnaryAlias<"fdivr", DIVR_FST0r>; defm : FpUnaryAlias<"fdivrp", DIV_FPrST0>; -defm : FpUnaryAlias<"fcomi", COM_FIr>; -defm : FpUnaryAlias<"fucomi", UCOM_FIr>; +defm : FpUnaryAlias<"fcomi", COM_FIr, 0>; +defm : FpUnaryAlias<"fucomi", UCOM_FIr, 0>; defm : FpUnaryAlias<"fcompi", COM_FIPr>; defm : FpUnaryAlias<"fucompi", UCOM_FIPr>; @@ -1481,7 +1483,7 @@ defm : FpUnaryAlias<"fucompi", UCOM_FIPr>; // Handle "f{mulp,addp} st(0), $op" the same as "f{mulp,addp} $op", since they // commute. We also allow fdiv[r]p/fsubrp even though they don't commute, // solely because gas supports it. -def : InstAlias<"faddp %st(0), $op", (ADD_FPrST0 RST:$op)>; +def : InstAlias<"faddp %st(0), $op", (ADD_FPrST0 RST:$op), 0>; def : InstAlias<"fmulp %st(0), $op", (MUL_FPrST0 RST:$op)>; def : InstAlias<"fsubrp %st(0), $op", (SUB_FPrST0 RST:$op)>; def : InstAlias<"fdivp %st(0), $op", (DIVR_FPrST0 RST:$op)>; @@ -1534,29 +1536,31 @@ def : InstAlias<"mov $seg, $mem", (MOV32ms i32mem:$mem, SEGMENT_REG:$seg)>; def : InstAlias<"movq $imm, $reg", (MOV64ri GR64:$reg, i64imm:$imm)>; // Match 'movq GR64, MMX' as an alias for movd. -def : InstAlias<"movq $src, $dst", (MMX_MOVD64to64rr VR64:$dst, GR64:$src)>; -def : InstAlias<"movq $src, $dst", (MMX_MOVD64from64rr GR64:$dst, VR64:$src)>; +def : InstAlias<"movq $src, $dst", + (MMX_MOVD64to64rr VR64:$dst, GR64:$src), 0>; +def : InstAlias<"movq $src, $dst", + (MMX_MOVD64from64rr GR64:$dst, VR64:$src), 0>; // movsd with no operands (as opposed to the SSE scalar move of a double) is an // alias for movsl. (as in rep; movsd) def : InstAlias<"movsd", (MOVSD)>; // movsx aliases -def : InstAlias<"movsx $src, $dst", (MOVSX16rr8W GR16:$dst, GR8:$src)>; -def : InstAlias<"movsx $src, $dst", (MOVSX16rm8W GR16:$dst, i8mem:$src)>; -def : InstAlias<"movsx $src, $dst", (MOVSX32rr8 GR32:$dst, GR8:$src)>; -def : InstAlias<"movsx $src, $dst", (MOVSX32rr16 GR32:$dst, GR16:$src)>; -def : InstAlias<"movsx $src, $dst", (MOVSX64rr8 GR64:$dst, GR8:$src)>; -def : InstAlias<"movsx $src, $dst", (MOVSX64rr16 GR64:$dst, GR16:$src)>; -def : InstAlias<"movsx $src, $dst", (MOVSX64rr32 GR64:$dst, GR32:$src)>; +def : InstAlias<"movsx $src, $dst", (MOVSX16rr8W GR16:$dst, GR8:$src), 0>; +def : InstAlias<"movsx $src, $dst", (MOVSX16rm8W GR16:$dst, i8mem:$src), 0>; +def : InstAlias<"movsx $src, $dst", (MOVSX32rr8 GR32:$dst, GR8:$src), 0>; +def : InstAlias<"movsx $src, $dst", (MOVSX32rr16 GR32:$dst, GR16:$src), 0>; +def : InstAlias<"movsx $src, $dst", (MOVSX64rr8 GR64:$dst, GR8:$src), 0>; +def : InstAlias<"movsx $src, $dst", (MOVSX64rr16 GR64:$dst, GR16:$src), 0>; +def : InstAlias<"movsx $src, $dst", (MOVSX64rr32 GR64:$dst, GR32:$src), 0>; // movzx aliases -def : InstAlias<"movzx $src, $dst", (MOVZX16rr8W GR16:$dst, GR8:$src)>; -def : InstAlias<"movzx $src, $dst", (MOVZX16rm8W GR16:$dst, i8mem:$src)>; -def : InstAlias<"movzx $src, $dst", (MOVZX32rr8 GR32:$dst, GR8:$src)>; -def : InstAlias<"movzx $src, $dst", (MOVZX32rr16 GR32:$dst, GR16:$src)>; -def : InstAlias<"movzx $src, $dst", (MOVZX64rr8_Q GR64:$dst, GR8:$src)>; -def : InstAlias<"movzx $src, $dst", (MOVZX64rr16_Q GR64:$dst, GR16:$src)>; +def : InstAlias<"movzx $src, $dst", (MOVZX16rr8W GR16:$dst, GR8:$src), 0>; +def : InstAlias<"movzx $src, $dst", (MOVZX16rm8W GR16:$dst, i8mem:$src), 0>; +def : InstAlias<"movzx $src, $dst", (MOVZX32rr8 GR32:$dst, GR8:$src), 0>; +def : InstAlias<"movzx $src, $dst", (MOVZX32rr16 GR32:$dst, GR16:$src), 0>; +def : InstAlias<"movzx $src, $dst", (MOVZX64rr8_Q GR64:$dst, GR8:$src), 0>; +def : InstAlias<"movzx $src, $dst", (MOVZX64rr16_Q GR64:$dst, GR16:$src), 0>; // Note: No GR32->GR64 movzx form. // outb %dx -> outb %al, %dx diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index b912949d482f..cde3f6b7d3c2 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -135,18 +135,16 @@ class sse12_move_rm, XS, VEX_4V; - def VMOVSDrr : sse12_move_rr, XD, VEX_4V; +def VMOVSSrr : sse12_move_rr, XS, VEX_4V; +def VMOVSDrr : sse12_move_rr, XD, VEX_4V; - let canFoldAsLoad = 1, isReMaterializable = 1 in { - def VMOVSSrm : sse12_move_rm, XS, VEX; +let canFoldAsLoad = 1, isReMaterializable = 1 in { + def VMOVSSrm : sse12_move_rm, XS, VEX; - let AddedComplexity = 20 in - def VMOVSDrm : sse12_move_rm, XD, VEX; - } + let AddedComplexity = 20 in + def VMOVSDrm : sse12_move_rm, XD, VEX; } let Constraints = "$src1 = $dst" in { @@ -218,14 +216,12 @@ def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src), "movsd\t{$src, $dst|$dst, $src}", [(store FR64:$src, addr:$dst)]>; -let isAsmParserOnly = 1 in { def VMOVSSmr : SI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src), "movss\t{$src, $dst|$dst, $src}", [(store FR32:$src, addr:$dst)]>, XS, VEX; def VMOVSDmr : SI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src), "movsd\t{$src, $dst|$dst, $src}", [(store FR64:$src, addr:$dst)]>, XD, VEX; -} // Extract and store. def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), @@ -251,7 +247,6 @@ let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in [(set RC:$dst, (ld_frag addr:$src))], d>; } -let isAsmParserOnly = 1 in { defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, "movaps", SSEPackedSingle>, VEX; defm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, @@ -269,7 +264,6 @@ defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32, "movups", SSEPackedSingle>, VEX; defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64, "movupd", SSEPackedDouble, 0>, OpSize, VEX; -} defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, "movaps", SSEPackedSingle>, TB; defm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, @@ -279,7 +273,6 @@ defm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, "movupd", SSEPackedDouble, 0>, TB, OpSize; -let isAsmParserOnly = 1 in { def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movaps\t{$src, $dst|$dst, $src}", [(alignedstore (v4f32 VR128:$src), addr:$dst)]>, VEX; @@ -304,7 +297,6 @@ def VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), "movupd\t{$src, $dst|$dst, $src}", [(store (v4f64 VR256:$src), addr:$dst)]>, VEX; -} def : Pat<(int_x86_avx_loadu_ps_256 addr:$src), (VMOVUPSYrm addr:$src)>; def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src), @@ -328,32 +320,14 @@ def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), [(store (v2f64 VR128:$src), addr:$dst)]>; // Intrinsic forms of MOVUPS/D load and store -let isAsmParserOnly = 1 in { - let canFoldAsLoad = 1, isReMaterializable = 1 in - def VMOVUPSrm_Int : VPSI<0x10, MRMSrcMem, (outs VR128:$dst), - (ins f128mem:$src), - "movups\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>, VEX; - def VMOVUPDrm_Int : VPDI<0x10, MRMSrcMem, (outs VR128:$dst), - (ins f128mem:$src), - "movupd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_loadu_pd addr:$src))]>, VEX; - def VMOVUPSmr_Int : VPSI<0x11, MRMDestMem, (outs), - (ins f128mem:$dst, VR128:$src), - "movups\t{$src, $dst|$dst, $src}", - [(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>, VEX; - def VMOVUPDmr_Int : VPDI<0x11, MRMDestMem, (outs), - (ins f128mem:$dst, VR128:$src), - "movupd\t{$src, $dst|$dst, $src}", - [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>, VEX; -} -let canFoldAsLoad = 1, isReMaterializable = 1 in -def MOVUPSrm_Int : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "movups\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>; -def MOVUPDrm_Int : PDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), - "movupd\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_loadu_pd addr:$src))]>; +def VMOVUPSmr_Int : VPSI<0x11, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movups\t{$src, $dst|$dst, $src}", + [(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>, VEX; +def VMOVUPDmr_Int : VPDI<0x11, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movupd\t{$src, $dst|$dst, $src}", + [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>, VEX; def MOVUPSmr_Int : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movups\t{$src, $dst|$dst, $src}", @@ -382,7 +356,7 @@ multiclass sse12_mov_hilo_packedopc, RegisterClass RC, SSEPackedDouble>, TB, OpSize; } -let isAsmParserOnly = 1, AddedComplexity = 20 in { +let AddedComplexity = 20 in { defm VMOVL : sse12_mov_hilo_packed<0x12, VR128, movlp, "movlp", "\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V; defm VMOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp", @@ -395,7 +369,6 @@ let Constraints = "$src1 = $dst", AddedComplexity = 20 in { "\t{$src2, $dst|$dst, $src2}">; } -let isAsmParserOnly = 1 in { def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movlps\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)), @@ -404,7 +377,6 @@ def VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movlpd\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), addr:$dst)]>, VEX; -} def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movlps\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)), @@ -416,7 +388,6 @@ def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), // v2f64 extract element 1 is always custom lowered to unpack high to low // and extract element 0 so the non-store version isn't too horrible. -let isAsmParserOnly = 1 in { def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movhps\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract @@ -429,7 +400,6 @@ def VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), (v2f64 (unpckh VR128:$src, (undef))), (iPTR 0))), addr:$dst)]>, VEX; -} def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), "movhps\t{$src, $dst|$dst, $src}", [(store (f64 (vector_extract @@ -441,7 +411,7 @@ def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), (v2f64 (unpckh VR128:$src, (undef))), (iPTR 0))), addr:$dst)]>; -let isAsmParserOnly = 1, AddedComplexity = 20 in { +let AddedComplexity = 20 in { def VMOVLHPSrr : VPSI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "movlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -516,7 +486,6 @@ multiclass sse12_vcvt_avx opc, RegisterClass SrcRC, RegisterClass DstRC, !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>; } -let isAsmParserOnly = 1 in { defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, "cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX; defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32, @@ -542,7 +511,6 @@ defm VCVTSI2SDL : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">, XD, VEX_4V; defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, XD, VEX_4V, VEX_W; -} defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, "cvttss2si\t{$src, $dst|$dst, $src}">, XS; @@ -591,27 +559,25 @@ multiclass sse12_cvt_sint_3addr opc, RegisterClass SrcRC, [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))]>; } -let isAsmParserOnly = 1 in { - defm Int_VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si, - f32mem, load, "cvtss2si">, XS, VEX; - defm Int_VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, - int_x86_sse_cvtss2si64, f32mem, load, "cvtss2si">, - XS, VEX, VEX_W; - defm Int_VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si, - f128mem, load, "cvtsd2si">, XD, VEX; - defm Int_VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, - int_x86_sse2_cvtsd2si64, f128mem, load, "cvtsd2si">, - XD, VEX, VEX_W; +defm Int_VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si, + f32mem, load, "cvtss2si">, XS, VEX; +defm Int_VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, + int_x86_sse_cvtss2si64, f32mem, load, "cvtss2si">, + XS, VEX, VEX_W; +defm Int_VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si, + f128mem, load, "cvtsd2si">, XD, VEX; +defm Int_VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, + int_x86_sse2_cvtsd2si64, f128mem, load, "cvtsd2si">, + XD, VEX, VEX_W; - // FIXME: The asm matcher has a hack to ignore instructions with _Int and Int_ - // Get rid of this hack or rename the intrinsics, there are several - // intructions that only match with the intrinsic form, why create duplicates - // to let them be recognized by the assembler? - defm VCVTSD2SI_alt : sse12_cvt_s_np<0x2D, FR64, GR32, f64mem, - "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX; - defm VCVTSD2SI64 : sse12_cvt_s_np<0x2D, FR64, GR64, f64mem, - "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, VEX_W; -} +// FIXME: The asm matcher has a hack to ignore instructions with _Int and Int_ +// Get rid of this hack or rename the intrinsics, there are several +// intructions that only match with the intrinsic form, why create duplicates +// to let them be recognized by the assembler? +defm VCVTSD2SI_alt : sse12_cvt_s_np<0x2D, FR64, GR32, f64mem, + "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX; +defm VCVTSD2SI64 : sse12_cvt_s_np<0x2D, FR64, GR64, f64mem, + "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, VEX_W; defm Int_CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si, f32mem, load, "cvtss2si">, XS; defm Int_CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64, @@ -622,18 +588,16 @@ defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64, f128mem, load, "cvtsd2si{q}">, XD, REX_W; -let isAsmParserOnly = 1 in { - defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, - int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss", 0>, XS, VEX_4V; - defm Int_VCVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, - int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss", 0>, XS, VEX_4V, - VEX_W; - defm Int_VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, - int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd", 0>, XD, VEX_4V; - defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, - int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd", 0>, XD, - VEX_4V, VEX_W; -} +defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, + int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss", 0>, XS, VEX_4V; +defm Int_VCVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, + int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss", 0>, XS, VEX_4V, + VEX_W; +defm Int_VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, + int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd", 0>, XD, VEX_4V; +defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, + int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd", 0>, XD, + VEX_4V, VEX_W; let Constraints = "$src1 = $dst" in { defm Int_CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, @@ -653,7 +617,6 @@ let Constraints = "$src1 = $dst" in { /// SSE 1 Only // Aliases for intrinsics -let isAsmParserOnly = 1 in { defm Int_VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si, f32mem, load, "cvttss2si">, XS, VEX; defm Int_VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, @@ -664,7 +627,6 @@ defm Int_VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si, defm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, int_x86_sse2_cvttsd2si64, f128mem, load, "cvttsd2si">, XD, VEX, VEX_W; -} defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si, f32mem, load, "cvttss2si">, XS; defm Int_CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, @@ -676,7 +638,7 @@ defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, int_x86_sse2_cvttsd2si64, f128mem, load, "cvttsd2si{q}">, XD, REX_W; -let isAsmParserOnly = 1, Pattern = [] in { +let Pattern = [] in { defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load, "cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS, VEX; defm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load, @@ -702,7 +664,6 @@ defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load /*dummy*/, /// SSE 2 Only // Convert scalar double to scalar single -let isAsmParserOnly = 1 in { def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src1, FR64:$src2), "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, @@ -711,7 +672,6 @@ def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins FR64:$src1, f64mem:$src2), "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, XD, Requires<[HasAVX, OptForSize]>, VEX_4V; -} def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>, Requires<[HasAVX]>; @@ -723,7 +683,6 @@ def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), [(set FR32:$dst, (fround (loadf64 addr:$src)))]>, XD, Requires<[HasSSE2, OptForSize]>; -let isAsmParserOnly = 1 in defm Int_VCVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128, int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss", 0>, XS, VEX_4V; @@ -732,7 +691,7 @@ defm Int_CVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128, int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss">, XS; // Convert scalar single to scalar double -let isAsmParserOnly = 1 in { // SSE2 instructions with XS prefix +// SSE2 instructions with XS prefix def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src1, FR32:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -741,7 +700,6 @@ def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins FR32:$src1, f32mem:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, XS, VEX_4V, Requires<[HasAVX, OptForSize]>; -} def : Pat<(f64 (fextend FR32:$src)), (VCVTSS2SDrr FR32:$src, FR32:$src)>, Requires<[HasAVX]>; @@ -754,7 +712,6 @@ def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), [(set FR64:$dst, (extloadf32 addr:$src))]>, XS, Requires<[HasSSE2, OptForSize]>; -let isAsmParserOnly = 1 in { def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -767,7 +724,6 @@ def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem, [(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1, (load addr:$src2)))]>, XS, VEX_4V, Requires<[HasAVX]>; -} let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), @@ -788,7 +744,7 @@ def : Pat<(extloadf32 addr:$src), Requires<[HasSSE2, OptForSpeed]>; // Convert doubleword to packed single/double fp -let isAsmParserOnly = 1 in { // SSE2 instructions without OpSize prefix +// SSE2 instructions without OpSize prefix def Int_VCVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtdq2ps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>, @@ -798,7 +754,6 @@ def Int_VCVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), [(set VR128:$dst, (int_x86_sse2_cvtdq2ps (bitconvert (memopv2i64 addr:$src))))]>, TB, VEX, Requires<[HasAVX]>; -} def Int_CVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtdq2ps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>, @@ -810,7 +765,7 @@ def Int_CVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), TB, Requires<[HasSSE2]>; // FIXME: why the non-intrinsic version is described as SSE3? -let isAsmParserOnly = 1 in { // SSE2 instructions with XS prefix +// SSE2 instructions with XS prefix def Int_VCVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtdq2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>, @@ -820,7 +775,6 @@ def Int_VCVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), [(set VR128:$dst, (int_x86_sse2_cvtdq2pd (bitconvert (memopv2i64 addr:$src))))]>, XS, VEX, Requires<[HasAVX]>; -} def Int_CVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtdq2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>, @@ -833,7 +787,6 @@ def Int_CVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), // Convert packed single/double fp to doubleword -let isAsmParserOnly = 1 in { def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX; def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), @@ -842,13 +795,11 @@ def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX; def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX; -} def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", []>; def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", []>; -let isAsmParserOnly = 1 in { def Int_VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>, @@ -858,7 +809,6 @@ def Int_VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2dq (memop addr:$src)))]>, VEX; -} def Int_CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>; @@ -867,7 +817,7 @@ def Int_CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), [(set VR128:$dst, (int_x86_sse2_cvtps2dq (memop addr:$src)))]>; -let isAsmParserOnly = 1 in { // SSE2 packed instructions with XD prefix +// SSE2 packed instructions with XD prefix def Int_VCVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>, @@ -877,7 +827,6 @@ def Int_VCVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), [(set VR128:$dst, (int_x86_sse2_cvtpd2dq (memop addr:$src)))]>, XD, VEX, Requires<[HasAVX]>; -} def Int_CVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>, @@ -890,7 +839,7 @@ def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), // Convert with truncation packed single/double fp to doubleword -let isAsmParserOnly = 1 in { // SSE2 packed instructions with XS prefix +// SSE2 packed instructions with XS prefix def VCVTTPS2DQrr : VSSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX; def VCVTTPS2DQrm : VSSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), @@ -899,7 +848,6 @@ def VCVTTPS2DQYrr : VSSI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX; def VCVTTPS2DQYrm : VSSI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX; -} def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -910,7 +858,6 @@ def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), (int_x86_sse2_cvttps2dq (memop addr:$src)))]>; -let isAsmParserOnly = 1 in { def Int_VCVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvttps2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -921,9 +868,7 @@ def Int_VCVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), [(set VR128:$dst, (int_x86_sse2_cvttps2dq (memop addr:$src)))]>, XS, VEX, Requires<[HasAVX]>; -} -let isAsmParserOnly = 1 in { def Int_VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", @@ -934,7 +879,6 @@ def Int_VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), "cvttpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttpd2dq (memop addr:$src)))]>, VEX; -} def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvttpd2dq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>; @@ -943,7 +887,6 @@ def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src), [(set VR128:$dst, (int_x86_sse2_cvttpd2dq (memop addr:$src)))]>; -let isAsmParserOnly = 1 in { // The assembler can recognize rr 256-bit instructions by seeing a ymm // register, but the same isn't true when using memory operands instead. // Provide other assembly rr and rm forms to address this explicitly. @@ -963,10 +906,9 @@ def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), "cvttpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX; def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), "cvttpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L; -} // Convert packed single to packed double -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { // SSE2 instructions without OpSize prefix def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX; @@ -982,7 +924,6 @@ def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", []>, TB; -let isAsmParserOnly = 1 in { def Int_VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>, @@ -992,7 +933,6 @@ def Int_VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), [(set VR128:$dst, (int_x86_sse2_cvtps2pd (load addr:$src)))]>, VEX, Requires<[HasAVX]>; -} def Int_CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtps2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>, @@ -1004,7 +944,6 @@ def Int_CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), TB, Requires<[HasSSE2]>; // Convert packed double to packed single -let isAsmParserOnly = 1 in { // The assembler can recognize rr 256-bit instructions by seeing a ymm // register, but the same isn't true when using memory operands instead. // Provide other assembly rr and rm forms to address this explicitly. @@ -1024,14 +963,12 @@ def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), "cvtpd2psy\t{$src, $dst|$dst, $src}", []>, VEX; def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), "cvtpd2psy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L; -} def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", []>; def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", []>; -let isAsmParserOnly = 1 in { def Int_VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>; @@ -1040,7 +977,6 @@ def Int_VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), "cvtpd2ps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2ps (memop addr:$src)))]>; -} def Int_CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2ps\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>; @@ -1089,26 +1025,27 @@ def : Pat<(int_x86_avx_cvtt_ps2dq_256 (memopv8f32 addr:$src)), // sse12_cmp_scalar - sse 1 & 2 compare scalar instructions multiclass sse12_cmp_scalar { - def rr : SIi8<0xC2, MRMSrcReg, - (outs RC:$dst), (ins RC:$src1, RC:$src, SSECC:$cc), - asm, []>; - let mayLoad = 1 in - def rm : SIi8<0xC2, MRMSrcMem, - (outs RC:$dst), (ins RC:$src1, x86memop:$src, SSECC:$cc), - asm, []>; - // Accept explicit immediate argument form instead of comparison code. let isAsmParserOnly = 1 in { - def rr_alt : SIi8<0xC2, MRMSrcReg, - (outs RC:$dst), (ins RC:$src1, RC:$src, i8imm:$src2), - asm_alt, []>; + def rr : SIi8<0xC2, MRMSrcReg, + (outs RC:$dst), (ins RC:$src1, RC:$src, SSECC:$cc), + asm, []>; let mayLoad = 1 in - def rm_alt : SIi8<0xC2, MRMSrcMem, - (outs RC:$dst), (ins RC:$src1, x86memop:$src, i8imm:$src2), - asm_alt, []>; + def rm : SIi8<0xC2, MRMSrcMem, + (outs RC:$dst), (ins RC:$src1, x86memop:$src, SSECC:$cc), + asm, []>; } + + // Accept explicit immediate argument form instead of comparison code. + def rr_alt : SIi8<0xC2, MRMSrcReg, + (outs RC:$dst), (ins RC:$src1, RC:$src, i8imm:$src2), + asm_alt, []>; + let mayLoad = 1 in + def rm_alt : SIi8<0xC2, MRMSrcMem, + (outs RC:$dst), (ins RC:$src1, x86memop:$src, i8imm:$src2), + asm_alt, []>; } -let neverHasSideEffects = 1, isAsmParserOnly = 1 in { +let neverHasSideEffects = 1 in { defm VCMPSS : sse12_cmp_scalar, @@ -1141,14 +1078,12 @@ multiclass sse12_cmp_scalar_int, - XS, VEX_4V; - defm Int_VCMPSD : sse12_cmp_scalar_int, - XD, VEX_4V; -} +defm Int_VCMPSS : sse12_cmp_scalar_int, + XS, VEX_4V; +defm Int_VCMPSD : sse12_cmp_scalar_int, + XD, VEX_4V; let Constraints = "$src1 = $dst" in { defm Int_CMPSS : sse12_cmp_scalar_int, XS; @@ -1171,28 +1106,26 @@ multiclass sse12_ord_cmp opc, RegisterClass RC, SDNode OpNode, } let Defs = [EFLAGS] in { - let isAsmParserOnly = 1 in { - defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, - "ucomiss", SSEPackedSingle>, VEX; - defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, - "ucomisd", SSEPackedDouble>, OpSize, VEX; - let Pattern = [] in { - defm VCOMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load, - "comiss", SSEPackedSingle>, VEX; - defm VCOMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load, - "comisd", SSEPackedDouble>, OpSize, VEX; - } - - defm Int_VUCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem, - load, "ucomiss", SSEPackedSingle>, VEX; - defm Int_VUCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem, - load, "ucomisd", SSEPackedDouble>, OpSize, VEX; - - defm Int_VCOMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, - load, "comiss", SSEPackedSingle>, VEX; - defm Int_VCOMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, - load, "comisd", SSEPackedDouble>, OpSize, VEX; + defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, + "ucomiss", SSEPackedSingle>, VEX; + defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, + "ucomisd", SSEPackedDouble>, OpSize, VEX; + let Pattern = [] in { + defm VCOMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load, + "comiss", SSEPackedSingle>, VEX; + defm VCOMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load, + "comisd", SSEPackedDouble>, OpSize, VEX; } + + defm Int_VUCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem, + load, "ucomiss", SSEPackedSingle>, VEX; + defm Int_VUCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem, + load, "ucomisd", SSEPackedDouble>, OpSize, VEX; + + defm Int_VCOMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, + load, "comiss", SSEPackedSingle>, VEX; + defm Int_VCOMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, + load, "comisd", SSEPackedDouble>, OpSize, VEX; defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, "ucomiss", SSEPackedSingle>, TB; defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, @@ -1220,41 +1153,40 @@ let Defs = [EFLAGS] in { multiclass sse12_cmp_packed { - def rri : PIi8<0xC2, MRMSrcReg, - (outs RC:$dst), (ins RC:$src1, RC:$src, SSECC:$cc), asm, - [(set RC:$dst, (Int RC:$src1, RC:$src, imm:$cc))], d>; - def rmi : PIi8<0xC2, MRMSrcMem, - (outs RC:$dst), (ins RC:$src1, f128mem:$src, SSECC:$cc), asm, - [(set RC:$dst, (Int RC:$src1, (memop addr:$src), imm:$cc))], d>; - // Accept explicit immediate argument form instead of comparison code. let isAsmParserOnly = 1 in { - def rri_alt : PIi8<0xC2, MRMSrcReg, - (outs RC:$dst), (ins RC:$src1, RC:$src, i8imm:$src2), - asm_alt, [], d>; - def rmi_alt : PIi8<0xC2, MRMSrcMem, - (outs RC:$dst), (ins RC:$src1, f128mem:$src, i8imm:$src2), - asm_alt, [], d>; + def rri : PIi8<0xC2, MRMSrcReg, + (outs RC:$dst), (ins RC:$src1, RC:$src, SSECC:$cc), asm, + [(set RC:$dst, (Int RC:$src1, RC:$src, imm:$cc))], d>; + def rmi : PIi8<0xC2, MRMSrcMem, + (outs RC:$dst), (ins RC:$src1, f128mem:$src, SSECC:$cc), asm, + [(set RC:$dst, (Int RC:$src1, (memop addr:$src), imm:$cc))], d>; } + + // Accept explicit immediate argument form instead of comparison code. + def rri_alt : PIi8<0xC2, MRMSrcReg, + (outs RC:$dst), (ins RC:$src1, RC:$src, i8imm:$src2), + asm_alt, [], d>; + def rmi_alt : PIi8<0xC2, MRMSrcMem, + (outs RC:$dst), (ins RC:$src1, f128mem:$src, i8imm:$src2), + asm_alt, [], d>; } -let isAsmParserOnly = 1 in { - defm VCMPPS : sse12_cmp_packed, VEX_4V; - defm VCMPPD : sse12_cmp_packed, OpSize, VEX_4V; - defm VCMPPSY : sse12_cmp_packed, VEX_4V; - defm VCMPPDY : sse12_cmp_packed, OpSize, VEX_4V; -} +defm VCMPPS : sse12_cmp_packed, VEX_4V; +defm VCMPPD : sse12_cmp_packed, OpSize, VEX_4V; +defm VCMPPSY : sse12_cmp_packed, VEX_4V; +defm VCMPPDY : sse12_cmp_packed, OpSize, VEX_4V; let Constraints = "$src1 = $dst" in { defm CMPPS : sse12_cmp_packed; } -let isAsmParserOnly = 1 in { - defm VSHUFPS : sse12_shuffle, VEX_4V; - defm VSHUFPSY : sse12_shuffle, VEX_4V; - defm VSHUFPD : sse12_shuffle, OpSize, VEX_4V; - defm VSHUFPDY : sse12_shuffle, OpSize, VEX_4V; -} +defm VSHUFPS : sse12_shuffle, TB, VEX_4V; +defm VSHUFPSY : sse12_shuffle, TB, VEX_4V; +defm VSHUFPD : sse12_shuffle, TB, OpSize, VEX_4V; +defm VSHUFPDY : sse12_shuffle, TB, OpSize, VEX_4V; let Constraints = "$src1 = $dst" in { defm SHUFPS : sse12_shuffle opc, PatFrag OpNode, ValueType vt, } let AddedComplexity = 10 in { - let isAsmParserOnly = 1 in { - defm VUNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32, - VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedSingle>, VEX_4V; - defm VUNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64, - VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, OpSize, VEX_4V; - defm VUNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32, - VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedSingle>, VEX_4V; - defm VUNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64, - VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, OpSize, VEX_4V; + defm VUNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32, + VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedSingle>, VEX_4V; + defm VUNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64, + VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedDouble>, OpSize, VEX_4V; + defm VUNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32, + VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedSingle>, VEX_4V; + defm VUNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64, + VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedDouble>, OpSize, VEX_4V; - defm VUNPCKHPSY: sse12_unpack_interleave<0x15, unpckh, v8f32, memopv8f32, - VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedSingle>, VEX_4V; - defm VUNPCKHPDY: sse12_unpack_interleave<0x15, unpckh, v4f64, memopv4f64, - VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, OpSize, VEX_4V; - defm VUNPCKLPSY: sse12_unpack_interleave<0x14, unpckl, v8f32, memopv8f32, - VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedSingle>, VEX_4V; - defm VUNPCKLPDY: sse12_unpack_interleave<0x14, unpckl, v4f64, memopv4f64, - VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, OpSize, VEX_4V; - } + defm VUNPCKHPSY: sse12_unpack_interleave<0x15, unpckh, v8f32, memopv8f32, + VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedSingle>, VEX_4V; + defm VUNPCKHPDY: sse12_unpack_interleave<0x15, unpckh, v4f64, memopv4f64, + VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedDouble>, OpSize, VEX_4V; + defm VUNPCKLPSY: sse12_unpack_interleave<0x14, unpckl, v8f32, memopv8f32, + VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedSingle>, VEX_4V; + defm VUNPCKLPDY: sse12_unpack_interleave<0x14, unpckl, v4f64, memopv4f64, + VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", + SSEPackedDouble>, OpSize, VEX_4V; let Constraints = "$src1 = $dst" in { defm UNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32, @@ -1404,30 +1332,28 @@ defm MOVMSKPS : sse12_extr_sign_mask, TB, OpSize; -let isAsmParserOnly = 1 in { - defm VMOVMSKPS : sse12_extr_sign_mask, VEX; - defm VMOVMSKPD : sse12_extr_sign_mask, OpSize, - VEX; - defm VMOVMSKPSY : sse12_extr_sign_mask, VEX; - defm VMOVMSKPDY : sse12_extr_sign_mask, OpSize, - VEX; +defm VMOVMSKPS : sse12_extr_sign_mask, VEX; +defm VMOVMSKPD : sse12_extr_sign_mask, OpSize, + VEX; +defm VMOVMSKPSY : sse12_extr_sign_mask, VEX; +defm VMOVMSKPDY : sse12_extr_sign_mask, OpSize, + VEX; - // Assembler Only - def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), - "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX; - def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), - "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize, - VEX; - def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src), - "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX; - def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src), - "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize, - VEX; -} +// Assembler Only +def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), + "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX; +def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), + "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize, + VEX; +def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src), + "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX; +def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src), + "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize, + VEX; //===----------------------------------------------------------------------===// // SSE 1 & 2 - Misc aliasing of packed SSE 1 & 2 instructions @@ -1482,13 +1408,11 @@ def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src), /// multiclass sse12_fp_alias_pack_logical opc, string OpcodeStr, SDNode OpNode> { - let isAsmParserOnly = 1 in { - defm V#NAME#PS : sse12_fp_packed, VEX_4V; + defm V#NAME#PS : sse12_fp_packed, VEX_4V; - defm V#NAME#PD : sse12_fp_packed, OpSize, VEX_4V; - } + defm V#NAME#PD : sse12_fp_packed, OpSize, VEX_4V; let Constraints = "$src1 = $dst" in { defm PS : sse12_fp_packed, isCommutable = 0 in multiclass sse12_fp_packed_logical opc, string OpcodeStr, SDNode OpNode, int HasPat = 0, list> Pattern = []> { - let isAsmParserOnly = 1, Pattern = [] in { + let Pattern = [] in { defm V#NAME#PS : sse12_fp_packed_logical_rm opc, string OpcodeStr, /// sse12_fp_packed_logical_y - AVX 256-bit SSE 1 & 2 logical ops forms /// -let isAsmParserOnly = 1 in { multiclass sse12_fp_packed_logical_y opc, string OpcodeStr> { defm PSY : sse12_fp_packed_logical_rm, VEX_4V; @@ -1569,7 +1492,6 @@ multiclass sse12_fp_packed_logical_y opc, string OpcodeStr> { defm PDY : sse12_fp_packed_logical_rm, OpSize, VEX_4V; } -} // AVX 256-bit packed logical ops forms defm VAND : sse12_fp_packed_logical_y<0x54, "and">; @@ -1667,38 +1589,36 @@ multiclass basic_sse12_fp_binop_p_y_int opc, string OpcodeStr> { } // Binary Arithmetic instructions -let isAsmParserOnly = 1 in { - defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, 0>, - basic_sse12_fp_binop_s_int<0x58, "add", 0>, - basic_sse12_fp_binop_p<0x58, "add", fadd, 0>, - basic_sse12_fp_binop_p_y<0x58, "add", fadd>, VEX_4V; - defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, 0>, - basic_sse12_fp_binop_s_int<0x59, "mul", 0>, - basic_sse12_fp_binop_p<0x59, "mul", fmul, 0>, - basic_sse12_fp_binop_p_y<0x59, "mul", fmul>, VEX_4V; +defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, 0>, + basic_sse12_fp_binop_s_int<0x58, "add", 0>, + basic_sse12_fp_binop_p<0x58, "add", fadd, 0>, + basic_sse12_fp_binop_p_y<0x58, "add", fadd>, VEX_4V; +defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, 0>, + basic_sse12_fp_binop_s_int<0x59, "mul", 0>, + basic_sse12_fp_binop_p<0x59, "mul", fmul, 0>, + basic_sse12_fp_binop_p_y<0x59, "mul", fmul>, VEX_4V; - let isCommutable = 0 in { - defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, 0>, - basic_sse12_fp_binop_s_int<0x5C, "sub", 0>, - basic_sse12_fp_binop_p<0x5C, "sub", fsub, 0>, - basic_sse12_fp_binop_p_y<0x5C, "sub", fsub>, VEX_4V; - defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, 0>, - basic_sse12_fp_binop_s_int<0x5E, "div", 0>, - basic_sse12_fp_binop_p<0x5E, "div", fdiv, 0>, - basic_sse12_fp_binop_p_y<0x5E, "div", fdiv>, VEX_4V; - defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, 0>, - basic_sse12_fp_binop_s_int<0x5F, "max", 0>, - basic_sse12_fp_binop_p<0x5F, "max", X86fmax, 0>, - basic_sse12_fp_binop_p_int<0x5F, "max", 0>, - basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax>, - basic_sse12_fp_binop_p_y_int<0x5F, "max">, VEX_4V; - defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, 0>, - basic_sse12_fp_binop_s_int<0x5D, "min", 0>, - basic_sse12_fp_binop_p<0x5D, "min", X86fmin, 0>, - basic_sse12_fp_binop_p_int<0x5D, "min", 0>, - basic_sse12_fp_binop_p_y_int<0x5D, "min">, - basic_sse12_fp_binop_p_y<0x5D, "min", X86fmin>, VEX_4V; - } +let isCommutable = 0 in { + defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, 0>, + basic_sse12_fp_binop_s_int<0x5C, "sub", 0>, + basic_sse12_fp_binop_p<0x5C, "sub", fsub, 0>, + basic_sse12_fp_binop_p_y<0x5C, "sub", fsub>, VEX_4V; + defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, 0>, + basic_sse12_fp_binop_s_int<0x5E, "div", 0>, + basic_sse12_fp_binop_p<0x5E, "div", fdiv, 0>, + basic_sse12_fp_binop_p_y<0x5E, "div", fdiv>, VEX_4V; + defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, 0>, + basic_sse12_fp_binop_s_int<0x5F, "max", 0>, + basic_sse12_fp_binop_p<0x5F, "max", X86fmax, 0>, + basic_sse12_fp_binop_p_int<0x5F, "max", 0>, + basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax>, + basic_sse12_fp_binop_p_y_int<0x5F, "max">, VEX_4V; + defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, 0>, + basic_sse12_fp_binop_s_int<0x5D, "min", 0>, + basic_sse12_fp_binop_p<0x5D, "min", X86fmin, 0>, + basic_sse12_fp_binop_p_int<0x5D, "min", 0>, + basic_sse12_fp_binop_p_y_int<0x5D, "min">, + basic_sse12_fp_binop_p_y<0x5D, "min", X86fmin>, VEX_4V; } let Constraints = "$src1 = $dst" in { @@ -1899,7 +1819,7 @@ multiclass sse2_fp_unop_p_y_int opc, string OpcodeStr, [(set VR256:$dst, (V2F64Int (memopv4f64 addr:$src)))]>; } -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { // Square root. defm VSQRT : sse1_fp_unop_s_avx<0x51, "vsqrt", fsqrt, int_x86_sse_sqrt_ss>, sse2_fp_unop_s_avx<0x51, "vsqrt", fsqrt, int_x86_sse2_sqrt_sd>, @@ -1955,67 +1875,65 @@ defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss>, // SSE 1 & 2 - Non-temporal stores //===----------------------------------------------------------------------===// -let isAsmParserOnly = 1 in { - def VMOVNTPSmr_Int : VPSI<0x2B, MRMDestMem, (outs), - (ins i128mem:$dst, VR128:$src), - "movntps\t{$src, $dst|$dst, $src}", - [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>, VEX; - def VMOVNTPDmr_Int : VPDI<0x2B, MRMDestMem, (outs), - (ins i128mem:$dst, VR128:$src), - "movntpd\t{$src, $dst|$dst, $src}", - [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>, VEX; +def VMOVNTPSmr_Int : VPSI<0x2B, MRMDestMem, (outs), + (ins i128mem:$dst, VR128:$src), + "movntps\t{$src, $dst|$dst, $src}", + [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>, VEX; +def VMOVNTPDmr_Int : VPDI<0x2B, MRMDestMem, (outs), + (ins i128mem:$dst, VR128:$src), + "movntpd\t{$src, $dst|$dst, $src}", + [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>, VEX; - let ExeDomain = SSEPackedInt in - def VMOVNTDQmr_Int : VPDI<0xE7, MRMDestMem, (outs), +let ExeDomain = SSEPackedInt in + def VMOVNTDQmr_Int : VPDI<0xE7, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>, VEX; + +let AddedComplexity = 400 in { // Prefer non-temporal versions + def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), - "movntdq\t{$src, $dst|$dst, $src}", - [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>, VEX; - - let AddedComplexity = 400 in { // Prefer non-temporal versions - def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs), - (ins f128mem:$dst, VR128:$src), - "movntps\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v4f32 VR128:$src), - addr:$dst)]>, VEX; - def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs), - (ins f128mem:$dst, VR128:$src), - "movntpd\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v2f64 VR128:$src), - addr:$dst)]>, VEX; - def VMOVNTDQ_64mr : VPDI<0xE7, MRMDestMem, (outs), - (ins f128mem:$dst, VR128:$src), - "movntdq\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v2f64 VR128:$src), - addr:$dst)]>, VEX; - let ExeDomain = SSEPackedInt in - def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs), + "movntps\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4f32 VR128:$src), + addr:$dst)]>, VEX; + def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movntpd\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v2f64 VR128:$src), + addr:$dst)]>, VEX; + def VMOVNTDQ_64mr : VPDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), "movntdq\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v4f32 VR128:$src), + [(alignednontemporalstore (v2f64 VR128:$src), addr:$dst)]>, VEX; + let ExeDomain = SSEPackedInt in + def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs), + (ins f128mem:$dst, VR128:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4f32 VR128:$src), + addr:$dst)]>, VEX; - def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs), - (ins f256mem:$dst, VR256:$src), - "movntps\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v8f32 VR256:$src), - addr:$dst)]>, VEX; - def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs), - (ins f256mem:$dst, VR256:$src), - "movntpd\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v4f64 VR256:$src), - addr:$dst)]>, VEX; - def VMOVNTDQY_64mr : VPDI<0xE7, MRMDestMem, (outs), - (ins f256mem:$dst, VR256:$src), - "movntdq\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v4f64 VR256:$src), - addr:$dst)]>, VEX; - let ExeDomain = SSEPackedInt in - def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs), + def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs), + (ins f256mem:$dst, VR256:$src), + "movntps\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v8f32 VR256:$src), + addr:$dst)]>, VEX; + def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs), + (ins f256mem:$dst, VR256:$src), + "movntpd\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v4f64 VR256:$src), + addr:$dst)]>, VEX; + def VMOVNTDQY_64mr : VPDI<0xE7, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), "movntdq\t{$src, $dst|$dst, $src}", - [(alignednontemporalstore (v8f32 VR256:$src), + [(alignednontemporalstore (v4f64 VR256:$src), addr:$dst)]>, VEX; - } + let ExeDomain = SSEPackedInt in + def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs), + (ins f256mem:$dst, VR256:$src), + "movntdq\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v8f32 VR256:$src), + addr:$dst)]>, VEX; } def : Pat<(int_x86_avx_movnt_dq_256 addr:$dst, VR256:$src), @@ -2138,12 +2056,10 @@ def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), // SSE 1 & 2 - Load/Store XCSR register //===----------------------------------------------------------------------===// -let isAsmParserOnly = 1 in { - def VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src), - "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>, VEX; - def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), - "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>, VEX; -} +def VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src), + "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>, VEX; +def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), + "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>, VEX; def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src), "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>; @@ -2156,45 +2072,43 @@ def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), let ExeDomain = SSEPackedInt in { // SSE integer instructions -let isAsmParserOnly = 1 in { - let neverHasSideEffects = 1 in { - def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; - def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), - "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; - } - def VMOVDQUrr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX; - def VMOVDQUYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), - "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX; +let neverHasSideEffects = 1 in { +def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; +def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), + "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; +} +def VMOVDQUrr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), + "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX; +def VMOVDQUYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), + "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX; - let canFoldAsLoad = 1, mayLoad = 1 in { - def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), - "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; - def VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), - "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; - let Predicates = [HasAVX] in { - def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), - "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX; - def VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), - "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX; - } - } +let canFoldAsLoad = 1, mayLoad = 1 in { +def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), + "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; +def VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), + "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; +let Predicates = [HasAVX] in { + def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), + "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX; + def VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), + "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX; +} +} - let mayStore = 1 in { - def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs), - (ins i128mem:$dst, VR128:$src), - "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; - def VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs), - (ins i256mem:$dst, VR256:$src), - "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; - let Predicates = [HasAVX] in { - def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), - "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX; - def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src), - "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX; - } - } +let mayStore = 1 in { +def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs), + (ins i128mem:$dst, VR128:$src), + "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; +def VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs), + (ins i256mem:$dst, VR256:$src), + "movdqa\t{$src, $dst|$dst, $src}", []>, VEX; +let Predicates = [HasAVX] in { +def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), + "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX; +def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src), + "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX; +} } let neverHasSideEffects = 1 in @@ -2226,23 +2140,11 @@ def MOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), } // Intrinsic forms of MOVDQU load and store -let isAsmParserOnly = 1 in { -let canFoldAsLoad = 1 in -def VMOVDQUrm_Int : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), - "vmovdqu\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_loadu_dq addr:$src))]>, - XS, VEX, Requires<[HasAVX]>; def VMOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "vmovdqu\t{$src, $dst|$dst, $src}", [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>, XS, VEX, Requires<[HasAVX]>; -} -let canFoldAsLoad = 1 in -def MOVDQUrm_Int : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), - "movdqu\t{$src, $dst|$dst, $src}", - [(set VR128:$dst, (int_x86_sse2_loadu_dq addr:$src))]>, - XS, Requires<[HasSSE2]>; def MOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), "movdqu\t{$src, $dst|$dst, $src}", [(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>, @@ -2347,7 +2249,7 @@ multiclass PDI_binop_rm_v2i64 opc, string OpcodeStr, SDNode OpNode, // 128-bit Integer Arithmetic -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { defm VPADDB : PDI_binop_rm<0xFC, "vpaddb", add, v16i8, 1, 0 /*3addr*/>, VEX_4V; defm VPADDW : PDI_binop_rm<0xFD, "vpaddw", add, v8i16, 1, 0>, VEX_4V; defm VPADDD : PDI_binop_rm<0xFE, "vpaddd", add, v4i32, 1, 0>, VEX_4V; @@ -2437,7 +2339,7 @@ defm PSADBW : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw, 1>; // SSE2 - Packed Integer Logical Instructions //===---------------------------------------------------------------------===// -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { defm VPSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "vpsllw", int_x86_sse2_psll_w, int_x86_sse2_pslli_w, 0>, VEX_4V; @@ -2584,7 +2486,7 @@ let Predicates = [HasSSE2] in { // SSE2 - Packed Integer Comparison Instructions //===---------------------------------------------------------------------===// -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { defm VPCMPEQB : PDI_binop_rm_int<0x74, "vpcmpeqb", int_x86_sse2_pcmpeq_b, 1, 0>, VEX_4V; defm VPCMPEQW : PDI_binop_rm_int<0x75, "vpcmpeqw", int_x86_sse2_pcmpeq_w, 1, @@ -2638,7 +2540,7 @@ def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, (memop addr:$src2))), // SSE2 - Packed Integer Pack Instructions //===---------------------------------------------------------------------===// -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { defm VPACKSSWB : PDI_binop_rm_int<0x63, "vpacksswb", int_x86_sse2_packsswb_128, 0, 0>, VEX_4V; defm VPACKSSDW : PDI_binop_rm_int<0x6B, "vpackssdw", int_x86_sse2_packssdw_128, @@ -2676,7 +2578,7 @@ def mi : Ii8<0x70, MRMSrcMem, } } // ExeDomain = SSEPackedInt -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { let AddedComplexity = 5 in defm VPSHUFD : sse2_pshuffle<"vpshufd", v4i32, pshufd, bc_v4i32>, OpSize, VEX; @@ -2724,7 +2626,7 @@ multiclass sse2_unpack opc, string OpcodeStr, ValueType vt, addr:$src2))))]>; } -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, unpckl, bc_v16i8, 0>, VEX_4V; defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, unpckl, bc_v8i16, @@ -2834,7 +2736,7 @@ multiclass sse2_pinsrw { } // Extract -let isAsmParserOnly = 1, Predicates = [HasAVX] in +let Predicates = [HasAVX] in def VPEXTRWri : Ii8<0xC5, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2), "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -2847,7 +2749,7 @@ def PEXTRWri : PDIi8<0xC5, MRMSrcReg, imm:$src2))]>; // Insert -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { defm VPINSRW : sse2_pinsrw<0>, OpSize, VEX_4V; def VPINSRWrr64i : Ii8<0xC4, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, GR64:$src2, i32i8imm:$src3), @@ -2866,13 +2768,11 @@ let Constraints = "$src1 = $dst" in let ExeDomain = SSEPackedInt in { -let isAsmParserOnly = 1 in { def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), "pmovmskb\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>, VEX; def VPMOVMSKBr64r : VPDI<0xD7, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), "pmovmskb\t{$src, $dst|$dst, $src}", []>, VEX; -} def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), "pmovmskb\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>; @@ -2885,7 +2785,6 @@ def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), let ExeDomain = SSEPackedInt in { -let isAsmParserOnly = 1 in { let Uses = [EDI] in def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), @@ -2896,7 +2795,6 @@ def VMASKMOVDQU64 : VPDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), "maskmovdqu\t{$mask, $src|$src, $mask}", [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>, VEX; -} let Uses = [EDI] in def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), @@ -2914,7 +2812,6 @@ def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), //===---------------------------------------------------------------------===// // Move Int Doubleword to Packed Double Int -let isAsmParserOnly = 1 in { def VMOVDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -2924,7 +2821,6 @@ def VMOVDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), [(set VR128:$dst, (v4i32 (scalar_to_vector (loadi32 addr:$src))))]>, VEX; -} def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -2943,7 +2839,6 @@ def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), // Move Int Doubleword to Single Scalar -let isAsmParserOnly = 1 in { def VMOVDI2SSrr : VPDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert GR32:$src))]>, VEX; @@ -2952,7 +2847,6 @@ def VMOVDI2SSrm : VPDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>, VEX; -} def MOVDI2SSrr : PDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert GR32:$src))]>; @@ -2962,7 +2856,6 @@ def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>; // Move Packed Doubleword Int to Packed Double Int -let isAsmParserOnly = 1 in { def VMOVPDI2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (vector_extract (v4i32 VR128:$src), @@ -2972,7 +2865,6 @@ def VMOVPDI2DImr : VPDI<0x7E, MRMDestMem, (outs), "movd\t{$src, $dst|$dst, $src}", [(store (i32 (vector_extract (v4i32 VR128:$src), (iPTR 0))), addr:$dst)]>, VEX; -} def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (vector_extract (v4i32 VR128:$src), @@ -2998,14 +2890,12 @@ def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>; // Move Scalar Single to Double Int -let isAsmParserOnly = 1 in { def VMOVSS2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (bitconvert FR32:$src))]>, VEX; def VMOVSS2DImr : VPDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src), "movd\t{$src, $dst|$dst, $src}", [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>, VEX; -} def MOVSS2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (bitconvert FR32:$src))]>; @@ -3014,7 +2904,7 @@ def MOVSS2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src), [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>; // movd / movq to XMM register zero-extends -let AddedComplexity = 15, isAsmParserOnly = 1 in { +let AddedComplexity = 15 in { def VMOVZDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (X86vzmovl @@ -3038,7 +2928,6 @@ def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), } let AddedComplexity = 20 in { -let isAsmParserOnly = 1 in def VMOVZDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -3064,7 +2953,6 @@ def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), //===---------------------------------------------------------------------===// // Move Quadword Int to Packed Quadword Int -let isAsmParserOnly = 1 in def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -3077,7 +2965,6 @@ def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), Requires<[HasSSE2]>; // SSE2 instruction with XS Prefix // Move Packed Quadword Int to Quadword Int -let isAsmParserOnly = 1 in def VMOVPQI2QImr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), "movq\t{$src, $dst|$dst, $src}", [(store (i64 (vector_extract (v2i64 VR128:$src), @@ -3091,7 +2978,6 @@ def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>; // Store / copy lower 64-bits of a XMM register. -let isAsmParserOnly = 1 in def VMOVLQ128mr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), "movq\t{$src, $dst|$dst, $src}", [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX; @@ -3099,7 +2985,7 @@ def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), "movq\t{$src, $dst|$dst, $src}", [(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>; -let AddedComplexity = 20, isAsmParserOnly = 1 in +let AddedComplexity = 20 in def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -3124,7 +3010,7 @@ def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>; // Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in // IA32 document. movq xmm1, xmm2 does clear the high bits. -let isAsmParserOnly = 1, AddedComplexity = 15 in +let AddedComplexity = 15 in def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>, @@ -3135,7 +3021,7 @@ def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>, XS, Requires<[HasSSE2]>; -let AddedComplexity = 20, isAsmParserOnly = 1 in +let AddedComplexity = 20 in def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (X86vzmovl @@ -3153,7 +3039,6 @@ def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))), } // Instructions to match in the assembler -let isAsmParserOnly = 1 in { def VMOVQs64rr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), "movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W; def VMOVQd64rr : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), @@ -3161,13 +3046,12 @@ def VMOVQd64rr : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), // Recognize "movd" with GR64 destination, but encode as a "movq" def VMOVQd64rr_alt : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), "movd\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W; -} // Instructions for the disassembler // xr = XMM register // xm = mem64 -let isAsmParserOnly = 1, Predicates = [HasAVX] in +let Predicates = [HasAVX] in def VMOVQxrxr: I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vmovq\t{$src, $dst|$dst, $src}", []>, VEX, XS; def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), @@ -3209,7 +3093,7 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, //===---------------------------------------------------------------------===// // Convert Packed Double FP to Packed DW Integers -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { // The assembler can recognize rr 256-bit instructions by seeing a ymm // register, but the same isn't true when using memory operands instead. // Provide other assembly rr and rm forms to address this explicitly. @@ -3237,7 +3121,7 @@ def CVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtpd2dq\t{$src, $dst|$dst, $src}", []>; // Convert Packed DW Integers to Packed Double FP -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { def VCVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX; def VCVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), @@ -3288,7 +3172,7 @@ def rm : S3SI; } -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { // FIXME: Merge above classes when we have patterns for the ymm version defm VMOVSHDUP : sse3_replicate_sfp<0x16, movshdup, "vmovshdup">, VEX; defm VMOVSLDUP : sse3_replicate_sfp<0x12, movsldup, "vmovsldup">, VEX; @@ -3319,7 +3203,7 @@ def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), []>; } -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { // FIXME: Merge above classes when we have patterns for the ymm version defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX; defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX; @@ -3327,7 +3211,7 @@ let isAsmParserOnly = 1, Predicates = [HasAVX] in { defm MOVDDUP : sse3_replicate_dfp<"movddup">; // Move Unaligned Integer -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "vlddqu\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX; @@ -3391,21 +3275,21 @@ multiclass sse3_addsub; } -let isAsmParserOnly = 1, Predicates = [HasAVX], +let Predicates = [HasAVX], ExeDomain = SSEPackedDouble in { defm VADDSUBPS : sse3_addsub, XD, VEX_4V; + f128mem, 0>, TB, XD, VEX_4V; defm VADDSUBPD : sse3_addsub, OpSize, VEX_4V; + f128mem, 0>, TB, OpSize, VEX_4V; defm VADDSUBPSY : sse3_addsub, XD, VEX_4V; + f256mem, 0>, TB, XD, VEX_4V; defm VADDSUBPDY : sse3_addsub, OpSize, VEX_4V; + f256mem, 0>, TB, OpSize, VEX_4V; } let Constraints = "$src1 = $dst", Predicates = [HasSSE3], ExeDomain = SSEPackedDouble in { defm ADDSUBPS : sse3_addsub, XD; + f128mem>, TB, XD; defm ADDSUBPD : sse3_addsub, TB, OpSize; } @@ -3444,7 +3328,7 @@ multiclass S3_Int o, string OpcodeStr, ValueType vt, RegisterClass RC, [(set RC:$dst, (vt (IntId RC:$src1, (memop addr:$src2))))]>; } -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem, int_x86_sse3_hadd_ps, 0>, VEX_4V; defm VHADDPD : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem, @@ -3496,7 +3380,7 @@ multiclass SS3I_unop_rm_int opc, string OpcodeStr, (bitconvert (mem_frag128 addr:$src))))]>, OpSize; } -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb", memopv16i8, int_x86_ssse3_pabs_b_128>, VEX; defm VPABSW : SS3I_unop_rm_int<0x1D, "vpabsw", memopv8i16, @@ -3538,7 +3422,7 @@ multiclass SS3I_binop_rm_int opc, string OpcodeStr, (bitconvert (memopv16i8 addr:$src2))))]>, OpSize; } -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { let isCommutable = 0 in { defm VPHADDW : SS3I_binop_rm_int<0x01, "vphaddw", memopv8i16, int_x86_ssse3_phadd_w_128, 0>, VEX_4V; @@ -3630,7 +3514,7 @@ multiclass ssse3_palign { []>, OpSize; } -let isAsmParserOnly = 1, Predicates = [HasAVX] in +let Predicates = [HasAVX] in defm VPALIGN : ssse3_palign<"vpalignr", 0>, VEX_4V; let Constraints = "$src1 = $dst" in defm PALIGN : ssse3_palign<"palignr">; @@ -3985,7 +3869,7 @@ multiclass SS41I_binop_rm_int8 opc, string OpcodeStr, Intrinsic IntId> { OpSize; } -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { defm VPMOVSXBW : SS41I_binop_rm_int8<0x20, "vpmovsxbw", int_x86_sse41_pmovsxbw>, VEX; defm VPMOVSXWD : SS41I_binop_rm_int8<0x23, "vpmovsxwd", int_x86_sse41_pmovsxwd>, @@ -4051,7 +3935,7 @@ multiclass SS41I_binop_rm_int4 opc, string OpcodeStr, Intrinsic IntId> { OpSize; } -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { defm VPMOVSXBD : SS41I_binop_rm_int4<0x21, "vpmovsxbd", int_x86_sse41_pmovsxbd>, VEX; defm VPMOVSXWQ : SS41I_binop_rm_int4<0x24, "vpmovsxwq", int_x86_sse41_pmovsxwq>, @@ -4092,7 +3976,7 @@ multiclass SS41I_binop_rm_int2 opc, string OpcodeStr, Intrinsic IntId> { OpSize; } -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { defm VPMOVSXBQ : SS41I_binop_rm_int2<0x22, "vpmovsxbq", int_x86_sse41_pmovsxbq>, VEX; defm VPMOVZXBQ : SS41I_binop_rm_int2<0x32, "vpmovzxbq", int_x86_sse41_pmovzxbq>, @@ -4134,7 +4018,7 @@ multiclass SS41I_extract8 opc, string OpcodeStr> { // (store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), imm:$src2))), addr:$dst) } -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX; def VPEXTRBrr64 : SS4AIi8<0x14, MRMDestReg, (outs GR64:$dst), (ins VR128:$src1, i32i8imm:$src2), @@ -4156,7 +4040,7 @@ multiclass SS41I_extract16 opc, string OpcodeStr> { // (store (i16 (trunc (X86pextrw (v16i8 VR128:$src1), imm:$src2))), addr:$dst) } -let isAsmParserOnly = 1, Predicates = [HasAVX] in +let Predicates = [HasAVX] in defm VPEXTRW : SS41I_extract16<0x15, "vpextrw">, VEX; defm PEXTRW : SS41I_extract16<0x15, "pextrw">; @@ -4178,7 +4062,7 @@ multiclass SS41I_extract32 opc, string OpcodeStr> { addr:$dst)]>, OpSize; } -let isAsmParserOnly = 1, Predicates = [HasAVX] in +let Predicates = [HasAVX] in defm VPEXTRD : SS41I_extract32<0x16, "vpextrd">, VEX; defm PEXTRD : SS41I_extract32<0x16, "pextrd">; @@ -4199,7 +4083,7 @@ multiclass SS41I_extract64 opc, string OpcodeStr> { addr:$dst)]>, OpSize, REX_W; } -let isAsmParserOnly = 1, Predicates = [HasAVX] in +let Predicates = [HasAVX] in defm VPEXTRQ : SS41I_extract64<0x16, "vpextrq">, VEX, VEX_W; defm PEXTRQ : SS41I_extract64<0x16, "pextrq">; @@ -4222,7 +4106,7 @@ multiclass SS41I_extractf32 opc, string OpcodeStr> { addr:$dst)]>, OpSize; } -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX; def VEXTRACTPSrr64 : SS4AIi8<0x17, MRMDestReg, (outs GR64:$dst), (ins VR128:$src1, i32i8imm:$src2), @@ -4262,7 +4146,7 @@ multiclass SS41I_insert8 opc, string asm, bit Is2Addr = 1> { imm:$src3))]>, OpSize; } -let isAsmParserOnly = 1, Predicates = [HasAVX] in +let Predicates = [HasAVX] in defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V; let Constraints = "$src1 = $dst" in defm PINSRB : SS41I_insert8<0x20, "pinsrb">; @@ -4288,7 +4172,7 @@ multiclass SS41I_insert32 opc, string asm, bit Is2Addr = 1> { imm:$src3)))]>, OpSize; } -let isAsmParserOnly = 1, Predicates = [HasAVX] in +let Predicates = [HasAVX] in defm VPINSRD : SS41I_insert32<0x22, "vpinsrd", 0>, VEX_4V; let Constraints = "$src1 = $dst" in defm PINSRD : SS41I_insert32<0x22, "pinsrd">; @@ -4314,7 +4198,7 @@ multiclass SS41I_insert64 opc, string asm, bit Is2Addr = 1> { imm:$src3)))]>, OpSize; } -let isAsmParserOnly = 1, Predicates = [HasAVX] in +let Predicates = [HasAVX] in defm VPINSRQ : SS41I_insert64<0x22, "vpinsrq", 0>, VEX_4V, VEX_W; let Constraints = "$src1 = $dst" in defm PINSRQ : SS41I_insert64<0x22, "pinsrq">, REX_W; @@ -4347,7 +4231,7 @@ multiclass SS41I_insertf32 opc, string asm, bit Is2Addr = 1> { let Constraints = "$src1 = $dst" in defm INSERTPS : SS41I_insertf32<0x21, "insertps">; -let isAsmParserOnly = 1, Predicates = [HasAVX] in +let Predicates = [HasAVX] in defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V; def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3), @@ -4517,7 +4401,7 @@ multiclass sse41_fp_binop_rm_avx_s opcss, bits<8> opcsd, } // FP round - roundss, roundps, roundsd, roundpd -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { // Intrinsic form defm VROUND : sse41_fp_unop_rm<0x08, 0x09, "vround", f128mem, VR128, memopv4f32, memopv2f64, @@ -4552,7 +4436,7 @@ defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round", // ptest instruction we'll lower to this in X86ISelLowering primarily from // the intel intrinsic that corresponds to this. -let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Defs = [EFLAGS], Predicates = [HasAVX] in { def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), "vptest\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>, @@ -4595,7 +4479,7 @@ multiclass avx_bittest opc, string OpcodeStr, RegisterClass RC, OpSize, VEX; } -let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Defs = [EFLAGS], Predicates = [HasAVX] in { defm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem, memopv4f32, v4f32>; defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, memopv8f32, v8f32>; defm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem, memopv2f64, v2f64>; @@ -4644,7 +4528,7 @@ multiclass SS41I_unop_rm_int_v16 opc, string OpcodeStr, (bitconvert (memopv8i16 addr:$src))))]>, OpSize; } -let isAsmParserOnly = 1, Predicates = [HasAVX] in +let Predicates = [HasAVX] in defm VPHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "vphminposuw", int_x86_sse41_phminposuw>, VEX; defm PHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "phminposuw", @@ -4670,7 +4554,7 @@ multiclass SS41I_binop_rm_int opc, string OpcodeStr, (bitconvert (memopv16i8 addr:$src2))))]>, OpSize; } -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { let isCommutable = 0 in defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw, 0>, VEX_4V; @@ -4737,7 +4621,7 @@ multiclass SS48I_binop_rm opc, string OpcodeStr, SDNode OpNode, OpSize; } -let isAsmParserOnly = 1, Predicates = [HasAVX] in +let Predicates = [HasAVX] in defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, 0>, VEX_4V; let Constraints = "$src1 = $dst" in defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32>; @@ -4769,7 +4653,7 @@ multiclass SS41I_binop_rmi_int opc, string OpcodeStr, OpSize; } -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { let isCommutable = 0 in { defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps, VR128, memopv16i8, i128mem, 0>, VEX_4V; @@ -4810,7 +4694,7 @@ let Constraints = "$src1 = $dst" in { } /// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { multiclass SS41I_quaternary_int_avx opc, string OpcodeStr, RegisterClass RC, X86MemOperand x86memop, PatFrag mem_frag, Intrinsic IntId> { @@ -4870,7 +4754,7 @@ defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>; def : Pat<(X86pblendv VR128:$src1, VR128:$src2, XMM0), (PBLENDVBrr0 VR128:$src1, VR128:$src2)>; -let isAsmParserOnly = 1, Predicates = [HasAVX] in +let Predicates = [HasAVX] in def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>, @@ -4904,7 +4788,7 @@ multiclass SS42I_binop_rm_int opc, string OpcodeStr, (bitconvert (memopv16i8 addr:$src2))))]>, OpSize; } -let isAsmParserOnly = 1, Predicates = [HasAVX] in +let Predicates = [HasAVX] in defm VPCMPGTQ : SS42I_binop_rm_int<0x37, "vpcmpgtq", int_x86_sse42_pcmpgtq, 0>, VEX_4V; let Constraints = "$src1 = $dst" in @@ -4936,8 +4820,7 @@ let Defs = [EFLAGS], usesCustomInserter = 1 in { defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128">, Requires<[HasAVX]>; } -let Defs = [XMM0, EFLAGS], isAsmParserOnly = 1, - Predicates = [HasAVX] in { +let Defs = [XMM0, EFLAGS], Predicates = [HasAVX] in { def VPCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2, i8imm:$src3), "vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX; @@ -4972,7 +4855,7 @@ let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in { defm VPCMPESTRM128 : pseudo_pcmpestrm<"#VPCMPESTRM128">, Requires<[HasAVX]>; } -let isAsmParserOnly = 1, Predicates = [HasAVX], +let Predicates = [HasAVX], Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in { def VPCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src3, i8imm:$src5), @@ -5007,7 +4890,7 @@ let Defs = [ECX, EFLAGS] in { } } -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { defm VPCMPISTRI : SS42AI_pcmpistri, VEX; defm VPCMPISTRIA : SS42AI_pcmpistri, @@ -5046,7 +4929,7 @@ let Defs = [ECX, EFLAGS], Uses = [EAX, EDX] in { } } -let isAsmParserOnly = 1, Predicates = [HasAVX] in { +let Predicates = [HasAVX] in { defm VPCMPESTRI : SS42AI_pcmpestri, VEX; defm VPCMPESTRIA : SS42AI_pcmpestri, @@ -5165,7 +5048,7 @@ multiclass AESI_binop_rm_int opc, string OpcodeStr, } // Perform One Round of an AES Encryption/Decryption Flow -let isAsmParserOnly = 1, Predicates = [HasAVX, HasAES] in { +let Predicates = [HasAVX, HasAES] in { defm VAESENC : AESI_binop_rm_int<0xDC, "vaesenc", int_x86_aesni_aesenc, 0>, VEX_4V; defm VAESENCLAST : AESI_binop_rm_int<0xDD, "vaesenclast", @@ -5205,7 +5088,7 @@ def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, (memop addr:$src2))), (AESDECLASTrm VR128:$src1, addr:$src2)>; // Perform the AES InvMixColumn Transformation -let isAsmParserOnly = 1, Predicates = [HasAVX, HasAES] in { +let Predicates = [HasAVX, HasAES] in { def VAESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1), "vaesimc\t{$src1, $dst|$dst, $src1}", @@ -5233,7 +5116,7 @@ def AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst), OpSize; // AES Round Key Generation Assist -let isAsmParserOnly = 1, Predicates = [HasAVX, HasAES] in { +let Predicates = [HasAVX, HasAES] in { def VAESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, i8imm:$src2), "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", @@ -5269,7 +5152,6 @@ def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), // Only the AVX version of CLMUL instructions are described here. // Carry-less Multiplication instructions -let isAsmParserOnly = 1 in { def VPCLMULQDQrr : CLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3), "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", @@ -5295,13 +5177,10 @@ defm VPCLMULHQLQDQ : avx_vpclmul<"vpclmulhqlqdq">; defm VPCLMULLQHQDQ : avx_vpclmul<"vpclmullqhqdq">; defm VPCLMULLQLQDQ : avx_vpclmul<"vpclmullqlqdq">; -} // isAsmParserOnly - //===----------------------------------------------------------------------===// // AVX Instructions //===----------------------------------------------------------------------===// -let isAsmParserOnly = 1 in { // Load from memory and broadcast to all elements of the destination operand class avx_broadcast opc, string OpcodeStr, RegisterClass RC, @@ -5435,8 +5314,6 @@ def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall", def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper", [(int_x86_avx_vzeroupper)]>, VEX, Requires<[HasAVX]>; -} // isAsmParserOnly - def : Pat<(int_x86_avx_vinsertf128_pd_256 VR256:$src1, VR128:$src2, imm:$src3), (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>; def : Pat<(int_x86_avx_vinsertf128_ps_256 VR256:$src1, VR128:$src2, imm:$src3), @@ -5622,11 +5499,15 @@ def : Pat<(X86Movddup (bc_v2f64 // Shuffle with UNPCKLPS def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))), (VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>; +def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, (memopv8f32 addr:$src2))), + (VUNPCKLPSYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>; def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))), (UNPCKLPSrm VR128:$src1, addr:$src2)>; def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)), (VUNPCKLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>; +def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, VR256:$src2)), + (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>; def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)), (UNPCKLPSrr VR128:$src1, VR128:$src2)>; @@ -5644,11 +5525,15 @@ def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)), // Shuffle with UNPCKLPD def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))), (VUNPCKLPDrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>; +def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, (memopv4f64 addr:$src2))), + (VUNPCKLPDYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>; def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))), (UNPCKLPDrm VR128:$src1, addr:$src2)>; def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)), (VUNPCKLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>; +def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, VR256:$src2)), + (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>; def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)), (UNPCKLPDrr VR128:$src1, VR128:$src2)>; diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td index 6a24d145c696..f73cff39e86d 100644 --- a/lib/Target/X86/X86InstrSystem.td +++ b/lib/Target/X86/X86InstrSystem.td @@ -34,9 +34,16 @@ let Uses = [EFLAGS] in def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>; def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3", [(int_x86_int (i8 3))]>; + +// The long form of "int $3" turns into int3 as a size optimization. +// FIXME: This doesn't work because InstAlias can't match immediate constants. +//def : InstAlias<"int\t$3", (INT3)>; + + def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap", [(int_x86_int imm:$trap)]>; + def SYSCALL : I<0x05, RawFrm, (outs), (ins), "syscall", []>, TB; def SYSRETL : I<0x07, RawFrm, (outs), (ins), "sysretl", []>, TB; def SYSRETQ :RI<0x07, RawFrm, (outs), (ins), "sysretq", []>, TB, @@ -207,10 +214,15 @@ def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), def INVLPG : I<0x01, MRM7m, (outs), (ins i8mem:$addr), "invlpg\t$addr", []>, TB; -def STRr : I<0x00, MRM1r, (outs GR16:$dst), (ins), - "str{w}\t{$dst}", []>, TB; -def STRm : I<0x00, MRM1m, (outs i16mem:$dst), (ins), - "str{w}\t{$dst}", []>, TB; +def STR16r : I<0x00, MRM1r, (outs GR16:$dst), (ins), + "str{w}\t{$dst}", []>, TB, OpSize; +def STR32r : I<0x00, MRM1r, (outs GR32:$dst), (ins), + "str{l}\t{$dst}", []>, TB; +def STR64r : RI<0x00, MRM1r, (outs GR64:$dst), (ins), + "str{q}\t{$dst}", []>, TB; +def STRm : I<0x00, MRM1m, (outs i16mem:$dst), (ins), + "str{w}\t{$dst}", []>, TB; + def LTRr : I<0x00, MRM3r, (outs), (ins GR16:$src), "ltr{w}\t{$src}", []>, TB; def LTRm : I<0x00, MRM3m, (outs), (ins i16mem:$src), @@ -393,3 +405,23 @@ let Defs = [RDX, RAX], Uses = [RCX] in let Uses = [RDX, RAX, RCX] in def XSETBV : I<0x01, MRM_D1, (outs), (ins), "xsetbv", []>, TB; + +//===----------------------------------------------------------------------===// +// VIA PadLock crypto instructions +let Defs = [RAX, RDI], Uses = [RDX, RDI] in + def XSTORE : I<0xc0, RawFrm, (outs), (ins), "xstore", []>, A7; + +let Defs = [RSI, RDI], Uses = [RBX, RDX, RSI, RDI] in { + def XCRYPTECB : I<0xc8, RawFrm, (outs), (ins), "xcryptecb", []>, A7; + def XCRYPTCBC : I<0xd0, RawFrm, (outs), (ins), "xcryptcbc", []>, A7; + def XCRYPTCTR : I<0xd8, RawFrm, (outs), (ins), "xcryptctr", []>, A7; + def XCRYPTCFB : I<0xe0, RawFrm, (outs), (ins), "xcryptcfb", []>, A7; + def XCRYPTOFB : I<0xe8, RawFrm, (outs), (ins), "xcryptofb", []>, A7; +} + +let Defs = [RAX, RSI, RDI], Uses = [RAX, RSI, RDI] in { + def XSHA1 : I<0xc8, RawFrm, (outs), (ins), "xsha1", []>, A6; + def XSHA256 : I<0xd0, RawFrm, (outs), (ins), "xsha256", []>, A6; +} +let Defs = [RAX, RDX, RSI], Uses = [RAX, RSI] in + def MONTMUL : I<0xc0, RawFrm, (outs), (ins), "montmul", []>, A6; diff --git a/lib/Target/X86/X86MCAsmInfo.cpp b/lib/Target/X86/X86MCAsmInfo.cpp index 6686214e06f5..83bba529a689 100644 --- a/lib/Target/X86/X86MCAsmInfo.cpp +++ b/lib/Target/X86/X86MCAsmInfo.cpp @@ -15,7 +15,9 @@ #include "X86TargetMachine.h" #include "llvm/ADT/Triple.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ELF.h" using namespace llvm; @@ -69,7 +71,22 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &Triple) { DwarfUsesInlineInfoSection = true; // Exceptions handling - ExceptionsType = ExceptionHandling::DwarfTable; + ExceptionsType = ExceptionHandling::DwarfCFI; +} + +const MCExpr * +X86_64MCAsmInfoDarwin::getExprForPersonalitySymbol(const MCSymbol *Sym, + unsigned Encoding, + MCStreamer &Streamer) const { + MCContext &Context = Streamer.getContext(); + const MCExpr *Res = + MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, Context); + const MCExpr *Four = MCConstantExpr::Create(4, Context); + return MCBinaryExpr::CreateAdd(Res, Four, Context); +} + +X86_64MCAsmInfoDarwin::X86_64MCAsmInfoDarwin(const Triple &Triple) + : X86MCAsmInfoDarwin(Triple) { } X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) { @@ -89,7 +106,9 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) { SupportsDebugInformation = true; // Exceptions handling - ExceptionsType = ExceptionHandling::DwarfTable; + ExceptionsType = ExceptionHandling::DwarfCFI; + + DwarfRequiresFrameSection = false; // OpenBSD has buggy support for .quad in 32-bit mode, just split into two // .words. diff --git a/lib/Target/X86/X86MCAsmInfo.h b/lib/Target/X86/X86MCAsmInfo.h index 581522567d09..2cd4c8eb30ec 100644 --- a/lib/Target/X86/X86MCAsmInfo.h +++ b/lib/Target/X86/X86MCAsmInfo.h @@ -25,6 +25,14 @@ namespace llvm { explicit X86MCAsmInfoDarwin(const Triple &Triple); }; + struct X86_64MCAsmInfoDarwin : public X86MCAsmInfoDarwin { + explicit X86_64MCAsmInfoDarwin(const Triple &Triple); + virtual const MCExpr * + getExprForPersonalitySymbol(const MCSymbol *Sym, + unsigned Encoding, + MCStreamer &Streamer) const; + }; + struct X86ELFMCAsmInfo : public MCAsmInfo { explicit X86ELFMCAsmInfo(const Triple &Triple); virtual const MCSection *getNonexecutableStackSection(MCContext &Ctx) const; diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp index 0e3b5711f2b5..f195a67a3040 100644 --- a/lib/Target/X86/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/X86MCCodeEmitter.cpp @@ -382,7 +382,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, const TargetInstrDesc &Desc, raw_ostream &OS) const { bool HasVEX_4V = false; - if ((TSFlags >> 32) & X86II::VEX_4V) + if ((TSFlags >> X86II::VEXShift) & X86II::VEX_4V) HasVEX_4V = true; // VEX_R: opcode externsion equivalent to REX.R in @@ -446,10 +446,10 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, if (TSFlags & X86II::OpSize) VEX_PP = 0x01; - if ((TSFlags >> 32) & X86II::VEX_W) + if ((TSFlags >> X86II::VEXShift) & X86II::VEX_W) VEX_W = 1; - if ((TSFlags >> 32) & X86II::VEX_L) + if ((TSFlags >> X86II::VEXShift) & X86II::VEX_L) VEX_L = 1; switch (TSFlags & X86II::Op0Mask) { @@ -470,6 +470,8 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, case X86II::XD: // F2 0F VEX_PP = 0x3; break; + case X86II::A6: // Bypass: Not used by VEX + case X86II::A7: // Bypass: Not used by VEX case X86II::TB: // Bypass: Not used by VEX case 0: break; // No prefix! @@ -512,13 +514,13 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, } // To only check operands before the memory address ones, start - // the search from the begining + // the search from the beginning if (IsDestMem) CurOp = 0; // If the last register should be encoded in the immediate field // do not use any bit from VEX prefix to this register, ignore it - if ((TSFlags >> 32) & X86II::VEX_I8IMM) + if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM) NumOps--; for (; CurOp != NumOps; ++CurOp) { @@ -742,6 +744,8 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, case X86II::TB: // Two-byte opcode prefix case X86II::T8: // 0F 38 case X86II::TA: // 0F 3A + case X86II::A6: // 0F A6 + case X86II::A7: // 0F A7 Need0FPrefix = true; break; case X86II::TF: // F2 0F 38 @@ -786,6 +790,12 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, case X86II::TA: // 0F 3A EmitByte(0x3A, CurByte, OS); break; + case X86II::A6: // 0F A6 + EmitByte(0xA6, CurByte, OS); + break; + case X86II::A7: // 0F A7 + EmitByte(0xA7, CurByte, OS); + break; } } @@ -819,9 +829,9 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, // It uses the VEX.VVVV field? bool HasVEX_4V = false; - if ((TSFlags >> 32) & X86II::VEX) + if ((TSFlags >> X86II::VEXShift) & X86II::VEX) HasVEXPrefix = true; - if ((TSFlags >> 32) & X86II::VEX_4V) + if ((TSFlags >> X86II::VEXShift) & X86II::VEX_4V) HasVEX_4V = true; @@ -837,7 +847,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, unsigned char BaseOpcode = X86II::getBaseOpcodeFor(TSFlags); - if ((TSFlags >> 32) & X86II::Has3DNow0F0FOpcode) + if ((TSFlags >> X86II::VEXShift) & X86II::Has3DNow0F0FOpcode) BaseOpcode = 0x0F; // Weird 3DNow! encoding. unsigned SrcRegNum = 0; @@ -994,7 +1004,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, if (CurOp != NumOps) { // The last source register of a 4 operand instruction in AVX is encoded // in bits[7:4] of a immediate byte, and bits[3:0] are ignored. - if ((TSFlags >> 32) & X86II::VEX_I8IMM) { + if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM) { const MCOperand &MO = MI.getOperand(CurOp++); bool IsExtReg = X86InstrInfo::isX86_64ExtendedReg(MO.getReg()); @@ -1017,7 +1027,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, } } - if ((TSFlags >> 32) & X86II::Has3DNow0F0FOpcode) + if ((TSFlags >> X86II::VEXShift) & X86II::Has3DNow0F0FOpcode) EmitByte(X86II::getBaseOpcodeFor(TSFlags), CurByte, OS); diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 2f6bd88c6526..37fb0fe56948 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -308,6 +308,33 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, return 0; } +const TargetRegisterClass* +X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) const{ + const TargetRegisterClass *Super = RC; + TargetRegisterClass::sc_iterator I = RC->superclasses_begin(); + do { + switch (Super->getID()) { + case X86::GR8RegClassID: + case X86::GR16RegClassID: + case X86::GR32RegClassID: + case X86::GR64RegClassID: + case X86::FR32RegClassID: + case X86::FR64RegClassID: + case X86::RFP32RegClassID: + case X86::RFP64RegClassID: + case X86::RFP80RegClassID: + case X86::VR128RegClassID: + case X86::VR256RegClassID: + // Don't return a super-class that would shrink the spill size. + // That can happen with the vector and float classes. + if (Super->getSize() == RC->getSize()) + return Super; + } + Super = *I++; + } while (Super); + return RC; +} + const TargetRegisterClass * X86RegisterInfo::getPointerRegClass(unsigned Kind) const { switch (Kind) { @@ -337,7 +364,27 @@ X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const { else return &X86::GR32RegClass; } - return NULL; + return RC; +} + +unsigned +X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, + MachineFunction &MF) const { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + + unsigned FPDiff = TFI->hasFP(MF) ? 1 : 0; + switch (RC->getID()) { + default: + return 0; + case X86::GR32RegClassID: + return 4 - FPDiff; + case X86::GR64RegClassID: + return 12 - FPDiff; + case X86::VR128RegClassID: + return TM.getSubtarget().is64Bit() ? 10 : 4; + case X86::VR64RegClassID: + return 4; + } } const unsigned * @@ -450,7 +497,7 @@ bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const { // FIXME: It's more complicated than this... if (0 && requiresRealignment && MFI->hasVarSizedObjects()) report_fatal_error( - "Stack realignment in presense of dynamic allocas is not supported"); + "Stack realignment in presence of dynamic allocas is not supported"); // If we've requested that we force align the stack do so now. if (ForceStackAlign) diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h index 064be64f4916..9970c52c3e72 100644 --- a/lib/Target/X86/X86RegisterInfo.h +++ b/lib/Target/X86/X86RegisterInfo.h @@ -91,6 +91,9 @@ class X86RegisterInfo : public X86GenRegisterInfo { getMatchingSuperRegClass(const TargetRegisterClass *A, const TargetRegisterClass *B, unsigned Idx) const; + const TargetRegisterClass* + getLargestLegalSuperClass(const TargetRegisterClass *RC) const; + /// getPointerRegClass - Returns a TargetRegisterClass used for pointer /// values. const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const; @@ -101,6 +104,9 @@ class X86RegisterInfo : public X86GenRegisterInfo { const TargetRegisterClass * getCrossCopyRegClass(const TargetRegisterClass *RC) const; + unsigned getRegPressureLimit(const TargetRegisterClass *RC, + MachineFunction &MF) const; + /// getCalleeSavedRegs - Return a null-terminated list of all of the /// callee-save registers on this target. const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const; diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index 612fac2f3be5..fd7a247adcb6 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -46,7 +46,8 @@ let Namespace = "X86" in { def CL : Register<"cl">, DwarfRegNum<[2, 1, 1]>; def BL : Register<"bl">, DwarfRegNum<[3, 3, 3]>; - // X86-64 only + // X86-64 only, requires REX. + let CostPerUse = 1 in { def SIL : Register<"sil">, DwarfRegNum<[4, 6, 6]>; def DIL : Register<"dil">, DwarfRegNum<[5, 7, 7]>; def BPL : Register<"bpl">, DwarfRegNum<[6, 4, 5]>; @@ -59,6 +60,7 @@ let Namespace = "X86" in { def R13B : Register<"r13b">, DwarfRegNum<[13, -2, -2]>; def R14B : Register<"r14b">, DwarfRegNum<[14, -2, -2]>; def R15B : Register<"r15b">, DwarfRegNum<[15, -2, -2]>; + } // High registers. On x86-64, these cannot be used in any instruction // with a REX prefix. @@ -82,8 +84,8 @@ let Namespace = "X86" in { } def IP : Register<"ip">, DwarfRegNum<[16]>; - // X86-64 only - let SubRegIndices = [sub_8bit] in { + // X86-64 only, requires REX. + let SubRegIndices = [sub_8bit], CostPerUse = 1 in { def R8W : RegisterWithSubRegs<"r8w", [R8B]>, DwarfRegNum<[8, -2, -2]>; def R9W : RegisterWithSubRegs<"r9w", [R9B]>, DwarfRegNum<[9, -2, -2]>; def R10W : RegisterWithSubRegs<"r10w", [R10B]>, DwarfRegNum<[10, -2, -2]>; @@ -105,7 +107,8 @@ let Namespace = "X86" in { def ESP : RegisterWithSubRegs<"esp", [SP]>, DwarfRegNum<[7, 5, 4]>; def EIP : RegisterWithSubRegs<"eip", [IP]>, DwarfRegNum<[16, 8, 8]>; - // X86-64 only + // X86-64 only, requires REX + let CostPerUse = 1 in { def R8D : RegisterWithSubRegs<"r8d", [R8W]>, DwarfRegNum<[8, -2, -2]>; def R9D : RegisterWithSubRegs<"r9d", [R9W]>, DwarfRegNum<[9, -2, -2]>; def R10D : RegisterWithSubRegs<"r10d", [R10W]>, DwarfRegNum<[10, -2, -2]>; @@ -114,7 +117,7 @@ let Namespace = "X86" in { def R13D : RegisterWithSubRegs<"r13d", [R13W]>, DwarfRegNum<[13, -2, -2]>; def R14D : RegisterWithSubRegs<"r14d", [R14W]>, DwarfRegNum<[14, -2, -2]>; def R15D : RegisterWithSubRegs<"r15d", [R15W]>, DwarfRegNum<[15, -2, -2]>; - } + }} // 64-bit registers, X86-64 only let SubRegIndices = [sub_32bit] in { @@ -127,6 +130,8 @@ let Namespace = "X86" in { def RBP : RegisterWithSubRegs<"rbp", [EBP]>, DwarfRegNum<[6, -2, -2]>; def RSP : RegisterWithSubRegs<"rsp", [ESP]>, DwarfRegNum<[7, -2, -2]>; + // These also require REX. + let CostPerUse = 1 in { def R8 : RegisterWithSubRegs<"r8", [R8D]>, DwarfRegNum<[8, -2, -2]>; def R9 : RegisterWithSubRegs<"r9", [R9D]>, DwarfRegNum<[9, -2, -2]>; def R10 : RegisterWithSubRegs<"r10", [R10D]>, DwarfRegNum<[10, -2, -2]>; @@ -136,7 +141,7 @@ let Namespace = "X86" in { def R14 : RegisterWithSubRegs<"r14", [R14D]>, DwarfRegNum<[14, -2, -2]>; def R15 : RegisterWithSubRegs<"r15", [R15D]>, DwarfRegNum<[15, -2, -2]>; def RIP : RegisterWithSubRegs<"rip", [EIP]>, DwarfRegNum<[16, -2, -2]>; - } + }} // MMX Registers. These are actually aliased to ST0 .. ST7 def MM0 : Register<"mm0">, DwarfRegNum<[41, 29, 29]>; @@ -170,6 +175,7 @@ let Namespace = "X86" in { def XMM7: Register<"xmm7">, DwarfRegNum<[24, 28, 28]>; // X86-64 only + let CostPerUse = 1 in { def XMM8: Register<"xmm8">, DwarfRegNum<[25, -2, -2]>; def XMM9: Register<"xmm9">, DwarfRegNum<[26, -2, -2]>; def XMM10: Register<"xmm10">, DwarfRegNum<[27, -2, -2]>; @@ -178,7 +184,7 @@ let Namespace = "X86" in { def XMM13: Register<"xmm13">, DwarfRegNum<[30, -2, -2]>; def XMM14: Register<"xmm14">, DwarfRegNum<[31, -2, -2]>; def XMM15: Register<"xmm15">, DwarfRegNum<[32, -2, -2]>; - } + }} // YMM Registers, used by AVX instructions let SubRegIndices = [sub_xmm] in { diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp index 42e819343b5b..02754f9ae503 100644 --- a/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -178,7 +178,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { - // This requires the copy size to be a constant, preferrably + // This requires the copy size to be a constant, preferably // within a subtarget-specific limit. ConstantSDNode *ConstantSize = dyn_cast(Size); if (!ConstantSize) diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 1ee73123bbc6..ba5864ef0e0a 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -144,7 +144,8 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const { /// passed as the second argument. Otherwise it returns null. const char *X86Subtarget::getBZeroEntry() const { // Darwin 10 has a __bzero entry point for this purpose. - if (getDarwinVers() >= 10) + if (getTargetTriple().isMacOSX() && + !getTargetTriple().isMacOSXVersionLT(10, 6)) return "__bzero"; return 0; diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 0a62a029554c..286a7982a699 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -165,9 +165,15 @@ class X86Subtarget : public TargetSubtarget { bool isUnalignedMemAccessFast() const { return IsUAMemFast; } bool hasVectorUAMem() const { return HasVectorUAMem; } - bool isTargetDarwin() const { return TargetTriple.getOS() == Triple::Darwin; } - bool isTargetFreeBSD() const { return TargetTriple.getOS() == Triple::FreeBSD; } - bool isTargetSolaris() const { return TargetTriple.getOS() == Triple::Solaris; } + const Triple &getTargetTriple() const { return TargetTriple; } + + bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } + bool isTargetFreeBSD() const { + return TargetTriple.getOS() == Triple::FreeBSD; + } + bool isTargetSolaris() const { + return TargetTriple.getOS() == Triple::Solaris; + } // ELF is a reasonably sane default and the only other X86 targets we // support are Darwin and Windows. Just use "not those". @@ -215,13 +221,6 @@ class X86Subtarget : public TargetSubtarget { return PICStyle == PICStyles::StubDynamicNoPIC || PICStyle == PICStyles::StubPIC; } - /// getDarwinVers - Return the darwin version number, 8 = Tiger, 9 = Leopard, - /// 10 = Snow Leopard, etc. - unsigned getDarwinVers() const { - if (isTargetDarwin()) return TargetTriple.getDarwinMajorNumber(); - return 0; - } - /// ClassifyGlobalReference - Classify a global variable reference for the /// current subtarget according to how we should reference it in a non-pcrel /// context. diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 889c824b0e11..74833291dc7a 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -26,19 +26,18 @@ using namespace llvm; static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { Triple TheTriple(TT); - switch (TheTriple.getOS()) { - case Triple::Darwin: - return new X86MCAsmInfoDarwin(TheTriple); - case Triple::MinGW32: - case Triple::Cygwin: - case Triple::Win32: - if (TheTriple.getEnvironment() == Triple::MachO) - return new X86MCAsmInfoDarwin(TheTriple); + + if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO) { + if (TheTriple.getArch() == Triple::x86_64) + return new X86_64MCAsmInfoDarwin(TheTriple); else - return new X86MCAsmInfoCOFF(TheTriple); - default: - return new X86ELFMCAsmInfo(TheTriple); + return new X86MCAsmInfoDarwin(TheTriple); } + + if (TheTriple.isOSWindows()) + return new X86MCAsmInfoCOFF(TheTriple); + + return new X86ELFMCAsmInfo(TheTriple); } static MCStreamer *createMCStreamer(const Target &T, const std::string &TT, @@ -48,19 +47,14 @@ static MCStreamer *createMCStreamer(const Target &T, const std::string &TT, bool RelaxAll, bool NoExecStack) { Triple TheTriple(TT); - switch (TheTriple.getOS()) { - case Triple::Darwin: + + if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO) return createMachOStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll); - case Triple::MinGW32: - case Triple::Cygwin: - case Triple::Win32: - if (TheTriple.getEnvironment() == Triple::MachO) - return createMachOStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll); - else - return createWinCOFFStreamer(Ctx, TAB, *_Emitter, _OS, RelaxAll); - default: - return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll, NoExecStack); - } + + if (TheTriple.isOSWindows()) + return createWinCOFFStreamer(Ctx, TAB, *_Emitter, _OS, RelaxAll); + + return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll, NoExecStack); } extern "C" void LLVMInitializeX86Target() { @@ -96,11 +90,11 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, const std::string &TT, const std::string &FS) : X86TargetMachine(T, TT, FS, false), DataLayout(getSubtargetImpl()->isTargetDarwin() ? - "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-n8:16:32" : + "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-f128:128:128-n8:16:32" : (getSubtargetImpl()->isTargetCygMing() || getSubtargetImpl()->isTargetWindows()) ? - "e-p:32:32-f64:64:64-i64:64:64-f80:32:32-n8:16:32" : - "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-n8:16:32"), + "e-p:32:32-f64:64:64-i64:64:64-f80:32:32-f128:128:128-n8:16:32" : + "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-f128:128:128-n8:16:32"), InstrInfo(*this), TSInfo(*this), TLInfo(*this), @@ -111,7 +105,7 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, const std::string &TT, X86_64TargetMachine::X86_64TargetMachine(const Target &T, const std::string &TT, const std::string &FS) : X86TargetMachine(T, TT, FS, true), - DataLayout("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-n8:16:32:64"), + DataLayout("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-n8:16:32:64"), InstrInfo(*this), TSInfo(*this), TLInfo(*this), diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp index c15dfbb1c8ec..1231798297ac 100644 --- a/lib/Target/X86/X86TargetObjectFile.cpp +++ b/lib/Target/X86/X86TargetObjectFile.cpp @@ -38,6 +38,12 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer); } +MCSymbol *X8664_MachoTargetObjectFile:: +getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang, + MachineModuleInfo *MMI) const { + return Mang->getSymbol(GV); +} + unsigned X8632_ELFTargetObjectFile::getPersonalityEncoding() const { if (TM.getRelocationModel() == Reloc::PIC_) return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4; @@ -52,7 +58,7 @@ unsigned X8632_ELFTargetObjectFile::getLSDAEncoding() const { return DW_EH_PE_absptr; } -unsigned X8632_ELFTargetObjectFile::getFDEEncoding() const { +unsigned X8632_ELFTargetObjectFile::getFDEEncoding(bool FDE) const { if (TM.getRelocationModel() == Reloc::PIC_) return DW_EH_PE_pcrel | DW_EH_PE_sdata4; else @@ -91,17 +97,14 @@ unsigned X8664_ELFTargetObjectFile::getLSDAEncoding() const { return DW_EH_PE_absptr; } -unsigned X8664_ELFTargetObjectFile::getFDEEncoding() const { - CodeModel::Model Model = TM.getCodeModel(); +unsigned X8664_ELFTargetObjectFile::getFDEEncoding(bool CFI) const { + if (CFI) + return DW_EH_PE_pcrel | DW_EH_PE_sdata4; + if (TM.getRelocationModel() == Reloc::PIC_) - return DW_EH_PE_pcrel | (Model == CodeModel::Small || - Model == CodeModel::Medium ? - DW_EH_PE_sdata4 : DW_EH_PE_sdata8); + return DW_EH_PE_pcrel | DW_EH_PE_sdata4; - if (Model == CodeModel::Small || Model == CodeModel::Medium) - return DW_EH_PE_udata4; - - return DW_EH_PE_absptr; + return DW_EH_PE_udata4; } unsigned X8664_ELFTargetObjectFile::getTTypeEncoding() const { diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h index f2fd49caca38..e21b5bffd059 100644 --- a/lib/Target/X86/X86TargetObjectFile.h +++ b/lib/Target/X86/X86TargetObjectFile.h @@ -25,6 +25,12 @@ namespace llvm { getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, MachineModuleInfo *MMI, unsigned Encoding, MCStreamer &Streamer) const; + + // getCFIPersonalitySymbol - The symbol that gets passed to + // .cfi_personality. + virtual MCSymbol * + getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang, + MachineModuleInfo *MMI) const; }; class X8632_ELFTargetObjectFile : public TargetLoweringObjectFileELF { @@ -34,7 +40,7 @@ namespace llvm { :TM(tm) { } virtual unsigned getPersonalityEncoding() const; virtual unsigned getLSDAEncoding() const; - virtual unsigned getFDEEncoding() const; + virtual unsigned getFDEEncoding(bool CFI) const; virtual unsigned getTTypeEncoding() const; }; @@ -45,7 +51,7 @@ namespace llvm { :TM(tm) { } virtual unsigned getPersonalityEncoding() const; virtual unsigned getLSDAEncoding() const; - virtual unsigned getFDEEncoding() const; + virtual unsigned getFDEEncoding(bool CFI) const; virtual unsigned getTTypeEncoding() const; }; diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp index fc8a07aad73b..6bec9f91944a 100644 --- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp +++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp @@ -30,8 +30,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include -#include using namespace llvm; /// XCoreDAGToDAGISel - XCore specific code to select XCore machine @@ -49,7 +47,8 @@ namespace { Subtarget(*TM.getSubtargetImpl()) { } SDNode *Select(SDNode *N); - + SDNode *SelectBRIND(SDNode *N); + /// getI32Imm - Return a target constant with the specified value, of type /// i32. inline SDValue getI32Imm(unsigned Imm) { @@ -154,62 +153,133 @@ bool XCoreDAGToDAGISel::SelectADDRcpii(SDValue Addr, SDValue &Base, SDNode *XCoreDAGToDAGISel::Select(SDNode *N) { DebugLoc dl = N->getDebugLoc(); - EVT NVT = N->getValueType(0); - if (NVT == MVT::i32) { - switch (N->getOpcode()) { - default: break; - case ISD::Constant: { - uint64_t Val = cast(N)->getZExtValue(); - if (immMskBitp(N)) { - // Transformation function: get the size of a mask - // Look for the first non-zero bit - SDValue MskSize = getI32Imm(32 - CountLeadingZeros_32(Val)); - return CurDAG->getMachineNode(XCore::MKMSK_rus, dl, - MVT::i32, MskSize); - } - else if (!isUInt<16>(Val)) { - SDValue CPIdx = - CurDAG->getTargetConstantPool(ConstantInt::get( - Type::getInt32Ty(*CurDAG->getContext()), Val), - TLI.getPointerTy()); - return CurDAG->getMachineNode(XCore::LDWCP_lru6, dl, MVT::i32, - MVT::Other, CPIdx, - CurDAG->getEntryNode()); - } - break; - } - case XCoreISD::LADD: { - SDValue Ops[] = { N->getOperand(0), N->getOperand(1), - N->getOperand(2) }; - return CurDAG->getMachineNode(XCore::LADD_l5r, dl, MVT::i32, MVT::i32, - Ops, 3); - } - case XCoreISD::LSUB: { - SDValue Ops[] = { N->getOperand(0), N->getOperand(1), - N->getOperand(2) }; - return CurDAG->getMachineNode(XCore::LSUB_l5r, dl, MVT::i32, MVT::i32, - Ops, 3); - } - case XCoreISD::MACCU: { - SDValue Ops[] = { N->getOperand(0), N->getOperand(1), - N->getOperand(2), N->getOperand(3) }; - return CurDAG->getMachineNode(XCore::MACCU_l4r, dl, MVT::i32, MVT::i32, - Ops, 4); - } - case XCoreISD::MACCS: { - SDValue Ops[] = { N->getOperand(0), N->getOperand(1), - N->getOperand(2), N->getOperand(3) }; - return CurDAG->getMachineNode(XCore::MACCS_l4r, dl, MVT::i32, MVT::i32, - Ops, 4); - } - case XCoreISD::LMUL: { - SDValue Ops[] = { N->getOperand(0), N->getOperand(1), - N->getOperand(2), N->getOperand(3) }; - return CurDAG->getMachineNode(XCore::LMUL_l6r, dl, MVT::i32, MVT::i32, - Ops, 4); - } - // Other cases are autogenerated. + switch (N->getOpcode()) { + default: break; + case ISD::Constant: { + uint64_t Val = cast(N)->getZExtValue(); + if (immMskBitp(N)) { + // Transformation function: get the size of a mask + // Look for the first non-zero bit + SDValue MskSize = getI32Imm(32 - CountLeadingZeros_32(Val)); + return CurDAG->getMachineNode(XCore::MKMSK_rus, dl, + MVT::i32, MskSize); } + else if (!isUInt<16>(Val)) { + SDValue CPIdx = + CurDAG->getTargetConstantPool(ConstantInt::get( + Type::getInt32Ty(*CurDAG->getContext()), Val), + TLI.getPointerTy()); + return CurDAG->getMachineNode(XCore::LDWCP_lru6, dl, MVT::i32, + MVT::Other, CPIdx, + CurDAG->getEntryNode()); + } + break; + } + case XCoreISD::LADD: { + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), + N->getOperand(2) }; + return CurDAG->getMachineNode(XCore::LADD_l5r, dl, MVT::i32, MVT::i32, + Ops, 3); + } + case XCoreISD::LSUB: { + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), + N->getOperand(2) }; + return CurDAG->getMachineNode(XCore::LSUB_l5r, dl, MVT::i32, MVT::i32, + Ops, 3); + } + case XCoreISD::MACCU: { + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), + N->getOperand(2), N->getOperand(3) }; + return CurDAG->getMachineNode(XCore::MACCU_l4r, dl, MVT::i32, MVT::i32, + Ops, 4); + } + case XCoreISD::MACCS: { + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), + N->getOperand(2), N->getOperand(3) }; + return CurDAG->getMachineNode(XCore::MACCS_l4r, dl, MVT::i32, MVT::i32, + Ops, 4); + } + case XCoreISD::LMUL: { + SDValue Ops[] = { N->getOperand(0), N->getOperand(1), + N->getOperand(2), N->getOperand(3) }; + return CurDAG->getMachineNode(XCore::LMUL_l6r, dl, MVT::i32, MVT::i32, + Ops, 4); + } + case ISD::BRIND: + if (SDNode *ResNode = SelectBRIND(N)) + return ResNode; + break; + // Other cases are autogenerated. } return SelectCode(N); } + +/// Given a chain return a new chain where any appearance of Old is replaced +/// by New. There must be at most one instruction between Old and Chain and +/// this instruction must be a TokenFactor. Returns an empty SDValue if +/// these conditions don't hold. +static SDValue +replaceInChain(SelectionDAG *CurDAG, SDValue Chain, SDValue Old, SDValue New) +{ + if (Chain == Old) + return New; + if (Chain->getOpcode() != ISD::TokenFactor) + return SDValue(); + SmallVector Ops; + bool found = false; + for (unsigned i = 0, e = Chain->getNumOperands(); i != e; ++i) { + if (Chain->getOperand(i) == Old) { + Ops.push_back(New); + found = true; + } else { + Ops.push_back(Chain->getOperand(i)); + } + } + if (!found) + return SDValue(); + return CurDAG->getNode(ISD::TokenFactor, Chain->getDebugLoc(), MVT::Other, + &Ops[0], Ops.size()); +} + +SDNode *XCoreDAGToDAGISel::SelectBRIND(SDNode *N) { + DebugLoc dl = N->getDebugLoc(); + // (brind (int_xcore_checkevent (addr))) + SDValue Chain = N->getOperand(0); + SDValue Addr = N->getOperand(1); + if (Addr->getOpcode() != ISD::INTRINSIC_W_CHAIN) + return 0; + unsigned IntNo = cast(Addr->getOperand(1))->getZExtValue(); + if (IntNo != Intrinsic::xcore_checkevent) + return 0; + SDValue nextAddr = Addr->getOperand(2); + SDValue CheckEventChainOut(Addr.getNode(), 1); + if (!CheckEventChainOut.use_empty()) { + // If the chain out of the checkevent intrinsic is an operand of the + // indirect branch or used in a TokenFactor which is the operand of the + // indirect branch then build a new chain which uses the chain coming into + // the checkevent intrinsic instead. + SDValue CheckEventChainIn = Addr->getOperand(0); + SDValue NewChain = replaceInChain(CurDAG, Chain, CheckEventChainOut, + CheckEventChainIn); + if (!NewChain.getNode()) + return 0; + Chain = NewChain; + } + // Enable events on the thread using setsr 1 and then disable them immediately + // after with clrsr 1. If any resources owned by the thread are ready an event + // will be taken. If no resource is ready we branch to the address which was + // the operand to the checkevent intrinsic. + SDValue constOne = getI32Imm(1); + SDValue Glue = + SDValue(CurDAG->getMachineNode(XCore::SETSR_branch_u6, dl, MVT::Glue, + constOne, Chain), 0); + Glue = + SDValue(CurDAG->getMachineNode(XCore::CLRSR_branch_u6, dl, MVT::Glue, + constOne, Glue), 0); + if (nextAddr->getOpcode() == XCoreISD::PCRelativeWrapper && + nextAddr->getOperand(0)->getOpcode() == ISD::TargetBlockAddress) { + return CurDAG->SelectNodeTo(N, XCore::BRFU_lu6, MVT::Other, + nextAddr->getOperand(0), Glue); + } + return CurDAG->SelectNodeTo(N, XCore::BAU_1r, MVT::Other, nextAddr, Glue); +} diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index 4817787d7515..5987e8be9a16 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -37,8 +37,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/VectorExtras.h" -#include -#include using namespace llvm; const char *XCoreTargetLowering:: @@ -967,7 +965,7 @@ XCoreTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, // Build a sequence of copy-to-reg nodes chained together with token // chain and flag operands which copy the outgoing args into registers. - // The InFlag in necessary since all emited instructions must be + // The InFlag in necessary since all emitted instructions must be // stuck together. SDValue InFlag; for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index ecdd4cb63000..789546ed304b 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -308,6 +308,16 @@ multiclass FU6_LU6 { !strconcat(OpcStr, " $b"), [(OpNode immU16:$b)]>; } +multiclass FU6_LU6_int { + def _u6: _FU6< + (outs), (ins i32imm:$b), + !strconcat(OpcStr, " $b"), + [(Int immU6:$b)]>; + def _lu6: _FLU6< + (outs), (ins i32imm:$b), + !strconcat(OpcStr, " $b"), + [(Int immU16:$b)]>; +} multiclass FU6_LU6_np { def _u6: _FU6< @@ -638,8 +648,8 @@ defm RETSP : FU6_LU6<"retsp", XCoreRetsp>; } } -// TODO extdp, kentsp, krestsp, blat, setsr -// clrsr, getsr, kalli +// TODO extdp, kentsp, krestsp, blat +// getsr, kalli let isBranch = 1, isTerminator = 1, isBarrier = 1 in { def BRBU_u6 : _FU6< (outs), @@ -678,6 +688,17 @@ def LDAWCP_lu6: _FLRU6< "ldaw r11, cp[$a]", [(set R11, ADDRcpii:$a)]>; +defm SETSR : FU6_LU6_int<"setsr", int_xcore_setsr>; + +defm CLRSR : FU6_LU6_int<"clrsr", int_xcore_clrsr>; + +// setsr may cause a branch if it is used to enable events. clrsr may +// branch if it is executed while events are enabled. +let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in { +defm SETSR_branch : FU6_LU6_np<"setsr">; +defm CLRSR_branch : FU6_LU6_np<"clrsr">; +} + // U10 // TODO ldwcpl, blacp @@ -718,7 +739,7 @@ def BL_lu10 : _FLU10< } // Two operand short -// TODO getr, getst +// TODO eet, eef, testwct, tsetmr, sext (reg), zext (reg) def NOT : _F2R<(outs GRRegs:$dst), (ins GRRegs:$b), "not $dst, $b", [(set GRRegs:$dst, (not GRRegs:$b))]>; @@ -727,8 +748,6 @@ def NEG : _F2R<(outs GRRegs:$dst), (ins GRRegs:$b), "neg $dst, $b", [(set GRRegs:$dst, (ineg GRRegs:$b))]>; -// TODO setd, eet, eef, testwct, tinitpc, tinitdp, -// tinitsp, tinitcp, tsetmr, sext (reg), zext (reg) let Constraints = "$src1 = $dst" in { let neverHasSideEffects = 1 in def SEXT_rus : _FRUS<(outs GRRegs:$dst), (ins GRRegs:$src1, i32imm:$src2), @@ -816,9 +835,29 @@ def SETD_2r : _F2R<(outs), (ins GRRegs:$r, GRRegs:$val), "setd res[$r], $val", [(int_xcore_setd GRRegs:$r, GRRegs:$val)]>; +def GETST_2r : _F2R<(outs GRRegs:$dst), (ins GRRegs:$r), + "getst $dst, res[$r]", + [(set GRRegs:$dst, (int_xcore_getst GRRegs:$r))]>; + +def INITSP_2r : _F2R<(outs), (ins GRRegs:$t, GRRegs:$src), + "init t[$t]:sp, $src", + [(int_xcore_initsp GRRegs:$t, GRRegs:$src)]>; + +def INITPC_2r : _F2R<(outs), (ins GRRegs:$t, GRRegs:$src), + "init t[$t]:pc, $src", + [(int_xcore_initpc GRRegs:$t, GRRegs:$src)]>; + +def INITCP_2r : _F2R<(outs), (ins GRRegs:$t, GRRegs:$src), + "init t[$t]:cp, $src", + [(int_xcore_initcp GRRegs:$t, GRRegs:$src)]>; + +def INITDP_2r : _F2R<(outs), (ins GRRegs:$t, GRRegs:$src), + "init t[$t]:dp, $src", + [(int_xcore_initdp GRRegs:$t, GRRegs:$src)]>; + // Two operand long -// TODO setclk, setrdy, setpsc, endin, peek, -// getd, testlcl, tinitlr, getps, setps +// TODO endin, peek, +// getd, testlcl def BITREV_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src), "bitrev $dst, $src", [(set GRRegs:$dst, (int_xcore_bitrev GRRegs:$src))]>; @@ -839,10 +878,41 @@ def SETTW_l2r : _FL2R<(outs), (ins GRRegs:$r, GRRegs:$val), "settw res[$r], $val", [(int_xcore_settw GRRegs:$r, GRRegs:$val)]>; +def GETPS_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src), + "get $dst, ps[$src]", + [(set GRRegs:$dst, (int_xcore_getps GRRegs:$src))]>; + +def SETPS_l2r : _FL2R<(outs), (ins GRRegs:$src1, GRRegs:$src2), + "set ps[$src1], $src2", + [(int_xcore_setps GRRegs:$src1, GRRegs:$src2)]>; + +def INITLR_l2r : _FL2R<(outs), (ins GRRegs:$t, GRRegs:$src), + "init t[$t]:lr, $src", + [(int_xcore_initlr GRRegs:$t, GRRegs:$src)]>; + +def SETCLK_l2r : _FL2R<(outs), (ins GRRegs:$src1, GRRegs:$src2), + "setclk res[$src1], $src2", + [(int_xcore_setclk GRRegs:$src1, GRRegs:$src2)]>; + +def SETRDY_l2r : _FL2R<(outs), (ins GRRegs:$src1, GRRegs:$src2), + "setrdy res[$src1], $src2", + [(int_xcore_setrdy GRRegs:$src1, GRRegs:$src2)]>; + +def SETPSC_l2r : _FL2R<(outs), (ins GRRegs:$src1, GRRegs:$src2), + "setpsc res[$src1], $src2", + [(int_xcore_setpsc GRRegs:$src1, GRRegs:$src2)]>; + // One operand short -// TODO edu, eeu, waitet, waitef, tstart, msync, mjoin, clrtp +// TODO edu, eeu, waitet, waitef, tstart, clrtp // setdp, setcp, setev, kcall // dgetreg +def MSYNC_1r : _F1R<(outs), (ins GRRegs:$i), + "msync res[$i]", + [(int_xcore_msync GRRegs:$i)]>; +def MJOIN_1r : _F1R<(outs), (ins GRRegs:$i), + "mjoin res[$i]", + [(int_xcore_mjoin GRRegs:$i)]>; + let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in def BAU_1r : _F1R<(outs), (ins GRRegs:$addr), "bau $addr", @@ -899,7 +969,7 @@ def EEU_1r : _F1R<(outs), (ins GRRegs:$r), [(int_xcore_eeu GRRegs:$r)]>; // Zero operand short -// TODO ssync, freet, ldspc, stspc, ldssr, stssr, ldsed, stsed, +// TODO freet, ldspc, stspc, ldssr, stssr, ldsed, stsed, // stet, geted, getet, getkep, getksp, setkep, getid, kret, dcall, dret, // dentsp, drestsp @@ -910,6 +980,10 @@ def GETID_0R : _F0R<(outs), (ins), "get r11, id", [(set R11, (int_xcore_getid))]>; +def SSYNC_0r : _F0R<(outs), (ins), + "ssync", + [(int_xcore_ssync)]>; + let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1, hasSideEffects = 1 in def WAITEU_0R : _F0R<(outs), (ins), diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp index 56c0879cc8fc..0287a5135837 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.cpp +++ b/lib/Target/XCore/XCoreRegisterInfo.cpp @@ -104,6 +104,11 @@ XCoreRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const { return TFI->hasFP(MF); } +bool +XCoreRegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const { + return false; +} + // This function eliminates ADJCALLSTACKDOWN, // ADJCALLSTACKUP pseudo instructions void XCoreRegisterInfo:: diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h index 218575581d4a..770483b68615 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.h +++ b/lib/Target/XCore/XCoreRegisterInfo.h @@ -48,6 +48,8 @@ struct XCoreRegisterInfo : public XCoreGenRegisterInfo { bool requiresRegisterScavenging(const MachineFunction &MF) const; + bool useFPForScavengingIndex(const MachineFunction &MF) const; + void eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index 0c650cfe6440..54a7f679e01c 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -771,8 +771,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, // function empty. NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList()); - // Loop over the argument list, transfering uses of the old arguments over to - // the new arguments, also transfering over the names as well. + // Loop over the argument list, transferring uses of the old arguments over to + // the new arguments, also transferring over the names as well. // for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(), I2 = NF->arg_begin(); I != E; ++I) { diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt index efdeec564051..179b150c1478 100644 --- a/lib/Transforms/IPO/CMakeLists.txt +++ b/lib/Transforms/IPO/CMakeLists.txt @@ -20,5 +20,4 @@ add_llvm_library(LLVMipo PruneEH.cpp StripDeadPrototypes.cpp StripSymbols.cpp - StructRetPromotion.cpp ) diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index b42322116a98..d4eaf0c4a3ec 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -49,7 +49,7 @@ namespace { /// Struct that represents (part of) either a return value or a function /// argument. Used so that arguments and return values can be used - /// interchangably. + /// interchangeably. struct RetOrArg { RetOrArg(const Function *F, unsigned Idx, bool IsArg) : F(F), Idx(Idx), IsArg(IsArg) {} @@ -273,8 +273,8 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { // function empty. NF->getBasicBlockList().splice(NF->begin(), Fn.getBasicBlockList()); - // Loop over the argument list, transfering uses of the old arguments over to - // the new arguments, also transfering over the names as well. While we're at + // Loop over the argument list, transferring uses of the old arguments over to + // the new arguments, also transferring over the names as well. While we're at // it, remove the dead arguments from the DeadArguments list. // for (Function::arg_iterator I = Fn.arg_begin(), E = Fn.arg_end(), @@ -294,7 +294,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { /// instead. bool DAE::RemoveDeadArgumentsFromCallers(Function &Fn) { - if (Fn.isDeclaration()) + if (Fn.isDeclaration() || Fn.mayBeOverridden()) return false; // Functions with local linkage should already have been handled. @@ -379,7 +379,7 @@ DAE::Liveness DAE::SurveyUse(Value::const_use_iterator U, // The value is returned from a function. It's only live when the // function's return value is live. We use RetValNum here, for the case // that U is really a use of an insertvalue instruction that uses the - // orginal Use. + // original Use. RetOrArg Use = CreateRet(RI->getParent()->getParent(), RetValNum); // We might be live, depending on the liveness of Use. return MarkIfNotLive(Use, MaybeLiveUses); @@ -894,8 +894,8 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { // function empty. NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList()); - // Loop over the argument list, transfering uses of the old arguments over to - // the new arguments, also transfering over the names as well. + // Loop over the argument list, transferring uses of the old arguments over to + // the new arguments, also transferring over the names as well. i = 0; for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(), I2 = NF->arg_begin(); I != E; ++I, ++i) diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index d4cb71272f76..ded58aca75fc 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -21,6 +21,7 @@ #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/Module.h" +#include "llvm/Operator.h" #include "llvm/Pass.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/MemoryBuiltins.h" @@ -54,6 +55,7 @@ STATISTIC(NumCtorsEvaluated, "Number of static ctors evaluated"); STATISTIC(NumNestRemoved , "Number of nest attributes removed"); STATISTIC(NumAliasesResolved, "Number of global aliases resolved"); STATISTIC(NumAliasesRemoved, "Number of global aliases eliminated"); +STATISTIC(NumCXXDtorsRemoved, "Number of global C++ destructors removed"); namespace { struct GlobalStatus; @@ -77,6 +79,7 @@ namespace { bool ProcessInternalGlobal(GlobalVariable *GV,Module::global_iterator &GVI, const SmallPtrSet &PHIUsers, const GlobalStatus &GS); + bool OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn); }; } @@ -1191,9 +1194,11 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo, const StructType *ST = cast(cast(PN->getType())->getElementType()); - Result = + PHINode *NewPN = PHINode::Create(PointerType::getUnqual(ST->getElementType(FieldNo)), + PN->getNumIncomingValues(), PN->getName()+".f"+Twine(FieldNo), PN); + Result = NewPN; PHIsToRewrite.push_back(std::make_pair(PN, FieldNo)); } else { llvm_unreachable("Unknown usable value"); @@ -1940,36 +1945,24 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) { return Changed; } -/// FindGlobalCtors - Find the llvm.globalctors list, verifying that all +/// FindGlobalCtors - Find the llvm.global_ctors list, verifying that all /// initializers have an init priority of 65535. GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) { GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors"); if (GV == 0) return 0; - // Found it, verify it's an array of { int, void()* }. - const ArrayType *ATy =dyn_cast(GV->getType()->getElementType()); - if (!ATy) return 0; - const StructType *STy = dyn_cast(ATy->getElementType()); - if (!STy || STy->getNumElements() != 2 || - !STy->getElementType(0)->isIntegerTy(32)) return 0; - const PointerType *PFTy = dyn_cast(STy->getElementType(1)); - if (!PFTy) return 0; - const FunctionType *FTy = dyn_cast(PFTy->getElementType()); - if (!FTy || !FTy->getReturnType()->isVoidTy() || - FTy->isVarArg() || FTy->getNumParams() != 0) - return 0; - // Verify that the initializer is simple enough for us to handle. We are // only allowed to optimize the initializer if it is unique. if (!GV->hasUniqueInitializer()) return 0; - - ConstantArray *CA = dyn_cast(GV->getInitializer()); - if (!CA) return 0; - + + if (isa(GV->getInitializer())) + return GV; + ConstantArray *CA = cast(GV->getInitializer()); + for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) { - ConstantStruct *CS = dyn_cast(*i); - if (CS == 0) return 0; - + if (isa(*i)) + continue; + ConstantStruct *CS = cast(*i); if (isa(CS->getOperand(1))) continue; @@ -1978,8 +1971,8 @@ GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) { return 0; // Init priority must be standard. - ConstantInt *CI = dyn_cast(CS->getOperand(0)); - if (!CI || CI->getZExtValue() != 65535) + ConstantInt *CI = cast(CS->getOperand(0)); + if (CI->getZExtValue() != 65535) return 0; } @@ -1989,6 +1982,8 @@ GlobalVariable *GlobalOpt::FindGlobalCtors(Module &M) { /// ParseGlobalCtors - Given a llvm.global_ctors list that we can understand, /// return a list of the functions and null terminator as a vector. static std::vector ParseGlobalCtors(GlobalVariable *GV) { + if (GV->getInitializer()->isNullValue()) + return std::vector(); ConstantArray *CA = cast(GV->getInitializer()); std::vector Result; Result.reserve(CA->getNumOperands()); @@ -2019,7 +2014,7 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL, const PointerType *PFTy = PointerType::getUnqual(FTy); CSVals[1] = Constant::getNullValue(PFTy); CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()), - 2147483647); + 0x7fffffff); } CAList.push_back(ConstantStruct::get(GCL->getContext(), CSVals, false)); } @@ -2696,12 +2691,126 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) { return Changed; } +static Function *FindCXAAtExit(Module &M) { + Function *Fn = M.getFunction("__cxa_atexit"); + + if (!Fn) + return 0; + + const FunctionType *FTy = Fn->getFunctionType(); + + // Checking that the function has the right return type, the right number of + // parameters and that they all have pointer types should be enough. + if (!FTy->getReturnType()->isIntegerTy() || + FTy->getNumParams() != 3 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isPointerTy() || + !FTy->getParamType(2)->isPointerTy()) + return 0; + + return Fn; +} + +/// cxxDtorIsEmpty - Returns whether the given function is an empty C++ +/// destructor and can therefore be eliminated. +/// Note that we assume that other optimization passes have already simplified +/// the code so we only look for a function with a single basic block, where +/// the only allowed instructions are 'ret' or 'call' to empty C++ dtor. +static bool cxxDtorIsEmpty(const Function &Fn, + SmallPtrSet &CalledFunctions) { + // FIXME: We could eliminate C++ destructors if they're readonly/readnone and + // nounwind, but that doesn't seem worth doing. + if (Fn.isDeclaration()) + return false; + + if (++Fn.begin() != Fn.end()) + return false; + + const BasicBlock &EntryBlock = Fn.getEntryBlock(); + for (BasicBlock::const_iterator I = EntryBlock.begin(), E = EntryBlock.end(); + I != E; ++I) { + if (const CallInst *CI = dyn_cast(I)) { + // Ignore debug intrinsics. + if (isa(CI)) + continue; + + const Function *CalledFn = CI->getCalledFunction(); + + if (!CalledFn) + return false; + + SmallPtrSet NewCalledFunctions(CalledFunctions); + + // Don't treat recursive functions as empty. + if (!NewCalledFunctions.insert(CalledFn)) + return false; + + if (!cxxDtorIsEmpty(*CalledFn, NewCalledFunctions)) + return false; + } else if (isa(*I)) + return true; + else + return false; + } + + return false; +} + +bool GlobalOpt::OptimizeEmptyGlobalCXXDtors(Function *CXAAtExitFn) { + /// Itanium C++ ABI p3.3.5: + /// + /// After constructing a global (or local static) object, that will require + /// destruction on exit, a termination function is registered as follows: + /// + /// extern "C" int __cxa_atexit ( void (*f)(void *), void *p, void *d ); + /// + /// This registration, e.g. __cxa_atexit(f,p,d), is intended to cause the + /// call f(p) when DSO d is unloaded, before all such termination calls + /// registered before this one. It returns zero if registration is + /// successful, nonzero on failure. + + // This pass will look for calls to __cxa_atexit where the function is trivial + // and remove them. + bool Changed = false; + + for (Function::use_iterator I = CXAAtExitFn->use_begin(), + E = CXAAtExitFn->use_end(); I != E;) { + // We're only interested in calls. Theoretically, we could handle invoke + // instructions as well, but neither llvm-gcc nor clang generate invokes + // to __cxa_atexit. + CallInst *CI = dyn_cast(*I++); + if (!CI) + continue; + + Function *DtorFn = + dyn_cast(CI->getArgOperand(0)->stripPointerCasts()); + if (!DtorFn) + continue; + + SmallPtrSet CalledFunctions; + if (!cxxDtorIsEmpty(*DtorFn, CalledFunctions)) + continue; + + // Just remove the call. + CI->replaceAllUsesWith(Constant::getNullValue(CI->getType())); + CI->eraseFromParent(); + + ++NumCXXDtorsRemoved; + + Changed |= true; + } + + return Changed; +} + bool GlobalOpt::runOnModule(Module &M) { bool Changed = false; // Try to find the llvm.globalctors list. GlobalVariable *GlobalCtors = FindGlobalCtors(M); + Function *CXAAtExitFn = FindCXAAtExit(M); + bool LocalChange = true; while (LocalChange) { LocalChange = false; @@ -2718,6 +2827,11 @@ bool GlobalOpt::runOnModule(Module &M) { // Resolve aliases, when possible. LocalChange |= OptimizeGlobalAliases(M); + + // Try to remove trivial global destructors. + if (CXAAtExitFn) + LocalChange |= OptimizeEmptyGlobalCXXDtors(CXAAtExitFn); + Changed |= LocalChange; } diff --git a/lib/Transforms/IPO/IPConstantPropagation.cpp b/lib/Transforms/IPO/IPConstantPropagation.cpp index c7c293987a58..25c01346642b 100644 --- a/lib/Transforms/IPO/IPConstantPropagation.cpp +++ b/lib/Transforms/IPO/IPConstantPropagation.cpp @@ -186,7 +186,7 @@ bool IPCP::PropagateConstantReturn(Function &F) { // Find the returned value Value *V; if (!STy) - V = RI->getOperand(i); + V = RI->getOperand(0); else V = FindInsertedValue(RI->getOperand(0), i); diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp index fbe90ce67591..21dcb519d9c9 100644 --- a/lib/Transforms/IPO/IPO.cpp +++ b/lib/Transforms/IPO/IPO.cpp @@ -45,7 +45,6 @@ void llvm::initializeIPO(PassRegistry &Registry) { initializeStripDebugDeclarePass(Registry); initializeStripDeadDebugInfoPass(Registry); initializeStripNonDebugSymbolsPass(Registry); - initializeSRETPromotionPass(Registry); } void LLVMInitializeIPO(LLVMPassRegistryRef R) { diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index 37eafd723bf8..57f3e772b569 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -29,7 +29,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" -#include using namespace llvm; STATISTIC(NumInlined, "Number of functions inlined"); diff --git a/lib/Transforms/IPO/Internalize.cpp b/lib/Transforms/IPO/Internalize.cpp index 9b9ebad47225..7cb1d18f933d 100644 --- a/lib/Transforms/IPO/Internalize.cpp +++ b/lib/Transforms/IPO/Internalize.cpp @@ -126,6 +126,8 @@ bool InternalizePass::runOnModule(Module &M) { // FIXME: maybe use private linkage? for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) if (!I->isDeclaration() && // Function must be defined here + // Available externally is really just a "declaration with a body". + !I->hasAvailableExternallyLinkage() && !I->hasLocalLinkage() && // Can't already have internal linkage !ExternalNames.count(I->getName())) {// Not marked to keep external? I->setLinkage(GlobalValue::InternalLinkage); @@ -144,9 +146,6 @@ bool InternalizePass::runOnModule(Module &M) { // Never internalize anchors used by the machine module info, else the info // won't find them. (see MachineModuleInfo.) - ExternalNames.insert("llvm.dbg.compile_units"); - ExternalNames.insert("llvm.dbg.global_variables"); - ExternalNames.insert("llvm.dbg.subprograms"); ExternalNames.insert("llvm.global_ctors"); ExternalNames.insert("llvm.global_dtors"); ExternalNames.insert("llvm.noinline"); diff --git a/lib/Transforms/IPO/LowerSetJmp.cpp b/lib/Transforms/IPO/LowerSetJmp.cpp index b545f0bb267d..52ecf17b8f9b 100644 --- a/lib/Transforms/IPO/LowerSetJmp.cpp +++ b/lib/Transforms/IPO/LowerSetJmp.cpp @@ -430,7 +430,7 @@ void LowerSetJmp::TransformSetJmpCall(CallInst* Inst) // This PHI node will be in the new block created from the // splitBasicBlock call. - PHINode* PHI = PHINode::Create(Type::getInt32Ty(Inst->getContext()), + PHINode* PHI = PHINode::Create(Type::getInt32Ty(Inst->getContext()), 2, "SetJmpReturn", Inst); // Coming from a call to setjmp, the return is 0. diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index cccffca6e384..f74144338a61 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -55,6 +55,7 @@ #include "llvm/Instructions.h" #include "llvm/LLVMContext.h" #include "llvm/Module.h" +#include "llvm/Operator.h" #include "llvm/Pass.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" @@ -125,7 +126,7 @@ class ComparableFunction { const ComparableFunction ComparableFunction::EmptyKey = ComparableFunction(0); const ComparableFunction ComparableFunction::TombstoneKey = ComparableFunction(1); -TargetData * const ComparableFunction::LookupOnly = (TargetData*)(-1); +TargetData *const ComparableFunction::LookupOnly = (TargetData*)(-1); } @@ -212,7 +213,7 @@ bool FunctionComparator::isEquivalentType(const Type *Ty1, return false; } - switch(Ty1->getTypeID()) { + switch (Ty1->getTypeID()) { default: llvm_unreachable("Unknown type!"); // Fall through in Release mode. diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp index 2afd02985764..d9d1d106111e 100644 --- a/lib/Transforms/IPO/PartialInlining.cpp +++ b/lib/Transforms/IPO/PartialInlining.cpp @@ -95,7 +95,7 @@ Function* PartialInliner::unswitchFunction(Function* F) { PHINode* OldPhi = dyn_cast(I); if (!OldPhi) break; - PHINode* retPhi = PHINode::Create(OldPhi->getType(), "", Ins); + PHINode* retPhi = PHINode::Create(OldPhi->getType(), 2, "", Ins); OldPhi->replaceAllUsesWith(retPhi); Ins = newReturnBlock->getFirstNonPHI(); diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp index d91c2c403aae..9470180c5657 100644 --- a/lib/Transforms/IPO/PruneEH.cpp +++ b/lib/Transforms/IPO/PruneEH.cpp @@ -27,7 +27,6 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Support/CFG.h" -#include #include using namespace llvm; diff --git a/lib/Transforms/IPO/StructRetPromotion.cpp b/lib/Transforms/IPO/StructRetPromotion.cpp deleted file mode 100644 index 584deacaff1b..000000000000 --- a/lib/Transforms/IPO/StructRetPromotion.cpp +++ /dev/null @@ -1,357 +0,0 @@ -//===-- StructRetPromotion.cpp - Promote sret arguments -------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass finds functions that return a struct (using a pointer to the struct -// as the first argument of the function, marked with the 'sret' attribute) and -// replaces them with a new function that simply returns each of the elements of -// that struct (using multiple return values). -// -// This pass works under a number of conditions: -// 1. The returned struct must not contain other structs -// 2. The returned struct must only be used to load values from -// 3. The placeholder struct passed in is the result of an alloca -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "sretpromotion" -#include "llvm/Transforms/IPO.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/LLVMContext.h" -#include "llvm/Module.h" -#include "llvm/CallGraphSCCPass.h" -#include "llvm/Instructions.h" -#include "llvm/Analysis/CallGraph.h" -#include "llvm/Support/CallSite.h" -#include "llvm/Support/CFG.h" -#include "llvm/Support/Debug.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -STATISTIC(NumRejectedSRETUses , "Number of sret rejected due to unexpected uses"); -STATISTIC(NumSRET , "Number of sret promoted"); -namespace { - /// SRETPromotion - This pass removes sret parameter and updates - /// function to use multiple return value. - /// - struct SRETPromotion : public CallGraphSCCPass { - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - CallGraphSCCPass::getAnalysisUsage(AU); - } - - virtual bool runOnSCC(CallGraphSCC &SCC); - static char ID; // Pass identification, replacement for typeid - SRETPromotion() : CallGraphSCCPass(ID) { - initializeSRETPromotionPass(*PassRegistry::getPassRegistry()); - } - - private: - CallGraphNode *PromoteReturn(CallGraphNode *CGN); - bool isSafeToUpdateAllCallers(Function *F); - Function *cloneFunctionBody(Function *F, const StructType *STy); - CallGraphNode *updateCallSites(Function *F, Function *NF); - }; -} - -char SRETPromotion::ID = 0; -INITIALIZE_PASS_BEGIN(SRETPromotion, "sretpromotion", - "Promote sret arguments to multiple ret values", false, false) -INITIALIZE_AG_DEPENDENCY(CallGraph) -INITIALIZE_PASS_END(SRETPromotion, "sretpromotion", - "Promote sret arguments to multiple ret values", false, false) - -Pass *llvm::createStructRetPromotionPass() { - return new SRETPromotion(); -} - -bool SRETPromotion::runOnSCC(CallGraphSCC &SCC) { - bool Changed = false; - - for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) - if (CallGraphNode *NewNode = PromoteReturn(*I)) { - SCC.ReplaceNode(*I, NewNode); - Changed = true; - } - - return Changed; -} - -/// PromoteReturn - This method promotes function that uses StructRet paramater -/// into a function that uses multiple return values. -CallGraphNode *SRETPromotion::PromoteReturn(CallGraphNode *CGN) { - Function *F = CGN->getFunction(); - - if (!F || F->isDeclaration() || !F->hasLocalLinkage()) - return 0; - - // Make sure that function returns struct. - if (F->arg_size() == 0 || !F->hasStructRetAttr() || F->doesNotReturn()) - return 0; - - DEBUG(dbgs() << "SretPromotion: Looking at sret function " - << F->getName() << "\n"); - - assert(F->getReturnType()->isVoidTy() && "Invalid function return type"); - Function::arg_iterator AI = F->arg_begin(); - const llvm::PointerType *FArgType = dyn_cast(AI->getType()); - assert(FArgType && "Invalid sret parameter type"); - const llvm::StructType *STy = - dyn_cast(FArgType->getElementType()); - assert(STy && "Invalid sret parameter element type"); - - // Check if it is ok to perform this promotion. - if (isSafeToUpdateAllCallers(F) == false) { - DEBUG(dbgs() << "SretPromotion: Not all callers can be updated\n"); - ++NumRejectedSRETUses; - return 0; - } - - DEBUG(dbgs() << "SretPromotion: sret argument will be promoted\n"); - ++NumSRET; - // [1] Replace use of sret parameter - AllocaInst *TheAlloca = new AllocaInst(STy, NULL, "mrv", - F->getEntryBlock().begin()); - Value *NFirstArg = F->arg_begin(); - NFirstArg->replaceAllUsesWith(TheAlloca); - - // [2] Find and replace ret instructions - for (Function::iterator FI = F->begin(), FE = F->end(); FI != FE; ++FI) - for(BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ) { - Instruction *I = BI; - ++BI; - if (isa(I)) { - Value *NV = new LoadInst(TheAlloca, "mrv.ld", I); - ReturnInst *NR = ReturnInst::Create(F->getContext(), NV, I); - I->replaceAllUsesWith(NR); - I->eraseFromParent(); - } - } - - // [3] Create the new function body and insert it into the module. - Function *NF = cloneFunctionBody(F, STy); - - // [4] Update all call sites to use new function - CallGraphNode *NF_CFN = updateCallSites(F, NF); - - CallGraph &CG = getAnalysis(); - NF_CFN->stealCalledFunctionsFrom(CG[F]); - - delete CG.removeFunctionFromModule(F); - return NF_CFN; -} - -// Check if it is ok to perform this promotion. -bool SRETPromotion::isSafeToUpdateAllCallers(Function *F) { - - if (F->use_empty()) - // No users. OK to modify signature. - return true; - - for (Value::use_iterator FnUseI = F->use_begin(), FnUseE = F->use_end(); - FnUseI != FnUseE; ++FnUseI) { - // The function is passed in as an argument to (possibly) another function, - // we can't change it! - CallSite CS(*FnUseI); - Instruction *Call = CS.getInstruction(); - // The function is used by something else than a call or invoke instruction, - // we can't change it! - if (!Call || !CS.isCallee(FnUseI)) - return false; - CallSite::arg_iterator AI = CS.arg_begin(); - Value *FirstArg = *AI; - - if (!isa(FirstArg)) - return false; - - // Check FirstArg's users. - for (Value::use_iterator ArgI = FirstArg->use_begin(), - ArgE = FirstArg->use_end(); ArgI != ArgE; ++ArgI) { - User *U = *ArgI; - // If FirstArg user is a CallInst that does not correspond to current - // call site then this function F is not suitable for sret promotion. - if (CallInst *CI = dyn_cast(U)) { - if (CI != Call) - return false; - } - // If FirstArg user is a GEP whose all users are not LoadInst then - // this function F is not suitable for sret promotion. - else if (GetElementPtrInst *GEP = dyn_cast(U)) { - // TODO : Use dom info and insert PHINodes to collect get results - // from multiple call sites for this GEP. - if (GEP->getParent() != Call->getParent()) - return false; - for (Value::use_iterator GEPI = GEP->use_begin(), GEPE = GEP->use_end(); - GEPI != GEPE; ++GEPI) - if (!isa(*GEPI)) - return false; - } - // Any other FirstArg users make this function unsuitable for sret - // promotion. - else - return false; - } - } - - return true; -} - -/// cloneFunctionBody - Create a new function based on F and -/// insert it into module. Remove first argument. Use STy as -/// the return type for new function. -Function *SRETPromotion::cloneFunctionBody(Function *F, - const StructType *STy) { - - const FunctionType *FTy = F->getFunctionType(); - std::vector Params; - - // Attributes - Keep track of the parameter attributes for the arguments. - SmallVector AttributesVec; - const AttrListPtr &PAL = F->getAttributes(); - - // Add any return attributes. - if (Attributes attrs = PAL.getRetAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(0, attrs)); - - // Skip first argument. - Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); - ++I; - // 0th parameter attribute is reserved for return type. - // 1th parameter attribute is for first 1st sret argument. - unsigned ParamIndex = 2; - while (I != E) { - Params.push_back(I->getType()); - if (Attributes Attrs = PAL.getParamAttributes(ParamIndex)) - AttributesVec.push_back(AttributeWithIndex::get(ParamIndex - 1, Attrs)); - ++I; - ++ParamIndex; - } - - // Add any fn attributes. - if (Attributes attrs = PAL.getFnAttributes()) - AttributesVec.push_back(AttributeWithIndex::get(~0, attrs)); - - - FunctionType *NFTy = FunctionType::get(STy, Params, FTy->isVarArg()); - Function *NF = Function::Create(NFTy, F->getLinkage()); - NF->takeName(F); - NF->copyAttributesFrom(F); - NF->setAttributes(AttrListPtr::get(AttributesVec.begin(), AttributesVec.end())); - F->getParent()->getFunctionList().insert(F, NF); - NF->getBasicBlockList().splice(NF->begin(), F->getBasicBlockList()); - - // Replace arguments - I = F->arg_begin(); - E = F->arg_end(); - Function::arg_iterator NI = NF->arg_begin(); - ++I; - while (I != E) { - I->replaceAllUsesWith(NI); - NI->takeName(I); - ++I; - ++NI; - } - - return NF; -} - -/// updateCallSites - Update all sites that call F to use NF. -CallGraphNode *SRETPromotion::updateCallSites(Function *F, Function *NF) { - CallGraph &CG = getAnalysis(); - SmallVector Args; - - // Attributes - Keep track of the parameter attributes for the arguments. - SmallVector ArgAttrsVec; - - // Get a new callgraph node for NF. - CallGraphNode *NF_CGN = CG.getOrInsertFunction(NF); - - while (!F->use_empty()) { - CallSite CS(*F->use_begin()); - Instruction *Call = CS.getInstruction(); - - const AttrListPtr &PAL = F->getAttributes(); - // Add any return attributes. - if (Attributes attrs = PAL.getRetAttributes()) - ArgAttrsVec.push_back(AttributeWithIndex::get(0, attrs)); - - // Copy arguments, however skip first one. - CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); - Value *FirstCArg = *AI; - ++AI; - // 0th parameter attribute is reserved for return type. - // 1th parameter attribute is for first 1st sret argument. - unsigned ParamIndex = 2; - while (AI != AE) { - Args.push_back(*AI); - if (Attributes Attrs = PAL.getParamAttributes(ParamIndex)) - ArgAttrsVec.push_back(AttributeWithIndex::get(ParamIndex - 1, Attrs)); - ++ParamIndex; - ++AI; - } - - // Add any function attributes. - if (Attributes attrs = PAL.getFnAttributes()) - ArgAttrsVec.push_back(AttributeWithIndex::get(~0, attrs)); - - AttrListPtr NewPAL = AttrListPtr::get(ArgAttrsVec.begin(), ArgAttrsVec.end()); - - // Build new call instruction. - Instruction *New; - if (InvokeInst *II = dyn_cast(Call)) { - New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(), - Args.begin(), Args.end(), "", Call); - cast(New)->setCallingConv(CS.getCallingConv()); - cast(New)->setAttributes(NewPAL); - } else { - New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call); - cast(New)->setCallingConv(CS.getCallingConv()); - cast(New)->setAttributes(NewPAL); - if (cast(Call)->isTailCall()) - cast(New)->setTailCall(); - } - Args.clear(); - ArgAttrsVec.clear(); - New->takeName(Call); - - // Update the callgraph to know that the callsite has been transformed. - CallGraphNode *CalleeNode = CG[Call->getParent()->getParent()]; - CalleeNode->removeCallEdgeFor(Call); - CalleeNode->addCalledFunction(New, NF_CGN); - - // Update all users of sret parameter to extract value using extractvalue. - for (Value::use_iterator UI = FirstCArg->use_begin(), - UE = FirstCArg->use_end(); UI != UE; ) { - User *U2 = *UI++; - CallInst *C2 = dyn_cast(U2); - if (C2 && (C2 == Call)) - continue; - - GetElementPtrInst *UGEP = cast(U2); - ConstantInt *Idx = cast(UGEP->getOperand(2)); - Value *GR = ExtractValueInst::Create(New, Idx->getZExtValue(), - "evi", UGEP); - while(!UGEP->use_empty()) { - // isSafeToUpdateAllCallers has checked that all GEP uses are - // LoadInsts - LoadInst *L = cast(*UGEP->use_begin()); - L->replaceAllUsesWith(GR); - L->eraseFromParent(); - } - UGEP->eraseFromParent(); - continue; - } - Call->eraseFromParent(); - } - - return NF_CGN; -} - diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h index 9c2969c7ab22..9c70cf89e48c 100644 --- a/lib/Transforms/InstCombine/InstCombine.h +++ b/lib/Transforms/InstCombine/InstCombine.h @@ -11,6 +11,7 @@ #define INSTCOMBINE_INSTCOMBINE_H #include "InstCombineWorklist.h" +#include "llvm/Operator.h" #include "llvm/Pass.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Support/IRBuilder.h" @@ -69,7 +70,6 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner : public FunctionPass, public InstVisitor { TargetData *TD; - bool MustPreserveLCSSA; bool MadeIRChange; public: /// Worklist - All of the instructions that need to be simplified. @@ -217,8 +217,8 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner Instruction *transformCallThroughTrampoline(CallSite CS); Instruction *transformZExtICmp(ICmpInst *ICI, Instruction &CI, bool DoXform = true); + Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI); bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS); - DbgDeclareInst *hasOneUsePlusDeclare(Value *V); Value *EmitGEPOffset(User *GEP); public: @@ -247,7 +247,10 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner // segment of unreachable code, so just clobber the instruction. if (&I == V) V = UndefValue::get(I.getType()); - + + DEBUG(errs() << "IC: Replacing " << I << "\n" + " with " << *V << '\n'); + I.replaceAllUsesWith(V); return &I; } diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 7986d1aca762..a08446e5d519 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -14,6 +14,7 @@ #include "InstCombine.h" #include "llvm/Intrinsics.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Support/ConstantRange.h" #include "llvm/Support/PatternMatch.h" using namespace llvm; using namespace PatternMatch; @@ -330,7 +331,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op, /// InsertRangeTest - Emit a computation of: (V >= Lo && V < Hi) if Inside is -/// true, otherwise (V < Lo || V >= Hi). In pratice, we emit the more efficient +/// true, otherwise (V < Lo || V >= Hi). In practice, we emit the more efficient /// (V-Lo) CreateOr(Val, Val2); return Builder->CreateICmp(LHSCC, NewOr, LHSCst); } + + // (icmp slt A, 0) & (icmp slt B, 0) --> (icmp slt (A&B), 0) + if (LHSCC == ICmpInst::ICMP_SLT && LHSCst->isZero()) { + Value *NewAnd = Builder->CreateAnd(Val, Val2); + return Builder->CreateICmp(LHSCC, NewAnd, LHSCst); + } + + // (icmp sgt A, -1) & (icmp sgt B, -1) --> (icmp sgt (A|B), -1) + if (LHSCC == ICmpInst::ICMP_SGT && LHSCst->isAllOnesValue()) { + Value *NewOr = Builder->CreateOr(Val, Val2); + return Builder->CreateICmp(LHSCC, NewOr, LHSCst); + } + } + + // (trunc x) == C1 & (and x, CA) == C2 -> (and x, CA|CMAX) == C1|C2 + // where CMAX is the all ones value for the truncated type, + // iff the lower bits of C2 and CA are zero. + if (LHSCC == RHSCC && ICmpInst::isEquality(LHSCC) && + LHS->hasOneUse() && RHS->hasOneUse()) { + Value *V; + ConstantInt *AndCst, *SmallCst = 0, *BigCst = 0; + + // (trunc x) == C1 & (and x, CA) == C2 + if (match(Val2, m_Trunc(m_Value(V))) && + match(Val, m_And(m_Specific(V), m_ConstantInt(AndCst)))) { + SmallCst = RHSCst; + BigCst = LHSCst; + } + // (and x, CA) == C2 & (trunc x) == C1 + else if (match(Val, m_Trunc(m_Value(V))) && + match(Val2, m_And(m_Specific(V), m_ConstantInt(AndCst)))) { + SmallCst = LHSCst; + BigCst = RHSCst; + } + + if (SmallCst && BigCst) { + unsigned BigBitSize = BigCst->getType()->getBitWidth(); + unsigned SmallBitSize = SmallCst->getType()->getBitWidth(); + + // Check that the low bits are zero. + APInt Low = APInt::getLowBitsSet(BigBitSize, SmallBitSize); + if ((Low & AndCst->getValue()) == 0 && (Low & BigCst->getValue()) == 0) { + Value *NewAnd = Builder->CreateAnd(V, Low | AndCst->getValue()); + APInt N = SmallCst->getValue().zext(BigBitSize) | BigCst->getValue(); + Value *NewVal = ConstantInt::get(AndCst->getType()->getContext(), N); + return Builder->CreateICmp(LHSCC, NewAnd, NewVal); + } + } } // From here on, we only handle: @@ -767,7 +816,17 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { LHSCC == ICmpInst::ICMP_SGE || LHSCC == ICmpInst::ICMP_SLE || RHSCC == ICmpInst::ICMP_SGE || RHSCC == ICmpInst::ICMP_SLE) return 0; - + + // Make a constant range that's the intersection of the two icmp ranges. + // If the intersection is empty, we know that the result is false. + ConstantRange LHSRange = + ConstantRange::makeICmpRegion(LHSCC, LHSCst->getValue()); + ConstantRange RHSRange = + ConstantRange::makeICmpRegion(RHSCC, RHSCst->getValue()); + + if (LHSRange.intersectWith(RHSRange).isEmptySet()) + return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); + // We can't fold (ugt x, C) & (sgt x, C2). if (!PredicatesFoldable(LHSCC, RHSCC)) return 0; @@ -800,10 +859,6 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { case ICmpInst::ICMP_EQ: switch (RHSCC) { default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X == 13 & X == 15) -> false - case ICmpInst::ICMP_UGT: // (X == 13 & X > 15) -> false - case ICmpInst::ICMP_SGT: // (X == 13 & X > 15) -> false - return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); case ICmpInst::ICMP_NE: // (X == 13 & X != 15) -> X == 13 case ICmpInst::ICMP_ULT: // (X == 13 & X < 15) -> X == 13 case ICmpInst::ICMP_SLT: // (X == 13 & X < 15) -> X == 13 @@ -851,9 +906,6 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) { case ICmpInst::ICMP_SLT: switch (RHSCC) { default: llvm_unreachable("Unknown integer condition code!"); - case ICmpInst::ICMP_EQ: // (X s< 13 & X == 15) -> false - case ICmpInst::ICMP_SGT: // (X s< 13 & X s> 15) -> false - return ConstantInt::get(CmpInst::makeCmpResultType(LHS->getType()), 0); case ICmpInst::ICMP_UGT: // (X s< 13 & X u> 15) -> no change break; case ICmpInst::ICMP_NE: // (X s< 13 & X != 15) -> X < 13 @@ -1438,6 +1490,18 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS) { Value *NewOr = Builder->CreateOr(Val, Val2); return Builder->CreateICmp(LHSCC, NewOr, LHSCst); } + + // (icmp slt A, 0) | (icmp slt B, 0) --> (icmp slt (A|B), 0) + if (LHSCC == ICmpInst::ICMP_SLT && LHSCst->isZero()) { + Value *NewOr = Builder->CreateOr(Val, Val2); + return Builder->CreateICmp(LHSCC, NewOr, LHSCst); + } + + // (icmp sgt A, -1) | (icmp sgt B, -1) --> (icmp sgt (A&B), -1) + if (LHSCC == ICmpInst::ICMP_SGT && LHSCst->isAllOnesValue()) { + Value *NewAnd = Builder->CreateAnd(Val, Val2); + return Builder->CreateICmp(LHSCC, NewAnd, LHSCst); + } } // (icmp ult (X + CA), C1) | (icmp eq X, C2) -> (icmp ule (X + CA), C1) @@ -1975,7 +2039,14 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { } } } - + + // or(sext(A), B) -> A ? -1 : B where A is an i1 + // or(A, sext(B)) -> B ? -1 : A where B is an i1 + if (match(Op0, m_SExt(m_Value(A))) && A->getType()->isIntegerTy(1)) + return SelectInst::Create(A, ConstantInt::getSigned(I.getType(), -1), Op1); + if (match(Op1, m_SExt(m_Value(A))) && A->getType()->isIntegerTy(1)) + return SelectInst::Create(A, ConstantInt::getSigned(I.getType(), -1), Op0); + // Note: If we've gotten to the point of visiting the outer OR, then the // inner one couldn't be simplified. If it was a constant, then it won't // be simplified by a later pass either, so we try swapping the inner/outer diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 0e464507a7e4..726105f75d6f 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -475,7 +475,36 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { } } break; - case Intrinsic::umul_with_overflow: + case Intrinsic::umul_with_overflow: { + Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); + unsigned BitWidth = cast(LHS->getType())->getBitWidth(); + APInt Mask = APInt::getAllOnesValue(BitWidth); + + APInt LHSKnownZero(BitWidth, 0); + APInt LHSKnownOne(BitWidth, 0); + ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne); + APInt RHSKnownZero(BitWidth, 0); + APInt RHSKnownOne(BitWidth, 0); + ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne); + + // Get the largest possible values for each operand. + APInt LHSMax = ~LHSKnownZero; + APInt RHSMax = ~RHSKnownZero; + + // If multiplying the maximum values does not overflow then we can turn + // this into a plain NUW mul. + bool Overflow; + LHSMax.umul_ov(RHSMax, Overflow); + if (!Overflow) { + Value *Mul = Builder->CreateNUWMul(LHS, RHS, "umul_with_overflow"); + Constant *V[] = { + UndefValue::get(LHS->getType()), + Builder->getFalse() + }; + Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false); + return InsertValueInst::Create(Struct, Mul, 0); + } + } // FALL THROUGH case Intrinsic::smul_with_overflow: // Canonicalize constants into the RHS. if (isa(II->getArgOperand(0)) && @@ -508,11 +537,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; case Intrinsic::ppc_altivec_lvx: case Intrinsic::ppc_altivec_lvxl: - case Intrinsic::x86_sse_loadu_ps: - case Intrinsic::x86_sse2_loadu_pd: - case Intrinsic::x86_sse2_loadu_dq: - // Turn PPC lvx -> load if the pointer is known aligned. - // Turn X86 loadups -> load if the pointer is known aligned. + // Turn PPC lvx -> load if the pointer is known aligned. if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, TD) >= 16) { Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0), PointerType::getUnqual(II->getType())); @@ -731,9 +756,13 @@ class InstCombineFortifiedLibCalls : public SimplifyFortifiedLibCalls { dyn_cast(CI->getArgOperand(SizeCIOp))) { if (SizeCI->isAllOnesValue()) return true; - if (isString) - return SizeCI->getZExtValue() >= - GetStringLength(CI->getArgOperand(SizeArgOp)); + if (isString) { + uint64_t Len = GetStringLength(CI->getArgOperand(SizeArgOp)); + // If the length is 0 we don't know how long it is and so we can't + // remove the check. + if (Len == 0) return false; + return SizeCI->getZExtValue() >= Len; + } if (ConstantInt *Arg = dyn_cast( CI->getArgOperand(SizeArgOp))) return SizeCI->getZExtValue() >= Arg->getZExtValue(); diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index b432641a1403..6f70de865764 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -87,10 +87,8 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, // If the allocation has multiple uses, only promote it if we are strictly // increasing the alignment of the resultant allocation. If we keep it the - // same, we open the door to infinite loops of various kinds. (A reference - // from a dbg.declare doesn't count as a use for this purpose.) - if (!AI.hasOneUse() && !hasOneUsePlusDeclare(&AI) && - CastElTyAlign == AllocElTyAlign) return 0; + // same, we open the door to infinite loops of various kinds. + if (!AI.hasOneUse() && CastElTyAlign == AllocElTyAlign) return 0; uint64_t AllocElTySize = TD->getTypeAllocSize(AllocElTy); uint64_t CastElTySize = TD->getTypeAllocSize(CastElTy); @@ -128,15 +126,10 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI, New->setAlignment(AI.getAlignment()); New->takeName(&AI); - // If the allocation has one real use plus a dbg.declare, just remove the - // declare. - if (DbgDeclareInst *DI = hasOneUsePlusDeclare(&AI)) { - EraseInstFromFunction(*(Instruction*)DI); - } // If the allocation has multiple real uses, insert a cast and change all // things that used it to use the new cast. This will also hack on CI, but it // will die soon. - else if (!AI.hasOneUse()) { + if (!AI.hasOneUse()) { // New is the allocation instruction, pointer typed. AI is the original // allocation instruction, also pointer typed. Thus, cast to use is BitCast. Value *NewCast = AllocaBuilder.CreateBitCast(New, AI.getType(), "tmpcast"); @@ -203,7 +196,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty, } case Instruction::PHI: { PHINode *OPN = cast(I); - PHINode *NPN = PHINode::Create(Ty); + PHINode *NPN = PHINode::Create(Ty, OPN->getNumIncomingValues()); for (unsigned i = 0, e = OPN->getNumIncomingValues(); i != e; ++i) { Value *V =EvaluateInDifferentType(OPN->getIncomingValue(i), Ty, isSigned); NPN->addIncoming(V, OPN->getIncomingBlock(i)); @@ -883,6 +876,102 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { return 0; } +/// transformSExtICmp - Transform (sext icmp) to bitwise / integer operations +/// in order to eliminate the icmp. +Instruction *InstCombiner::transformSExtICmp(ICmpInst *ICI, Instruction &CI) { + Value *Op0 = ICI->getOperand(0), *Op1 = ICI->getOperand(1); + ICmpInst::Predicate Pred = ICI->getPredicate(); + + if (ConstantInt *Op1C = dyn_cast(Op1)) { + // (x ashr x, 31 -> all ones if negative + // (x >s -1) ? -1 : 0 -> not (ashr x, 31) -> all ones if positive + if ((Pred == ICmpInst::ICMP_SLT && Op1C->isZero()) || + (Pred == ICmpInst::ICMP_SGT && Op1C->isAllOnesValue())) { + + Value *Sh = ConstantInt::get(Op0->getType(), + Op0->getType()->getScalarSizeInBits()-1); + Value *In = Builder->CreateAShr(Op0, Sh, Op0->getName()+".lobit"); + if (In->getType() != CI.getType()) + In = Builder->CreateIntCast(In, CI.getType(), true/*SExt*/, "tmp"); + + if (Pred == ICmpInst::ICMP_SGT) + In = Builder->CreateNot(In, In->getName()+".not"); + return ReplaceInstUsesWith(CI, In); + } + + // If we know that only one bit of the LHS of the icmp can be set and we + // have an equality comparison with zero or a power of 2, we can transform + // the icmp and sext into bitwise/integer operations. + if (ICI->hasOneUse() && + ICI->isEquality() && (Op1C->isZero() || Op1C->getValue().isPowerOf2())){ + unsigned BitWidth = Op1C->getType()->getBitWidth(); + APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + APInt TypeMask(APInt::getAllOnesValue(BitWidth)); + ComputeMaskedBits(Op0, TypeMask, KnownZero, KnownOne); + + APInt KnownZeroMask(~KnownZero); + if (KnownZeroMask.isPowerOf2()) { + Value *In = ICI->getOperand(0); + + // If the icmp tests for a known zero bit we can constant fold it. + if (!Op1C->isZero() && Op1C->getValue() != KnownZeroMask) { + Value *V = Pred == ICmpInst::ICMP_NE ? + ConstantInt::getAllOnesValue(CI.getType()) : + ConstantInt::getNullValue(CI.getType()); + return ReplaceInstUsesWith(CI, V); + } + + if (!Op1C->isZero() == (Pred == ICmpInst::ICMP_NE)) { + // sext ((x & 2^n) == 0) -> (x >> n) - 1 + // sext ((x & 2^n) != 2^n) -> (x >> n) - 1 + unsigned ShiftAmt = KnownZeroMask.countTrailingZeros(); + // Perform a right shift to place the desired bit in the LSB. + if (ShiftAmt) + In = Builder->CreateLShr(In, + ConstantInt::get(In->getType(), ShiftAmt)); + + // At this point "In" is either 1 or 0. Subtract 1 to turn + // {1, 0} -> {0, -1}. + In = Builder->CreateAdd(In, + ConstantInt::getAllOnesValue(In->getType()), + "sext"); + } else { + // sext ((x & 2^n) != 0) -> (x << bitwidth-n) a>> bitwidth-1 + // sext ((x & 2^n) == 2^n) -> (x << bitwidth-n) a>> bitwidth-1 + unsigned ShiftAmt = KnownZeroMask.countLeadingZeros(); + // Perform a left shift to place the desired bit in the MSB. + if (ShiftAmt) + In = Builder->CreateShl(In, + ConstantInt::get(In->getType(), ShiftAmt)); + + // Distribute the bit over the whole bit width. + In = Builder->CreateAShr(In, ConstantInt::get(In->getType(), + BitWidth - 1), "sext"); + } + + if (CI.getType() == In->getType()) + return ReplaceInstUsesWith(CI, In); + return CastInst::CreateIntegerCast(In, CI.getType(), true/*SExt*/); + } + } + } + + // vector (x ashr x, 31 -> all ones if signed. + if (const VectorType *VTy = dyn_cast(CI.getType())) { + if (Pred == ICmpInst::ICMP_SLT && match(Op1, m_Zero()) && + Op0->getType() == CI.getType()) { + const Type *EltTy = VTy->getElementType(); + + // splat the shift constant to a constant vector. + Constant *VSh = ConstantInt::get(VTy, EltTy->getScalarSizeInBits()-1); + Value *In = Builder->CreateAShr(Op0, VSh, Op0->getName()+".lobit"); + return ReplaceInstUsesWith(CI, In); + } + } + + return 0; +} + /// CanEvaluateSExtd - Return true if we can take the specified value /// and return it as type Ty without inserting any new casts and without /// changing the value of the common low bits. This is used by code that tries @@ -1006,44 +1095,9 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { Value *Res = Builder->CreateShl(TI->getOperand(0), ShAmt, "sext"); return BinaryOperator::CreateAShr(Res, ShAmt); } - - - // (x ashr x, 31 -> all ones if signed - // (x >s -1) ? -1 : 0 -> ashr x, 31 -> all ones if not signed - { - ICmpInst::Predicate Pred; Value *CmpLHS; ConstantInt *CmpRHS; - if (match(Src, m_ICmp(Pred, m_Value(CmpLHS), m_ConstantInt(CmpRHS)))) { - // sext (x x>>s31 true if signbit set. - // sext (x >s -1) to i32 --> (x>>s31)^-1 true if signbit clear. - if ((Pred == ICmpInst::ICMP_SLT && CmpRHS->isZero()) || - (Pred == ICmpInst::ICMP_SGT && CmpRHS->isAllOnesValue())) { - Value *Sh = ConstantInt::get(CmpLHS->getType(), - CmpLHS->getType()->getScalarSizeInBits()-1); - Value *In = Builder->CreateAShr(CmpLHS, Sh, CmpLHS->getName()+".lobit"); - if (In->getType() != CI.getType()) - In = Builder->CreateIntCast(In, CI.getType(), true/*SExt*/, "tmp"); - - if (Pred == ICmpInst::ICMP_SGT) - In = Builder->CreateNot(In, In->getName()+".not"); - return ReplaceInstUsesWith(CI, In); - } - } - } - // vector (x ashr x, 31 -> all ones if signed. - if (const VectorType *VTy = dyn_cast(DestTy)) { - ICmpInst::Predicate Pred; Value *CmpLHS; - if (match(Src, m_ICmp(Pred, m_Value(CmpLHS), m_Zero()))) { - if (Pred == ICmpInst::ICMP_SLT && CmpLHS->getType() == DestTy) { - const Type *EltTy = VTy->getElementType(); - - // splat the shift constant to a constant vector. - Constant *VSh = ConstantInt::get(VTy, EltTy->getScalarSizeInBits()-1); - Value *In = Builder->CreateAShr(CmpLHS, VSh,CmpLHS->getName()+".lobit"); - return ReplaceInstUsesWith(CI, In); - } - } - } + if (ICmpInst *ICI = dyn_cast(Src)) + return transformSExtICmp(ICI, CI); // If the input is a shl/ashr pair of a same constant, then this is a sign // extension from a smaller value. If we could trust arbitrary bitwidth diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 999de3409750..bb9b88bfe6a7 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -699,7 +699,7 @@ Instruction *InstCombiner::FoldICmpAddOpCst(ICmpInst &ICI, return ReplaceInstUsesWith(ICI, ConstantInt::getTrue(X->getContext())); // From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0, - // so the values can never be equal. Similiarly for all other "or equals" + // so the values can never be equal. Similarly for all other "or equals" // operators. // (X+1) X >u (MAXUINT-1) --> X == 255 @@ -1289,13 +1289,21 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, } case Instruction::LShr: // (icmp pred (shr X, ShAmt), CI) - case Instruction::AShr: - // Only handle equality comparisons of shift-by-constant. - if (ConstantInt *ShAmt = dyn_cast(LHSI->getOperand(1))) - if (Instruction *Res = FoldICmpShrCst(ICI, cast(LHSI), - ShAmt)) + case Instruction::AShr: { + // Handle equality comparisons of shift-by-constant. + BinaryOperator *BO = cast(LHSI); + if (ConstantInt *ShAmt = dyn_cast(LHSI->getOperand(1))) { + if (Instruction *Res = FoldICmpShrCst(ICI, BO, ShAmt)) return Res; + } + + // Handle exact shr's. + if (ICI.isEquality() && BO->isExact() && BO->hasOneUse()) { + if (RHSV.isMinValue()) + return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), RHS); + } break; + } case Instruction::SDiv: case Instruction::UDiv: @@ -1376,9 +1384,9 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI, if (Value *NegVal = dyn_castNegVal(BOp1)) return new ICmpInst(ICI.getPredicate(), BOp0, NegVal); - else if (Value *NegVal = dyn_castNegVal(BOp0)) + if (Value *NegVal = dyn_castNegVal(BOp0)) return new ICmpInst(ICI.getPredicate(), NegVal, BOp1); - else if (BO->hasOneUse()) { + if (BO->hasOneUse()) { Value *Neg = Builder->CreateNeg(BOp1); Neg->takeName(BO); return new ICmpInst(ICI.getPredicate(), BOp0, Neg); @@ -1855,11 +1863,11 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { return new ICmpInst(ICmpInst::ICMP_SLT, Op0, ConstantInt::get(CI->getContext(), CI->getValue()+1)); case ICmpInst::ICMP_UGE: - assert(!CI->isMinValue(false)); // A >=u MIN -> TRUE + assert(!CI->isMinValue(false)); // A >=u MIN -> TRUE return new ICmpInst(ICmpInst::ICMP_UGT, Op0, ConstantInt::get(CI->getContext(), CI->getValue()-1)); case ICmpInst::ICMP_SGE: - assert(!CI->isMinValue(true)); // A >=s MIN -> TRUE + assert(!CI->isMinValue(true)); // A >=s MIN -> TRUE return new ICmpInst(ICmpInst::ICMP_SGT, Op0, ConstantInt::get(CI->getContext(), CI->getValue()-1)); } @@ -1907,18 +1915,18 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // that code below can assume that Min != Max. if (!isa(Op0) && Op0Min == Op0Max) return new ICmpInst(I.getPredicate(), - ConstantInt::get(I.getContext(), Op0Min), Op1); + ConstantInt::get(Op0->getType(), Op0Min), Op1); if (!isa(Op1) && Op1Min == Op1Max) return new ICmpInst(I.getPredicate(), Op0, - ConstantInt::get(I.getContext(), Op1Min)); + ConstantInt::get(Op1->getType(), Op1Min)); // Based on the range information we know about the LHS, see if we can - // simplify this comparison. For example, (x&4) < 8 is always true. + // simplify this comparison. For example, (x&4) < 8 is always true. switch (I.getPredicate()) { default: llvm_unreachable("Unknown icmp opcode!"); case ICmpInst::ICMP_EQ: { if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); // If all bits are known zero except for one, then we know at most one // bit is set. If the comparison is against zero, then this is a check @@ -1955,7 +1963,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { } case ICmpInst::ICMP_NE: { if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); // If all bits are known zero except for one, then we know at most one // bit is set. If the comparison is against zero, then this is a check @@ -1992,9 +2000,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { } case ICmpInst::ICMP_ULT: if (Op0Max.ult(Op1Min)) // A true if max(A) < min(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Min.uge(Op1Max)) // A false if min(A) >= max(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); if (Op1Min == Op0Max) // A A != B if max(A) == min(B) return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); if (ConstantInt *CI = dyn_cast(Op1)) { @@ -2010,9 +2018,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { break; case ICmpInst::ICMP_UGT: if (Op0Min.ugt(Op1Max)) // A >u B -> true if min(A) > max(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Max.ule(Op1Min)) // A >u B -> false if max(A) <= max(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); if (Op1Max == Op0Min) // A >u B -> A != B if min(A) == max(B) return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); @@ -2029,9 +2037,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { break; case ICmpInst::ICMP_SLT: if (Op0Max.slt(Op1Min)) // A true if max(A) < min(C) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Min.sge(Op1Max)) // A false if min(A) >= max(C) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); if (Op1Min == Op0Max) // A A != B if max(A) == min(B) return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); if (ConstantInt *CI = dyn_cast(Op1)) { @@ -2042,9 +2050,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { break; case ICmpInst::ICMP_SGT: if (Op0Min.sgt(Op1Max)) // A >s B -> true if min(A) > max(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Max.sle(Op1Min)) // A >s B -> false if max(A) <= min(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); if (Op1Max == Op0Min) // A >s B -> A != B if min(A) == max(B) return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1); @@ -2057,30 +2065,30 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { case ICmpInst::ICMP_SGE: assert(!isa(Op1) && "ICMP_SGE with ConstantInt not folded!"); if (Op0Min.sge(Op1Max)) // A >=s B -> true if min(A) >= max(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Max.slt(Op1Min)) // A >=s B -> false if max(A) < min(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); break; case ICmpInst::ICMP_SLE: assert(!isa(Op1) && "ICMP_SLE with ConstantInt not folded!"); if (Op0Max.sle(Op1Min)) // A <=s B -> true if max(A) <= min(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Min.sgt(Op1Max)) // A <=s B -> false if min(A) > max(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); break; case ICmpInst::ICMP_UGE: assert(!isa(Op1) && "ICMP_UGE with ConstantInt not folded!"); if (Op0Min.uge(Op1Max)) // A >=u B -> true if min(A) >= max(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Max.ult(Op1Min)) // A >=u B -> false if max(A) < min(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); break; case ICmpInst::ICMP_ULE: assert(!isa(Op1) && "ICMP_ULE with ConstantInt not folded!"); if (Op0Max.ule(Op1Min)) // A <=u B -> true if max(A) <= min(B) - return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); if (Op0Min.ugt(Op1Max)) // A <=u B -> false if min(A) > max(B) - return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getContext())); + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); break; } @@ -2306,6 +2314,35 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { BO0->hasOneUse() && BO1->hasOneUse()) return new ICmpInst(Pred, D, B); + BinaryOperator *SRem = NULL; + // icmp (srem X, Y), Y + if (BO0 && BO0->getOpcode() == Instruction::SRem && + Op1 == BO0->getOperand(1)) + SRem = BO0; + // icmp Y, (srem X, Y) + else if (BO1 && BO1->getOpcode() == Instruction::SRem && + Op0 == BO1->getOperand(1)) + SRem = BO1; + if (SRem) { + // We don't check hasOneUse to avoid increasing register pressure because + // the value we use is the same value this instruction was already using. + switch (SRem == BO0 ? ICmpInst::getSwappedPredicate(Pred) : Pred) { + default: break; + case ICmpInst::ICMP_EQ: + return ReplaceInstUsesWith(I, ConstantInt::getFalse(I.getType())); + case ICmpInst::ICMP_NE: + return ReplaceInstUsesWith(I, ConstantInt::getTrue(I.getType())); + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: + return new ICmpInst(ICmpInst::ICMP_SGT, SRem->getOperand(1), + Constant::getAllOnesValue(SRem->getType())); + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: + return new ICmpInst(ICmpInst::ICMP_SLT, SRem->getOperand(1), + Constant::getNullValue(SRem->getType())); + } + } + if (BO0 && BO1 && BO0->getOpcode() == BO1->getOpcode() && BO0->hasOneUse() && BO1->hasOneUse() && BO0->getOperand(1) == BO1->getOperand(1)) { @@ -2356,6 +2393,27 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { } } break; + case Instruction::UDiv: + case Instruction::LShr: + if (I.isSigned()) + break; + // fall-through + case Instruction::SDiv: + case Instruction::AShr: + if (!BO0->isExact() && !BO1->isExact()) + break; + return new ICmpInst(I.getPredicate(), BO0->getOperand(0), + BO1->getOperand(0)); + case Instruction::Shl: { + bool NUW = BO0->hasNoUnsignedWrap() && BO1->hasNoUnsignedWrap(); + bool NSW = BO0->hasNoSignedWrap() && BO1->hasNoSignedWrap(); + if (!NUW && !NSW) + break; + if (!NSW && I.isSigned()) + break; + return new ICmpInst(I.getPredicate(), BO0->getOperand(0), + BO1->getOperand(0)); + } } } } @@ -2425,9 +2483,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { } // (X&Z) == (Y&Z) -> (X^Y) & Z == 0 - if (Op0->hasOneUse() && Op1->hasOneUse() && - match(Op0, m_And(m_Value(A), m_Value(B))) && - match(Op1, m_And(m_Value(C), m_Value(D)))) { + if (match(Op0, m_OneUse(m_And(m_Value(A), m_Value(B)))) && + match(Op1, m_OneUse(m_And(m_Value(C), m_Value(D))))) { Value *X = 0, *Y = 0, *Z = 0; if (A == C) { @@ -2448,6 +2505,32 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { return &I; } } + + // Transform "icmp eq (trunc (lshr(X, cst1)), cst" to + // "icmp (and X, mask), cst" + uint64_t ShAmt = 0; + ConstantInt *Cst1; + if (Op0->hasOneUse() && + match(Op0, m_Trunc(m_OneUse(m_LShr(m_Value(A), + m_ConstantInt(ShAmt))))) && + match(Op1, m_ConstantInt(Cst1)) && + // Only do this when A has multiple uses. This is most important to do + // when it exposes other optimizations. + !A->hasOneUse()) { + unsigned ASize =cast(A->getType())->getPrimitiveSizeInBits(); + + if (ShAmt < ASize) { + APInt MaskV = + APInt::getLowBitsSet(ASize, Op0->getType()->getPrimitiveSizeInBits()); + MaskV <<= ShAmt; + + APInt CmpV = Cst1->getValue().zext(ASize); + CmpV <<= ShAmt; + + Value *Mask = Builder->CreateAnd(A, Builder->getInt(MaskV)); + return new ICmpInst(I.getPredicate(), Mask, Builder->getInt(CmpV)); + } + } } { @@ -2704,6 +2787,42 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { if (Constant *RHSC = dyn_cast(Op1)) { if (Instruction *LHSI = dyn_cast(Op0)) switch (LHSI->getOpcode()) { + case Instruction::FPExt: { + // fcmp (fpext x), C -> fcmp x, (fptrunc C) if fptrunc is lossless + FPExtInst *LHSExt = cast(LHSI); + ConstantFP *RHSF = dyn_cast(RHSC); + if (!RHSF) + break; + + // We can't convert a PPC double double. + if (RHSF->getType()->isPPC_FP128Ty()) + break; + + const fltSemantics *Sem; + // FIXME: This shouldn't be here. + if (LHSExt->getSrcTy()->isFloatTy()) + Sem = &APFloat::IEEEsingle; + else if (LHSExt->getSrcTy()->isDoubleTy()) + Sem = &APFloat::IEEEdouble; + else if (LHSExt->getSrcTy()->isFP128Ty()) + Sem = &APFloat::IEEEquad; + else if (LHSExt->getSrcTy()->isX86_FP80Ty()) + Sem = &APFloat::x87DoubleExtended; + else + break; + + bool Lossy; + APFloat F = RHSF->getValueAPF(); + F.convert(*Sem, APFloat::rmNearestTiesToEven, &Lossy); + + // Avoid lossy conversions and denormals. + if (!Lossy && + F.compare(APFloat::getSmallestNormalized(*Sem)) != + APFloat::cmpLessThan) + return new FCmpInst(I.getPredicate(), LHSExt->getOperand(0), + ConstantFP::get(RHSC->getContext(), F)); + break; + } case Instruction::PHI: // Only fold fcmp into the PHI if the phi and fcmp are in the same // block. If in the same block, we're encouraging jump threading. If @@ -2742,6 +2861,14 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { return SelectInst::Create(LHSI->getOperand(0), Op1, Op2); break; } + case Instruction::FSub: { + // fcmp pred (fneg x), C -> fcmp swap(pred) x, -C + Value *Op; + if (match(LHSI, m_FNeg(m_Value(Op)))) + return new FCmpInst(I.getSwappedPredicate(), Op, + ConstantExpr::getFNeg(RHSC)); + break; + } case Instruction::Load: if (GetElementPtrInst *GEP = dyn_cast(LHSI->getOperand(0))) { @@ -2755,5 +2882,17 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) { } } + // fcmp pred (fneg x), (fneg y) -> fcmp swap(pred) x, y + Value *X, *Y; + if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_FNeg(m_Value(Y)))) + return new FCmpInst(I.getSwappedPredicate(), X, Y); + + // fcmp (fpext x), (fpext y) -> fcmp x, y + if (FPExtInst *LHSExt = dyn_cast(Op0)) + if (FPExtInst *RHSExt = dyn_cast(Op1)) + if (LHSExt->getSrcTy() == RHSExt->getSrcTy()) + return new FCmpInst(I.getPredicate(), LHSExt->getOperand(0), + RHSExt->getOperand(0)); + return Changed ? &I : 0; } diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 78ff7346abe4..432adc9d046d 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -364,34 +364,12 @@ static bool equivalentAddressValues(Value *A, Value *B) { return false; } -// If this instruction has two uses, one of which is a llvm.dbg.declare, -// return the llvm.dbg.declare. -DbgDeclareInst *InstCombiner::hasOneUsePlusDeclare(Value *V) { - if (!V->hasNUses(2)) - return 0; - for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); - UI != E; ++UI) { - User *U = *UI; - if (DbgDeclareInst *DI = dyn_cast(U)) - return DI; - if (isa(U) && U->hasOneUse()) { - if (DbgDeclareInst *DI = dyn_cast(*U->use_begin())) - return DI; - } - } - return 0; -} - Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { Value *Val = SI.getOperand(0); Value *Ptr = SI.getOperand(1); // If the RHS is an alloca with a single use, zapify the store, making the // alloca dead. - // If the RHS is an alloca with a two uses, the other one being a - // llvm.dbg.declare, zapify the store and the declare, making the - // alloca dead. We must do this to prevent declares from affecting - // codegen. if (!SI.isVolatile()) { if (Ptr->hasOneUse()) { if (isa(Ptr)) @@ -400,17 +378,9 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { if (isa(GEP->getOperand(0))) { if (GEP->getOperand(0)->hasOneUse()) return EraseInstFromFunction(SI); - if (DbgDeclareInst *DI = hasOneUsePlusDeclare(GEP->getOperand(0))) { - EraseInstFromFunction(*DI); - return EraseInstFromFunction(SI); - } } } } - if (DbgDeclareInst *DI = hasOneUsePlusDeclare(Ptr)) { - EraseInstFromFunction(*DI); - return EraseInstFromFunction(SI); - } } // Attempt to improve the alignment. @@ -621,8 +591,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { // Insert a PHI node now if we need it. Value *MergedVal = OtherStore->getOperand(0); if (MergedVal != SI.getOperand(0)) { - PHINode *PN = PHINode::Create(MergedVal->getType(), "storemerge"); - PN->reserveOperandSpace(2); + PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge"); PN->addIncoming(SI.getOperand(0), SI.getParent()); PN->addIncoming(OtherStore->getOperand(0), OtherBB); MergedVal = InsertNewInstBefore(PN, DestBB->front()); diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index d1a1fd6ddfac..57fb08aca266 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -320,6 +320,10 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { } } + // See if we can fold away this div instruction. + if (SimplifyDemandedInstructionBits(I)) + return &I; + // (X - (X rem Y)) / Y -> X / Y; usually originates as ((X / Y) * Y) / Y Value *X = 0, *Z = 0; if (match(Op0, m_Sub(m_Value(X), m_Value(Z)))) { // (X - Z) / Y; Y = Op1 @@ -332,6 +336,19 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) { return 0; } +/// dyn_castZExtVal - Checks if V is a zext or constant that can +/// be truncated to Ty without losing bits. +static Value *dyn_castZExtVal(Value *V, const Type *Ty) { + if (ZExtInst *Z = dyn_cast(V)) { + if (Z->getSrcTy() == Ty) + return Z->getOperand(0); + } else if (ConstantInt *C = dyn_cast(V)) { + if (C->getValue().getActiveBits() <= cast(Ty)->getBitWidth()) + return ConstantExpr::getTrunc(C, Ty); + } + return 0; +} + Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); @@ -390,6 +407,14 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { return SelectInst::Create(Cond, TSI, FSI); } } + + // (zext A) udiv (zext B) --> zext (A udiv B) + if (ZExtInst *ZOp0 = dyn_cast(Op0)) + if (Value *ZOp1 = dyn_castZExtVal(Op1, ZOp0->getSrcTy())) + return new ZExtInst(Builder->CreateUDiv(ZOp0->getOperand(0), ZOp1, "div", + I.isExact()), + I.getType()); + return 0; } @@ -452,27 +477,17 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) { if (Value *V = SimplifyFDivInst(Op0, Op1, TD)) return ReplaceInstUsesWith(I, V); - return 0; -} + if (ConstantFP *Op1C = dyn_cast(Op1)) { + const APFloat &Op1F = Op1C->getValueAPF(); -/// This function implements the transforms on rem instructions that work -/// regardless of the kind of rem instruction it is (urem, srem, or frem). It -/// is used by the visitors to those instructions. -/// @brief Transforms common to all three rem instructions -Instruction *InstCombiner::commonRemTransforms(BinaryOperator &I) { - Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - - if (isa(Op0)) { // undef % X -> 0 - if (I.getType()->isFPOrFPVectorTy()) - return ReplaceInstUsesWith(I, Op0); // X % undef -> undef (could be SNaN) - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + // If the divisor has an exact multiplicative inverse we can turn the fdiv + // into a cheaper fmul. + APFloat Reciprocal(Op1F.getSemantics()); + if (Op1F.getExactInverse(&Reciprocal)) { + ConstantFP *RFP = ConstantFP::get(Builder->getContext(), Reciprocal); + return BinaryOperator::CreateFMul(Op0, RFP); + } } - if (isa(Op1)) - return ReplaceInstUsesWith(I, Op1); // X % undef -> undef - - // Handle cases involving: rem X, (select Cond, Y, Z) - if (isa(Op1) && SimplifyDivRemOfSelect(I)) - return &I; return 0; } @@ -484,26 +499,11 @@ Instruction *InstCombiner::commonRemTransforms(BinaryOperator &I) { Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (Instruction *common = commonRemTransforms(I)) - return common; - - // X % X == 0 - if (Op0 == Op1) - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - - // 0 % X == 0 for integer, we don't need to preserve faults! - if (Constant *LHS = dyn_cast(Op0)) - if (LHS->isNullValue()) - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); + // Handle cases involving: rem X, (select Cond, Y, Z) + if (isa(Op1) && SimplifyDivRemOfSelect(I)) + return &I; if (ConstantInt *RHS = dyn_cast(Op1)) { - // X % 0 == undef, we don't need to preserve faults! - if (RHS->equalsInt(0)) - return ReplaceInstUsesWith(I, UndefValue::get(I.getType())); - - if (RHS->equalsInt(1)) // X % 1 == 0 - return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); - if (Instruction *Op0I = dyn_cast(Op0)) { if (SelectInst *SI = dyn_cast(Op0I)) { if (Instruction *R = FoldOpIntoSelect(I, SI)) @@ -525,6 +525,9 @@ Instruction *InstCombiner::commonIRemTransforms(BinaryOperator &I) { Instruction *InstCombiner::visitURem(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyURemInst(Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); + if (Instruction *common = commonIRemTransforms(I)) return common; @@ -552,13 +555,22 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) { return SelectInst::Create(Cond, TrueAnd, FalseAnd); } } - + + // (zext A) urem (zext B) --> zext (A urem B) + if (ZExtInst *ZOp0 = dyn_cast(Op0)) + if (Value *ZOp1 = dyn_castZExtVal(Op1, ZOp0->getSrcTy())) + return new ZExtInst(Builder->CreateURem(ZOp0->getOperand(0), ZOp1), + I.getType()); + return 0; } Instruction *InstCombiner::visitSRem(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifySRemInst(Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); + // Handle the integer rem common cases if (Instruction *Common = commonIRemTransforms(I)) return Common; @@ -617,6 +629,14 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) { } Instruction *InstCombiner::visitFRem(BinaryOperator &I) { - return commonRemTransforms(I); -} + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + if (Value *V = SimplifyFRemInst(Op0, Op1, TD)) + return ReplaceInstUsesWith(I, V); + + // Handle cases involving: rem X, (select Cond, Y, Z) + if (isa(Op1) && SimplifyDivRemOfSelect(I)) + return &I; + + return 0; +} diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp index 297a18c40a97..abf61bbaf3a6 100644 --- a/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -80,18 +80,16 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) { Value *InRHS = FirstInst->getOperand(1); PHINode *NewLHS = 0, *NewRHS = 0; if (LHSVal == 0) { - NewLHS = PHINode::Create(LHSType, + NewLHS = PHINode::Create(LHSType, PN.getNumIncomingValues(), FirstInst->getOperand(0)->getName() + ".pn"); - NewLHS->reserveOperandSpace(PN.getNumOperands()/2); NewLHS->addIncoming(InLHS, PN.getIncomingBlock(0)); InsertNewInstBefore(NewLHS, PN); LHSVal = NewLHS; } if (RHSVal == 0) { - NewRHS = PHINode::Create(RHSType, + NewRHS = PHINode::Create(RHSType, PN.getNumIncomingValues(), FirstInst->getOperand(1)->getName() + ".pn"); - NewRHS->reserveOperandSpace(PN.getNumOperands()/2); NewRHS->addIncoming(InRHS, PN.getIncomingBlock(0)); InsertNewInstBefore(NewRHS, PN); RHSVal = NewRHS; @@ -202,11 +200,10 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { for (unsigned i = 0, e = FixedOperands.size(); i != e; ++i) { if (FixedOperands[i]) continue; // operand doesn't need a phi. Value *FirstOp = FirstInst->getOperand(i); - PHINode *NewPN = PHINode::Create(FirstOp->getType(), + PHINode *NewPN = PHINode::Create(FirstOp->getType(), e, FirstOp->getName()+".pn"); InsertNewInstBefore(NewPN, PN); - NewPN->reserveOperandSpace(e); NewPN->addIncoming(FirstOp, PN.getIncomingBlock(0)); OperandPhis[i] = NewPN; FixedOperands[i] = NewPN; @@ -240,7 +237,7 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) { /// obvious the value of the load is not changed from the point of the load to /// the end of the block it is in. /// -/// Finally, it is safe, but not profitable, to sink a load targetting a +/// Finally, it is safe, but not profitable, to sink a load targeting a /// non-address-taken alloca. Doing so will cause us to not promote the alloca /// to a register. static bool isSafeAndProfitableToSinkLoad(LoadInst *L) { @@ -340,8 +337,8 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) { // Okay, they are all the same operation. Create a new PHI node of the // correct type, and PHI together all of the LHS's of the instructions. PHINode *NewPN = PHINode::Create(FirstLI->getOperand(0)->getType(), + PN.getNumIncomingValues(), PN.getName()+".in"); - NewPN->reserveOperandSpace(PN.getNumOperands()/2); Value *InVal = FirstLI->getOperand(0); NewPN->addIncoming(InVal, PN.getIncomingBlock(0)); @@ -446,8 +443,8 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) { // Okay, they are all the same operation. Create a new PHI node of the // correct type, and PHI together all of the LHS's of the instructions. PHINode *NewPN = PHINode::Create(FirstInst->getOperand(0)->getType(), + PN.getNumIncomingValues(), PN.getName()+".in"); - NewPN->reserveOperandSpace(PN.getNumOperands()/2); Value *InVal = FirstInst->getOperand(0); NewPN->addIncoming(InVal, PN.getIncomingBlock(0)); @@ -699,7 +696,8 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { if ((EltPHI = ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)]) == 0) { // Otherwise, Create the new PHI node for this user. - EltPHI = PHINode::Create(Ty, PN->getName()+".off"+Twine(Offset), PN); + EltPHI = PHINode::Create(Ty, PN->getNumIncomingValues(), + PN->getName()+".off"+Twine(Offset), PN); assert(EltPHI->getType() != PN->getType() && "Truncate didn't shrink phi?"); @@ -776,9 +774,6 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) { // PHINode simplification // Instruction *InstCombiner::visitPHINode(PHINode &PN) { - // If LCSSA is around, don't mess with Phi nodes - if (MustPreserveLCSSA) return 0; - if (Value *V = SimplifyInstruction(&PN, TD)) return ReplaceInstUsesWith(PN, V); @@ -826,18 +821,18 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) { // quick check to see if the PHI node only contains a single non-phi value, if // so, scan to see if the phi cycle is actually equal to that value. { - unsigned InValNo = 0, NumOperandVals = PN.getNumIncomingValues(); + unsigned InValNo = 0, NumIncomingVals = PN.getNumIncomingValues(); // Scan for the first non-phi operand. - while (InValNo != NumOperandVals && + while (InValNo != NumIncomingVals && isa(PN.getIncomingValue(InValNo))) ++InValNo; - if (InValNo != NumOperandVals) { - Value *NonPhiInVal = PN.getOperand(InValNo); + if (InValNo != NumIncomingVals) { + Value *NonPhiInVal = PN.getIncomingValue(InValNo); // Scan the rest of the operands to see if there are any conflicts, if so // there is no need to recursively scan other phis. - for (++InValNo; InValNo != NumOperandVals; ++InValNo) { + for (++InValNo; InValNo != NumIncomingVals; ++InValNo) { Value *OpVal = PN.getIncomingValue(InValNo); if (OpVal != NonPhiInVal && !isa(OpVal)) break; @@ -846,7 +841,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) { // If we scanned over all operands, then we have one unique value plus // phi values. Scan PHI nodes to see if they all merge in each other or // the value. - if (InValNo == NumOperandVals) { + if (InValNo == NumIncomingVals) { SmallPtrSet ValueEqualPHIs; if (PHIsEqualValue(&PN, NonPhiInVal, ValueEqualPHIs)) return ReplaceInstUsesWith(PN, NonPhiInVal); diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index 97abc769ae5f..61a433a9c00c 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -214,7 +214,7 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal, unsigned OpToFold = 0; if ((SFO & 1) && FalseVal == TVI->getOperand(0)) { OpToFold = 1; - } else if ((SFO & 2) && FalseVal == TVI->getOperand(1)) { + } else if ((SFO & 2) && FalseVal == TVI->getOperand(1)) { OpToFold = 2; } @@ -227,9 +227,16 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal, Instruction *NewSel = SelectInst::Create(SI.getCondition(), OOp, C); InsertNewInstBefore(NewSel, SI); NewSel->takeName(TVI); - if (BinaryOperator *BO = dyn_cast(TVI)) - return BinaryOperator::Create(BO->getOpcode(), FalseVal, NewSel); - llvm_unreachable("Unknown instruction!!"); + BinaryOperator *TVI_BO = cast(TVI); + BinaryOperator *BO = BinaryOperator::Create(TVI_BO->getOpcode(), + FalseVal, NewSel); + if (isa(BO)) + BO->setIsExact(TVI_BO->isExact()); + if (isa(BO)) { + BO->setHasNoUnsignedWrap(TVI_BO->hasNoUnsignedWrap()); + BO->setHasNoSignedWrap(TVI_BO->hasNoSignedWrap()); + } + return BO; } } } @@ -243,7 +250,7 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal, unsigned OpToFold = 0; if ((SFO & 1) && TrueVal == FVI->getOperand(0)) { OpToFold = 1; - } else if ((SFO & 2) && TrueVal == FVI->getOperand(1)) { + } else if ((SFO & 2) && TrueVal == FVI->getOperand(1)) { OpToFold = 2; } @@ -256,9 +263,16 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal, Instruction *NewSel = SelectInst::Create(SI.getCondition(), C, OOp); InsertNewInstBefore(NewSel, SI); NewSel->takeName(FVI); - if (BinaryOperator *BO = dyn_cast(FVI)) - return BinaryOperator::Create(BO->getOpcode(), TrueVal, NewSel); - llvm_unreachable("Unknown instruction!!"); + BinaryOperator *FVI_BO = cast(FVI); + BinaryOperator *BO = BinaryOperator::Create(FVI_BO->getOpcode(), + TrueVal, NewSel); + if (isa(BO)) + BO->setIsExact(FVI_BO->isExact()); + if (isa(BO)) { + BO->setHasNoUnsignedWrap(FVI_BO->hasNoUnsignedWrap()); + BO->setHasNoSignedWrap(FVI_BO->hasNoSignedWrap()); + } + return BO; } } } @@ -424,6 +438,19 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI, return ReplaceInstUsesWith(SI, TrueVal); /// NOTE: if we wanted to, this is where to detect integer MIN/MAX } + + if (isa(CmpRHS)) { + if (CmpLHS == TrueVal && Pred == ICmpInst::ICMP_EQ) { + // Transform (X == C) ? X : Y -> (X == C) ? C : Y + SI.setOperand(1, CmpRHS); + Changed = true; + } else if (CmpLHS == FalseVal && Pred == ICmpInst::ICMP_NE) { + // Transform (X != C) ? Y : X -> (X != C) ? Y : C + SI.setOperand(2, CmpRHS); + Changed = true; + } + } + return Changed ? &SI : 0; } @@ -503,9 +530,8 @@ static Value *foldSelectICmpAnd(const SelectInst &SI, ConstantInt *TrueVal, if (!IC || !IC->isEquality()) return 0; - if (ConstantInt *C = dyn_cast(IC->getOperand(1))) - if (!C->isZero()) - return 0; + if (!match(IC->getOperand(1), m_Zero())) + return 0; ConstantInt *AndRHS; Value *LHS = IC->getOperand(0); diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp index a7f800587bb6..811f94976f68 100644 --- a/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -644,7 +644,14 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) { return &I; } } - + + // (C1 << A) << C2 -> (C1 << C2) << A + Constant *C1, *C2; + Value *A; + if (match(I.getOperand(0), m_OneUse(m_Shl(m_Constant(C1), m_Value(A)))) && + match(I.getOperand(1), m_Constant(C2))) + return BinaryOperator::CreateShl(ConstantExpr::getShl(C1, C2), A); + return 0; } diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index bda8cea4e41f..6e727ce6e35c 100644 --- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -684,6 +684,10 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, break; case Instruction::SRem: if (ConstantInt *Rem = dyn_cast(I->getOperand(1))) { + // X % -1 demands all the bits because we don't want to introduce + // INT_MIN % -1 (== undef) by accident. + if (Rem->isAllOnesValue()) + break; APInt RA = Rem->getValue().abs(); if (RA.isPowerOf2()) { if (DemandedMask.ult(RA)) // srem won't affect demanded bits @@ -712,6 +716,18 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask, assert(!(KnownZero & KnownOne) && "Bits known to be one AND zero?"); } } + + // The sign bit is the LHS's sign bit, except when the result of the + // remainder is zero. + if (DemandedMask.isNegative() && KnownZero.isNonNegative()) { + APInt Mask2 = APInt::getSignBit(BitWidth); + APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); + ComputeMaskedBits(I->getOperand(0), Mask2, LHSKnownZero, LHSKnownOne, + Depth+1); + // If it's known zero, our sign bit is also zero. + if (LHSKnownZero.isNegative()) + KnownZero |= LHSKnownZero; + } break; case Instruction::URem: { APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0); diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 5caa12dfdfa5..ad6a8d054ee7 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -230,8 +230,16 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { ConstantInt::get(Int32Ty, SrcIdx, false)); } + } else if (CastInst *CI = dyn_cast(I)) { + // Canonicalize extractelement(cast) -> cast(extractelement) + // bitcasts can change the number of vector elements and they cost nothing + if (CI->hasOneUse() && EI.hasOneUse() && + (CI->getOpcode() != Instruction::BitCast)) { + Value *EE = Builder->CreateExtractElement(CI->getOperand(0), + EI.getIndexOperand()); + return CastInst::Create(CI->getOpcode(), EE, EI.getType()); + } } - // FIXME: Canonicalize extractelement(bitcast) -> bitcast(extractelement) } return 0; } diff --git a/lib/Transforms/InstCombine/InstCombineWorklist.h b/lib/Transforms/InstCombine/InstCombineWorklist.h index 9100a851f16e..32009c39ec25 100644 --- a/lib/Transforms/InstCombine/InstCombineWorklist.h +++ b/lib/Transforms/InstCombine/InstCombineWorklist.h @@ -53,6 +53,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombineWorklist { void AddInitialGroup(Instruction *const *List, unsigned NumEntries) { assert(Worklist.empty() && "Worklist must be empty to add initial group"); Worklist.reserve(NumEntries+16); + WorklistMap.resize(NumEntries); DEBUG(errs() << "IC: ADDING: " << NumEntries << " instrs to worklist\n"); for (; NumEntries; --NumEntries) { Instruction *I = List[NumEntries-1]; diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 37123d0621eb..7a84598c3a0d 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -76,7 +76,6 @@ INITIALIZE_PASS(InstCombiner, "instcombine", "Combine redundant instructions", false, false) void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addPreservedID(LCSSAID); AU.setPreservesCFG(); } @@ -600,8 +599,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) { } // Okay, we can do the transformation: create the new PHI node. - PHINode *NewPN = PHINode::Create(I.getType(), ""); - NewPN->reserveOperandSpace(PN->getNumOperands()/2); + PHINode *NewPN = PHINode::Create(I.getType(), PN->getNumIncomingValues(), ""); InsertNewInstBefore(NewPN, *PN); NewPN->takeName(PN); @@ -850,22 +848,23 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { GetElementPtrInst::Create(Src->getOperand(0), Indices.begin(), Indices.end(), GEP.getName()); } - + // Handle gep(bitcast x) and gep(gep x, 0, 0, 0). Value *StrippedPtr = PtrOp->stripPointerCasts(); - if (StrippedPtr != PtrOp) { - const PointerType *StrippedPtrTy =cast(StrippedPtr->getType()); + const PointerType *StrippedPtrTy =cast(StrippedPtr->getType()); + if (StrippedPtr != PtrOp && + StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) { bool HasZeroPointerIndex = false; if (ConstantInt *C = dyn_cast(GEP.getOperand(1))) HasZeroPointerIndex = C->isZero(); - + // Transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... // into : GEP [10 x i8]* X, i32 0, ... // // Likewise, transform: GEP (bitcast i8* X to [0 x i8]*), i32 0, ... // into : GEP i8* X, ... - // + // // This occurs when the program declares an array extern like "int X[];" if (HasZeroPointerIndex) { const PointerType *CPTy = cast(PtrOp->getType()); @@ -976,7 +975,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { } } } - + /// See if we can simplify: /// X = bitcast A* to B* /// Y = gep X, <...constant indices...> @@ -984,12 +983,14 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { /// analysis of unions. If "A" is also a bitcast, wait for A/X to be merged. if (BitCastInst *BCI = dyn_cast(PtrOp)) { if (TD && - !isa(BCI->getOperand(0)) && GEP.hasAllConstantIndices()) { + !isa(BCI->getOperand(0)) && GEP.hasAllConstantIndices() && + StrippedPtrTy->getAddressSpace() == GEP.getPointerAddressSpace()) { + // Determine how much the GEP moves the pointer. We are guaranteed to get // a constant back from EmitGEPOffset. ConstantInt *OffsetV = cast(EmitGEPOffset(&GEP)); int64_t Offset = OffsetV->getSExtValue(); - + // If this GEP instruction doesn't move the pointer, just replace the GEP // with a bitcast of the real input to the dest type. if (Offset == 0) { @@ -1635,7 +1636,6 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { bool InstCombiner::runOnFunction(Function &F) { - MustPreserveLCSSA = mustPreserveAnalysisID(LCSSAID); TD = getAnalysisIfAvailable(); @@ -1648,6 +1648,10 @@ bool InstCombiner::runOnFunction(Function &F) { bool EverMadeChange = false; + // Lower dbg.declare intrinsics otherwise their value may be clobbered + // by instcombiner. + EverMadeChange = LowerDbgDeclare(F); + // Iterate while there is work to do. unsigned Iteration = 0; while (DoOneIteration(F, Iteration++)) diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt index 0ac1cb09bce7..5700ac87f659 100644 --- a/lib/Transforms/Instrumentation/CMakeLists.txt +++ b/lib/Transforms/Instrumentation/CMakeLists.txt @@ -1,5 +1,6 @@ add_llvm_library(LLVMInstrumentation EdgeProfiling.cpp + GCOVProfiling.cpp Instrumentation.cpp OptimalEdgeProfiling.cpp PathProfiling.cpp diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp new file mode 100644 index 000000000000..2425342f7e6c --- /dev/null +++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -0,0 +1,638 @@ +//===- GCOVProfiling.cpp - Insert edge counters for gcov profiling --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass implements GCOV-style profiling. When this pass is run it emits +// "gcno" files next to the existing source, and instruments the code that runs +// to records the edges between blocks that run and emit a complementary "gcda" +// file on exit. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "insert-gcov-profiling" + +#include "ProfilingUtils.h" +#include "llvm/Transforms/Instrumentation.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Instructions.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/DebugLoc.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/IRBuilder.h" +#include "llvm/Support/PathV2.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/UniqueVector.h" +#include +#include +using namespace llvm; + +namespace { + class GCOVProfiler : public ModulePass { + bool runOnModule(Module &M); + public: + static char ID; + GCOVProfiler() + : ModulePass(ID), EmitNotes(true), EmitData(true) { + initializeGCOVProfilerPass(*PassRegistry::getPassRegistry()); + } + GCOVProfiler(bool EmitNotes, bool EmitData) + : ModulePass(ID), EmitNotes(EmitNotes), EmitData(EmitData) { + assert((EmitNotes || EmitData) && "GCOVProfiler asked to do nothing?"); + initializeGCOVProfilerPass(*PassRegistry::getPassRegistry()); + } + virtual const char *getPassName() const { + return "GCOV Profiler"; + } + + private: + // Create the GCNO files for the Module based on DebugInfo. + void emitGCNO(DebugInfoFinder &DIF); + + // Modify the program to track transitions along edges and call into the + // profiling runtime to emit .gcda files when run. + bool emitProfileArcs(DebugInfoFinder &DIF); + + // Get pointers to the functions in the runtime library. + Constant *getStartFileFunc(); + Constant *getIncrementIndirectCounterFunc(); + Constant *getEmitFunctionFunc(); + Constant *getEmitArcsFunc(); + Constant *getEndFileFunc(); + + // Create or retrieve an i32 state value that is used to represent the + // pred block number for certain non-trivial edges. + GlobalVariable *getEdgeStateValue(); + + // Produce a table of pointers to counters, by predecessor and successor + // block number. + GlobalVariable *buildEdgeLookupTable(Function *F, + GlobalVariable *Counter, + const UniqueVector &Preds, + const UniqueVector &Succs); + + // Add the function to write out all our counters to the global destructor + // list. + void insertCounterWriteout(DebugInfoFinder &, + SmallVector, 8> &); + + bool EmitNotes; + bool EmitData; + + Module *M; + LLVMContext *Ctx; + }; +} + +char GCOVProfiler::ID = 0; +INITIALIZE_PASS(GCOVProfiler, "insert-gcov-profiling", + "Insert instrumentation for GCOV profiling", false, false) + +ModulePass *llvm::createGCOVProfilerPass(bool EmitNotes, bool EmitData) { + return new GCOVProfiler(EmitNotes, EmitData); +} + +static DISubprogram findSubprogram(DIScope Scope) { + while (!Scope.isSubprogram()) { + assert(Scope.isLexicalBlock() && + "Debug location not lexical block or subprogram"); + Scope = DILexicalBlock(Scope).getContext(); + } + return DISubprogram(Scope); +} + +namespace { + class GCOVRecord { + protected: + static const char *LinesTag; + static const char *FunctionTag; + static const char *BlockTag; + static const char *EdgeTag; + + GCOVRecord() {} + + void writeBytes(const char *Bytes, int Size) { + os->write(Bytes, Size); + } + + void write(uint32_t i) { + writeBytes(reinterpret_cast(&i), 4); + } + + // Returns the length measured in 4-byte blocks that will be used to + // represent this string in a GCOV file + unsigned lengthOfGCOVString(StringRef s) { + // A GCOV string is a length, followed by a NUL, then between 0 and 3 NULs + // padding out to the next 4-byte word. The length is measured in 4-byte + // words including padding, not bytes of actual string. + return (s.size() + 5) / 4; + } + + void writeGCOVString(StringRef s) { + uint32_t Len = lengthOfGCOVString(s); + write(Len); + writeBytes(s.data(), s.size()); + + // Write 1 to 4 bytes of NUL padding. + assert((unsigned)(4 - (s.size() % 4)) > 0); + assert((unsigned)(4 - (s.size() % 4)) <= 4); + writeBytes("\0\0\0\0", 4 - (s.size() % 4)); + } + + raw_ostream *os; + }; + const char *GCOVRecord::LinesTag = "\0\0\x45\x01"; + const char *GCOVRecord::FunctionTag = "\0\0\0\1"; + const char *GCOVRecord::BlockTag = "\0\0\x41\x01"; + const char *GCOVRecord::EdgeTag = "\0\0\x43\x01"; + + class GCOVFunction; + class GCOVBlock; + + // Constructed only by requesting it from a GCOVBlock, this object stores a + // list of line numbers and a single filename, representing lines that belong + // to the block. + class GCOVLines : public GCOVRecord { + public: + void addLine(uint32_t Line) { + Lines.push_back(Line); + } + + uint32_t length() { + return lengthOfGCOVString(Filename) + 2 + Lines.size(); + } + + private: + friend class GCOVBlock; + + GCOVLines(std::string Filename, raw_ostream *os) + : Filename(Filename) { + this->os = os; + } + + std::string Filename; + SmallVector Lines; + }; + + // Represent a basic block in GCOV. Each block has a unique number in the + // function, number of lines belonging to each block, and a set of edges to + // other blocks. + class GCOVBlock : public GCOVRecord { + public: + GCOVLines &getFile(std::string Filename) { + GCOVLines *&Lines = LinesByFile[Filename]; + if (!Lines) { + Lines = new GCOVLines(Filename, os); + } + return *Lines; + } + + void addEdge(GCOVBlock &Successor) { + OutEdges.push_back(&Successor); + } + + void writeOut() { + uint32_t Len = 3; + for (StringMap::iterator I = LinesByFile.begin(), + E = LinesByFile.end(); I != E; ++I) { + Len += I->second->length(); + } + + writeBytes(LinesTag, 4); + write(Len); + write(Number); + for (StringMap::iterator I = LinesByFile.begin(), + E = LinesByFile.end(); I != E; ++I) { + write(0); + writeGCOVString(I->second->Filename); + for (int i = 0, e = I->second->Lines.size(); i != e; ++i) { + write(I->second->Lines[i]); + } + } + write(0); + write(0); + } + + ~GCOVBlock() { + DeleteContainerSeconds(LinesByFile); + } + + private: + friend class GCOVFunction; + + GCOVBlock(uint32_t Number, raw_ostream *os) + : Number(Number) { + this->os = os; + } + + uint32_t Number; + StringMap LinesByFile; + SmallVector OutEdges; + }; + + // A function has a unique identifier, a checksum (we leave as zero) and a + // set of blocks and a map of edges between blocks. This is the only GCOV + // object users can construct, the blocks and lines will be rooted here. + class GCOVFunction : public GCOVRecord { + public: + GCOVFunction(DISubprogram SP, raw_ostream *os) { + this->os = os; + + Function *F = SP.getFunction(); + uint32_t i = 0; + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + Blocks[BB] = new GCOVBlock(i++, os); + } + ReturnBlock = new GCOVBlock(i++, os); + + writeBytes(FunctionTag, 4); + uint32_t BlockLen = 1 + 1 + 1 + lengthOfGCOVString(SP.getName()) + + 1 + lengthOfGCOVString(SP.getFilename()) + 1; + write(BlockLen); + uint32_t Ident = reinterpret_cast((MDNode*)SP); + write(Ident); + write(0); // checksum + writeGCOVString(SP.getName()); + writeGCOVString(SP.getFilename()); + write(SP.getLineNumber()); + } + + ~GCOVFunction() { + DeleteContainerSeconds(Blocks); + delete ReturnBlock; + } + + GCOVBlock &getBlock(BasicBlock *BB) { + return *Blocks[BB]; + } + + GCOVBlock &getReturnBlock() { + return *ReturnBlock; + } + + void writeOut() { + // Emit count of blocks. + writeBytes(BlockTag, 4); + write(Blocks.size() + 1); + for (int i = 0, e = Blocks.size() + 1; i != e; ++i) { + write(0); // No flags on our blocks. + } + + // Emit edges between blocks. + for (DenseMap::iterator I = Blocks.begin(), + E = Blocks.end(); I != E; ++I) { + GCOVBlock &Block = *I->second; + if (Block.OutEdges.empty()) continue; + + writeBytes(EdgeTag, 4); + write(Block.OutEdges.size() * 2 + 1); + write(Block.Number); + for (int i = 0, e = Block.OutEdges.size(); i != e; ++i) { + write(Block.OutEdges[i]->Number); + write(0); // no flags + } + } + + // Emit lines for each block. + for (DenseMap::iterator I = Blocks.begin(), + E = Blocks.end(); I != E; ++I) { + I->second->writeOut(); + } + } + + private: + DenseMap Blocks; + GCOVBlock *ReturnBlock; + }; +} + +// Replace the stem of a file, or add one if missing. +static std::string replaceStem(std::string OrigFilename, std::string NewStem) { + return (sys::path::stem(OrigFilename) + "." + NewStem).str(); +} + +bool GCOVProfiler::runOnModule(Module &M) { + this->M = &M; + Ctx = &M.getContext(); + + DebugInfoFinder DIF; + DIF.processModule(M); + + if (EmitNotes) emitGCNO(DIF); + if (EmitData) return emitProfileArcs(DIF); + return false; +} + +void GCOVProfiler::emitGCNO(DebugInfoFinder &DIF) { + DenseMap GcnoFiles; + for (DebugInfoFinder::iterator I = DIF.compile_unit_begin(), + E = DIF.compile_unit_end(); I != E; ++I) { + // Each compile unit gets its own .gcno file. This means that whether we run + // this pass over the original .o's as they're produced, or run it after + // LTO, we'll generate the same .gcno files. + + DICompileUnit CU(*I); + raw_fd_ostream *&out = GcnoFiles[CU]; + std::string ErrorInfo; + out = new raw_fd_ostream(replaceStem(CU.getFilename(), "gcno").c_str(), + ErrorInfo, raw_fd_ostream::F_Binary); + out->write("oncg*404MVLL", 12); + } + + for (DebugInfoFinder::iterator SPI = DIF.subprogram_begin(), + SPE = DIF.subprogram_end(); SPI != SPE; ++SPI) { + DISubprogram SP(*SPI); + raw_fd_ostream *&os = GcnoFiles[SP.getCompileUnit()]; + + Function *F = SP.getFunction(); + if (!F) continue; + GCOVFunction Func(SP, os); + + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + GCOVBlock &Block = Func.getBlock(BB); + TerminatorInst *TI = BB->getTerminator(); + if (int successors = TI->getNumSuccessors()) { + for (int i = 0; i != successors; ++i) { + Block.addEdge(Func.getBlock(TI->getSuccessor(i))); + } + } else if (isa(TI)) { + Block.addEdge(Func.getReturnBlock()); + } + + uint32_t Line = 0; + for (BasicBlock::iterator I = BB->begin(), IE = BB->end(); I != IE; ++I) { + const DebugLoc &Loc = I->getDebugLoc(); + if (Loc.isUnknown()) continue; + if (Line == Loc.getLine()) continue; + Line = Loc.getLine(); + if (SP != findSubprogram(DIScope(Loc.getScope(*Ctx)))) continue; + + GCOVLines &Lines = Block.getFile(SP.getFilename()); + Lines.addLine(Loc.getLine()); + } + } + Func.writeOut(); + } + + for (DenseMap::iterator + I = GcnoFiles.begin(), E = GcnoFiles.end(); I != E; ++I) { + raw_fd_ostream *&out = I->second; + out->write("\0\0\0\0\0\0\0\0", 8); // EOF + out->close(); + delete out; + } +} + +bool GCOVProfiler::emitProfileArcs(DebugInfoFinder &DIF) { + if (DIF.subprogram_begin() == DIF.subprogram_end()) + return false; + + SmallVector, 8> CountersByIdent; + for (DebugInfoFinder::iterator SPI = DIF.subprogram_begin(), + SPE = DIF.subprogram_end(); SPI != SPE; ++SPI) { + DISubprogram SP(*SPI); + Function *F = SP.getFunction(); + if (!F) continue; + + unsigned Edges = 0; + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + TerminatorInst *TI = BB->getTerminator(); + if (isa(TI)) + ++Edges; + else + Edges += TI->getNumSuccessors(); + } + + const ArrayType *CounterTy = + ArrayType::get(Type::getInt64Ty(*Ctx), Edges); + GlobalVariable *Counters = + new GlobalVariable(*M, CounterTy, false, + GlobalValue::InternalLinkage, + Constant::getNullValue(CounterTy), + "__llvm_gcov_ctr", 0, false, 0); + CountersByIdent.push_back( + std::make_pair(Counters, reinterpret_cast((MDNode*)SP))); + + UniqueVector ComplexEdgePreds; + UniqueVector ComplexEdgeSuccs; + + unsigned Edge = 0; + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + TerminatorInst *TI = BB->getTerminator(); + int Successors = isa(TI) ? 1 : TI->getNumSuccessors(); + if (Successors) { + IRBuilder<> Builder(TI); + + if (Successors == 1) { + Value *Counter = Builder.CreateConstInBoundsGEP2_64(Counters, 0, + Edge); + Value *Count = Builder.CreateLoad(Counter); + Count = Builder.CreateAdd(Count, + ConstantInt::get(Type::getInt64Ty(*Ctx),1)); + Builder.CreateStore(Count, Counter); + } else if (BranchInst *BI = dyn_cast(TI)) { + Value *Sel = Builder.CreateSelect( + BI->getCondition(), + ConstantInt::get(Type::getInt64Ty(*Ctx), Edge), + ConstantInt::get(Type::getInt64Ty(*Ctx), Edge + 1)); + SmallVector Idx; + Idx.push_back(Constant::getNullValue(Type::getInt64Ty(*Ctx))); + Idx.push_back(Sel); + Value *Counter = Builder.CreateInBoundsGEP(Counters, + Idx.begin(), Idx.end()); + Value *Count = Builder.CreateLoad(Counter); + Count = Builder.CreateAdd(Count, + ConstantInt::get(Type::getInt64Ty(*Ctx),1)); + Builder.CreateStore(Count, Counter); + } else { + ComplexEdgePreds.insert(BB); + for (int i = 0; i != Successors; ++i) + ComplexEdgeSuccs.insert(TI->getSuccessor(i)); + } + Edge += Successors; + } + } + + if (!ComplexEdgePreds.empty()) { + GlobalVariable *EdgeTable = + buildEdgeLookupTable(F, Counters, + ComplexEdgePreds, ComplexEdgeSuccs); + GlobalVariable *EdgeState = getEdgeStateValue(); + + const Type *Int32Ty = Type::getInt32Ty(*Ctx); + for (int i = 0, e = ComplexEdgePreds.size(); i != e; ++i) { + IRBuilder<> Builder(ComplexEdgePreds[i+1]->getTerminator()); + Builder.CreateStore(ConstantInt::get(Int32Ty, i), EdgeState); + } + for (int i = 0, e = ComplexEdgeSuccs.size(); i != e; ++i) { + // call runtime to perform increment + IRBuilder<> Builder(ComplexEdgeSuccs[i+1]->getFirstNonPHI()); + Value *CounterPtrArray = + Builder.CreateConstInBoundsGEP2_64(EdgeTable, 0, + i * ComplexEdgePreds.size()); + Builder.CreateCall2(getIncrementIndirectCounterFunc(), + EdgeState, CounterPtrArray); + // clear the predecessor number + Builder.CreateStore(ConstantInt::get(Int32Ty, 0xffffffff), EdgeState); + } + } + } + + insertCounterWriteout(DIF, CountersByIdent); + + return true; +} + +// All edges with successors that aren't branches are "complex", because it +// requires complex logic to pick which counter to update. +GlobalVariable *GCOVProfiler::buildEdgeLookupTable( + Function *F, + GlobalVariable *Counters, + const UniqueVector &Preds, + const UniqueVector &Succs) { + // TODO: support invoke, threads. We rely on the fact that nothing can modify + // the whole-Module pred edge# between the time we set it and the time we next + // read it. Threads and invoke make this untrue. + + // emit [(succs * preds) x i64*], logically [succ x [pred x i64*]]. + const Type *Int64PtrTy = Type::getInt64PtrTy(*Ctx); + const ArrayType *EdgeTableTy = ArrayType::get( + Int64PtrTy, Succs.size() * Preds.size()); + + Constant **EdgeTable = new Constant*[Succs.size() * Preds.size()]; + Constant *NullValue = Constant::getNullValue(Int64PtrTy); + for (int i = 0, ie = Succs.size() * Preds.size(); i != ie; ++i) + EdgeTable[i] = NullValue; + + unsigned Edge = 0; + for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { + TerminatorInst *TI = BB->getTerminator(); + int Successors = isa(TI) ? 1 : TI->getNumSuccessors(); + if (Successors > 1 && !isa(TI) && !isa(TI)) { + for (int i = 0; i != Successors; ++i) { + BasicBlock *Succ = TI->getSuccessor(i); + IRBuilder<> builder(Succ); + Value *Counter = builder.CreateConstInBoundsGEP2_64(Counters, 0, + Edge + i); + EdgeTable[((Succs.idFor(Succ)-1) * Preds.size()) + + (Preds.idFor(BB)-1)] = cast(Counter); + } + } + Edge += Successors; + } + + GlobalVariable *EdgeTableGV = + new GlobalVariable( + *M, EdgeTableTy, true, GlobalValue::InternalLinkage, + ConstantArray::get(EdgeTableTy, + &EdgeTable[0], Succs.size() * Preds.size()), + "__llvm_gcda_edge_table"); + EdgeTableGV->setUnnamedAddr(true); + return EdgeTableGV; +} + +Constant *GCOVProfiler::getStartFileFunc() { + const Type *Args[] = { Type::getInt8PtrTy(*Ctx) }; + const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), + Args, false); + return M->getOrInsertFunction("llvm_gcda_start_file", FTy); +} + +Constant *GCOVProfiler::getIncrementIndirectCounterFunc() { + const Type *Args[] = { + Type::getInt32PtrTy(*Ctx), // uint32_t *predecessor + Type::getInt64PtrTy(*Ctx)->getPointerTo(), // uint64_t **state_table_row + }; + const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), + Args, false); + return M->getOrInsertFunction("llvm_gcda_increment_indirect_counter", FTy); +} + +Constant *GCOVProfiler::getEmitFunctionFunc() { + const Type *Args[] = { Type::getInt32Ty(*Ctx) }; + const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), + Args, false); + return M->getOrInsertFunction("llvm_gcda_emit_function", FTy); +} + +Constant *GCOVProfiler::getEmitArcsFunc() { + const Type *Args[] = { + Type::getInt32Ty(*Ctx), // uint32_t num_counters + Type::getInt64PtrTy(*Ctx), // uint64_t *counters + }; + const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), + Args, false); + return M->getOrInsertFunction("llvm_gcda_emit_arcs", FTy); +} + +Constant *GCOVProfiler::getEndFileFunc() { + const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false); + return M->getOrInsertFunction("llvm_gcda_end_file", FTy); +} + +GlobalVariable *GCOVProfiler::getEdgeStateValue() { + GlobalVariable *GV = M->getGlobalVariable("__llvm_gcov_global_state_pred"); + if (!GV) { + GV = new GlobalVariable(*M, Type::getInt32Ty(*Ctx), false, + GlobalValue::InternalLinkage, + ConstantInt::get(Type::getInt32Ty(*Ctx), + 0xffffffff), + "__llvm_gcov_global_state_pred"); + GV->setUnnamedAddr(true); + } + return GV; +} + +void GCOVProfiler::insertCounterWriteout( + DebugInfoFinder &DIF, + SmallVector, 8> &CountersByIdent) { + const FunctionType *WriteoutFTy = + FunctionType::get(Type::getVoidTy(*Ctx), false); + Function *WriteoutF = Function::Create(WriteoutFTy, + GlobalValue::InternalLinkage, + "__llvm_gcov_writeout", M); + WriteoutF->setUnnamedAddr(true); + BasicBlock *BB = BasicBlock::Create(*Ctx, "", WriteoutF); + IRBuilder<> Builder(BB); + + Constant *StartFile = getStartFileFunc(); + Constant *EmitFunction = getEmitFunctionFunc(); + Constant *EmitArcs = getEmitArcsFunc(); + Constant *EndFile = getEndFileFunc(); + + for (DebugInfoFinder::iterator CUI = DIF.compile_unit_begin(), + CUE = DIF.compile_unit_end(); CUI != CUE; ++CUI) { + DICompileUnit compile_unit(*CUI); + std::string FilenameGcda = replaceStem(compile_unit.getFilename(), "gcda"); + Builder.CreateCall(StartFile, + Builder.CreateGlobalStringPtr(FilenameGcda)); + for (SmallVector, 8>::iterator + I = CountersByIdent.begin(), E = CountersByIdent.end(); + I != E; ++I) { + Builder.CreateCall(EmitFunction, ConstantInt::get(Type::getInt32Ty(*Ctx), + I->second)); + GlobalVariable *GV = I->first; + unsigned Arcs = + cast(GV->getType()->getElementType())->getNumElements(); + Builder.CreateCall2(EmitArcs, + ConstantInt::get(Type::getInt32Ty(*Ctx), Arcs), + Builder.CreateConstGEP2_64(GV, 0, 0)); + } + Builder.CreateCall(EndFile); + } + Builder.CreateRetVoid(); + + InsertProfilingShutdownCall(WriteoutF, M); +} diff --git a/lib/Transforms/Instrumentation/Instrumentation.cpp b/lib/Transforms/Instrumentation/Instrumentation.cpp index 96ed4fa5c0fe..71adc1ec6de0 100644 --- a/lib/Transforms/Instrumentation/Instrumentation.cpp +++ b/lib/Transforms/Instrumentation/Instrumentation.cpp @@ -23,6 +23,7 @@ void llvm::initializeInstrumentation(PassRegistry &Registry) { initializeEdgeProfilerPass(Registry); initializeOptimalEdgeProfilerPass(Registry); initializePathProfilerPass(Registry); + initializeGCOVProfilerPass(Registry); } /// LLVMInitializeInstrumentation - C binding for diff --git a/lib/Transforms/Instrumentation/MaximumSpanningTree.h b/lib/Transforms/Instrumentation/MaximumSpanningTree.h index 829da6b295de..f76c77e1bdbf 100644 --- a/lib/Transforms/Instrumentation/MaximumSpanningTree.h +++ b/lib/Transforms/Instrumentation/MaximumSpanningTree.h @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This module privides means for calculating a maximum spanning tree for a +// This module provides means for calculating a maximum spanning tree for a // given set of weighted edges. The type parameter T is the type of a node. // //===----------------------------------------------------------------------===// diff --git a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp index c85a1a9391d4..e09f882aa323 100644 --- a/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp +++ b/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "insert-optimal-edge-profiling" #include "ProfilingUtils.h" +#include "llvm/Constants.h" #include "llvm/Module.h" #include "llvm/Pass.h" #include "llvm/Analysis/Passes.h" @@ -26,7 +27,6 @@ #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Statistic.h" #include "MaximumSpanningTree.h" -#include using namespace llvm; STATISTIC(NumEdgesInserted, "The # of edges inserted."); @@ -120,14 +120,14 @@ bool OptimalEdgeProfiler::runOnModule(Module &M) { NumEdgesInserted = 0; std::vector Initializer(NumEdges); - Constant* Zero = ConstantInt::get(Int32, 0); - Constant* Uncounted = ConstantInt::get(Int32, ProfileInfoLoader::Uncounted); + Constant *Zero = ConstantInt::get(Int32, 0); + Constant *Uncounted = ConstantInt::get(Int32, ProfileInfoLoader::Uncounted); // Instrument all of the edges not in MST... unsigned i = 0; for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { if (F->isDeclaration()) continue; - DEBUG(dbgs()<<"Working on "<getNameStr()<<"\n"); + DEBUG(dbgs() << "Working on " << F->getNameStr() << "\n"); // Calculate a Maximum Spanning Tree with the edge weights determined by // ProfileEstimator. ProfileEstimator also assign weights to the virtual @@ -139,17 +139,17 @@ bool OptimalEdgeProfiler::runOnModule(Module &M) { ProfileInfo::EdgeWeights ECs = getAnalysis(*F).getEdgeWeights(F); std::vector EdgeVector(ECs.begin(), ECs.end()); - MaximumSpanningTree MST (EdgeVector); - std::stable_sort(MST.begin(),MST.end()); + MaximumSpanningTree MST(EdgeVector); + std::stable_sort(MST.begin(), MST.end()); // Check if (0,entry) not in the MST. If not, instrument edge // (IncrementCounterInBlock()) and set the counter initially to zero, if // the edge is in the MST the counter is initialised to -1. BasicBlock *entry = &(F->getEntryBlock()); - ProfileInfo::Edge edge = ProfileInfo::getEdge(0,entry); + ProfileInfo::Edge edge = ProfileInfo::getEdge(0, entry); if (!std::binary_search(MST.begin(), MST.end(), edge)) { - printEdgeCounter(edge,entry,i); + printEdgeCounter(edge, entry, i); IncrementCounterInBlock(entry, i, Counters); ++NumEdgesInserted; Initializer[i++] = (Zero); } else{ @@ -170,9 +170,9 @@ bool OptimalEdgeProfiler::runOnModule(Module &M) { // has no successors, the virtual edge (BB,0) is processed. TerminatorInst *TI = BB->getTerminator(); if (TI->getNumSuccessors() == 0) { - ProfileInfo::Edge edge = ProfileInfo::getEdge(BB,0); + ProfileInfo::Edge edge = ProfileInfo::getEdge(BB, 0); if (!std::binary_search(MST.begin(), MST.end(), edge)) { - printEdgeCounter(edge,BB,i); + printEdgeCounter(edge, BB, i); IncrementCounterInBlock(BB, i, Counters); ++NumEdgesInserted; Initializer[i++] = (Zero); } else{ @@ -195,11 +195,11 @@ bool OptimalEdgeProfiler::runOnModule(Module &M) { // otherwise insert it in the successor block. if (TI->getNumSuccessors() == 1) { // Insert counter at the start of the block - printEdgeCounter(edge,BB,i); + printEdgeCounter(edge, BB, i); IncrementCounterInBlock(BB, i, Counters); ++NumEdgesInserted; } else { // Insert counter at the start of the block - printEdgeCounter(edge,Succ,i); + printEdgeCounter(edge, Succ, i); IncrementCounterInBlock(Succ, i, Counters); ++NumEdgesInserted; } Initializer[i++] = (Zero); @@ -212,9 +212,9 @@ bool OptimalEdgeProfiler::runOnModule(Module &M) { // Check if the number of edges counted at first was the number of edges we // considered for instrumentation. - assert(i==NumEdges && "the number of edges in counting array is wrong"); + assert(i == NumEdges && "the number of edges in counting array is wrong"); - // Assing the now completely defined initialiser to the array. + // Assign the now completely defined initialiser to the array. Constant *init = ConstantArray::get(ATy, Initializer); Counters->setInitializer(init); diff --git a/lib/Transforms/Instrumentation/PathProfiling.cpp b/lib/Transforms/Instrumentation/PathProfiling.cpp index 6449b39cfc9d..6b3f12dcbc84 100644 --- a/lib/Transforms/Instrumentation/PathProfiling.cpp +++ b/lib/Transforms/Instrumentation/PathProfiling.cpp @@ -63,7 +63,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Instrumentation.h" -#include #include #define HASH_THRESHHOLD 100000 @@ -259,7 +258,7 @@ class BLInstrumentationDag : public BallLarusDag { }; // --------------------------------------------------------------------------- -// PathProfiler is a module pass which intruments path profiling instructions +// PathProfiler is a module pass which instruments path profiling instructions // --------------------------------------------------------------------------- class PathProfiler : public ModulePass { private: @@ -388,6 +387,9 @@ namespace llvm { ftEntryTypeBuilder; // BallLarusEdge << operator overloading + raw_ostream& operator<<(raw_ostream& os, + const BLInstrumentationEdge& edge) + LLVM_ATTRIBUTE_USED; raw_ostream& operator<<(raw_ostream& os, const BLInstrumentationEdge& edge) { os << "[" << edge.getSource()->getName() << " -> " @@ -929,14 +931,16 @@ BasicBlock::iterator PathProfiler::getInsertionPoint(BasicBlock* block, Value* void PathProfiler::preparePHI(BLInstrumentationNode* node) { BasicBlock* block = node->getBlock(); BasicBlock::iterator insertPoint = block->getFirstNonPHI(); - PHINode* phi = PHINode::Create(Type::getInt32Ty(*Context), "pathNumber", + pred_iterator PB = pred_begin(node->getBlock()), + PE = pred_end(node->getBlock()); + PHINode* phi = PHINode::Create(Type::getInt32Ty(*Context), + std::distance(PB, PE), "pathNumber", insertPoint ); node->setPathPHI(phi); node->setStartingPathNumber(phi); node->setEndingPathNumber(phi); - for(pred_iterator predIt = pred_begin(node->getBlock()), - end = pred_end(node->getBlock()); predIt != end; predIt++) { + for(pred_iterator predIt = PB; predIt != PE; predIt++) { BasicBlock* pred = (*predIt); if(pred != NULL) diff --git a/lib/Transforms/Instrumentation/ProfilingUtils.cpp b/lib/Transforms/Instrumentation/ProfilingUtils.cpp index b57bbf60a07a..7435bc37fbe1 100644 --- a/lib/Transforms/Instrumentation/ProfilingUtils.cpp +++ b/lib/Transforms/Instrumentation/ProfilingUtils.cpp @@ -110,7 +110,7 @@ void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum, GlobalValue *CounterArray, bool beginning) { // Insert the increment after any alloca or PHI instructions... BasicBlock::iterator InsertPos = beginning ? BB->getFirstNonPHI() : - BB->getTerminator(); + BB->getTerminator(); while (isa(InsertPos)) ++InsertPos; @@ -121,8 +121,7 @@ void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum, Indices[0] = Constant::getNullValue(Type::getInt32Ty(Context)); Indices[1] = ConstantInt::get(Type::getInt32Ty(Context), CounterNum); Constant *ElementPtr = - ConstantExpr::getGetElementPtr(CounterArray, &Indices[0], - Indices.size()); + ConstantExpr::getGetElementPtr(CounterArray, &Indices[0], Indices.size()); // Load, increment and store the value back. Value *OldVal = new LoadInst(ElementPtr, "OldFuncCounter", InsertPos); @@ -131,3 +130,41 @@ void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum, "NewFuncCounter", InsertPos); new StoreInst(NewVal, ElementPtr, InsertPos); } + +void llvm::InsertProfilingShutdownCall(Function *Callee, Module *Mod) { + // llvm.global_dtors is an array of type { i32, void ()* }. Prepare those + // types. + const Type *GlobalDtorElems[2] = { + Type::getInt32Ty(Mod->getContext()), + FunctionType::get(Type::getVoidTy(Mod->getContext()), false)->getPointerTo() + }; + const StructType *GlobalDtorElemTy = + StructType::get(Mod->getContext(), GlobalDtorElems, false); + + // Construct the new element we'll be adding. + Constant *Elem[2] = { + ConstantInt::get(Type::getInt32Ty(Mod->getContext()), 65535), + ConstantExpr::getBitCast(Callee, GlobalDtorElems[1]) + }; + + // If llvm.global_dtors exists, make a copy of the things in its list and + // delete it, to replace it with one that has a larger array type. + std::vector dtors; + if (GlobalVariable *GlobalDtors = Mod->getNamedGlobal("llvm.global_dtors")) { + if (ConstantArray *InitList = + dyn_cast(GlobalDtors->getInitializer())) { + for (unsigned i = 0, e = InitList->getType()->getNumElements(); + i != e; ++i) + dtors.push_back(cast(InitList->getOperand(i))); + } + GlobalDtors->eraseFromParent(); + } + + // Build up llvm.global_dtors with our new item in it. + GlobalVariable *GlobalDtors = new GlobalVariable( + *Mod, ArrayType::get(GlobalDtorElemTy, 1), false, + GlobalValue::AppendingLinkage, NULL, "llvm.global_dtors"); + dtors.push_back(ConstantStruct::get(Mod->getContext(), Elem, 2, false)); + GlobalDtors->setInitializer(ConstantArray::get( + cast(GlobalDtors->getType()->getElementType()), dtors)); +} diff --git a/lib/Transforms/Instrumentation/ProfilingUtils.h b/lib/Transforms/Instrumentation/ProfilingUtils.h index a76e3576e1ca..09b22171ff04 100644 --- a/lib/Transforms/Instrumentation/ProfilingUtils.h +++ b/lib/Transforms/Instrumentation/ProfilingUtils.h @@ -18,9 +18,10 @@ #define PROFILINGUTILS_H namespace llvm { + class BasicBlock; class Function; class GlobalValue; - class BasicBlock; + class Module; class PointerType; void InsertProfilingInitCall(Function *MainFn, const char *FnName, @@ -29,6 +30,7 @@ namespace llvm { void IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum, GlobalValue *CounterArray, bool beginning = true); + void InsertProfilingShutdownCall(Function *Callee, Module *Mod); } #endif diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt index 106fb8f3c833..fcf914f8baa0 100644 --- a/lib/Transforms/Scalar/CMakeLists.txt +++ b/lib/Transforms/Scalar/CMakeLists.txt @@ -7,7 +7,6 @@ add_llvm_library(LLVMScalarOpts DCE.cpp DeadStoreElimination.cpp EarlyCSE.cpp - GEPSplitter.cpp GVN.cpp IndVarSimplify.cpp JumpThreading.cpp @@ -27,7 +26,6 @@ add_llvm_library(LLVMScalarOpts Scalar.cpp ScalarReplAggregates.cpp SimplifyCFGPass.cpp - SimplifyHalfPowrLibCalls.cpp SimplifyLibCalls.cpp Sink.cpp TailDuplication.cpp diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index 9536939ba2d4..018439018553 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -47,21 +47,21 @@ using namespace llvm; using namespace llvm::PatternMatch; STATISTIC(NumBlocksElim, "Number of blocks eliminated"); -STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated"); -STATISTIC(NumGEPsElim, "Number of GEPs converted to casts"); +STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated"); +STATISTIC(NumGEPsElim, "Number of GEPs converted to casts"); STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of " "sunken Cmps"); STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses " "of sunken Casts"); STATISTIC(NumMemoryInsts, "Number of memory instructions whose address " "computations were sunk"); -STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads"); -STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized"); +STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads"); +STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized"); +STATISTIC(NumRetsDup, "Number of return instructions duplicated"); -static cl::opt -CriticalEdgeSplit("cgp-critical-edge-splitting", - cl::desc("Split critical edges during codegen prepare"), - cl::init(false), cl::Hidden); +static cl::opt DisableBranchOpts( + "disable-cgp-branch-opts", cl::Hidden, cl::init(false), + cl::desc("Disable branch optimizations in CodeGenPrepare")); namespace { class CodeGenPrepare : public FunctionPass { @@ -76,15 +76,15 @@ namespace { /// update it. BasicBlock::iterator CurInstIterator; - /// BackEdges - Keep a set of all the loop back edges. - /// - SmallSet, 8> BackEdges; - - // Keeps track of non-local addresses that have been sunk into a block. This - // allows us to avoid inserting duplicate code for blocks with multiple - // load/stores of the same address. + /// Keeps track of non-local addresses that have been sunk into a block. + /// This allows us to avoid inserting duplicate code for blocks with + /// multiple load/stores of the same address. DenseMap SunkAddrs; + /// ModifiedDT - If CFG is modified in anyway, dominator tree may need to + /// be updated. + bool ModifiedDT; + public: static char ID; // Pass identification, replacement for typeid explicit CodeGenPrepare(const TargetLowering *tli = 0) @@ -98,10 +98,6 @@ namespace { AU.addPreserved(); } - virtual void releaseMemory() { - BackEdges.clear(); - } - private: bool EliminateMostlyEmptyBlocks(Function &F); bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; @@ -113,7 +109,7 @@ namespace { bool OptimizeCallInst(CallInst *CI); bool MoveExtToFormExtLoad(Instruction *I); bool OptimizeExtUses(Instruction *I); - void findLoopBackEdges(const Function &F); + bool DupRetToEnableTailCallOpts(ReturnInst *RI); }; } @@ -125,40 +121,42 @@ FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) { return new CodeGenPrepare(TLI); } -/// findLoopBackEdges - Do a DFS walk to find loop back edges. -/// -void CodeGenPrepare::findLoopBackEdges(const Function &F) { - SmallVector, 32> Edges; - FindFunctionBackedges(F, Edges); - - BackEdges.insert(Edges.begin(), Edges.end()); -} - - bool CodeGenPrepare::runOnFunction(Function &F) { bool EverMadeChange = false; + ModifiedDT = false; DT = getAnalysisIfAvailable(); PFI = getAnalysisIfAvailable(); + // First pass, eliminate blocks that contain only PHI nodes and an // unconditional branch. EverMadeChange |= EliminateMostlyEmptyBlocks(F); - // Now find loop back edges, but only if they are being used to decide which - // critical edges to split. - if (CriticalEdgeSplit) - findLoopBackEdges(F); - bool MadeChange = true; while (MadeChange) { MadeChange = false; - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + for (Function::iterator I = F.begin(), E = F.end(); I != E; ) { + BasicBlock *BB = I++; MadeChange |= OptimizeBlock(*BB); + } EverMadeChange |= MadeChange; } SunkAddrs.clear(); + if (!DisableBranchOpts) { + MadeChange = false; + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) + MadeChange |= ConstantFoldTerminator(BB); + + if (MadeChange) + ModifiedDT = true; + EverMadeChange |= MadeChange; + } + + if (ModifiedDT && DT) + DT->DT->recalculate(F); + return EverMadeChange; } @@ -333,7 +331,7 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { // The PHIs are now updated, change everything that refers to BB to use // DestBB and remove BB. BB->replaceAllUsesWith(DestBB); - if (DT) { + if (DT && !ModifiedDT) { BasicBlock *BBIDom = DT->getNode(BB)->getIDom()->getBlock(); BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock(); BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom); @@ -350,110 +348,6 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); } -/// FindReusablePredBB - Check all of the predecessors of the block DestPHI -/// lives in to see if there is a block that we can reuse as a critical edge -/// from TIBB. -static BasicBlock *FindReusablePredBB(PHINode *DestPHI, BasicBlock *TIBB) { - BasicBlock *Dest = DestPHI->getParent(); - - /// TIPHIValues - This array is lazily computed to determine the values of - /// PHIs in Dest that TI would provide. - SmallVector TIPHIValues; - - /// TIBBEntryNo - This is a cache to speed up pred queries for TIBB. - unsigned TIBBEntryNo = 0; - - // Check to see if Dest has any blocks that can be used as a split edge for - // this terminator. - for (unsigned pi = 0, e = DestPHI->getNumIncomingValues(); pi != e; ++pi) { - BasicBlock *Pred = DestPHI->getIncomingBlock(pi); - // To be usable, the pred has to end with an uncond branch to the dest. - BranchInst *PredBr = dyn_cast(Pred->getTerminator()); - if (!PredBr || !PredBr->isUnconditional()) - continue; - // Must be empty other than the branch and debug info. - BasicBlock::iterator I = Pred->begin(); - while (isa(I)) - I++; - if (&*I != PredBr) - continue; - // Cannot be the entry block; its label does not get emitted. - if (Pred == &Dest->getParent()->getEntryBlock()) - continue; - - // Finally, since we know that Dest has phi nodes in it, we have to make - // sure that jumping to Pred will have the same effect as going to Dest in - // terms of PHI values. - PHINode *PN; - unsigned PHINo = 0; - unsigned PredEntryNo = pi; - - bool FoundMatch = true; - for (BasicBlock::iterator I = Dest->begin(); - (PN = dyn_cast(I)); ++I, ++PHINo) { - if (PHINo == TIPHIValues.size()) { - if (PN->getIncomingBlock(TIBBEntryNo) != TIBB) - TIBBEntryNo = PN->getBasicBlockIndex(TIBB); - TIPHIValues.push_back(PN->getIncomingValue(TIBBEntryNo)); - } - - // If the PHI entry doesn't work, we can't use this pred. - if (PN->getIncomingBlock(PredEntryNo) != Pred) - PredEntryNo = PN->getBasicBlockIndex(Pred); - - if (TIPHIValues[PHINo] != PN->getIncomingValue(PredEntryNo)) { - FoundMatch = false; - break; - } - } - - // If we found a workable predecessor, change TI to branch to Succ. - if (FoundMatch) - return Pred; - } - return 0; -} - - -/// SplitEdgeNicely - Split the critical edge from TI to its specified -/// successor if it will improve codegen. We only do this if the successor has -/// phi nodes (otherwise critical edges are ok). If there is already another -/// predecessor of the succ that is empty (and thus has no phi nodes), use it -/// instead of introducing a new block. -static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum, - SmallSet, 8> &BackEdges, - Pass *P) { - BasicBlock *TIBB = TI->getParent(); - BasicBlock *Dest = TI->getSuccessor(SuccNum); - assert(isa(Dest->begin()) && - "This should only be called if Dest has a PHI!"); - PHINode *DestPHI = cast(Dest->begin()); - - // Do not split edges to EH landing pads. - if (InvokeInst *Invoke = dyn_cast(TI)) - if (Invoke->getSuccessor(1) == Dest) - return; - - // As a hack, never split backedges of loops. Even though the copy for any - // PHIs inserted on the backedge would be dead for exits from the loop, we - // assume that the cost of *splitting* the backedge would be too high. - if (BackEdges.count(std::make_pair(TIBB, Dest))) - return; - - if (BasicBlock *ReuseBB = FindReusablePredBB(DestPHI, TIBB)) { - ProfileInfo *PFI = P->getAnalysisIfAvailable(); - if (PFI) - PFI->splitEdge(TIBB, Dest, ReuseBB); - Dest->removePredecessor(TIBB); - TI->setSuccessor(SuccNum, ReuseBB); - return; - } - - SplitCriticalEdge(TI, SuccNum, P, true); -} - - /// OptimizeNoopCopyExpression - If the specified cast instruction is a noop /// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC), /// sink it into user blocks to reduce the number of virtual @@ -640,7 +534,8 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { // happens. WeakVH IterHandle(CurInstIterator); - ReplaceAndSimplifyAllUses(CI, RetVal, TLI ? TLI->getTargetData() : 0, DT); + ReplaceAndSimplifyAllUses(CI, RetVal, TLI ? TLI->getTargetData() : 0, + ModifiedDT ? 0 : DT); // If the iterator instruction was recursively deleted, start over at the // start of the block. @@ -666,6 +561,129 @@ bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { return Simplifier.fold(CI, TD); } +/// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return +/// instructions to the predecessor to enable tail call optimizations. The +/// case it is currently looking for is: +/// bb0: +/// %tmp0 = tail call i32 @f0() +/// br label %return +/// bb1: +/// %tmp1 = tail call i32 @f1() +/// br label %return +/// bb2: +/// %tmp2 = tail call i32 @f2() +/// br label %return +/// return: +/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ] +/// ret i32 %retval +/// +/// => +/// +/// bb0: +/// %tmp0 = tail call i32 @f0() +/// ret i32 %tmp0 +/// bb1: +/// %tmp1 = tail call i32 @f1() +/// ret i32 %tmp1 +/// bb2: +/// %tmp2 = tail call i32 @f2() +/// ret i32 %tmp2 +/// +bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) { + if (!TLI) + return false; + + Value *V = RI->getReturnValue(); + PHINode *PN = V ? dyn_cast(V) : NULL; + if (V && !PN) + return false; + + BasicBlock *BB = RI->getParent(); + if (PN && PN->getParent() != BB) + return false; + + // It's not safe to eliminate the sign / zero extension of the return value. + // See llvm::isInTailCallPosition(). + const Function *F = BB->getParent(); + unsigned CallerRetAttr = F->getAttributes().getRetAttributes(); + if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt)) + return false; + + // Make sure there are no instructions between the PHI and return, or that the + // return is the first instruction in the block. + if (PN) { + BasicBlock::iterator BI = BB->begin(); + do { ++BI; } while (isa(BI)); + if (&*BI != RI) + return false; + } else { + BasicBlock::iterator BI = BB->begin(); + while (isa(BI)) ++BI; + if (&*BI != RI) + return false; + } + + /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail + /// call. + SmallVector TailCalls; + if (PN) { + for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) { + CallInst *CI = dyn_cast(PN->getIncomingValue(I)); + // Make sure the phi value is indeed produced by the tail call. + if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) && + TLI->mayBeEmittedAsTailCall(CI)) + TailCalls.push_back(CI); + } + } else { + SmallPtrSet VisitedBBs; + for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { + if (!VisitedBBs.insert(*PI)) + continue; + + BasicBlock::InstListType &InstList = (*PI)->getInstList(); + BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin(); + BasicBlock::InstListType::reverse_iterator RE = InstList.rend(); + do { ++RI; } while (RI != RE && isa(&*RI)); + if (RI == RE) + continue; + + CallInst *CI = dyn_cast(&*RI); + if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI)) + TailCalls.push_back(CI); + } + } + + bool Changed = false; + for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) { + CallInst *CI = TailCalls[i]; + CallSite CS(CI); + + // Conservatively require the attributes of the call to match those of the + // return. Ignore noalias because it doesn't affect the call sequence. + unsigned CalleeRetAttr = CS.getAttributes().getRetAttributes(); + if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias) + continue; + + // Make sure the call instruction is followed by an unconditional branch to + // the return block. + BasicBlock *CallBB = CI->getParent(); + BranchInst *BI = dyn_cast(CallBB->getTerminator()); + if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB) + continue; + + // Duplicate the return into CallBB. + (void)FoldReturnIntoUncondBranch(RI, BB, CallBB); + ModifiedDT = Changed = true; + ++NumRetsDup; + } + + // If we eliminated all predecessors of the block, delete the block now. + if (Changed && pred_begin(BB) == pred_end(BB)) + BB->eraseFromParent(); + + return Changed; +} + //===----------------------------------------------------------------------===// // Memory Optimization //===----------------------------------------------------------------------===// @@ -701,7 +719,8 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, // the addressing mode obtained from the non-PHI roots of the graph // are equivalent. Value *Consensus = 0; - unsigned NumUses = 0; + unsigned NumUsesConsensus = 0; + bool IsNumUsesConsensusValid = false; SmallVector AddrModeInsts; ExtAddrMode AddrMode; while (!worklist.empty()) { @@ -728,16 +747,31 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(V, AccessTy,MemoryInst, NewAddrModeInsts, *TLI); - - // Ensure that the obtained addressing mode is equivalent to that obtained - // for all other roots of the PHI traversal. Also, when choosing one - // such root as representative, select the one with the most uses in order - // to keep the cost modeling heuristics in AddressingModeMatcher applicable. - if (!Consensus || NewAddrMode == AddrMode) { - if (V->getNumUses() > NumUses) { + + // This check is broken into two cases with very similar code to avoid using + // getNumUses() as much as possible. Some values have a lot of uses, so + // calling getNumUses() unconditionally caused a significant compile-time + // regression. + if (!Consensus) { + Consensus = V; + AddrMode = NewAddrMode; + AddrModeInsts = NewAddrModeInsts; + continue; + } else if (NewAddrMode == AddrMode) { + if (!IsNumUsesConsensusValid) { + NumUsesConsensus = Consensus->getNumUses(); + IsNumUsesConsensusValid = true; + } + + // Ensure that the obtained addressing mode is equivalent to that obtained + // for all other roots of the PHI traversal. Also, when choosing one + // such root as representative, select the one with the most uses in order + // to keep the cost modeling heuristics in AddressingModeMatcher + // applicable. + unsigned NumUses = V->getNumUses(); + if (NumUses > NumUsesConsensus) { Consensus = V; - NumUses = V->getNumUses(); - AddrMode = NewAddrMode; + NumUsesConsensus = NumUses; AddrModeInsts = NewAddrModeInsts; } continue; @@ -855,11 +889,26 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, MemoryInst->replaceUsesOfWith(Repl, SunkAddr); + // If we have no uses, recursively delete the value and all dead instructions + // using it. if (Repl->use_empty()) { + // This can cause recursive deletion, which can invalidate our iterator. + // Use a WeakVH to hold onto it in case this happens. + WeakVH IterHandle(CurInstIterator); + BasicBlock *BB = CurInstIterator->getParent(); + RecursivelyDeleteTriviallyDeadInstructions(Repl); - // This address is now available for reassignment, so erase the table entry; - // we don't want to match some completely different instruction. - SunkAddrs[Addr] = 0; + + if (IterHandle != CurInstIterator) { + // If the iterator instruction was recursively deleted, start over at the + // start of the block. + CurInstIterator = BB->begin(); + SunkAddrs.clear(); + } else { + // This address is now available for reassignment, so erase the table + // entry; we don't want to match some completely different instruction. + SunkAddrs[Addr] = 0; + } } ++NumMemoryInsts; return true; @@ -1073,6 +1122,9 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) { if (CallInst *CI = dyn_cast(I)) return OptimizeCallInst(CI); + if (ReturnInst *RI = dyn_cast(I)) + return DupRetToEnableTailCallOpts(RI); + return false; } @@ -1080,21 +1132,8 @@ bool CodeGenPrepare::OptimizeInst(Instruction *I) { // across basic blocks and rewrite them to improve basic-block-at-a-time // selection. bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) { - bool MadeChange = false; - - // Split all critical edges where the dest block has a PHI. - if (CriticalEdgeSplit) { - TerminatorInst *BBTI = BB.getTerminator(); - if (BBTI->getNumSuccessors() > 1 && !isa(BBTI)) { - for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i) { - BasicBlock *SuccBB = BBTI->getSuccessor(i); - if (isa(SuccBB->begin()) && isCriticalEdge(BBTI, i, true)) - SplitEdgeNicely(BBTI, i, BackEdges, this); - } - } - } - SunkAddrs.clear(); + bool MadeChange = false; CurInstIterator = BB.begin(); for (BasicBlock::iterator E = BB.end(); CurInstIterator != E; ) diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index be12973b645f..e275268fc4ea 100644 --- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -13,6 +13,7 @@ #define DEBUG_TYPE "correlated-value-propagation" #include "llvm/Transforms/Scalar.h" +#include "llvm/Constants.h" #include "llvm/Function.h" #include "llvm/Instructions.h" #include "llvm/Pass.h" diff --git a/lib/Transforms/Scalar/DCE.cpp b/lib/Transforms/Scalar/DCE.cpp index dbb68f3e0bd1..8dbcc23d7ec8 100644 --- a/lib/Transforms/Scalar/DCE.cpp +++ b/lib/Transforms/Scalar/DCE.cpp @@ -23,7 +23,6 @@ #include "llvm/Pass.h" #include "llvm/Support/InstIterator.h" #include "llvm/ADT/Statistic.h" -#include using namespace llvm; STATISTIC(DIEEliminated, "Number of insts removed by DIE pass"); diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index 867a06ad202d..53e46400dca8 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -340,24 +340,35 @@ static bool isCompleteOverwrite(const AliasAnalysis::Location &Later, // Okay, we have stores to two completely different pointers. Try to // decompose the pointer into a "base + constant_offset" form. If the base // pointers are equal, then we can reason about the two stores. - int64_t Off1 = 0, Off2 = 0; - const Value *BP1 = GetPointerBaseWithConstantOffset(P1, Off1, TD); - const Value *BP2 = GetPointerBaseWithConstantOffset(P2, Off2, TD); + int64_t EarlierOff = 0, LaterOff = 0; + const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, TD); + const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, TD); // If the base pointers still differ, we have two completely different stores. if (BP1 != BP2) return false; - - // Otherwise, we might have a situation like: - // store i16 -> P + 1 Byte - // store i32 -> P - // In this case, we see if the later store completely overlaps all bytes - // stored by the previous store. - if (Off1 < Off2 || // Earlier starts before Later. - Off1+Earlier.Size > Off2+Later.Size) // Earlier goes beyond Later. - return false; - // Otherwise, we have complete overlap. - return true; + + // The later store completely overlaps the earlier store if: + // + // 1. Both start at the same offset and the later one's size is greater than + // or equal to the earlier one's, or + // + // |--earlier--| + // |-- later --| + // + // 2. The earlier store has an offset greater than the later offset, but which + // still lies completely within the later store. + // + // |--earlier--| + // |----- later ------| + // + // We have to be careful here as *Off is signed while *.Size is unsigned. + if (EarlierOff >= LaterOff && + uint64_t(EarlierOff - LaterOff) + Earlier.Size <= Later.Size) + return true; + + // Otherwise, they don't completely overlap. + return false; } /// isPossibleSelfRead - If 'Inst' might be a self read (i.e. a noop copy of a @@ -474,7 +485,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { // away the store and we bail out. However, if we depend on on something // that overwrites the memory location we *can* potentially optimize it. // - // Find out what memory location the dependant instruction stores. + // Find out what memory location the dependent instruction stores. Instruction *DepWrite = InstDep.getInst(); AliasAnalysis::Location DepLoc = getLocForWrite(DepWrite, *AA); // If we didn't get a useful location, or if it isn't a size, bail out. @@ -631,28 +642,15 @@ bool DSE::handleEndBlock(BasicBlock &BB) { if (AA->doesNotAccessMemory(CS)) continue; - unsigned NumModRef = 0, NumOther = 0; - // If the call might load from any of our allocas, then any store above // the call is live. SmallVector LiveAllocas; for (SmallPtrSet::iterator I = DeadStackObjects.begin(), E = DeadStackObjects.end(); I != E; ++I) { - // If we detect that our AA is imprecise, it's not worth it to scan the - // rest of the DeadPointers set. Just assume that the AA will return - // ModRef for everything, and go ahead and bail out. - if (NumModRef >= 16 && NumOther == 0) - return MadeChange; - // See if the call site touches it. AliasAnalysis::ModRefResult A = AA->getModRefInfo(CS, *I, getPointerSize(*I, *AA)); - if (A == AliasAnalysis::ModRef) - ++NumModRef; - else - ++NumOther; - if (A == AliasAnalysis::ModRef || A == AliasAnalysis::Ref) LiveAllocas.push_back(*I); } diff --git a/lib/Transforms/Scalar/GEPSplitter.cpp b/lib/Transforms/Scalar/GEPSplitter.cpp deleted file mode 100644 index 4c3d188a8afd..000000000000 --- a/lib/Transforms/Scalar/GEPSplitter.cpp +++ /dev/null @@ -1,83 +0,0 @@ -//===- GEPSplitter.cpp - Split complex GEPs into simple ones --------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This function breaks GEPs with more than 2 non-zero operands into smaller -// GEPs each with no more than 2 non-zero operands. This exposes redundancy -// between GEPs with common initial operand sequences. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "split-geps" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Constants.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" -#include "llvm/Pass.h" -using namespace llvm; - -namespace { - class GEPSplitter : public FunctionPass { - virtual bool runOnFunction(Function &F); - virtual void getAnalysisUsage(AnalysisUsage &AU) const; - public: - static char ID; // Pass identification, replacement for typeid - explicit GEPSplitter() : FunctionPass(ID) { - initializeGEPSplitterPass(*PassRegistry::getPassRegistry()); - } - }; -} - -char GEPSplitter::ID = 0; -INITIALIZE_PASS(GEPSplitter, "split-geps", - "split complex GEPs into simple GEPs", false, false) - -FunctionPass *llvm::createGEPSplitterPass() { - return new GEPSplitter(); -} - -bool GEPSplitter::runOnFunction(Function &F) { - bool Changed = false; - - // Visit each GEP instruction. - for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) - for (BasicBlock::iterator II = I->begin(), IE = I->end(); II != IE; ) - if (GetElementPtrInst *GEP = dyn_cast(II++)) { - unsigned NumOps = GEP->getNumOperands(); - // Ignore GEPs which are already simple. - if (NumOps <= 2) - continue; - bool FirstIndexIsZero = isa(GEP->getOperand(1)) && - cast(GEP->getOperand(1))->isZero(); - if (NumOps == 3 && FirstIndexIsZero) - continue; - // The first index is special and gets expanded with a 2-operand GEP - // (unless it's zero, in which case we can skip this). - Value *NewGEP = FirstIndexIsZero ? - GEP->getOperand(0) : - GetElementPtrInst::Create(GEP->getOperand(0), GEP->getOperand(1), - "tmp", GEP); - // All remaining indices get expanded with a 3-operand GEP with zero - // as the second operand. - Value *Idxs[2]; - Idxs[0] = ConstantInt::get(Type::getInt64Ty(F.getContext()), 0); - for (unsigned i = 2; i != NumOps; ++i) { - Idxs[1] = GEP->getOperand(i); - NewGEP = GetElementPtrInst::Create(NewGEP, Idxs, Idxs+2, "tmp", GEP); - } - GEP->replaceAllUsesWith(NewGEP); - GEP->eraseFromParent(); - Changed = true; - } - - return Changed; -} - -void GEPSplitter::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); -} diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index a0123f589816..efecb97de77d 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -63,50 +63,48 @@ static cl::opt EnableLoadPRE("enable-load-pre", cl::init(true)); namespace { struct Expression { uint32_t opcode; - const Type* type; + const Type *type; SmallVector varargs; - Expression() { } - Expression(uint32_t o) : opcode(o) { } + Expression(uint32_t o = ~2U) : opcode(o) { } bool operator==(const Expression &other) const { if (opcode != other.opcode) return false; - else if (opcode == ~0U || opcode == ~1U) + if (opcode == ~0U || opcode == ~1U) return true; - else if (type != other.type) + if (type != other.type) return false; - else if (varargs != other.varargs) + if (varargs != other.varargs) return false; return true; } }; class ValueTable { - private: - DenseMap valueNumbering; - DenseMap expressionNumbering; - AliasAnalysis* AA; - MemoryDependenceAnalysis* MD; - DominatorTree* DT; + DenseMap valueNumbering; + DenseMap expressionNumbering; + AliasAnalysis *AA; + MemoryDependenceAnalysis *MD; + DominatorTree *DT; - uint32_t nextValueNumber; + uint32_t nextValueNumber; - Expression create_expression(Instruction* I); - uint32_t lookup_or_add_call(CallInst* C); - public: - ValueTable() : nextValueNumber(1) { } - uint32_t lookup_or_add(Value *V); - uint32_t lookup(Value *V) const; - void add(Value *V, uint32_t num); - void clear(); - void erase(Value *v); - void setAliasAnalysis(AliasAnalysis* A) { AA = A; } - AliasAnalysis *getAliasAnalysis() const { return AA; } - void setMemDep(MemoryDependenceAnalysis* M) { MD = M; } - void setDomTree(DominatorTree* D) { DT = D; } - uint32_t getNextUnusedValueNumber() { return nextValueNumber; } - void verifyRemoved(const Value *) const; + Expression create_expression(Instruction* I); + uint32_t lookup_or_add_call(CallInst* C); + public: + ValueTable() : nextValueNumber(1) { } + uint32_t lookup_or_add(Value *V); + uint32_t lookup(Value *V) const; + void add(Value *V, uint32_t num); + void clear(); + void erase(Value *v); + void setAliasAnalysis(AliasAnalysis* A) { AA = A; } + AliasAnalysis *getAliasAnalysis() const { return AA; } + void setMemDep(MemoryDependenceAnalysis* M) { MD = M; } + void setDomTree(DominatorTree* D) { DT = D; } + uint32_t getNextUnusedValueNumber() { return nextValueNumber; } + void verifyRemoved(const Value *) const; }; } @@ -364,14 +362,14 @@ uint32_t ValueTable::lookup(Value *V) const { return VI->second; } -/// clear - Remove all entries from the ValueTable +/// clear - Remove all entries from the ValueTable. void ValueTable::clear() { valueNumbering.clear(); expressionNumbering.clear(); nextValueNumber = 1; } -/// erase - Remove a value from the value numbering +/// erase - Remove a value from the value numbering. void ValueTable::erase(Value *V) { valueNumbering.erase(V); } @@ -392,20 +390,11 @@ void ValueTable::verifyRemoved(const Value *V) const { namespace { class GVN : public FunctionPass { - bool runOnFunction(Function &F); - public: - static char ID; // Pass identification, replacement for typeid - explicit GVN(bool noloads = false) - : FunctionPass(ID), NoLoads(noloads), MD(0) { - initializeGVNPass(*PassRegistry::getPassRegistry()); - } - - private: bool NoLoads; MemoryDependenceAnalysis *MD; DominatorTree *DT; - const TargetData* TD; - + const TargetData *TD; + ValueTable VN; /// LeaderTable - A mapping from value numbers to lists of Value*'s that @@ -418,17 +407,39 @@ namespace { DenseMap LeaderTable; BumpPtrAllocator TableAllocator; + SmallVector InstrsToErase; + public: + static char ID; // Pass identification, replacement for typeid + explicit GVN(bool noloads = false) + : FunctionPass(ID), NoLoads(noloads), MD(0) { + initializeGVNPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F); + + /// markInstructionForDeletion - This removes the specified instruction from + /// our various maps and marks it for deletion. + void markInstructionForDeletion(Instruction *I) { + VN.erase(I); + InstrsToErase.push_back(I); + } + + const TargetData *getTargetData() const { return TD; } + DominatorTree &getDominatorTree() const { return *DT; } + AliasAnalysis *getAliasAnalysis() const { return VN.getAliasAnalysis(); } + MemoryDependenceAnalysis &getMemDep() const { return *MD; } + private: /// addToLeaderTable - Push a new Value to the LeaderTable onto the list for /// its value number. void addToLeaderTable(uint32_t N, Value *V, BasicBlock *BB) { - LeaderTableEntry& Curr = LeaderTable[N]; + LeaderTableEntry &Curr = LeaderTable[N]; if (!Curr.Val) { Curr.Val = V; Curr.BB = BB; return; } - LeaderTableEntry* Node = TableAllocator.Allocate(); + LeaderTableEntry *Node = TableAllocator.Allocate(); Node->Val = V; Node->BB = BB; Node->Next = Curr.Next; @@ -474,19 +485,17 @@ namespace { AU.addPreserved(); AU.addPreserved(); } + // Helper fuctions // FIXME: eliminate or document these better - bool processLoad(LoadInst* L, - SmallVectorImpl &toErase); - bool processInstruction(Instruction *I, - SmallVectorImpl &toErase); - bool processNonLocalLoad(LoadInst* L, - SmallVectorImpl &toErase); + bool processLoad(LoadInst *L); + bool processInstruction(Instruction *I); + bool processNonLocalLoad(LoadInst *L); bool processBlock(BasicBlock *BB); - void dump(DenseMap& d); + void dump(DenseMap &d); bool iterateOnFunction(Function &F); - bool performPRE(Function& F); + bool performPRE(Function &F); Value *findLeader(BasicBlock *BB, uint32_t num); void cleanupGlobalSets(); void verifyRemoved(const Instruction *I) const; @@ -629,17 +638,17 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal, if (!CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, TD)) return 0; + // If this is already the right type, just return it. const Type *StoredValTy = StoredVal->getType(); uint64_t StoreSize = TD.getTypeStoreSizeInBits(StoredValTy); - uint64_t LoadSize = TD.getTypeSizeInBits(LoadedTy); + uint64_t LoadSize = TD.getTypeStoreSizeInBits(LoadedTy); // If the store and reload are the same size, we can always reuse it. if (StoreSize == LoadSize) { - if (StoredValTy->isPointerTy() && LoadedTy->isPointerTy()) { - // Pointer to Pointer -> use bitcast. + // Pointer to Pointer -> use bitcast. + if (StoredValTy->isPointerTy() && LoadedTy->isPointerTy()) return new BitCastInst(StoredVal, LoadedTy, "", InsertPt); - } // Convert source pointers to integers, which can be bitcast. if (StoredValTy->isPointerTy()) { @@ -796,6 +805,36 @@ static int AnalyzeLoadFromClobberingStore(const Type *LoadTy, Value *LoadPtr, StorePtr, StoreSize, TD); } +/// AnalyzeLoadFromClobberingLoad - This function is called when we have a +/// memdep query of a load that ends up being clobbered by another load. See if +/// the other load can feed into the second load. +static int AnalyzeLoadFromClobberingLoad(const Type *LoadTy, Value *LoadPtr, + LoadInst *DepLI, const TargetData &TD){ + // Cannot handle reading from store of first-class aggregate yet. + if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy()) + return -1; + + Value *DepPtr = DepLI->getPointerOperand(); + uint64_t DepSize = TD.getTypeSizeInBits(DepLI->getType()); + int R = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, DepSize, TD); + if (R != -1) return R; + + // If we have a load/load clobber an DepLI can be widened to cover this load, + // then we should widen it! + int64_t LoadOffs = 0; + const Value *LoadBase = + GetPointerBaseWithConstantOffset(LoadPtr, LoadOffs, TD); + unsigned LoadSize = TD.getTypeStoreSize(LoadTy); + + unsigned Size = MemoryDependenceAnalysis:: + getLoadLoadClobberFullWidthSize(LoadBase, LoadOffs, LoadSize, DepLI, TD); + if (Size == 0) return -1; + + return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, DepPtr, Size*8, TD); +} + + + static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr, MemIntrinsic *MI, const TargetData &TD) { @@ -843,9 +882,9 @@ static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr, /// GetStoreValueForLoad - This function is called when we have a /// memdep query of a load that ends up being a clobbering store. This means -/// that the store *may* provide bits used by the load but we can't be sure -/// because the pointers don't mustalias. Check this case to see if there is -/// anything more we can do before we give up. +/// that the store provides bits used by the load but we the pointers don't +/// mustalias. Check this case to see if there is anything more we can do +/// before we give up. static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset, const Type *LoadTy, Instruction *InsertPt, const TargetData &TD){ @@ -881,6 +920,69 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset, return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD); } +/// GetStoreValueForLoad - This function is called when we have a +/// memdep query of a load that ends up being a clobbering load. This means +/// that the load *may* provide bits used by the load but we can't be sure +/// because the pointers don't mustalias. Check this case to see if there is +/// anything more we can do before we give up. +static Value *GetLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, + const Type *LoadTy, Instruction *InsertPt, + GVN &gvn) { + const TargetData &TD = *gvn.getTargetData(); + // If Offset+LoadTy exceeds the size of SrcVal, then we must be wanting to + // widen SrcVal out to a larger load. + unsigned SrcValSize = TD.getTypeStoreSize(SrcVal->getType()); + unsigned LoadSize = TD.getTypeStoreSize(LoadTy); + if (Offset+LoadSize > SrcValSize) { + assert(!SrcVal->isVolatile() && "Cannot widen volatile load!"); + assert(isa(SrcVal->getType())&&"Can't widen non-integer load"); + // If we have a load/load clobber an DepLI can be widened to cover this + // load, then we should widen it to the next power of 2 size big enough! + unsigned NewLoadSize = Offset+LoadSize; + if (!isPowerOf2_32(NewLoadSize)) + NewLoadSize = NextPowerOf2(NewLoadSize); + + Value *PtrVal = SrcVal->getPointerOperand(); + + // Insert the new load after the old load. This ensures that subsequent + // memdep queries will find the new load. We can't easily remove the old + // load completely because it is already in the value numbering table. + IRBuilder<> Builder(SrcVal->getParent(), ++BasicBlock::iterator(SrcVal)); + const Type *DestPTy = + IntegerType::get(LoadTy->getContext(), NewLoadSize*8); + DestPTy = PointerType::get(DestPTy, + cast(PtrVal->getType())->getAddressSpace()); + + PtrVal = Builder.CreateBitCast(PtrVal, DestPTy); + LoadInst *NewLoad = Builder.CreateLoad(PtrVal); + NewLoad->takeName(SrcVal); + NewLoad->setAlignment(SrcVal->getAlignment()); + + DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n"); + DEBUG(dbgs() << "TO: " << *NewLoad << "\n"); + + // Replace uses of the original load with the wider load. On a big endian + // system, we need to shift down to get the relevant bits. + Value *RV = NewLoad; + if (TD.isBigEndian()) + RV = Builder.CreateLShr(RV, + NewLoadSize*8-SrcVal->getType()->getPrimitiveSizeInBits()); + RV = Builder.CreateTrunc(RV, SrcVal->getType()); + SrcVal->replaceAllUsesWith(RV); + + // We would like to use gvn.markInstructionForDeletion here, but we can't + // because the load is already memoized into the leader map table that GVN + // tracks. It is potentially possible to remove the load from the table, + // but then there all of the operations based on it would need to be + // rehashed. Just leave the dead load around. + gvn.getMemDep().removeInstruction(SrcVal); + SrcVal = NewLoad; + } + + return GetStoreValueForLoad(SrcVal, Offset, LoadTy, InsertPt, TD); +} + + /// GetMemInstValueForLoad - This function is called when we have a /// memdep query of a load that ends up being a clobbering mem intrinsic. static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset, @@ -943,11 +1045,12 @@ struct AvailableValueInBlock { BasicBlock *BB; enum ValType { SimpleVal, // A simple offsetted value that is accessed. + LoadVal, // A value produced by a load. MemIntrin // A memory intrinsic which is loaded from. }; /// V - The value that is live out of the block. - PointerIntPair Val; + PointerIntPair Val; /// Offset - The byte offset in Val that is interesting for the load query. unsigned Offset; @@ -972,37 +1075,69 @@ struct AvailableValueInBlock { return Res; } + static AvailableValueInBlock getLoad(BasicBlock *BB, LoadInst *LI, + unsigned Offset = 0) { + AvailableValueInBlock Res; + Res.BB = BB; + Res.Val.setPointer(LI); + Res.Val.setInt(LoadVal); + Res.Offset = Offset; + return Res; + } + bool isSimpleValue() const { return Val.getInt() == SimpleVal; } + bool isCoercedLoadValue() const { return Val.getInt() == LoadVal; } + bool isMemIntrinValue() const { return Val.getInt() == MemIntrin; } + Value *getSimpleValue() const { assert(isSimpleValue() && "Wrong accessor"); return Val.getPointer(); } + LoadInst *getCoercedLoadValue() const { + assert(isCoercedLoadValue() && "Wrong accessor"); + return cast(Val.getPointer()); + } + MemIntrinsic *getMemIntrinValue() const { - assert(!isSimpleValue() && "Wrong accessor"); + assert(isMemIntrinValue() && "Wrong accessor"); return cast(Val.getPointer()); } /// MaterializeAdjustedValue - Emit code into this block to adjust the value /// defined here to the specified type. This handles various coercion cases. - Value *MaterializeAdjustedValue(const Type *LoadTy, - const TargetData *TD) const { + Value *MaterializeAdjustedValue(const Type *LoadTy, GVN &gvn) const { Value *Res; if (isSimpleValue()) { Res = getSimpleValue(); if (Res->getType() != LoadTy) { + const TargetData *TD = gvn.getTargetData(); assert(TD && "Need target data to handle type mismatch case"); Res = GetStoreValueForLoad(Res, Offset, LoadTy, BB->getTerminator(), *TD); - DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " " + DEBUG(dbgs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " " << *getSimpleValue() << '\n' << *Res << '\n' << "\n\n\n"); } + } else if (isCoercedLoadValue()) { + LoadInst *Load = getCoercedLoadValue(); + if (Load->getType() == LoadTy && Offset == 0) { + Res = Load; + } else { + Res = GetLoadValueForLoad(Load, Offset, LoadTy, BB->getTerminator(), + gvn); + + DEBUG(dbgs() << "GVN COERCED NONLOCAL LOAD:\nOffset: " << Offset << " " + << *getCoercedLoadValue() << '\n' + << *Res << '\n' << "\n\n\n"); + } } else { + const TargetData *TD = gvn.getTargetData(); + assert(TD && "Need target data to handle type mismatch case"); Res = GetMemInstValueForLoad(getMemIntrinValue(), Offset, LoadTy, BB->getTerminator(), *TD); - DEBUG(errs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset + DEBUG(dbgs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset << " " << *getMemIntrinValue() << '\n' << *Res << '\n' << "\n\n\n"); } @@ -1010,21 +1145,20 @@ struct AvailableValueInBlock { } }; -} +} // end anonymous namespace /// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock, /// construct SSA form, allowing us to eliminate LI. This returns the value /// that should be used at LI's definition site. static Value *ConstructSSAForLoadSet(LoadInst *LI, SmallVectorImpl &ValuesPerBlock, - const TargetData *TD, - const DominatorTree &DT, - AliasAnalysis *AA) { + GVN &gvn) { // Check for the fully redundant, dominating load case. In this case, we can // just use the dominating value directly. if (ValuesPerBlock.size() == 1 && - DT.properlyDominates(ValuesPerBlock[0].BB, LI->getParent())) - return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), TD); + gvn.getDominatorTree().properlyDominates(ValuesPerBlock[0].BB, + LI->getParent())) + return ValuesPerBlock[0].MaterializeAdjustedValue(LI->getType(), gvn); // Otherwise, we have to construct SSA form. SmallVector NewPHIs; @@ -1040,14 +1174,16 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI, if (SSAUpdate.HasValueForBlock(BB)) continue; - SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, TD)); + SSAUpdate.AddAvailableValue(BB, AV.MaterializeAdjustedValue(LoadTy, gvn)); } // Perform PHI construction. Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent()); // If new PHI nodes were created, notify alias analysis. - if (V->getType()->isPointerTy()) + if (V->getType()->isPointerTy()) { + AliasAnalysis *AA = gvn.getAliasAnalysis(); + for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) AA->copyValue(LI, NewPHIs[i]); @@ -1059,6 +1195,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI, for (unsigned ii = 0, ee = P->getNumIncomingValues(); ii != ee; ++ii) AA->addEscapingUse(P->getOperandUse(2*ii)); } + } return V; } @@ -1071,8 +1208,7 @@ static bool isLifetimeStart(const Instruction *Inst) { /// processNonLocalLoad - Attempt to eliminate a load whose dependencies are /// non-local by performing PHI construction. -bool GVN::processNonLocalLoad(LoadInst *LI, - SmallVectorImpl &toErase) { +bool GVN::processNonLocalLoad(LoadInst *LI) { // Find the non-local dependencies of the load. SmallVector Deps; AliasAnalysis::Location Loc = VN.getAliasAnalysis()->getLocation(LI); @@ -1088,7 +1224,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI, // If we had a phi translation failure, we'll have a single entry which is a // clobber in the current block. Reject this early. - if (Deps.size() == 1 && Deps[0].getResult().isClobber()) { + if (Deps.size() == 1 && Deps[0].getResult().isClobber() && + Deps[0].getResult().getInst()->getParent() == LI->getParent()) { DEBUG( dbgs() << "GVN: non-local load "; WriteAsOperand(dbgs(), LI); @@ -1129,6 +1266,26 @@ bool GVN::processNonLocalLoad(LoadInst *LI, } } } + + // Check to see if we have something like this: + // load i32* P + // load i8* (P+1) + // if we have this, replace the later with an extraction from the former. + if (LoadInst *DepLI = dyn_cast(DepInfo.getInst())) { + // If this is a clobber and L is the first instruction in its block, then + // we have the first instruction in the entry block. + if (DepLI != LI && Address && TD) { + int Offset = AnalyzeLoadFromClobberingLoad(LI->getType(), + LI->getPointerOperand(), + DepLI, *TD); + + if (Offset != -1) { + ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB,DepLI, + Offset)); + continue; + } + } + } // If the clobbering value is a memset/memcpy/memmove, see if we can // forward a value on from it. @@ -1187,7 +1344,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI, continue; } } - ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB, LD)); + ValuesPerBlock.push_back(AvailableValueInBlock::getLoad(DepBB, LD)); continue; } @@ -1206,16 +1363,14 @@ bool GVN::processNonLocalLoad(LoadInst *LI, DEBUG(dbgs() << "GVN REMOVING NONLOCAL LOAD: " << *LI << '\n'); // Perform PHI construction. - Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT, - VN.getAliasAnalysis()); + Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this); LI->replaceAllUsesWith(V); if (isa(V)) V->takeName(LI); if (V->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(V); - VN.erase(LI); - toErase.push_back(LI); + markInstructionForDeletion(LI); ++NumGVNLoad; return true; } @@ -1429,22 +1584,20 @@ bool GVN::processNonLocalLoad(LoadInst *LI, } // Perform PHI construction. - Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, TD, *DT, - VN.getAliasAnalysis()); + Value *V = ConstructSSAForLoadSet(LI, ValuesPerBlock, *this); LI->replaceAllUsesWith(V); if (isa(V)) V->takeName(LI); if (V->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(V); - VN.erase(LI); - toErase.push_back(LI); + markInstructionForDeletion(LI); ++NumPRELoad; return true; } /// processLoad - Attempt to eliminate a load, first by eliminating it /// locally, and then attempting non-local elimination if that fails. -bool GVN::processLoad(LoadInst *L, SmallVectorImpl &toErase) { +bool GVN::processLoad(LoadInst *L) { if (!MD) return false; @@ -1454,8 +1607,9 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl &toErase) { // ... to a pointer that has been loaded from before... MemDepResult Dep = MD->getDependency(L); - // If the value isn't available, don't do anything! - if (Dep.isClobber()) { + // If we have a clobber and target data is around, see if this is a clobber + // that we can fix up through code synthesis. + if (Dep.isClobber() && TD) { // Check to see if we have something like this: // store i32 123, i32* %P // %A = bitcast i32* %P to i8* @@ -1467,26 +1621,40 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl &toErase) { // completely covers this load. This sort of thing can happen in bitfield // access code. Value *AvailVal = 0; - if (StoreInst *DepSI = dyn_cast(Dep.getInst())) - if (TD) { - int Offset = AnalyzeLoadFromClobberingStore(L->getType(), - L->getPointerOperand(), - DepSI, *TD); - if (Offset != -1) - AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset, - L->getType(), L, *TD); - } + if (StoreInst *DepSI = dyn_cast(Dep.getInst())) { + int Offset = AnalyzeLoadFromClobberingStore(L->getType(), + L->getPointerOperand(), + DepSI, *TD); + if (Offset != -1) + AvailVal = GetStoreValueForLoad(DepSI->getValueOperand(), Offset, + L->getType(), L, *TD); + } + + // Check to see if we have something like this: + // load i32* P + // load i8* (P+1) + // if we have this, replace the later with an extraction from the former. + if (LoadInst *DepLI = dyn_cast(Dep.getInst())) { + // If this is a clobber and L is the first instruction in its block, then + // we have the first instruction in the entry block. + if (DepLI == L) + return false; + + int Offset = AnalyzeLoadFromClobberingLoad(L->getType(), + L->getPointerOperand(), + DepLI, *TD); + if (Offset != -1) + AvailVal = GetLoadValueForLoad(DepLI, Offset, L->getType(), L, *this); + } // If the clobbering value is a memset/memcpy/memmove, see if we can forward // a value on from it. if (MemIntrinsic *DepMI = dyn_cast(Dep.getInst())) { - if (TD) { - int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(), - L->getPointerOperand(), - DepMI, *TD); - if (Offset != -1) - AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L,*TD); - } + int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(), + L->getPointerOperand(), + DepMI, *TD); + if (Offset != -1) + AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L, *TD); } if (AvailVal) { @@ -1497,14 +1665,16 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl &toErase) { L->replaceAllUsesWith(AvailVal); if (AvailVal->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(AvailVal); - VN.erase(L); - toErase.push_back(L); + markInstructionForDeletion(L); ++NumGVNLoad; return true; } - + } + + // If the value isn't available, don't do anything! + if (Dep.isClobber()) { DEBUG( - // fast print dep, using operator<< on instruction would be too slow + // fast print dep, using operator<< on instruction is too slow. dbgs() << "GVN: load "; WriteAsOperand(dbgs(), L); Instruction *I = Dep.getInst(); @@ -1515,7 +1685,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl &toErase) { // If it is defined in another block, try harder. if (Dep.isNonLocal()) - return processNonLocalLoad(L, toErase); + return processNonLocalLoad(L); Instruction *DepInst = Dep.getInst(); if (StoreInst *DepSI = dyn_cast(DepInst)) { @@ -1542,8 +1712,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl &toErase) { L->replaceAllUsesWith(StoredVal); if (StoredVal->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(StoredVal); - VN.erase(L); - toErase.push_back(L); + markInstructionForDeletion(L); ++NumGVNLoad; return true; } @@ -1556,7 +1725,8 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl &toErase) { // (depending on its type). if (DepLI->getType() != L->getType()) { if (TD) { - AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), L,*TD); + AvailableVal = CoerceAvailableValueToLoadType(DepLI, L->getType(), + L, *TD); if (AvailableVal == 0) return false; @@ -1571,8 +1741,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl &toErase) { L->replaceAllUsesWith(AvailableVal); if (DepLI->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(DepLI); - VN.erase(L); - toErase.push_back(L); + markInstructionForDeletion(L); ++NumGVNLoad; return true; } @@ -1582,19 +1751,17 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl &toErase) { // intervening stores, for example. if (isa(DepInst) || isMalloc(DepInst)) { L->replaceAllUsesWith(UndefValue::get(L->getType())); - VN.erase(L); - toErase.push_back(L); + markInstructionForDeletion(L); ++NumGVNLoad; return true; } // If this load occurs either right after a lifetime begin, // then the loaded value is undefined. - if (IntrinsicInst* II = dyn_cast(DepInst)) { + if (IntrinsicInst *II = dyn_cast(DepInst)) { if (II->getIntrinsicID() == Intrinsic::lifetime_start) { L->replaceAllUsesWith(UndefValue::get(L->getType())); - VN.erase(L); - toErase.push_back(L); + markInstructionForDeletion(L); ++NumGVNLoad; return true; } @@ -1634,8 +1801,7 @@ Value *GVN::findLeader(BasicBlock *BB, uint32_t num) { /// processInstruction - When calculating availability, handle an instruction /// by inserting it into the appropriate sets -bool GVN::processInstruction(Instruction *I, - SmallVectorImpl &toErase) { +bool GVN::processInstruction(Instruction *I) { // Ignore dbg info intrinsics. if (isa(I)) return false; @@ -1648,20 +1814,17 @@ bool GVN::processInstruction(Instruction *I, I->replaceAllUsesWith(V); if (MD && V->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(V); - VN.erase(I); - toErase.push_back(I); + markInstructionForDeletion(I); return true; } if (LoadInst *LI = dyn_cast(I)) { - bool Changed = processLoad(LI, toErase); + if (processLoad(LI)) + return true; - if (!Changed) { - unsigned Num = VN.lookup_or_add(LI); - addToLeaderTable(Num, LI, LI->getParent()); - } - - return Changed; + unsigned Num = VN.lookup_or_add(LI); + addToLeaderTable(Num, LI, LI->getParent()); + return false; } // For conditions branches, we can perform simple conditional propagation on @@ -1720,11 +1883,10 @@ bool GVN::processInstruction(Instruction *I, } // Remove it! - VN.erase(I); I->replaceAllUsesWith(repl); if (MD && repl->getType()->isPointerTy()) MD->invalidateCachedPointerInfo(repl); - toErase.push_back(I); + markInstructionForDeletion(I); return true; } @@ -1781,35 +1943,36 @@ bool GVN::runOnFunction(Function& F) { bool GVN::processBlock(BasicBlock *BB) { - // FIXME: Kill off toErase by doing erasing eagerly in a helper function (and - // incrementing BI before processing an instruction). - SmallVector toErase; + // FIXME: Kill off InstrsToErase by doing erasing eagerly in a helper function + // (and incrementing BI before processing an instruction). + assert(InstrsToErase.empty() && + "We expect InstrsToErase to be empty across iterations"); bool ChangedFunction = false; for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { - ChangedFunction |= processInstruction(BI, toErase); - if (toErase.empty()) { + ChangedFunction |= processInstruction(BI); + if (InstrsToErase.empty()) { ++BI; continue; } // If we need some instructions deleted, do it now. - NumGVNInstr += toErase.size(); + NumGVNInstr += InstrsToErase.size(); // Avoid iterator invalidation. bool AtStart = BI == BB->begin(); if (!AtStart) --BI; - for (SmallVector::iterator I = toErase.begin(), - E = toErase.end(); I != E; ++I) { + for (SmallVector::iterator I = InstrsToErase.begin(), + E = InstrsToErase.end(); I != E; ++I) { DEBUG(dbgs() << "GVN removed: " << **I << '\n'); if (MD) MD->removeInstruction(*I); (*I)->eraseFromParent(); DEBUG(verifyRemoved(*I)); } - toErase.clear(); + InstrsToErase.clear(); if (AtStart) BI = BB->begin(); @@ -1944,11 +2107,11 @@ bool GVN::performPRE(Function &F) { addToLeaderTable(ValNo, PREInstr, PREPred); // Create a PHI to make the value available in this block. - PHINode* Phi = PHINode::Create(CurInst->getType(), + pred_iterator PB = pred_begin(CurrentBlock), PE = pred_end(CurrentBlock); + PHINode* Phi = PHINode::Create(CurInst->getType(), std::distance(PB, PE), CurInst->getName() + ".pre-phi", CurrentBlock->begin()); - for (pred_iterator PI = pred_begin(CurrentBlock), - PE = pred_end(CurrentBlock); PI != PE; ++PI) { + for (pred_iterator PI = PB; PI != PE; ++PI) { BasicBlock *P = *PI; Phi->addIncoming(predMap[P], P); } diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 0fb67982a3db..09d569a097dd 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -73,6 +73,7 @@ namespace { LoopInfo *LI; ScalarEvolution *SE; DominatorTree *DT; + SmallVector DeadInsts; bool Changed; public: @@ -98,6 +99,7 @@ namespace { } private: + bool isValidRewrite(Value *FromVal, Value *ToVal); void EliminateIVComparisons(); void EliminateIVRemainders(); @@ -134,6 +136,53 @@ Pass *llvm::createIndVarSimplifyPass() { return new IndVarSimplify(); } +/// isValidRewrite - Return true if the SCEV expansion generated by the +/// rewriter can replace the original value. SCEV guarantees that it +/// produces the same value, but the way it is produced may be illegal IR. +/// Ideally, this function will only be called for verification. +bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) { + // If an SCEV expression subsumed multiple pointers, its expansion could + // reassociate the GEP changing the base pointer. This is illegal because the + // final address produced by a GEP chain must be inbounds relative to its + // underlying object. Otherwise basic alias analysis, among other things, + // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid + // producing an expression involving multiple pointers. Until then, we must + // bail out here. + // + // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject + // because it understands lcssa phis while SCEV does not. + Value *FromPtr = FromVal; + Value *ToPtr = ToVal; + if (GEPOperator *GEP = dyn_cast(FromVal)) { + FromPtr = GEP->getPointerOperand(); + } + if (GEPOperator *GEP = dyn_cast(ToVal)) { + ToPtr = GEP->getPointerOperand(); + } + if (FromPtr != FromVal || ToPtr != ToVal) { + // Quickly check the common case + if (FromPtr == ToPtr) + return true; + + // SCEV may have rewritten an expression that produces the GEP's pointer + // operand. That's ok as long as the pointer operand has the same base + // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the + // base of a recurrence. This handles the case in which SCEV expansion + // converts a pointer type recurrence into a nonrecurrent pointer base + // indexed by an integer recurrence. + const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr)); + const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr)); + if (FromBase == ToBase) + return true; + + DEBUG(dbgs() << "INDVARS: GEP rewrite bail out " + << *FromBase << " != " << *ToBase << "\n"); + + return false; + } + return true; +} + /// LinearFunctionTestReplace - This method rewrites the exit condition of the /// loop to be a canonical != comparison against the incremented loop induction /// variable. This pass is able to rewrite the exit tests of any loop where the @@ -226,7 +275,7 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L, // update the branch to use the new comparison; in the common case this // will make old comparison dead. BI->setCondition(Cond); - RecursivelyDeleteTriviallyDeadInstructions(OrigCond); + DeadInsts.push_back(OrigCond); ++NumLFTR; Changed = true; @@ -304,14 +353,18 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { if (!SE->isLoopInvariant(ExitValue, L)) continue; - Changed = true; - ++NumReplaced; - Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst); DEBUG(dbgs() << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal << '\n' << " LoopVal = " << *Inst << "\n"); + if (!isValidRewrite(Inst, ExitVal)) { + DeadInsts.push_back(ExitVal); + continue; + } + Changed = true; + ++NumReplaced; + PN->setIncomingValue(i, ExitVal); // If this instruction is dead now, delete it. @@ -366,8 +419,6 @@ void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) { } void IndVarSimplify::EliminateIVComparisons() { - SmallVector DeadInsts; - // Look for ICmp users. for (IVUsers::iterator I = IU->begin(), E = IU->end(); I != E; ++I) { IVStrideUse &UI = *I; @@ -399,18 +450,9 @@ void IndVarSimplify::EliminateIVComparisons() { DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n'); DeadInsts.push_back(ICmp); } - - // Now that we're done iterating through lists, clean up any instructions - // which are now dead. - while (!DeadInsts.empty()) - if (Instruction *Inst = - dyn_cast_or_null(&*DeadInsts.pop_back_val())) - RecursivelyDeleteTriviallyDeadInstructions(Inst); } void IndVarSimplify::EliminateIVRemainders() { - SmallVector DeadInsts; - // Look for SRem and URem users. for (IVUsers::iterator I = IU->begin(), E = IU->end(); I != E; ++I) { IVStrideUse &UI = *I; @@ -466,13 +508,6 @@ void IndVarSimplify::EliminateIVRemainders() { DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n'); DeadInsts.push_back(Rem); } - - // Now that we're done iterating through lists, clean up any instructions - // which are now dead. - while (!DeadInsts.empty()) - if (Instruction *Inst = - dyn_cast_or_null(&*DeadInsts.pop_back_val())) - RecursivelyDeleteTriviallyDeadInstructions(Inst); } bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { @@ -491,6 +526,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { LI = &getAnalysis(); SE = &getAnalysis(); DT = &getAnalysis(); + DeadInsts.clear(); Changed = false; // If there are any floating-point recurrences, attempt to @@ -589,9 +625,21 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { ExitingBlock, BI, Rewriter); } - // Rewrite IV-derived expressions. Clears the rewriter cache. + // Rewrite IV-derived expressions. RewriteIVExpressions(L, Rewriter); + // Clear the rewriter cache, because values that are in the rewriter's cache + // can be deleted in the loop below, causing the AssertingVH in the cache to + // trigger. + Rewriter.clear(); + + // Now that we're done iterating through lists, clean up any instructions + // which are now dead. + while (!DeadInsts.empty()) + if (Instruction *Inst = + dyn_cast_or_null(&*DeadInsts.pop_back_val())) + RecursivelyDeleteTriviallyDeadInstructions(Inst); + // The Rewriter may not be used from this point on. // Loop-invariant instructions in the preheader that aren't used in the @@ -632,7 +680,7 @@ static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) { if (!isSafe(*I, L, SE)) return false; return true; } - + // A cast is safe if its operand is. if (const SCEVCastExpr *C = dyn_cast(S)) return isSafe(C->getOperand(), L, SE); @@ -651,8 +699,6 @@ static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) { } void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) { - SmallVector DeadInsts; - // Rewrite all induction variable expressions in terms of the canonical // induction variable. // @@ -705,6 +751,13 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) { // Now expand it into actual Instructions and patch it into place. Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt); + DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n' + << " into = " << *NewVal << "\n"); + + if (!isValidRewrite(Op, NewVal)) { + DeadInsts.push_back(NewVal); + continue; + } // Inform ScalarEvolution that this value is changing. The change doesn't // affect its value, but it does potentially affect which use lists the // value will be on after the replacement, which affects ScalarEvolution's @@ -717,25 +770,13 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) { NewVal->takeName(Op); User->replaceUsesOfWith(Op, NewVal); UI->setOperandValToReplace(NewVal); - DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n' - << " into = " << *NewVal << "\n"); + ++NumRemoved; Changed = true; // The old value may be dead now. DeadInsts.push_back(Op); } - - // Clear the rewriter cache, because values that are in the rewriter's cache - // can be deleted in the loop below, causing the AssertingVH in the cache to - // trigger. - Rewriter.clear(); - // Now that we're done iterating through lists, clean up any instructions - // which are now dead. - while (!DeadInsts.empty()) - if (Instruction *Inst = - dyn_cast_or_null(&*DeadInsts.pop_back_val())) - RecursivelyDeleteTriviallyDeadInstructions(Inst); } /// If there's a single exit block, sink any loop-invariant values that @@ -859,7 +900,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { BinaryOperator *Incr = dyn_cast(PN->getIncomingValue(BackEdge)); if (Incr == 0 || Incr->getOpcode() != Instruction::FAdd) return; - + // If this is not an add of the PHI with a constantfp, or if the constant fp // is not an integer, bail out. ConstantFP *IncValueVal = dyn_cast(Incr->getOperand(1)); @@ -884,7 +925,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { if (Compare == 0 || !Compare->hasOneUse() || !isa(Compare->use_back())) return; - + BranchInst *TheBr = cast(Compare->use_back()); // We need to verify that the branch actually controls the iteration count @@ -896,8 +937,8 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { (L->contains(TheBr->getSuccessor(0)) && L->contains(TheBr->getSuccessor(1)))) return; - - + + // If it isn't a comparison with an integer-as-fp (the exit value), we can't // transform it. ConstantFP *ExitValueVal = dyn_cast(Compare->getOperand(1)); @@ -905,7 +946,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { if (ExitValueVal == 0 || !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue)) return; - + // Find new predicate for integer comparison. CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE; switch (Compare->getPredicate()) { @@ -923,13 +964,13 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { case CmpInst::FCMP_OLE: case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break; } - + // We convert the floating point induction variable to a signed i32 value if // we can. This is only safe if the comparison will not overflow in a way // that won't be trapped by the integer equivalent operations. Check for this // now. // TODO: We could use i64 if it is native and the range requires it. - + // The start/stride/exit values must all fit in signed i32. if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue)) return; @@ -945,59 +986,59 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) { if (InitValue >= ExitValue || NewPred == CmpInst::ICMP_SGT || NewPred == CmpInst::ICMP_SGE) return; - + uint32_t Range = uint32_t(ExitValue-InitValue); if (NewPred == CmpInst::ICMP_SLE) { // Normalize SLE -> SLT, check for infinite loop. if (++Range == 0) return; // Range overflows. } - + unsigned Leftover = Range % uint32_t(IncValue); - + // If this is an equality comparison, we require that the strided value // exactly land on the exit value, otherwise the IV condition will wrap // around and do things the fp IV wouldn't. if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && Leftover != 0) return; - + // If the stride would wrap around the i32 before exiting, we can't // transform the IV. if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue) return; - + } else { // If we have a negative stride, we require the init to be greater than the // exit value and an equality or greater than comparison. if (InitValue >= ExitValue || NewPred == CmpInst::ICMP_SLT || NewPred == CmpInst::ICMP_SLE) return; - + uint32_t Range = uint32_t(InitValue-ExitValue); if (NewPred == CmpInst::ICMP_SGE) { // Normalize SGE -> SGT, check for infinite loop. if (++Range == 0) return; // Range overflows. } - + unsigned Leftover = Range % uint32_t(-IncValue); - + // If this is an equality comparison, we require that the strided value // exactly land on the exit value, otherwise the IV condition will wrap // around and do things the fp IV wouldn't. if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) && Leftover != 0) return; - + // If the stride would wrap around the i32 before exiting, we can't // transform the IV. if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue) return; } - + const IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext()); // Insert new integer induction variable. - PHINode *NewPHI = PHINode::Create(Int32Ty, PN->getName()+".int", PN); + PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN); NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue), PN->getIncomingBlock(IncomingEdge)); diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index 90094a8da257..7168177a76b4 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -16,6 +16,7 @@ #include "llvm/IntrinsicInst.h" #include "llvm/LLVMContext.h" #include "llvm/Pass.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LazyValueInfo.h" #include "llvm/Analysis/Loads.h" @@ -170,9 +171,9 @@ bool JumpThreading::runOnFunction(Function &F) { Changed = true; continue; } - + BranchInst *BI = dyn_cast(BB->getTerminator()); - + // Can't thread an unconditional jump, but if the block is "almost // empty", we can replace uses of it with uses of the successor and make // this dead. @@ -608,7 +609,7 @@ static unsigned GetBestDestForJumpOnUndef(BasicBlock *BB) { static bool hasAddressTakenAndUsed(BasicBlock *BB) { if (!BB->hasAddressTaken()) return false; - + // If the block has its address taken, it may be a tree of dead constants // hanging off of it. These shouldn't keep the block alive. BlockAddress *BA = BlockAddress::get(BB); @@ -668,6 +669,17 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) { return false; // Must be an invoke. } + // Run constant folding to see if we can reduce the condition to a simple + // constant. + if (Instruction *I = dyn_cast(Condition)) { + Value *SimpleVal = ConstantFoldInstruction(I, TD); + if (SimpleVal) { + I->replaceAllUsesWith(SimpleVal); + I->eraseFromParent(); + Condition = SimpleVal; + } + } + // If the terminator is branching on an undef, we can pick any of the // successors to branch to. Let GetBestDestForJumpOnUndef decide. if (isa(Condition)) { @@ -928,13 +940,14 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) { array_pod_sort(AvailablePreds.begin(), AvailablePreds.end()); // Create a PHI node at the start of the block for the PRE'd load value. - PHINode *PN = PHINode::Create(LI->getType(), "", LoadBB->begin()); + pred_iterator PB = pred_begin(LoadBB), PE = pred_end(LoadBB); + PHINode *PN = PHINode::Create(LI->getType(), std::distance(PB, PE), "", + LoadBB->begin()); PN->takeName(LI); // Insert new entries into the PHI for each predecessor. A single block may // have multiple entries here. - for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB); PI != E; - ++PI) { + for (pred_iterator PI = PB; PI != PE; ++PI) { BasicBlock *P = *PI; AvailablePredsTy::iterator I = std::lower_bound(AvailablePreds.begin(), AvailablePreds.end(), diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index 07867933d08c..93de9cf002eb 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -445,7 +445,8 @@ void LICM::sink(Instruction &I) { // enough that we handle it as a special (more efficient) case. It is more // efficient to handle because there are no PHI nodes that need to be placed. if (ExitBlocks.size() == 1) { - if (!DT->dominates(I.getParent(), ExitBlocks[0])) { + if (!isa(I) && + !DT->dominates(I.getParent(), ExitBlocks[0])) { // Instruction is not used, just delete it. CurAST->deleteValue(&I); // If I has users in unreachable blocks, eliminate. @@ -742,30 +743,13 @@ void LICM::PromoteAliasSet(AliasSet &AS) { Preheader->getTerminator()); SSA.AddAvailableValue(Preheader, PreheaderLoad); - // Copy any value stored to or loaded from a must-alias of the pointer. - if (PreheaderLoad->getType()->isPointerTy()) { - Value *SomeValue; - if (LoadInst *LI = dyn_cast(LoopUses[0])) - SomeValue = LI; - else - SomeValue = cast(LoopUses[0])->getValueOperand(); - - CurAST->copyValue(SomeValue, PreheaderLoad); - } - // Rewrite all the loads in the loop and remember all the definitions from // stores in the loop. Promoter.run(LoopUses); - - // If the preheader load is itself a pointer, we need to tell alias analysis - // about the new pointer we created in the preheader block and about any PHI - // nodes that just got inserted. - if (PreheaderLoad->getType()->isPointerTy()) { - for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) - CurAST->copyValue(PreheaderLoad, NewPHIs[i]); - } - - // fwew, we're done! + + // If the SSAUpdater didn't use the load in the preheader, just zap it now. + if (PreheaderLoad->use_empty()) + PreheaderLoad->eraseFromParent(); } diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index f8ce214750ac..1366231e9a1a 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -81,7 +81,7 @@ namespace { bool processLoopStore(StoreInst *SI, const SCEV *BECount); bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount); - + bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize, unsigned StoreAlignment, Value *SplatValue, Instruction *TheStore, @@ -91,7 +91,7 @@ namespace { const SCEVAddRecExpr *StoreEv, const SCEVAddRecExpr *LoadEv, const SCEV *BECount); - + /// This transformation requires natural loop information & requires that /// loop preheaders be inserted into the CFG. /// @@ -134,50 +134,50 @@ Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognize(); } /// static void DeleteDeadInstruction(Instruction *I, ScalarEvolution &SE) { SmallVector NowDeadInsts; - + NowDeadInsts.push_back(I); - + // Before we touch this instruction, remove it from SE! do { Instruction *DeadInst = NowDeadInsts.pop_back_val(); - + // This instruction is dead, zap it, in stages. Start by removing it from // SCEV. SE.forgetValue(DeadInst); - + for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) { Value *Op = DeadInst->getOperand(op); DeadInst->setOperand(op, 0); - + // If this operand just became dead, add it to the NowDeadInsts list. if (!Op->use_empty()) continue; - + if (Instruction *OpI = dyn_cast(Op)) if (isInstructionTriviallyDead(OpI)) NowDeadInsts.push_back(OpI); } - + DeadInst->eraseFromParent(); - + } while (!NowDeadInsts.empty()); } bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { CurLoop = L; - + // The trip count of the loop must be analyzable. SE = &getAnalysis(); if (!SE->hasLoopInvariantBackedgeTakenCount(L)) return false; const SCEV *BECount = SE->getBackedgeTakenCount(L); if (isa(BECount)) return false; - + // If this loop executes exactly one time, then it should be peeled, not // optimized by this pass. if (const SCEVConstant *BECst = dyn_cast(BECount)) if (BECst->getValue()->getValue() == 0) return false; - + // We require target data for now. TD = getAnalysisIfAvailable(); if (TD == 0) return false; @@ -185,14 +185,14 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { DT = &getAnalysis(); LoopInfo &LI = getAnalysis(); TLI = &getAnalysis(); - + SmallVector ExitBlocks; CurLoop->getUniqueExitBlocks(ExitBlocks); DEBUG(dbgs() << "loop-idiom Scanning: F[" << L->getHeader()->getParent()->getName() << "] Loop %" << L->getHeader()->getName() << "\n"); - + bool MadeChange = false; // Scan all the blocks in the loop that are not in subloops. for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E; @@ -200,7 +200,7 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { // Ignore blocks in subloops. if (LI.getLoopFor(*BI) != CurLoop) continue; - + MadeChange |= runOnLoopBlock(*BI, BECount, ExitBlocks); } return MadeChange; @@ -217,7 +217,7 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount, for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) if (!DT->dominates(BB, ExitBlocks[i])) return false; - + bool MadeChange = false; for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { Instruction *Inst = I++; @@ -226,20 +226,20 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount, WeakVH InstPtr(I); if (!processLoopStore(SI, BECount)) continue; MadeChange = true; - + // If processing the store invalidated our iterator, start over from the // top of the block. if (InstPtr == 0) I = BB->begin(); continue; } - + // Look for memset instructions, which may be optimized to a larger memset. if (MemSetInst *MSI = dyn_cast(Inst)) { WeakVH InstPtr(I); if (!processLoopMemSet(MSI, BECount)) continue; MadeChange = true; - + // If processing the memset invalidated our iterator, start over from the // top of the block. if (InstPtr == 0) @@ -247,7 +247,7 @@ bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount, continue; } } - + return MadeChange; } @@ -258,12 +258,12 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) { Value *StoredVal = SI->getValueOperand(); Value *StorePtr = SI->getPointerOperand(); - + // Reject stores that are so large that they overflow an unsigned. uint64_t SizeInBits = TD->getTypeSizeInBits(StoredVal->getType()); if ((SizeInBits & 7) || (SizeInBits >> 32) != 0) return false; - + // See if the pointer expression is an AddRec like {base,+,1} on the current // loop, which indicates a strided store. If we have something else, it's a // random store we can't handle. @@ -274,9 +274,9 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) { // Check to see if the stride matches the size of the store. If so, then we // know that every byte is touched in the loop. - unsigned StoreSize = (unsigned)SizeInBits >> 3; + unsigned StoreSize = (unsigned)SizeInBits >> 3; const SCEVConstant *Stride = dyn_cast(StoreEv->getOperand(1)); - + if (Stride == 0 || StoreSize != Stride->getValue()->getValue()) { // TODO: Could also handle negative stride here someday, that will require // the validity check in mayLoopAccessLocation to be updated though. @@ -285,7 +285,7 @@ bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) { dbgs() << "NEGATIVE STRIDE: " << *SI << "\n"; dbgs() << "BB: " << *SI->getParent(); } - + return false; } @@ -319,9 +319,9 @@ processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) { // If we're not allowed to hack on memset, we fail. if (!TLI->has(LibFunc::memset)) return false; - + Value *Pointer = MSI->getDest(); - + // See if the pointer expression is an AddRec like {base,+,1} on the current // loop, which indicates a strided store. If we have something else, it's a // random store we can't handle. @@ -333,16 +333,16 @@ processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) { uint64_t SizeInBytes = cast(MSI->getLength())->getZExtValue(); if ((SizeInBytes >> 32) != 0) return false; - + // Check to see if the stride matches the size of the memset. If so, then we // know that every byte is touched in the loop. const SCEVConstant *Stride = dyn_cast(Ev->getOperand(1)); - + // TODO: Could also handle negative stride here someday, that will require the // validity check in mayLoopAccessLocation to be updated though. if (Stride == 0 || MSI->getLength() != Stride->getValue()) return false; - + return processLoopStridedStore(Pointer, (unsigned)SizeInBytes, MSI->getAlignment(), MSI->getValue(), MSI, Ev, BECount); @@ -365,7 +365,7 @@ static bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access, // to be exactly the size of the memset, which is (BECount+1)*StoreSize if (const SCEVConstant *BECst = dyn_cast(BECount)) AccessSize = (BECst->getValue()->getZExtValue()+1)*StoreSize; - + // TODO: For this to be really effective, we have to dive into the pointer // operand in the store. Store to &A[i] of 100 will always return may alias // with store of &A[100], we need to StoreLoc to be "A" with size of 100, @@ -394,12 +394,12 @@ static Constant *getMemSetPatternValue(Value *V, const TargetData &TD) { // that doesn't seem worthwhile. Constant *C = dyn_cast(V); if (C == 0) return 0; - + // Only handle simple values that are a power of two bytes in size. uint64_t Size = TD.getTypeSizeInBits(V->getType()); if (Size == 0 || (Size & 7) || (Size & (Size-1))) return 0; - + // Don't care enough about darwin/ppc to implement this. if (TD.isBigEndian()) return 0; @@ -410,7 +410,7 @@ static Constant *getMemSetPatternValue(Value *V, const TargetData &TD) { // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see // if the top and bottom are the same (e.g. for vectors and large integers). if (Size > 16) return 0; - + // If the constant is exactly 16 bytes, just use it. if (Size == 16) return C; @@ -428,14 +428,14 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, unsigned StoreAlignment, Value *StoredVal, Instruction *TheStore, const SCEVAddRecExpr *Ev, const SCEV *BECount) { - + // If the stored value is a byte-wise value (like i32 -1), then it may be // turned into a memset of i8 -1, assuming that all the consecutive bytes // are stored. A store of i32 0x01020304 can never be turned into a memset, // but it can be turned into memset_pattern if the target supports it. Value *SplatValue = isBytewiseValue(StoredVal); Constant *PatternValue = 0; - + // If we're allowed to form a memset, and the stored value would be acceptable // for memset, use it. if (SplatValue && TLI->has(LibFunc::memset) && @@ -453,8 +453,8 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, // do anything with a 3-byte store, for example. return false; } - - + + // Okay, we have a strided store "p[i]" of a splattable value. We can turn // this into a memset in the loop preheader now if we want. However, this // would be unsafe to do if there is anything else in the loop that may read @@ -463,47 +463,47 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, CurLoop, BECount, StoreSize, getAnalysis(), TheStore)) return false; - + // Okay, everything looks good, insert the memset. BasicBlock *Preheader = CurLoop->getLoopPreheader(); - + IRBuilder<> Builder(Preheader->getTerminator()); - + // The trip count of the loop and the base pointer of the addrec SCEV is // guaranteed to be loop invariant, which means that it should dominate the // header. Just insert code for it in the preheader. SCEVExpander Expander(*SE); - + unsigned AddrSpace = cast(DestPtr->getType())->getAddressSpace(); - Value *BasePtr = + Value *BasePtr = Expander.expandCodeFor(Ev->getStart(), Builder.getInt8PtrTy(AddrSpace), Preheader->getTerminator()); - + // The # stored bytes is (BECount+1)*Size. Expand the trip count out to // pointer size if it isn't already. const Type *IntPtr = TD->getIntPtrType(DestPtr->getContext()); BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr); - + const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1), - true /*no unsigned overflow*/); + SCEV::FlagNUW); if (StoreSize != 1) NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize), - true /*no unsigned overflow*/); - - Value *NumBytes = + SCEV::FlagNUW); + + Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator()); - - Value *NewCall; + + CallInst *NewCall; if (SplatValue) NewCall = Builder.CreateMemSet(BasePtr, SplatValue,NumBytes,StoreAlignment); else { Module *M = TheStore->getParent()->getParent()->getParent(); Value *MSP = M->getOrInsertFunction("memset_pattern16", Builder.getVoidTy(), - Builder.getInt8PtrTy(), + Builder.getInt8PtrTy(), Builder.getInt8PtrTy(), IntPtr, (void*)0); - + // Otherwise we should form a memset_pattern16. PatternValue is known to be // an constant array of 16-bytes. Plop the value into a mergable global. GlobalVariable *GV = new GlobalVariable(*M, PatternValue->getType(), true, @@ -514,11 +514,11 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize, Value *PatternPtr = ConstantExpr::getBitCast(GV, Builder.getInt8PtrTy()); NewCall = Builder.CreateCall3(MSP, BasePtr, PatternPtr, NumBytes); } - + DEBUG(dbgs() << " Formed memset: " << *NewCall << "\n" << " from store to: " << *Ev << " at: " << *TheStore << "\n"); - (void)NewCall; - + NewCall->setDebugLoc(TheStore->getDebugLoc()); + // Okay, the memset has been formed. Zap the original store and anything that // feeds into it. DeleteDeadInstruction(TheStore, *SE); @@ -536,9 +536,9 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, // If we're not allowed to form memcpy, we fail. if (!TLI->has(LibFunc::memcpy)) return false; - + LoadInst *LI = cast(SI->getValueOperand()); - + // Okay, we have a strided store "p[i]" of a loaded value. We can turn // this into a memcpy in the loop preheader now if we want. However, this // would be unsafe to do if there is anything else in the loop that may read @@ -555,49 +555,49 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, CurLoop, BECount, StoreSize, getAnalysis(), SI)) return false; - + // Okay, everything looks good, insert the memcpy. BasicBlock *Preheader = CurLoop->getLoopPreheader(); - + IRBuilder<> Builder(Preheader->getTerminator()); - + // The trip count of the loop and the base pointer of the addrec SCEV is // guaranteed to be loop invariant, which means that it should dominate the // header. Just insert code for it in the preheader. SCEVExpander Expander(*SE); - Value *LoadBasePtr = + Value *LoadBasePtr = Expander.expandCodeFor(LoadEv->getStart(), Builder.getInt8PtrTy(LI->getPointerAddressSpace()), Preheader->getTerminator()); - Value *StoreBasePtr = + Value *StoreBasePtr = Expander.expandCodeFor(StoreEv->getStart(), Builder.getInt8PtrTy(SI->getPointerAddressSpace()), Preheader->getTerminator()); - + // The # stored bytes is (BECount+1)*Size. Expand the trip count out to // pointer size if it isn't already. const Type *IntPtr = TD->getIntPtrType(SI->getContext()); BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr); - + const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1), - true /*no unsigned overflow*/); + SCEV::FlagNUW); if (StoreSize != 1) NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize), - true /*no unsigned overflow*/); - + SCEV::FlagNUW); + Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator()); - + Value *NewCall = Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes, std::min(SI->getAlignment(), LI->getAlignment())); - + DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n" << " from load ptr=" << *LoadEv << " at: " << *LI << "\n" << " from store ptr=" << *StoreEv << " at: " << *SI << "\n"); (void)NewCall; - + // Okay, the memset has been formed. Zap the original store and anything that // feeds into it. DeleteDeadInstruction(SI, *SE); diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index 95e15784df2c..47dced37c3a4 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -184,7 +184,11 @@ bool LoopRotate::rotateLoop(Loop *L) { // Now, this loop is suitable for rotation. BasicBlock *OrigPreheader = L->getLoopPreheader(); BasicBlock *OrigLatch = L->getLoopLatch(); - assert(OrigPreheader && OrigLatch && "Loop not in canonical form?"); + + // If the loop could not be converted to canonical form, it must have an + // indirectbr in it, just give up. + if (OrigPreheader == 0 || OrigLatch == 0) + return false; // Anything ScalarEvolution may know about this loop or the PHI nodes // in its header will soon be invalidated. @@ -322,7 +326,8 @@ bool LoopRotate::rotateLoop(Loop *L) { // We can fold the conditional branch in the preheader, this makes things // simpler. The first step is to remove the extra edge to the Exit block. Exit->removePredecessor(OrigPreheader, true /*preserve LCSSA*/); - BranchInst::Create(NewHeader, PHBI); + BranchInst *NewBI = BranchInst::Create(NewHeader, PHBI); + NewBI->setDebugLoc(PHBI->getDebugLoc()); PHBI->eraseFromParent(); // With our CFG finalized, update DomTree if it is available. diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index ac4aea2e404e..5abc79042390 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -253,7 +253,8 @@ static void DoInitialMatch(const SCEV *S, Loop *L, DoInitialMatch(AR->getStart(), L, Good, Bad, SE); DoInitialMatch(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0), AR->getStepRecurrence(SE), - AR->getLoop()), + // FIXME: AR->getNoWrapFlags() + AR->getLoop(), SCEV::FlagAnyWrap), L, Good, Bad, SE); return; } @@ -455,7 +456,10 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS, const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE, IgnoreSignificantBits); if (!Start) return 0; - return SE.getAddRecExpr(Start, Step, AR->getLoop()); + // FlagNW is independent of the start value, step direction, and is + // preserved with smaller magnitude steps. + // FIXME: AR->getNoWrapFlags(SCEV::FlagNW) + return SE.getAddRecExpr(Start, Step, AR->getLoop(), SCEV::FlagAnyWrap); } return 0; } @@ -520,7 +524,9 @@ static int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) { SmallVector NewOps(AR->op_begin(), AR->op_end()); int64_t Result = ExtractImmediate(NewOps.front(), SE); if (Result != 0) - S = SE.getAddRecExpr(NewOps, AR->getLoop()); + S = SE.getAddRecExpr(NewOps, AR->getLoop(), + // FIXME: AR->getNoWrapFlags(SCEV::FlagNW) + SCEV::FlagAnyWrap); return Result; } return 0; @@ -545,7 +551,9 @@ static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) { SmallVector NewOps(AR->op_begin(), AR->op_end()); GlobalValue *Result = ExtractSymbol(NewOps.front(), SE); if (Result) - S = SE.getAddRecExpr(NewOps, AR->getLoop()); + S = SE.getAddRecExpr(NewOps, AR->getLoop(), + // FIXME: AR->getNoWrapFlags(SCEV::FlagNW) + SCEV::FlagAnyWrap); return Result; } return 0; @@ -564,9 +572,6 @@ static bool isAddressUse(Instruction *Inst, Value *OperandVal) { switch (II->getIntrinsicID()) { default: break; case Intrinsic::prefetch: - case Intrinsic::x86_sse2_loadu_dq: - case Intrinsic::x86_sse2_loadu_pd: - case Intrinsic::x86_sse_loadu_ps: case Intrinsic::x86_sse_storeu_ps: case Intrinsic::x86_sse2_storeu_pd: case Intrinsic::x86_sse2_storeu_dq: @@ -781,7 +786,7 @@ void Cost::RateFormula(const Formula &F, } } -/// Loose - Set this cost to a loosing value. +/// Loose - Set this cost to a losing value. void Cost::Loose() { NumRegs = ~0u; AddRecCost = ~0u; @@ -1483,7 +1488,7 @@ void LSRInstance::OptimizeShadowIV() { if (!C->getValue().isStrictlyPositive()) continue; /* Add new PHINode. */ - PHINode *NewPH = PHINode::Create(DestTy, "IV.S.", PH); + PHINode *NewPH = PHINode::Create(DestTy, 2, "IV.S.", PH); /* create new increment. '++d' in above example. */ Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue()); @@ -1819,7 +1824,7 @@ LSRInstance::OptimizeLoopTermCond() { } } -/// reconcileNewOffset - Determine if the given use can accomodate a fixup +/// reconcileNewOffset - Determine if the given use can accommodate a fixup /// at the given offset and other details. If so, update the use and /// return true. bool @@ -2236,7 +2241,9 @@ static void CollectSubexprs(const SCEV *S, const SCEVConstant *C, if (!AR->getStart()->isZero()) { CollectSubexprs(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0), AR->getStepRecurrence(SE), - AR->getLoop()), + AR->getLoop(), + //FIXME: AR->getNoWrapFlags(SCEV::FlagNW) + SCEV::FlagAnyWrap), C, Ops, L, SE); CollectSubexprs(AR->getStart(), C, Ops, L, SE); return; @@ -3047,7 +3054,7 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() { } } -/// NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters - Call +/// NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters - Call /// FilterOutUndesirableDedicatedRegisters again, if necessary, now that /// we've done more filtering, as it may be able to find more formulae to /// eliminate. diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index 80b263a30cb8..fef6bc31c7b6 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -43,7 +43,13 @@ namespace { class LoopUnroll : public LoopPass { public: static char ID; // Pass ID, replacement for typeid - LoopUnroll() : LoopPass(ID) { + LoopUnroll(int T = -1, int C = -1, int P = -1) : LoopPass(ID) { + CurrentThreshold = (T == -1) ? UnrollThreshold : unsigned(T); + CurrentCount = (C == -1) ? UnrollCount : unsigned(C); + CurrentAllowPartial = (P == -1) ? UnrollAllowPartial : (bool)P; + + UserThreshold = (T != -1) || (UnrollThreshold.getNumOccurrences() > 0); + initializeLoopUnrollPass(*PassRegistry::getPassRegistry()); } @@ -56,7 +62,10 @@ namespace { // explicit -unroll-threshold). static const unsigned OptSizeUnrollThreshold = 50; + unsigned CurrentCount; unsigned CurrentThreshold; + bool CurrentAllowPartial; + bool UserThreshold; // CurrentThreshold is user-specified. bool runOnLoop(Loop *L, LPPassManager &LPM); @@ -87,7 +96,9 @@ INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_DEPENDENCY(LCSSA) INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false) -Pass *llvm::createLoopUnrollPass() { return new LoopUnroll(); } +Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial) { + return new LoopUnroll(Threshold, Count, AllowPartial); +} /// ApproximateLoopSize - Approximate the size of the loop. static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls) { @@ -119,14 +130,14 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { // from UnrollThreshold, it is overridden to a smaller value if the current // function is marked as optimize-for-size, and the unroll threshold was // not user specified. - CurrentThreshold = UnrollThreshold; - if (Header->getParent()->hasFnAttr(Attribute::OptimizeForSize) && - UnrollThreshold.getNumOccurrences() == 0) - CurrentThreshold = OptSizeUnrollThreshold; + unsigned Threshold = CurrentThreshold; + if (!UserThreshold && + Header->getParent()->hasFnAttr(Attribute::OptimizeForSize)) + Threshold = OptSizeUnrollThreshold; // Find trip count unsigned TripCount = L->getSmallConstantTripCount(); - unsigned Count = UnrollCount; + unsigned Count = CurrentCount; // Automatically select an unroll count. if (Count == 0) { @@ -140,7 +151,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { } // Enforce the threshold. - if (CurrentThreshold != NoThreshold) { + if (Threshold != NoThreshold) { unsigned NumInlineCandidates; unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates); DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n"); @@ -149,16 +160,16 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { return false; } uint64_t Size = (uint64_t)LoopSize*Count; - if (TripCount != 1 && Size > CurrentThreshold) { + if (TripCount != 1 && Size > Threshold) { DEBUG(dbgs() << " Too large to fully unroll with count: " << Count - << " because size: " << Size << ">" << CurrentThreshold << "\n"); - if (!UnrollAllowPartial) { + << " because size: " << Size << ">" << Threshold << "\n"); + if (!CurrentAllowPartial) { DEBUG(dbgs() << " will not try to unroll partially because " << "-unroll-allow-partial not given\n"); return false; } // Reduce unroll count to be modulo of TripCount for partial unrolling - Count = CurrentThreshold / LoopSize; + Count = Threshold / LoopSize; while (Count != 0 && TripCount%Count != 0) { Count--; } diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index bde0e5316c3a..a3035cbfb0ee 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -28,6 +28,7 @@ #include "llvm/Support/IRBuilder.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetLibraryInfo.h" #include using namespace llvm; @@ -299,12 +300,15 @@ void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr, namespace { class MemCpyOpt : public FunctionPass { MemoryDependenceAnalysis *MD; + TargetLibraryInfo *TLI; const TargetData *TD; public: static char ID; // Pass identification, replacement for typeid MemCpyOpt() : FunctionPass(ID) { initializeMemCpyOptPass(*PassRegistry::getPassRegistry()); MD = 0; + TLI = 0; + TD = 0; } bool runOnFunction(Function &F); @@ -316,6 +320,7 @@ namespace { AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); AU.addPreserved(); AU.addPreserved(); } @@ -346,6 +351,7 @@ INITIALIZE_PASS_BEGIN(MemCpyOpt, "memcpyopt", "MemCpy Optimization", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTree) INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization", false, false) @@ -688,7 +694,7 @@ bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep, if (M->getSource() == MDep->getSource()) return false; - // Second, the length of the memcpy's must be the same, or the preceeding one + // Second, the length of the memcpy's must be the same, or the preceding one // must be larger than the following one. ConstantInt *MDepLen = dyn_cast(MDep->getLength()); ConstantInt *MLen = dyn_cast(M->getLength()); @@ -804,6 +810,9 @@ bool MemCpyOpt::processMemCpy(MemCpyInst *M) { bool MemCpyOpt::processMemMove(MemMoveInst *M) { AliasAnalysis &AA = getAnalysis(); + if (!TLI->has(LibFunc::memmove)) + return false; + // See if the pointers alias. if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(M))) return false; @@ -935,6 +944,14 @@ bool MemCpyOpt::runOnFunction(Function &F) { bool MadeChange = false; MD = &getAnalysis(); TD = getAnalysisIfAvailable(); + TLI = &getAnalysis(); + + // If we don't have at least memset and memcpy, there is little point of doing + // anything here. These are required by a freestanding implementation, so if + // even they are disabled, there is no point in trying hard. + if (!TLI->has(LibFunc::memset) || !TLI->has(LibFunc::memcpy)) + return false; + while (1) { if (!iterateOnFunction(F)) break; diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index e093b52571af..c1dfe154ae3f 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -22,6 +22,7 @@ #define DEBUG_TYPE "reassociate" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" @@ -74,6 +75,8 @@ namespace { class Reassociate : public FunctionPass { DenseMap RankMap; DenseMap, unsigned> ValueRankMap; + SmallVector RedoInsts; + SmallVector DeadInsts; bool MadeChange; public: static char ID; // Pass identification, replacement for typeid @@ -98,7 +101,7 @@ namespace { void LinearizeExprTree(BinaryOperator *I, SmallVectorImpl &Ops); void LinearizeExpr(BinaryOperator *I); Value *RemoveFactorFromExpression(Value *V, Value *Factor); - void ReassociateBB(BasicBlock *BB); + void ReassociateInst(BasicBlock::iterator &BBI); void RemoveDeadBinaryOp(Value *V); }; @@ -113,13 +116,13 @@ FunctionPass *llvm::createReassociatePass() { return new Reassociate(); } void Reassociate::RemoveDeadBinaryOp(Value *V) { Instruction *Op = dyn_cast(V); - if (!Op || !isa(Op) || !Op->use_empty()) + if (!Op || !isa(Op)) return; Value *LHS = Op->getOperand(0), *RHS = Op->getOperand(1); ValueRankMap.erase(Op); - Op->eraseFromParent(); + DeadInsts.push_back(Op); RemoveDeadBinaryOp(LHS); RemoveDeadBinaryOp(RHS); } @@ -214,6 +217,7 @@ static Instruction *LowerNegateToMultiply(Instruction *Neg, ValueRankMap.erase(Neg); Res->takeName(Neg); Neg->replaceAllUsesWith(Res); + Res->setDebugLoc(Neg->getDebugLoc()); Neg->eraseFromParent(); return Res; } @@ -503,6 +507,7 @@ static Instruction *BreakUpSubtract(Instruction *Sub, // Everyone now refers to the add instruction. ValueRankMap.erase(Sub); Sub->replaceAllUsesWith(New); + New->setDebugLoc(Sub->getDebugLoc()); Sub->eraseFromParent(); DEBUG(dbgs() << "Negated: " << *New << '\n'); @@ -528,6 +533,7 @@ static Instruction *ConvertShiftToMul(Instruction *Shl, ValueRankMap.erase(Shl); Mul->takeName(Shl); Shl->replaceAllUsesWith(Mul); + Mul->setDebugLoc(Shl->getDebugLoc()); Shl->eraseFromParent(); return Mul; } @@ -603,7 +609,7 @@ Value *Reassociate::RemoveFactorFromExpression(Value *V, Value *Factor) { // remaining operand. if (Factors.size() == 1) { ValueRankMap.erase(BO); - BO->eraseFromParent(); + DeadInsts.push_back(BO); V = Factors[0].Op; } else { RewriteExprTree(BO, Factors); @@ -732,7 +738,7 @@ Value *Reassociate::OptimizeAdd(Instruction *I, // Now that we have inserted a multiply, optimize it. This allows us to // handle cases that require multiple factoring steps, such as this: // (X*2) + (X*2) + (X*2) -> (X*2)*3 -> X*6 - Mul = ReassociateExpression(cast(Mul)); + RedoInsts.push_back(Mul); // If every add operand was a duplicate, return the multiply. if (Ops.empty()) @@ -960,71 +966,69 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I, } -/// ReassociateBB - Inspect all of the instructions in this basic block, -/// reassociating them as we go. -void Reassociate::ReassociateBB(BasicBlock *BB) { - for (BasicBlock::iterator BBI = BB->begin(); BBI != BB->end(); ) { - Instruction *BI = BBI++; - if (BI->getOpcode() == Instruction::Shl && - isa(BI->getOperand(1))) - if (Instruction *NI = ConvertShiftToMul(BI, ValueRankMap)) { - MadeChange = true; - BI = NI; - } - - // Reject cases where it is pointless to do this. - if (!isa(BI) || BI->getType()->isFloatingPointTy() || - BI->getType()->isVectorTy()) - continue; // Floating point ops are not associative. - - // Do not reassociate boolean (i1) expressions. We want to preserve the - // original order of evaluation for short-circuited comparisons that - // SimplifyCFG has folded to AND/OR expressions. If the expression - // is not further optimized, it is likely to be transformed back to a - // short-circuited form for code gen, and the source order may have been - // optimized for the most likely conditions. - if (BI->getType()->isIntegerTy(1)) - continue; - - // If this is a subtract instruction which is not already in negate form, - // see if we can convert it to X+-Y. - if (BI->getOpcode() == Instruction::Sub) { - if (ShouldBreakUpSubtract(BI)) { - BI = BreakUpSubtract(BI, ValueRankMap); - // Reset the BBI iterator in case BreakUpSubtract changed the - // instruction it points to. - BBI = BI; - ++BBI; - MadeChange = true; - } else if (BinaryOperator::isNeg(BI)) { - // Otherwise, this is a negation. See if the operand is a multiply tree - // and if this is not an inner node of a multiply tree. - if (isReassociableOp(BI->getOperand(1), Instruction::Mul) && - (!BI->hasOneUse() || - !isReassociableOp(BI->use_back(), Instruction::Mul))) { - BI = LowerNegateToMultiply(BI, ValueRankMap); - MadeChange = true; - } - } +/// ReassociateInst - Inspect and reassociate the instruction at the +/// given position, post-incrementing the position. +void Reassociate::ReassociateInst(BasicBlock::iterator &BBI) { + Instruction *BI = BBI++; + if (BI->getOpcode() == Instruction::Shl && + isa(BI->getOperand(1))) + if (Instruction *NI = ConvertShiftToMul(BI, ValueRankMap)) { + MadeChange = true; + BI = NI; } - // If this instruction is a commutative binary operator, process it. - if (!BI->isAssociative()) continue; - BinaryOperator *I = cast(BI); + // Reject cases where it is pointless to do this. + if (!isa(BI) || BI->getType()->isFloatingPointTy() || + BI->getType()->isVectorTy()) + return; // Floating point ops are not associative. - // If this is an interior node of a reassociable tree, ignore it until we - // get to the root of the tree, to avoid N^2 analysis. - if (I->hasOneUse() && isReassociableOp(I->use_back(), I->getOpcode())) - continue; + // Do not reassociate boolean (i1) expressions. We want to preserve the + // original order of evaluation for short-circuited comparisons that + // SimplifyCFG has folded to AND/OR expressions. If the expression + // is not further optimized, it is likely to be transformed back to a + // short-circuited form for code gen, and the source order may have been + // optimized for the most likely conditions. + if (BI->getType()->isIntegerTy(1)) + return; - // If this is an add tree that is used by a sub instruction, ignore it - // until we process the subtract. - if (I->hasOneUse() && I->getOpcode() == Instruction::Add && - cast(I->use_back())->getOpcode() == Instruction::Sub) - continue; - - ReassociateExpression(I); + // If this is a subtract instruction which is not already in negate form, + // see if we can convert it to X+-Y. + if (BI->getOpcode() == Instruction::Sub) { + if (ShouldBreakUpSubtract(BI)) { + BI = BreakUpSubtract(BI, ValueRankMap); + // Reset the BBI iterator in case BreakUpSubtract changed the + // instruction it points to. + BBI = BI; + ++BBI; + MadeChange = true; + } else if (BinaryOperator::isNeg(BI)) { + // Otherwise, this is a negation. See if the operand is a multiply tree + // and if this is not an inner node of a multiply tree. + if (isReassociableOp(BI->getOperand(1), Instruction::Mul) && + (!BI->hasOneUse() || + !isReassociableOp(BI->use_back(), Instruction::Mul))) { + BI = LowerNegateToMultiply(BI, ValueRankMap); + MadeChange = true; + } + } } + + // If this instruction is a commutative binary operator, process it. + if (!BI->isAssociative()) return; + BinaryOperator *I = cast(BI); + + // If this is an interior node of a reassociable tree, ignore it until we + // get to the root of the tree, to avoid N^2 analysis. + if (I->hasOneUse() && isReassociableOp(I->use_back(), I->getOpcode())) + return; + + // If this is an add tree that is used by a sub instruction, ignore it + // until we process the subtract. + if (I->hasOneUse() && I->getOpcode() == Instruction::Add && + cast(I->use_back())->getOpcode() == Instruction::Sub) + return; + + ReassociateExpression(I); } Value *Reassociate::ReassociateExpression(BinaryOperator *I) { @@ -1051,6 +1055,8 @@ Value *Reassociate::ReassociateExpression(BinaryOperator *I) { // eliminate it. DEBUG(dbgs() << "Reassoc to scalar: " << *V << '\n'); I->replaceAllUsesWith(V); + if (Instruction *VI = dyn_cast(V)) + VI->setDebugLoc(I->getDebugLoc()); RemoveDeadBinaryOp(I); ++NumAnnihil; return V; @@ -1074,6 +1080,8 @@ Value *Reassociate::ReassociateExpression(BinaryOperator *I) { // This expression tree simplified to something that isn't a tree, // eliminate it. I->replaceAllUsesWith(Ops[0].Op); + if (Instruction *OI = dyn_cast(Ops[0].Op)) + OI->setDebugLoc(I->getDebugLoc()); RemoveDeadBinaryOp(I); return Ops[0].Op; } @@ -1091,7 +1099,21 @@ bool Reassociate::runOnFunction(Function &F) { MadeChange = false; for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) - ReassociateBB(FI); + for (BasicBlock::iterator BBI = FI->begin(); BBI != FI->end(); ) + ReassociateInst(BBI); + + // Now that we're done, revisit any instructions which are likely to + // have secondary reassociation opportunities. + while (!RedoInsts.empty()) + if (Value *V = RedoInsts.pop_back_val()) { + BasicBlock::iterator BBI = cast(V); + ReassociateInst(BBI); + } + + // Now that we're done, delete any instructions which are no longer used. + while (!DeadInsts.empty()) + if (Value *V = DeadInsts.pop_back_val()) + RecursivelyDeleteTriviallyDeadInstructions(V); // We are done with the rank map. RankMap.clear(); diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp index 459bb0621f88..47afc770bb0c 100644 --- a/lib/Transforms/Scalar/Reg2Mem.cpp +++ b/lib/Transforms/Scalar/Reg2Mem.cpp @@ -9,7 +9,7 @@ // // This file demotes all registers to memory references. It is intented to be // the inverse of PromoteMemoryToRegister. By converting to loads, the only -// values live accross basic blocks are allocas and loads before phi nodes. +// values live across basic blocks are allocas and loads before phi nodes. // It is intended that this should make CFG hacking much easier. // To make later hacking easier, the entry block is split into two, such that // all introduced allocas and nothing else are in the entry block. diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index c82e929b364e..db8eb850448f 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -1989,7 +1989,7 @@ bool IPSCCP::runOnModule(Module &M) { ReturnsToZap[i]->setOperand(0, UndefValue::get(F->getReturnType())); } - // If we infered constant or undef values for globals variables, we can delete + // If we inferred constant or undef values for globals variables, we can delete // the global and any stores that remain to it. const DenseMap &TG = Solver.getTrackedGlobals(); for (DenseMap::const_iterator I = TG.begin(), diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp index bf9ca6d803b6..32a050617432 100644 --- a/lib/Transforms/Scalar/Scalar.cpp +++ b/lib/Transforms/Scalar/Scalar.cpp @@ -17,6 +17,7 @@ #include "llvm-c/Initialization.h" #include "llvm/InitializePasses.h" #include "llvm/PassManager.h" +#include "llvm/Analysis/Passes.h" #include "llvm/Analysis/Verifier.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/Scalar.h" @@ -34,7 +35,6 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeDCEPass(Registry); initializeDeadInstEliminationPass(Registry); initializeDSEPass(Registry); - initializeGEPSplitterPass(Registry); initializeGVNPass(Registry); initializeEarlyCSEPass(Registry); initializeIndVarSimplifyPass(Registry); @@ -56,7 +56,6 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeSROA_DTPass(Registry); initializeSROA_SSAUpPass(Registry); initializeCFGSimplifyPassPass(Registry); - initializeSimplifyHalfPowrLibCallsPass(Registry); initializeSimplifyLibCallsPass(Registry); initializeSinkingPass(Registry); initializeTailDupPass(Registry); @@ -103,6 +102,10 @@ void LLVMAddLoopDeletionPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createLoopDeletionPass()); } +void LLVMAddLoopIdiomPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createLoopIdiomPass()); +} + void LLVMAddLoopRotatePass(LLVMPassManagerRef PM) { unwrap(PM)->add(createLoopRotatePass()); } @@ -135,6 +138,10 @@ void LLVMAddScalarReplAggregatesPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createScalarReplAggregatesPass()); } +void LLVMAddScalarReplAggregatesPassSSA(LLVMPassManagerRef PM) { + unwrap(PM)->add(createScalarReplAggregatesPass(-1, false)); +} + void LLVMAddScalarReplAggregatesPassWithThreshold(LLVMPassManagerRef PM, int Threshold) { unwrap(PM)->add(createScalarReplAggregatesPass(Threshold)); @@ -159,3 +166,19 @@ void LLVMAddDemoteMemoryToRegisterPass(LLVMPassManagerRef PM) { void LLVMAddVerifierPass(LLVMPassManagerRef PM) { unwrap(PM)->add(createVerifierPass()); } + +void LLVMAddCorrelatedValuePropagationPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createCorrelatedValuePropagationPass()); +} + +void LLVMAddEarlyCSEPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createEarlyCSEPass()); +} + +void LLVMAddTypeBasedAliasAnalysisPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createTypeBasedAliasAnalysisPass()); +} + +void LLVMAddBasicAliasAnalysisPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createBasicAliasAnalysisPass()); +} diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index c3ca85280ee7..8178c2707599 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -219,7 +219,7 @@ namespace { /// optimization, which scans the uses of an alloca and determines if it can /// rewrite it in terms of a single new alloca that can be mem2reg'd. class ConvertToScalarInfo { - /// AllocaSize - The size of the alloca being considered. + /// AllocaSize - The size of the alloca being considered in bytes. unsigned AllocaSize; const TargetData &TD; @@ -238,19 +238,22 @@ class ConvertToScalarInfo { /// also declared as a vector, we do want to promote to a vector. bool HadAVector; + /// HadNonMemTransferAccess - True if there is at least one access to the + /// alloca that is not a MemTransferInst. We don't want to turn structs into + /// large integers unless there is some potential for optimization. + bool HadNonMemTransferAccess; + public: explicit ConvertToScalarInfo(unsigned Size, const TargetData &td) - : AllocaSize(Size), TD(td) { - IsNotTrivial = false; - VectorTy = 0; - HadAVector = false; - } + : AllocaSize(Size), TD(td), IsNotTrivial(false), VectorTy(0), + HadAVector(false), HadNonMemTransferAccess(false) { } AllocaInst *TryConvert(AllocaInst *AI); private: bool CanConvertToScalar(Value *V, uint64_t Offset); - void MergeInType(const Type *In, uint64_t Offset); + void MergeInType(const Type *In, uint64_t Offset, bool IsLoadOrStore); + bool MergeInVectorType(const VectorType *VInTy, uint64_t Offset); void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset); Value *ConvertScalar_ExtractValue(Value *NV, const Type *ToType, @@ -282,9 +285,14 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) { << *VectorTy << '\n'); NewTy = VectorTy; // Use the vector type. } else { + unsigned BitWidth = AllocaSize * 8; + if (!HadAVector && !HadNonMemTransferAccess && + !TD.fitsInLegalInteger(BitWidth)) + return 0; + DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n"); // Create and insert the integer alloca. - NewTy = IntegerType::get(AI->getContext(), AllocaSize*8); + NewTy = IntegerType::get(AI->getContext(), BitWidth); } AllocaInst *NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin()); ConvertUsesToScalar(AI, NewAI, 0); @@ -294,16 +302,21 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) { /// MergeInType - Add the 'In' type to the accumulated vector type (VectorTy) /// so far at the offset specified by Offset (which is specified in bytes). /// -/// There are two cases we handle here: +/// There are three cases we handle here: /// 1) A union of vector types of the same size and potentially its elements. /// Here we turn element accesses into insert/extract element operations. /// This promotes a <4 x float> with a store of float to the third element /// into a <4 x float> that uses insert element. -/// 2) A fully general blob of memory, which we turn into some (potentially +/// 2) A union of vector types with power-of-2 size differences, e.g. a float, +/// <2 x float> and <4 x float>. Here we turn element accesses into insert +/// and extract element operations, and <2 x float> accesses into a cast to +/// <2 x double>, an extract, and a cast back to <2 x float>. +/// 3) A fully general blob of memory, which we turn into some (potentially /// large) integer type with extract and insert operations where the loads /// and stores would mutate the memory. We mark this by setting VectorTy /// to VoidTy. -void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) { +void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset, + bool IsLoadOrStore) { // If we already decided to turn this into a blob of integer memory, there is // nothing to be done. if (VectorTy && VectorTy->isVoidTy()) @@ -314,33 +327,33 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) { // If the In type is a vector that is the same size as the alloca, see if it // matches the existing VecTy. if (const VectorType *VInTy = dyn_cast(In)) { - // Remember if we saw a vector type. - HadAVector = true; - - if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) { - // If we're storing/loading a vector of the right size, allow it as a - // vector. If this the first vector we see, remember the type so that - // we know the element size. If this is a subsequent access, ignore it - // even if it is a differing type but the same size. Worst case we can - // bitcast the resultant vectors. - if (VectorTy == 0) - VectorTy = VInTy; + if (MergeInVectorType(VInTy, Offset)) return; - } } else if (In->isFloatTy() || In->isDoubleTy() || (In->isIntegerTy() && In->getPrimitiveSizeInBits() >= 8 && isPowerOf2_32(In->getPrimitiveSizeInBits()))) { + // Full width accesses can be ignored, because they can always be turned + // into bitcasts. + unsigned EltSize = In->getPrimitiveSizeInBits()/8; + if (IsLoadOrStore && EltSize == AllocaSize) + return; + // If we're accessing something that could be an element of a vector, see // if the implied vector agrees with what we already have and if Offset is // compatible with it. - unsigned EltSize = In->getPrimitiveSizeInBits()/8; - if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 && - (VectorTy == 0 || - cast(VectorTy)->getElementType() - ->getPrimitiveSizeInBits()/8 == EltSize)) { - if (VectorTy == 0) + if (Offset % EltSize == 0 && AllocaSize % EltSize == 0) { + if (!VectorTy) { VectorTy = VectorType::get(In, AllocaSize/EltSize); - return; + return; + } + + unsigned CurrentEltSize = cast(VectorTy)->getElementType() + ->getPrimitiveSizeInBits()/8; + if (EltSize == CurrentEltSize) + return; + + if (In->isIntegerTy() && isPowerOf2_32(AllocaSize / EltSize)) + return; } } @@ -349,6 +362,77 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset) { VectorTy = Type::getVoidTy(In->getContext()); } +/// MergeInVectorType - Handles the vector case of MergeInType, returning true +/// if the type was successfully merged and false otherwise. +bool ConvertToScalarInfo::MergeInVectorType(const VectorType *VInTy, + uint64_t Offset) { + // Remember if we saw a vector type. + HadAVector = true; + + // TODO: Support nonzero offsets? + if (Offset != 0) + return false; + + // Only allow vectors that are a power-of-2 away from the size of the alloca. + if (!isPowerOf2_64(AllocaSize / (VInTy->getBitWidth() / 8))) + return false; + + // If this the first vector we see, remember the type so that we know the + // element size. + if (!VectorTy) { + VectorTy = VInTy; + return true; + } + + unsigned BitWidth = cast(VectorTy)->getBitWidth(); + unsigned InBitWidth = VInTy->getBitWidth(); + + // Vectors of the same size can be converted using a simple bitcast. + if (InBitWidth == BitWidth && AllocaSize == (InBitWidth / 8)) + return true; + + const Type *ElementTy = cast(VectorTy)->getElementType(); + const Type *InElementTy = cast(VInTy)->getElementType(); + + // Do not allow mixed integer and floating-point accesses from vectors of + // different sizes. + if (ElementTy->isFloatingPointTy() != InElementTy->isFloatingPointTy()) + return false; + + if (ElementTy->isFloatingPointTy()) { + // Only allow floating-point vectors of different sizes if they have the + // same element type. + // TODO: This could be loosened a bit, but would anything benefit? + if (ElementTy != InElementTy) + return false; + + // There are no arbitrary-precision floating-point types, which limits the + // number of legal vector types with larger element types that we can form + // to bitcast and extract a subvector. + // TODO: We could support some more cases with mixed fp128 and double here. + if (!(BitWidth == 64 || BitWidth == 128) || + !(InBitWidth == 64 || InBitWidth == 128)) + return false; + } else { + assert(ElementTy->isIntegerTy() && "Vector elements must be either integer " + "or floating-point."); + unsigned BitWidth = ElementTy->getPrimitiveSizeInBits(); + unsigned InBitWidth = InElementTy->getPrimitiveSizeInBits(); + + // Do not allow integer types smaller than a byte or types whose widths are + // not a multiple of a byte. + if (BitWidth < 8 || InBitWidth < 8 || + BitWidth % 8 != 0 || InBitWidth % 8 != 0) + return false; + } + + // Pick the largest of the two vector types. + if (InBitWidth > BitWidth) + VectorTy = VInTy; + + return true; +} + /// CanConvertToScalar - V is a pointer. If we can convert the pointee and all /// its accesses to a single vector type, return true and set VecTy to /// the new type. If we could convert the alloca into a single promotable @@ -369,7 +453,8 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { // Don't touch MMX operations. if (LI->getType()->isX86_MMXTy()) return false; - MergeInType(LI->getType(), Offset); + HadNonMemTransferAccess = true; + MergeInType(LI->getType(), Offset, true); continue; } @@ -379,7 +464,8 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { // Don't touch MMX operations. if (SI->getOperand(0)->getType()->isX86_MMXTy()) return false; - MergeInType(SI->getOperand(0)->getType(), Offset); + HadNonMemTransferAccess = true; + MergeInType(SI->getOperand(0)->getType(), Offset, true); continue; } @@ -403,6 +489,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { if (!CanConvertToScalar(GEP, Offset+GEPOffset)) return false; IsNotTrivial = true; // Can't be mem2reg'd. + HadNonMemTransferAccess = true; continue; } @@ -414,6 +501,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { !isa(MSI->getLength())) return false; IsNotTrivial = true; // Can't be mem2reg'd. + HadNonMemTransferAccess = true; continue; } @@ -575,6 +663,63 @@ void ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, } } +/// getScaledElementType - Gets a scaled element type for a partial vector +/// access of an alloca. The input types must be integer or floating-point +/// scalar or vector types, and the resulting type is an integer, float or +/// double. +static const Type *getScaledElementType(const Type *Ty1, const Type *Ty2, + unsigned NewBitWidth) { + bool IsFP1 = Ty1->isFloatingPointTy() || + (Ty1->isVectorTy() && + cast(Ty1)->getElementType()->isFloatingPointTy()); + bool IsFP2 = Ty2->isFloatingPointTy() || + (Ty2->isVectorTy() && + cast(Ty2)->getElementType()->isFloatingPointTy()); + + LLVMContext &Context = Ty1->getContext(); + + // Prefer floating-point types over integer types, as integer types may have + // been created by earlier scalar replacement. + if (IsFP1 || IsFP2) { + if (NewBitWidth == 32) + return Type::getFloatTy(Context); + if (NewBitWidth == 64) + return Type::getDoubleTy(Context); + } + + return Type::getIntNTy(Context, NewBitWidth); +} + +/// CreateShuffleVectorCast - Creates a shuffle vector to convert one vector +/// to another vector of the same element type which has the same allocation +/// size but different primitive sizes (e.g. <3 x i32> and <4 x i32>). +static Value *CreateShuffleVectorCast(Value *FromVal, const Type *ToType, + IRBuilder<> &Builder) { + const Type *FromType = FromVal->getType(); + const VectorType *FromVTy = cast(FromType); + const VectorType *ToVTy = cast(ToType); + assert((ToVTy->getElementType() == FromVTy->getElementType()) && + "Vectors must have the same element type"); + Value *UnV = UndefValue::get(FromType); + unsigned numEltsFrom = FromVTy->getNumElements(); + unsigned numEltsTo = ToVTy->getNumElements(); + + SmallVector Args; + const Type* Int32Ty = Builder.getInt32Ty(); + unsigned minNumElts = std::min(numEltsFrom, numEltsTo); + unsigned i; + for (i=0; i != minNumElts; ++i) + Args.push_back(ConstantInt::get(Int32Ty, i)); + + if (i < numEltsTo) { + Constant* UnC = UndefValue::get(Int32Ty); + for (; i != numEltsTo; ++i) + Args.push_back(UnC); + } + Constant *Mask = ConstantVector::get(Args); + return Builder.CreateShuffleVector(FromVal, UnV, Mask, "tmpV"); +} + /// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer /// or vector value FromVal, extracting the bits from the offset specified by /// Offset. This returns the value, which is of type ToType. @@ -589,14 +734,46 @@ Value *ConvertToScalarInfo:: ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, uint64_t Offset, IRBuilder<> &Builder) { // If the load is of the whole new alloca, no conversion is needed. - if (FromVal->getType() == ToType && Offset == 0) + const Type *FromType = FromVal->getType(); + if (FromType == ToType && Offset == 0) return FromVal; // If the result alloca is a vector type, this is either an element // access or a bitcast to another vector type of the same size. - if (const VectorType *VTy = dyn_cast(FromVal->getType())) { - if (ToType->isVectorTy()) - return Builder.CreateBitCast(FromVal, ToType, "tmp"); + if (const VectorType *VTy = dyn_cast(FromType)) { + unsigned ToTypeSize = TD.getTypeAllocSize(ToType); + if (ToTypeSize == AllocaSize) { + // If the two types have the same primitive size, use a bit cast. + // Otherwise, it is two vectors with the same element type that has + // the same allocation size but different number of elements so use + // a shuffle vector. + if (FromType->getPrimitiveSizeInBits() == + ToType->getPrimitiveSizeInBits()) + return Builder.CreateBitCast(FromVal, ToType, "tmp"); + else + return CreateShuffleVectorCast(FromVal, ToType, Builder); + } + + if (isPowerOf2_64(AllocaSize / ToTypeSize)) { + assert(!(ToType->isVectorTy() && Offset != 0) && "Can't extract a value " + "of a smaller vector type at a nonzero offset."); + + const Type *CastElementTy = getScaledElementType(FromType, ToType, + ToTypeSize * 8); + unsigned NumCastVectorElements = AllocaSize / ToTypeSize; + + LLVMContext &Context = FromVal->getContext(); + const Type *CastTy = VectorType::get(CastElementTy, + NumCastVectorElements); + Value *Cast = Builder.CreateBitCast(FromVal, CastTy, "tmp"); + + unsigned EltSize = TD.getTypeAllocSizeInBits(CastElementTy); + unsigned Elt = Offset/EltSize; + assert(EltSize*Elt == Offset && "Invalid modulus in validity checking"); + Value *Extract = Builder.CreateExtractElement(Cast, ConstantInt::get( + Type::getInt32Ty(Context), Elt), "tmp"); + return Builder.CreateBitCast(Extract, ToType, "tmp"); + } // Otherwise it must be an element access. unsigned Elt = 0; @@ -714,21 +891,49 @@ ConvertScalar_InsertValue(Value *SV, Value *Old, // Changing the whole vector with memset or with an access of a different // vector type? - if (ValSize == VecSize) - return Builder.CreateBitCast(SV, AllocaType, "tmp"); + if (ValSize == VecSize) { + // If the two types have the same primitive size, use a bit cast. + // Otherwise, it is two vectors with the same element type that has + // the same allocation size but different number of elements so use + // a shuffle vector. + if (VTy->getPrimitiveSizeInBits() == + SV->getType()->getPrimitiveSizeInBits()) + return Builder.CreateBitCast(SV, AllocaType, "tmp"); + else + return CreateShuffleVectorCast(SV, VTy, Builder); + } - uint64_t EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType()); + if (isPowerOf2_64(VecSize / ValSize)) { + assert(!(SV->getType()->isVectorTy() && Offset != 0) && "Can't insert a " + "value of a smaller vector type at a nonzero offset."); + + const Type *CastElementTy = getScaledElementType(VTy, SV->getType(), + ValSize); + unsigned NumCastVectorElements = VecSize / ValSize; + + LLVMContext &Context = SV->getContext(); + const Type *OldCastTy = VectorType::get(CastElementTy, + NumCastVectorElements); + Value *OldCast = Builder.CreateBitCast(Old, OldCastTy, "tmp"); + + Value *SVCast = Builder.CreateBitCast(SV, CastElementTy, "tmp"); + + unsigned EltSize = TD.getTypeAllocSizeInBits(CastElementTy); + unsigned Elt = Offset/EltSize; + assert(EltSize*Elt == Offset && "Invalid modulus in validity checking"); + Value *Insert = + Builder.CreateInsertElement(OldCast, SVCast, ConstantInt::get( + Type::getInt32Ty(Context), Elt), "tmp"); + return Builder.CreateBitCast(Insert, AllocaType, "tmp"); + } // Must be an element insertion. + assert(SV->getType() == VTy->getElementType()); + uint64_t EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType()); unsigned Elt = Offset/EltSize; - - if (SV->getType() != VTy->getElementType()) - SV = Builder.CreateBitCast(SV, VTy->getElementType(), "tmp"); - - SV = Builder.CreateInsertElement(Old, SV, + return Builder.CreateInsertElement(Old, SV, ConstantInt::get(Type::getInt32Ty(SV->getContext()), Elt), "tmp"); - return SV; } // If SV is a first-class aggregate value, insert each value recursively. @@ -1083,7 +1288,8 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) { } const Type *LoadTy = cast(PN->getType())->getElementType(); - PHINode *NewPN = PHINode::Create(LoadTy, PN->getName()+".ld", PN); + PHINode *NewPN = PHINode::Create(LoadTy, PN->getNumIncomingValues(), + PN->getName()+".ld", PN); // Get the TBAA tag and alignment to use from one of the loads. It doesn't // matter which one we get and if any differ, it doesn't matter. diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp index ce5dd73ace32..1137c2b23f96 100644 --- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -73,7 +73,8 @@ static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) { if (UseLLVMTrap) { Function *TrapFn = Intrinsic::getDeclaration(BB->getParent()->getParent(), Intrinsic::trap); - CallInst::Create(TrapFn, "", I); + CallInst *CallTrap = CallInst::Create(TrapFn, "", I); + CallTrap->setDebugLoc(I->getDebugLoc()); } new UnreachableInst(I->getContext(), I); @@ -259,11 +260,12 @@ static bool MergeEmptyReturnBlocks(Function &F) { PHINode *RetBlockPHI = dyn_cast(RetBlock->begin()); if (RetBlockPHI == 0) { Value *InVal = cast(RetBlock->getTerminator())->getOperand(0); - RetBlockPHI = PHINode::Create(Ret->getOperand(0)->getType(), "merge", + pred_iterator PB = pred_begin(RetBlock), PE = pred_end(RetBlock); + RetBlockPHI = PHINode::Create(Ret->getOperand(0)->getType(), + std::distance(PB, PE), "merge", &RetBlock->front()); - for (pred_iterator PI = pred_begin(RetBlock), E = pred_end(RetBlock); - PI != E; ++PI) + for (pred_iterator PI = PB; PI != PE; ++PI) RetBlockPHI->addIncoming(InVal, *PI); RetBlock->getTerminator()->setOperand(0, RetBlockPHI); } diff --git a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp deleted file mode 100644 index 70ff32e02310..000000000000 --- a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp +++ /dev/null @@ -1,160 +0,0 @@ -//===- SimplifyHalfPowrLibCalls.cpp - Optimize specific half_powr calls ---===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements a simple pass that applies an experimental -// transformation on calls to specific functions. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "simplify-libcalls-halfpowr" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Instructions.h" -#include "llvm/Intrinsics.h" -#include "llvm/Module.h" -#include "llvm/Pass.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Target/TargetData.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/Support/Debug.h" -using namespace llvm; - -namespace { - /// This pass optimizes well half_powr function calls. - /// - class SimplifyHalfPowrLibCalls : public FunctionPass { - const TargetData *TD; - public: - static char ID; // Pass identification - SimplifyHalfPowrLibCalls() : FunctionPass(ID) { - initializeSimplifyHalfPowrLibCallsPass(*PassRegistry::getPassRegistry()); - } - - bool runOnFunction(Function &F); - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - } - - Instruction * - InlineHalfPowrs(const std::vector &HalfPowrs, - Instruction *InsertPt); - }; - char SimplifyHalfPowrLibCalls::ID = 0; -} // end anonymous namespace. - -INITIALIZE_PASS(SimplifyHalfPowrLibCalls, "simplify-libcalls-halfpowr", - "Simplify half_powr library calls", false, false) - -// Public interface to the Simplify HalfPowr LibCalls pass. -FunctionPass *llvm::createSimplifyHalfPowrLibCallsPass() { - return new SimplifyHalfPowrLibCalls(); -} - -/// InlineHalfPowrs - Inline a sequence of adjacent half_powr calls, rearranging -/// their control flow to better facilitate subsequent optimization. -Instruction * -SimplifyHalfPowrLibCalls:: -InlineHalfPowrs(const std::vector &HalfPowrs, - Instruction *InsertPt) { - std::vector Bodies; - BasicBlock *NewBlock = 0; - - for (unsigned i = 0, e = HalfPowrs.size(); i != e; ++i) { - CallInst *Call = cast(HalfPowrs[i]); - Function *Callee = Call->getCalledFunction(); - - // Minimally sanity-check the CFG of half_powr to ensure that it contains - // the kind of code we expect. If we're running this pass, we have - // reason to believe it will be what we expect. - Function::iterator I = Callee->begin(); - BasicBlock *Prologue = I++; - if (I == Callee->end()) break; - BasicBlock *SubnormalHandling = I++; - if (I == Callee->end()) break; - BasicBlock *Body = I++; - if (I != Callee->end()) break; - if (SubnormalHandling->getSinglePredecessor() != Prologue) - break; - BranchInst *PBI = dyn_cast(Prologue->getTerminator()); - if (!PBI || !PBI->isConditional()) - break; - BranchInst *SNBI = dyn_cast(SubnormalHandling->getTerminator()); - if (!SNBI || SNBI->isConditional()) - break; - if (!isa(Body->getTerminator())) - break; - - Instruction *NextInst = llvm::next(BasicBlock::iterator(Call)); - - // Inline the call, taking care of what code ends up where. - NewBlock = SplitBlock(NextInst->getParent(), NextInst, this); - - InlineFunctionInfo IFI(0, TD); - bool B = InlineFunction(Call, IFI); - assert(B && "half_powr didn't inline?"); - (void)B; - - BasicBlock *NewBody = NewBlock->getSinglePredecessor(); - assert(NewBody); - Bodies.push_back(NewBody); - } - - if (!NewBlock) - return InsertPt; - - // Put the code for all the bodies into one block, to facilitate - // subsequent optimization. - (void)SplitEdge(NewBlock->getSinglePredecessor(), NewBlock, this); - for (unsigned i = 0, e = Bodies.size(); i != e; ++i) { - BasicBlock *Body = Bodies[i]; - Instruction *FNP = Body->getFirstNonPHI(); - // Splice the insts from body into NewBlock. - NewBlock->getInstList().splice(NewBlock->begin(), Body->getInstList(), - FNP, Body->getTerminator()); - } - - return NewBlock->begin(); -} - -/// runOnFunction - Top level algorithm. -/// -bool SimplifyHalfPowrLibCalls::runOnFunction(Function &F) { - TD = getAnalysisIfAvailable(); - - bool Changed = false; - std::vector HalfPowrs; - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { - // Look for calls. - bool IsHalfPowr = false; - if (CallInst *CI = dyn_cast(I)) { - // Look for direct calls and calls to non-external functions. - Function *Callee = CI->getCalledFunction(); - if (Callee && Callee->hasExternalLinkage()) { - // Look for calls with well-known names. - if (Callee->getName() == "__half_powrf4") - IsHalfPowr = true; - } - } - if (IsHalfPowr) - HalfPowrs.push_back(I); - // We're looking for sequences of up to three such calls, which we'll - // simplify as a group. - if ((!IsHalfPowr && !HalfPowrs.empty()) || HalfPowrs.size() == 3) { - I = InlineHalfPowrs(HalfPowrs, I); - E = I->getParent()->end(); - HalfPowrs.clear(); - Changed = true; - } - } - assert(HalfPowrs.empty() && "Block had no terminator!"); - } - - return Changed; -} diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index 9f136d4e3077..6247b0348f14 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -49,6 +49,7 @@ class LibCallOptimization { protected: Function *Caller; const TargetData *TD; + const TargetLibraryInfo *TLI; LLVMContext* Context; public: LibCallOptimization() { } @@ -62,9 +63,11 @@ class LibCallOptimization { virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) =0; - Value *OptimizeCall(CallInst *CI, const TargetData *TD, IRBuilder<> &B) { + Value *OptimizeCall(CallInst *CI, const TargetData *TD, + const TargetLibraryInfo *TLI, IRBuilder<> &B) { Caller = CI->getParent()->getParent(); this->TD = TD; + this->TLI = TLI; if (CI->getCalledFunction()) Context = &CI->getCalledFunction()->getContext(); @@ -97,6 +100,15 @@ static bool IsOnlyUsedInZeroEqualityComparison(Value *V) { } return true; } + +static bool CallHasFloatingPointArgument(const CallInst *CI) { + for (CallInst::const_op_iterator it = CI->op_begin(), e = CI->op_end(); + it != e; ++it) { + if ((*it)->getType()->isFloatingPointTy()) + return true; + } + return false; +} /// IsOnlyUsedInEqualityComparison - Return true if it is only used in equality /// comparisons with With. @@ -1075,14 +1087,8 @@ struct ToAsciiOpt : public LibCallOptimization { // 'printf' Optimizations struct PrintFOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - // Require one fixed pointer argument and an integer/void result. - const FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() || - !(FT->getReturnType()->isIntegerTy() || - FT->getReturnType()->isVoidTy())) - return 0; - + Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI, + IRBuilder<> &B) { // Check for a fixed format string. std::string FormatStr; if (!GetConstantStringInfo(CI->getArgOperand(0), FormatStr)) @@ -1138,20 +1144,40 @@ struct PrintFOpt : public LibCallOptimization { } return 0; } + + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Require one fixed pointer argument and an integer/void result. + const FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() || + !(FT->getReturnType()->isIntegerTy() || + FT->getReturnType()->isVoidTy())) + return 0; + + if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) { + return V; + } + + // printf(format, ...) -> iprintf(format, ...) if no floating point + // arguments. + if (TLI->has(LibFunc::iprintf) && !CallHasFloatingPointArgument(CI)) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + Constant *IPrintFFn = + M->getOrInsertFunction("iprintf", FT, Callee->getAttributes()); + CallInst *New = cast(CI->clone()); + New->setCalledFunction(IPrintFFn); + B.Insert(New); + return New; + } + return 0; + } }; //===---------------------------------------===// // 'sprintf' Optimizations struct SPrintFOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - // Require two fixed pointer arguments and an integer result. - const FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || - !FT->getParamType(1)->isPointerTy() || - !FT->getReturnType()->isIntegerTy()) - return 0; - + Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI, + IRBuilder<> &B) { // Check for a fixed format string. std::string FormatStr; if (!GetConstantStringInfo(CI->getArgOperand(1), FormatStr)) @@ -1212,6 +1238,32 @@ struct SPrintFOpt : public LibCallOptimization { } return 0; } + + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Require two fixed pointer arguments and an integer result. + const FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + !FT->getReturnType()->isIntegerTy()) + return 0; + + if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) { + return V; + } + + // sprintf(str, format, ...) -> siprintf(str, format, ...) if no floating + // point arguments. + if (TLI->has(LibFunc::siprintf) && !CallHasFloatingPointArgument(CI)) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + Constant *SIPrintFFn = + M->getOrInsertFunction("siprintf", FT, Callee->getAttributes()); + CallInst *New = cast(CI->clone()); + New->setCalledFunction(SIPrintFFn); + B.Insert(New); + return New; + } + return 0; + } }; //===---------------------------------------===// @@ -1278,14 +1330,8 @@ struct FPutsOpt : public LibCallOptimization { // 'fprintf' Optimizations struct FPrintFOpt : public LibCallOptimization { - virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { - // Require two fixed paramters as pointers and integer result. - const FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || - !FT->getParamType(1)->isPointerTy() || - !FT->getReturnType()->isIntegerTy()) - return 0; - + Value *OptimizeFixedFormatString(Function *Callee, CallInst *CI, + IRBuilder<> &B) { // All the optimizations depend on the format string. std::string FormatStr; if (!GetConstantStringInfo(CI->getArgOperand(1), FormatStr)) @@ -1330,6 +1376,32 @@ struct FPrintFOpt : public LibCallOptimization { } return 0; } + + virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) { + // Require two fixed paramters as pointers and integer result. + const FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() || + !FT->getParamType(1)->isPointerTy() || + !FT->getReturnType()->isIntegerTy()) + return 0; + + if (Value *V = OptimizeFixedFormatString(Callee, CI, B)) { + return V; + } + + // fprintf(stream, format, ...) -> fiprintf(stream, format, ...) if no + // floating point arguments. + if (TLI->has(LibFunc::fiprintf) && !CallHasFloatingPointArgument(CI)) { + Module *M = B.GetInsertBlock()->getParent()->getParent(); + Constant *FIPrintFFn = + M->getOrInsertFunction("fiprintf", FT, Callee->getAttributes()); + CallInst *New = cast(CI->clone()); + New->setCalledFunction(FIPrintFFn); + B.Insert(New); + return New; + } + return 0; + } }; //===---------------------------------------===// @@ -1544,8 +1616,11 @@ bool SimplifyLibCalls::runOnFunction(Function &F) { // Set the builder to the instruction after the call. Builder.SetInsertPoint(BB, I); + // Use debug location of CI for all new instructions. + Builder.SetCurrentDebugLocation(CI->getDebugLoc()); + // Try to optimize this call. - Value *Result = LCO->OptimizeCall(CI, TD, Builder); + Value *Result = LCO->OptimizeCall(CI, TD, TLI, Builder); if (Result == 0) continue; DEBUG(dbgs() << "SimplifyLibCalls simplified: " << *CI; diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp index 5b6bc04cc1c2..539cc6f0baf5 100644 --- a/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -36,7 +36,7 @@ // evaluated each time through the tail recursion. Safely keeping allocas // in the entry block requires analysis to proves that the tail-called // function does not read or write the stack object. -// 2. Tail recursion is only performed if the call immediately preceeds the +// 2. Tail recursion is only performed if the call immediately precedes the // return instruction. It's possible that there could be a jump between // the call and the return. // 3. There can be intervening operations between the call and the return that @@ -433,7 +433,7 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, if (CanMoveAboveCall(BBI, CI)) continue; // If we can't move the instruction above the call, it might be because it - // is an associative and commutative operation that could be tranformed + // is an associative and commutative operation that could be transformed // using accumulator recursion elimination. Check to see if this is the // case, and if so, remember the initial accumulator value for later. if ((AccumulatorRecursionEliminationInitVal = @@ -496,7 +496,7 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, Instruction *InsertPos = OldEntry->begin(); for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I) { - PHINode *PN = PHINode::Create(I->getType(), + PHINode *PN = PHINode::Create(I->getType(), 2, I->getName() + ".tr", InsertPos); I->replaceAllUsesWith(PN); // Everyone use the PHI node now! PN->addIncoming(I, NewEntry); @@ -527,8 +527,10 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, if (AccumulatorRecursionEliminationInitVal) { Instruction *AccRecInstr = AccumulatorRecursionInstr; // Start by inserting a new PHI node for the accumulator. + pred_iterator PB = pred_begin(OldEntry), PE = pred_end(OldEntry); PHINode *AccPN = PHINode::Create(AccumulatorRecursionEliminationInitVal->getType(), + std::distance(PB, PE) + 1, "accumulator.tr", OldEntry->begin()); // Loop over all of the predecessors of the tail recursion block. For the @@ -537,8 +539,7 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, // other tail recursions eliminated) the accumulator is not modified. // Because we haven't added the branch in the current block to OldEntry yet, // it will not show up as a predecessor. - for (pred_iterator PI = pred_begin(OldEntry), PE = pred_end(OldEntry); - PI != PE; ++PI) { + for (pred_iterator PI = PB; PI != PE; ++PI) { BasicBlock *P = *PI; if (P == &F->getEntryBlock()) AccPN->addIncoming(AccumulatorRecursionEliminationInitVal, P); @@ -572,7 +573,9 @@ bool TailCallElim::EliminateRecursiveTailCall(CallInst *CI, ReturnInst *Ret, // Now that all of the PHI nodes are in place, remove the call and // ret instructions, replacing them with an unconditional branch. - BranchInst::Create(OldEntry, Ret); + BranchInst *NewBI = BranchInst::Create(OldEntry, Ret); + NewBI->setDebugLoc(CI->getDebugLoc()); + BB->getInstList().erase(Ret); // Remove return. BB->getInstList().erase(CI); // Remove call. ++NumEliminated; diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index acaea195e710..c705cc51094a 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -447,7 +447,7 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB, // If the values coming into the block are not the same, we need a PHI. // Create the new PHI node, insert it into NewBB at the end of the block PHINode *NewPHI = - PHINode::Create(PN->getType(), PN->getName()+".ph", BI); + PHINode::Create(PN->getType(), NumPreds, PN->getName()+".ph", BI); if (AA) AA->copyValue(PN, NewPHI); // Move all of the PHI values for 'Preds' to the new PHI. @@ -538,3 +538,15 @@ ReturnInst *llvm::FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, UncondBranch->eraseFromParent(); return cast(NewRet); } + +/// GetFirstDebugLocInBasicBlock - Return first valid DebugLoc entry in a +/// given basic block. +DebugLoc llvm::GetFirstDebugLocInBasicBlock(const BasicBlock *BB) { + for (BasicBlock::const_iterator BI = BB->begin(), BE = BB->end(); + BI != BE; ++BI) { + DebugLoc DL = BI->getDebugLoc(); + if (!DL.isUnknown()) + return DL; + } + return DebugLoc(); +} diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp index 616b066b5ab1..caf2aeb4d30a 100644 --- a/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -56,7 +56,7 @@ char BreakCriticalEdges::ID = 0; INITIALIZE_PASS(BreakCriticalEdges, "break-crit-edges", "Break critical edges in CFG", false, false) -// Publically exposed interface to pass... +// Publicly exposed interface to pass... char &llvm::BreakCriticalEdgesID = BreakCriticalEdges::ID; FunctionPass *llvm::createBreakCriticalEdgesPass() { return new BreakCriticalEdges(); @@ -140,7 +140,7 @@ static void CreatePHIsForSplitLoopExit(SmallVectorImpl &Preds, if (VP->getParent() == SplitBB) continue; // Otherwise a new PHI is needed. Create one and populate it. - PHINode *NewPN = PHINode::Create(PN->getType(), "split", + PHINode *NewPN = PHINode::Create(PN->getType(), Preds.size(), "split", SplitBB->getTerminator()); for (unsigned i = 0, e = Preds.size(); i != e; ++i) NewPN->addIncoming(V, Preds[i]); diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp index e6337722c8bd..8c133ea7f560 100644 --- a/lib/Transforms/Utils/CodeExtractor.cpp +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -104,7 +104,7 @@ namespace { /// region, we need to split the entry block of the region so that the PHI node /// is easier to deal with. void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { - bool HasPredsFromRegion = false; + unsigned NumPredsFromRegion = 0; unsigned NumPredsOutsideRegion = 0; if (Header != &Header->getParent()->getEntryBlock()) { @@ -116,7 +116,7 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { // header block into two. for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) if (BlocksToExtract.count(PN->getIncomingBlock(i))) - HasPredsFromRegion = true; + ++NumPredsFromRegion; else ++NumPredsOutsideRegion; @@ -147,7 +147,7 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { // Okay, now we need to adjust the PHI nodes and any branches from within the // region to go to the new header block instead of the old header block. - if (HasPredsFromRegion) { + if (NumPredsFromRegion) { PHINode *PN = cast(OldPred->begin()); // Loop over all of the predecessors of OldPred that are in the region, // changing them to branch to NewBB instead. @@ -157,14 +157,14 @@ void CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { TI->replaceUsesOfWith(OldPred, NewBB); } - // Okay, everthing within the region is now branching to the right block, we + // Okay, everything within the region is now branching to the right block, we // just have to update the PHI nodes now, inserting PHI nodes into NewBB. for (AfterPHIs = OldPred->begin(); isa(AfterPHIs); ++AfterPHIs) { PHINode *PN = cast(AfterPHIs); // Create a new PHI node in the new region, which has an incoming value // from OldPred of PN. - PHINode *NewPN = PHINode::Create(PN->getType(), PN->getName()+".ce", - NewBB->begin()); + PHINode *NewPN = PHINode::Create(PN->getType(), 1 + NumPredsFromRegion, + PN->getName()+".ce", NewBB->begin()); NewPN->addIncoming(PN, OldPred); // Loop over all of the incoming value in PN, moving them to NewPN if they diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index c1faf2411331..7d179092c063 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -320,7 +320,7 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall, // // Note that this only does one level of inlining. For example, if the // instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now -// exists in the instruction stream. Similiarly this will inline a recursive +// exists in the instruction stream. Similarly this will inline a recursive // function by one level. // bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { @@ -624,7 +624,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) { // The PHI node should go at the front of the new basic block to merge all // possible incoming values. if (!TheCall->use_empty()) { - PHI = PHINode::Create(RTy, TheCall->getName(), + PHI = PHINode::Create(RTy, Returns.size(), TheCall->getName(), AfterCallBB->begin()); // Anything that used the result of the function call should now use the // PHI node as their operand. diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp index b2e5fa6d7e3a..b654111eba74 100644 --- a/lib/Transforms/Utils/LCSSA.cpp +++ b/lib/Transforms/Utils/LCSSA.cpp @@ -207,6 +207,8 @@ bool LCSSA::ProcessInstruction(Instruction *Inst, DomTreeNode *DomNode = DT->getNode(DomBB); + SmallVector AddedPHIs; + SSAUpdater SSAUpdate; SSAUpdate.Initialize(Inst->getType(), Inst->getName()); @@ -220,9 +222,10 @@ bool LCSSA::ProcessInstruction(Instruction *Inst, // If we already inserted something for this BB, don't reprocess it. if (SSAUpdate.HasValueForBlock(ExitBB)) continue; - PHINode *PN = PHINode::Create(Inst->getType(), Inst->getName()+".lcssa", + PHINode *PN = PHINode::Create(Inst->getType(), + PredCache.GetNumPreds(ExitBB), + Inst->getName()+".lcssa", ExitBB->begin()); - PN->reserveOperandSpace(PredCache.GetNumPreds(ExitBB)); // Add inputs from inside the loop for this PHI. for (BasicBlock **PI = PredCache.GetPreds(ExitBB); *PI; ++PI) { @@ -236,6 +239,8 @@ bool LCSSA::ProcessInstruction(Instruction *Inst, &PN->getOperandUse( PN->getOperandNumForIncomingValue(PN->getNumIncomingValues()-1))); } + + AddedPHIs.push_back(PN); // Remember that this phi makes the value alive in this block. SSAUpdate.AddAvailableValue(ExitBB, PN); @@ -262,6 +267,12 @@ bool LCSSA::ProcessInstruction(Instruction *Inst, // Otherwise, do full PHI insertion. SSAUpdate.RewriteUse(*UsesToRewrite[i]); } + + // Remove PHI nodes that did not have any uses rewritten. + for (unsigned i = 0, e = AddedPHIs.size(); i != e; ++i) { + if (AddedPHIs[i]->use_empty()) + AddedPHIs[i]->eraseFromParent(); + } return true; } diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 3f789fa86589..4bca2fc1fb9d 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -20,8 +20,11 @@ #include "llvm/Instructions.h" #include "llvm/Intrinsics.h" #include "llvm/IntrinsicInst.h" +#include "llvm/Operator.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/DebugInfo.h" +#include "llvm/Analysis/DIBuilder.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" @@ -65,8 +68,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB) { // Let the basic block know that we are letting go of it. Based on this, // it will adjust it's PHI nodes. - assert(BI->getParent() && "Terminator not inserted in block!"); - OldDest->removePredecessor(BI->getParent()); + OldDest->removePredecessor(BB); // Replace the conditional branch with an unconditional one. BranchInst::Create(Destination, BI); @@ -209,8 +211,18 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB) { bool llvm::isInstructionTriviallyDead(Instruction *I) { if (!I->use_empty() || isa(I)) return false; - // We don't want debug info removed by anything this general. - if (isa(I)) return false; + // We don't want debug info removed by anything this general, unless + // debug info is empty. + if (DbgDeclareInst *DDI = dyn_cast(I)) { + if (DDI->getAddress()) + return false; + return true; + } + if (DbgValueInst *DVI = dyn_cast(I)) { + if (DVI->getValue()) + return false; + return true; + } if (!I->mayHaveSideEffects()) return true; @@ -320,8 +332,14 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD) { BI = BB->begin(); continue; } - + + if (Inst->isTerminator()) + break; + + WeakVH BIHandle(BI); MadeChange |= RecursivelyDeleteTriviallyDeadInstructions(Inst); + if (BIHandle != BI) + BI = BB->begin(); } return MadeChange; } @@ -632,6 +650,8 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { Hash ^= reinterpret_cast(static_cast(*I)); Hash = (Hash << 7) | (Hash >> (sizeof(uintptr_t) * CHAR_BIT - 7)); } + // Avoid colliding with the DenseMap sentinels ~0 and ~0-1. + Hash >>= 1; // If we've never seen this hash value before, it's a unique PHI. std::pair::iterator, bool> Pair = HashMap.insert(std::make_pair(Hash, PN)); @@ -753,3 +773,83 @@ unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, return Align; } +///===---------------------------------------------------------------------===// +/// Dbg Intrinsic utilities +/// + +/// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value +/// that has an associated llvm.dbg.decl intrinsic. +bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, + StoreInst *SI, DIBuilder &Builder) { + DIVariable DIVar(DDI->getVariable()); + if (!DIVar.Verify()) + return false; + + Instruction *DbgVal = + Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0, + DIVar, SI); + + // Propagate any debug metadata from the store onto the dbg.value. + DebugLoc SIDL = SI->getDebugLoc(); + if (!SIDL.isUnknown()) + DbgVal->setDebugLoc(SIDL); + // Otherwise propagate debug metadata from dbg.declare. + else + DbgVal->setDebugLoc(DDI->getDebugLoc()); + return true; +} + +/// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value +/// that has an associated llvm.dbg.decl intrinsic. +bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, + LoadInst *LI, DIBuilder &Builder) { + DIVariable DIVar(DDI->getVariable()); + if (!DIVar.Verify()) + return false; + + Instruction *DbgVal = + Builder.insertDbgValueIntrinsic(LI->getOperand(0), 0, + DIVar, LI); + + // Propagate any debug metadata from the store onto the dbg.value. + DebugLoc LIDL = LI->getDebugLoc(); + if (!LIDL.isUnknown()) + DbgVal->setDebugLoc(LIDL); + // Otherwise propagate debug metadata from dbg.declare. + else + DbgVal->setDebugLoc(DDI->getDebugLoc()); + return true; +} + +/// LowerDbgDeclare - Lowers llvm.dbg.declare intrinsics into appropriate set +/// of llvm.dbg.value intrinsics. +bool llvm::LowerDbgDeclare(Function &F) { + DIBuilder DIB(*F.getParent()); + SmallVector Dbgs; + for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) + for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ++BI) { + if (DbgDeclareInst *DDI = dyn_cast(BI)) + Dbgs.push_back(DDI); + } + if (Dbgs.empty()) + return false; + + for (SmallVector::iterator I = Dbgs.begin(), + E = Dbgs.end(); I != E; ++I) { + DbgDeclareInst *DDI = *I; + if (AllocaInst *AI = dyn_cast_or_null(DDI->getAddress())) { + bool RemoveDDI = true; + for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); + UI != E; ++UI) + if (StoreInst *SI = dyn_cast(*UI)) + ConvertDebugDeclareToDebugValue(DDI, SI, DIB); + else if (LoadInst *LI = dyn_cast(*UI)) + ConvertDebugDeclareToDebugValue(DDI, LI, DIB); + else + RemoveDDI = false; + if (RemoveDDI) + DDI->eraseFromParent(); + } + } + return true; +} diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp index 246263026bb4..f02ffd20bca9 100644 --- a/lib/Transforms/Utils/LoopSimplify.cpp +++ b/lib/Transforms/Utils/LoopSimplify.cpp @@ -115,7 +115,7 @@ INITIALIZE_PASS_DEPENDENCY(LoopInfo) INITIALIZE_PASS_END(LoopSimplify, "loop-simplify", "Canonicalize natural loops", true, false) -// Publically exposed interface to pass... +// Publicly exposed interface to pass... char &llvm::LoopSimplifyID = LoopSimplify::ID; Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); } @@ -648,9 +648,8 @@ LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) { // the backedge block which correspond to any PHI nodes in the header block. for (BasicBlock::iterator I = Header->begin(); isa(I); ++I) { PHINode *PN = cast(I); - PHINode *NewPN = PHINode::Create(PN->getType(), PN->getName()+".be", - BETerminator); - NewPN->reserveOperandSpace(BackedgeBlocks.size()); + PHINode *NewPN = PHINode::Create(PN->getType(), BackedgeBlocks.size(), + PN->getName()+".be", BETerminator); if (AA) AA->copyValue(PN, NewPN); // Loop over the PHI node, moving all entries except the one for the diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp index 914a439718d4..ed733d393a11 100644 --- a/lib/Transforms/Utils/LowerSwitch.cpp +++ b/lib/Transforms/Utils/LowerSwitch.cpp @@ -84,7 +84,7 @@ char LowerSwitch::ID = 0; INITIALIZE_PASS(LowerSwitch, "lowerswitch", "Lower SwitchInst's to branches", false, false) -// Publically exposed interface to pass... +// Publicly exposed interface to pass... char &llvm::LowerSwitchID = LowerSwitch::ID; // createLowerSwitchPass - Interface to this file... FunctionPass *llvm::createLowerSwitchPass() { diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index 778885723e66..50c9ae204a4c 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -38,6 +38,7 @@ #include "llvm/Analysis/DIBuilder.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" @@ -45,7 +46,6 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Support/CFG.h" #include -#include #include using namespace llvm; @@ -103,7 +103,7 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) { /// FindAllocaDbgDeclare - Finds the llvm.dbg.declare intrinsic describing the /// alloca 'V', if any. static DbgDeclareInst *FindAllocaDbgDeclare(Value *V) { - if (MDNode *DebugNode = MDNode::getIfExists(V->getContext(), &V, 1)) + if (MDNode *DebugNode = MDNode::getIfExists(V->getContext(), V)) for (Value::use_iterator UI = DebugNode->use_begin(), E = DebugNode->use_end(); UI != E; ++UI) if (DbgDeclareInst *DDI = dyn_cast(*UI)) @@ -273,8 +273,6 @@ namespace { LargeBlockInfo &LBI); void PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, LargeBlockInfo &LBI); - void ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, StoreInst *SI); - void RenamePass(BasicBlock *BB, BasicBlock *Pred, RenamePassData::ValVector &IncVals, @@ -391,7 +389,9 @@ void PromoteMem2Reg::run() { if (Info.UsingBlocks.empty()) { // Record debuginfo for the store and remove the declaration's debuginfo. if (DbgDeclareInst *DDI = Info.DbgDeclare) { - ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore); + if (!DIB) + DIB = new DIBuilder(*DDI->getParent()->getParent()->getParent()); + ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore, *DIB); DDI->eraseFromParent(); } // Remove the (now dead) store and alloca. @@ -423,8 +423,11 @@ void PromoteMem2Reg::run() { while (!AI->use_empty()) { StoreInst *SI = cast(AI->use_back()); // Record debuginfo for the store before removing it. - if (DbgDeclareInst *DDI = Info.DbgDeclare) - ConvertDebugDeclareToDebugValue(DDI, SI); + if (DbgDeclareInst *DDI = Info.DbgDeclare) { + if (!DIB) + DIB = new DIBuilder(*SI->getParent()->getParent()->getParent()); + ConvertDebugDeclareToDebugValue(DDI, SI, *DIB); + } SI->eraseFromParent(); LBI.deleteValue(SI); } @@ -944,28 +947,6 @@ void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, } } -// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value -// that has an associated llvm.dbg.decl intrinsic. -void PromoteMem2Reg::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, - StoreInst *SI) { - DIVariable DIVar(DDI->getVariable()); - if (!DIVar.Verify()) - return; - - if (!DIB) - DIB = new DIBuilder(*SI->getParent()->getParent()->getParent()); - Instruction *DbgVal = DIB->insertDbgValueIntrinsic(SI->getOperand(0), 0, - DIVar, SI); - - // Propagate any debug metadata from the store onto the dbg.value. - DebugLoc SIDL = SI->getDebugLoc(); - if (!SIDL.isUnknown()) - DbgVal->setDebugLoc(SIDL); - // Otherwise propagate debug metadata from dbg.declare. - else - DbgVal->setDebugLoc(DDI->getDebugLoc()); -} - // QueuePhiNode - queues a phi-node to be added to a basic-block for a specific // Alloca returns true if there wasn't already a phi-node for that variable // @@ -979,12 +960,11 @@ bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo, // Create a PhiNode using the dereferenced type... and add the phi-node to the // BasicBlock. - PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), + PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), getNumPreds(BB), Allocas[AllocaNo]->getName() + "." + Twine(Version++), BB->begin()); ++NumPHIInsert; PhiToAllocaMap[PN] = AllocaNo; - PN->reserveOperandSpace(getNumPreds(BB)); if (AST && PN->getType()->isPointerTy()) AST->copyValue(PointerAllocaValues[AllocaNo], PN); @@ -1076,8 +1056,11 @@ void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred, // what value were we writing? IncomingVals[ai->second] = SI->getOperand(0); // Record debuginfo for the store before removing it. - if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second]) - ConvertDebugDeclareToDebugValue(DDI, SI); + if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second]) { + if (!DIB) + DIB = new DIBuilder(*SI->getParent()->getParent()->getParent()); + ConvertDebugDeclareToDebugValue(DDI, SI, *DIB); + } BB->getInstList().erase(SI); } } diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp index 3896d9851b26..2860c3e511a6 100644 --- a/lib/Transforms/Utils/SSAUpdater.cpp +++ b/lib/Transforms/Utils/SSAUpdater.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "ssaupdater" +#include "llvm/Constants.h" #include "llvm/Instructions.h" #include "llvm/ADT/DenseMap.h" #include "llvm/Analysis/InstructionSimplify.h" @@ -20,8 +21,10 @@ #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/SSAUpdater.h" #include "llvm/Transforms/Utils/SSAUpdaterImpl.h" + using namespace llvm; typedef DenseMap AvailableValsTy; @@ -170,8 +173,8 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { } // Ok, we have no way out, insert a new one now. - PHINode *InsertedPHI = PHINode::Create(ProtoType, ProtoName, &BB->front()); - InsertedPHI->reserveOperandSpace(PredValues.size()); + PHINode *InsertedPHI = PHINode::Create(ProtoType, PredValues.size(), + ProtoName, &BB->front()); // Fill in all the predecessors of the PHI. for (unsigned i = 0, e = PredValues.size(); i != e; ++i) @@ -184,6 +187,9 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) { return V; } + // Set DebugLoc. + InsertedPHI->setDebugLoc(GetFirstDebugLocInBasicBlock(BB)); + // If the client wants to know about all new instructions, tell it. if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI); @@ -289,9 +295,8 @@ class SSAUpdaterTraits { /// Reserve space for the operands but do not fill them in yet. static Value *CreateEmptyPHI(BasicBlock *BB, unsigned NumPreds, SSAUpdater *Updater) { - PHINode *PHI = PHINode::Create(Updater->ProtoType, Updater->ProtoName, - &BB->front()); - PHI->reserveOperandSpace(NumPreds); + PHINode *PHI = PHINode::Create(Updater->ProtoType, NumPreds, + Updater->ProtoName, &BB->front()); return PHI; } diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index c6708857cb56..18b857308e34 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -37,6 +37,10 @@ #include using namespace llvm; +static cl::opt +PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(1), + cl::desc("Control the amount of phi node folding to perform (default = 1)")); + static cl::opt DupRet("simplifycfg-dup-ret", cl::Hidden, cl::init(false), cl::desc("Duplicate return instructions into unconditional branches")); @@ -201,11 +205,20 @@ static Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, /// which works well enough for us. /// /// If AggressiveInsts is non-null, and if V does not dominate BB, we check to -/// see if V (which must be an instruction) is cheap to compute and is -/// non-trapping. If both are true, the instruction is inserted into the set -/// and true is returned. +/// see if V (which must be an instruction) and its recursive operands +/// that do not dominate BB have a combined cost lower than CostRemaining and +/// are non-trapping. If both are true, the instruction is inserted into the +/// set and true is returned. +/// +/// The cost for most non-trapping instructions is defined as 1 except for +/// Select whose cost is 2. +/// +/// After this function returns, CostRemaining is decreased by the cost of +/// V plus its non-dominating operands. If that cost is greater than +/// CostRemaining, false is returned and CostRemaining is undefined. static bool DominatesMergePoint(Value *V, BasicBlock *BB, - SmallPtrSet *AggressiveInsts) { + SmallPtrSet *AggressiveInsts, + unsigned &CostRemaining) { Instruction *I = dyn_cast(V); if (!I) { // Non-instructions all dominate instructions, but not all constantexprs @@ -232,12 +245,17 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, // instructions in the 'if region'. if (AggressiveInsts == 0) return false; + // If we have seen this instruction before, don't count it again. + if (AggressiveInsts->count(I)) return true; + // Okay, it looks like the instruction IS in the "condition". Check to // see if it's a cheap instruction to unconditionally compute, and if it // only uses stuff defined outside of the condition. If so, hoist it out. if (!I->isSafeToSpeculativelyExecute()) return false; + unsigned Cost = 0; + switch (I->getOpcode()) { default: return false; // Cannot hoist this out safely. case Instruction::Load: @@ -246,11 +264,13 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, // predecessor. if (PBB->getFirstNonPHIOrDbg() != I) return false; + Cost = 1; break; case Instruction::GetElementPtr: // GEPs are cheap if all indices are constant. if (!cast(I)->hasAllConstantIndices()) return false; + Cost = 1; break; case Instruction::Add: case Instruction::Sub: @@ -261,13 +281,26 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, case Instruction::LShr: case Instruction::AShr: case Instruction::ICmp: + case Instruction::Trunc: + case Instruction::ZExt: + case Instruction::SExt: + Cost = 1; break; // These are all cheap and non-trapping instructions. + + case Instruction::Select: + Cost = 2; + break; } - // Okay, we can only really hoist these out if their operands are not - // defined in the conditional region. + if (Cost > CostRemaining) + return false; + + CostRemaining -= Cost; + + // Okay, we can only really hoist these out if their operands do + // not take us over the cost threshold. for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) - if (!DominatesMergePoint(*i, BB, 0)) + if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining)) return false; // Okay, it's safe to do this! Remember this instruction. AggressiveInsts->insert(I); @@ -807,12 +840,16 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) { BasicBlock::iterator BB2_Itr = BB2->begin(); Instruction *I1 = BB1_Itr++, *I2 = BB2_Itr++; - while (isa(I1)) - I1 = BB1_Itr++; - while (isa(I2)) - I2 = BB2_Itr++; - if (I1->getOpcode() != I2->getOpcode() || isa(I1) || - !I1->isIdenticalToWhenDefined(I2) || + // Skip debug info if it is not identical. + DbgInfoIntrinsic *DBI1 = dyn_cast(I1); + DbgInfoIntrinsic *DBI2 = dyn_cast(I2); + if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) { + while (isa(I1)) + I1 = BB1_Itr++; + while (isa(I2)) + I2 = BB2_Itr++; + } + if (isa(I1) || !I1->isIdenticalToWhenDefined(I2) || (isa(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2))) return false; @@ -835,13 +872,17 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) { I2->eraseFromParent(); I1 = BB1_Itr++; - while (isa(I1)) - I1 = BB1_Itr++; I2 = BB2_Itr++; - while (isa(I2)) - I2 = BB2_Itr++; - } while (I1->getOpcode() == I2->getOpcode() && - I1->isIdenticalToWhenDefined(I2)); + // Skip debug info if it is not identical. + DbgInfoIntrinsic *DBI1 = dyn_cast(I1); + DbgInfoIntrinsic *DBI2 = dyn_cast(I2); + if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) { + while (isa(I1)) + I1 = BB1_Itr++; + while (isa(I2)) + I2 = BB2_Itr++; + } + } while (I1->isIdenticalToWhenDefined(I2)); return true; @@ -1209,6 +1250,8 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { // instructions. While we are at it, keep track of the instructions // that need to be moved to the dominating block. SmallPtrSet AggressiveInsts; + unsigned MaxCostVal0 = PHINodeFoldingThreshold, + MaxCostVal1 = PHINodeFoldingThreshold; for (BasicBlock::iterator II = BB->begin(); isa(II);) { PHINode *PN = cast(II++); @@ -1218,8 +1261,10 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetData *TD) { continue; } - if (!DominatesMergePoint(PN->getIncomingValue(0), BB, &AggressiveInsts) || - !DominatesMergePoint(PN->getIncomingValue(1), BB, &AggressiveInsts)) + if (!DominatesMergePoint(PN->getIncomingValue(0), BB, &AggressiveInsts, + MaxCostVal0) || + !DominatesMergePoint(PN->getIncomingValue(1), BB, &AggressiveInsts, + MaxCostVal1)) return false; } @@ -1393,24 +1438,23 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI) { return true; } -/// FoldBranchToCommonDest - If this basic block is ONLY a setcc and a branch, -/// and if a predecessor branches to us and one of our successors, fold the -/// setcc into the predecessor and use logical operations to pick the right -/// destination. +/// FoldBranchToCommonDest - If this basic block is simple enough, and if a +/// predecessor branches to us and one of our successors, fold the block into +/// the predecessor and use logical operations to pick the right destination. bool llvm::FoldBranchToCommonDest(BranchInst *BI) { BasicBlock *BB = BI->getParent(); Instruction *Cond = dyn_cast(BI->getCondition()); if (Cond == 0 || (!isa(Cond) && !isa(Cond)) || Cond->getParent() != BB || !Cond->hasOneUse()) return false; - + // Only allow this if the condition is a simple instruction that can be // executed unconditionally. It must be in the same block as the branch, and // must be at the front of the block. BasicBlock::iterator FrontIt = BB->front(); + // Ignore dbg intrinsics. - while (isa(FrontIt)) - ++FrontIt; + while (isa(FrontIt)) ++FrontIt; // Allow a single instruction to be hoisted in addition to the compare // that feeds the branch. We later ensure that any values that _it_ uses @@ -1422,21 +1466,23 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { FrontIt->isSafeToSpeculativelyExecute()) { BonusInst = &*FrontIt; ++FrontIt; + + // Ignore dbg intrinsics. + while (isa(FrontIt)) ++FrontIt; } - + // Only a single bonus inst is allowed. if (&*FrontIt != Cond) return false; // Make sure the instruction after the condition is the cond branch. BasicBlock::iterator CondIt = Cond; ++CondIt; + // Ingore dbg intrinsics. - while(isa(CondIt)) - ++CondIt; - if (&*CondIt != BI) { - assert (!isa(CondIt) && "Hey do not forget debug info!"); + while (isa(CondIt)) ++CondIt; + + if (&*CondIt != BI) return false; - } // Cond is known to be a compare or binary operator. Check to make sure that // neither operand is a potentially-trapping constant expression. @@ -1447,13 +1493,12 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { if (CE->canTrap()) return false; - // Finally, don't infinitely unroll conditional loops. BasicBlock *TrueDest = BI->getSuccessor(0); BasicBlock *FalseDest = BI->getSuccessor(1); if (TrueDest == BB || FalseDest == BB) return false; - + for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { BasicBlock *PredBlock = *PI; BranchInst *PBI = dyn_cast(PredBlock->getTerminator()); @@ -1461,10 +1506,24 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { // Check that we have two conditional branches. If there is a PHI node in // the common successor, verify that the same value flows in from both // blocks. - if (PBI == 0 || PBI->isUnconditional() || - !SafeToMergeTerminators(BI, PBI)) + if (PBI == 0 || PBI->isUnconditional() || !SafeToMergeTerminators(BI, PBI)) continue; + // Determine if the two branches share a common destination. + Instruction::BinaryOps Opc; + bool InvertPredCond = false; + + if (PBI->getSuccessor(0) == TrueDest) + Opc = Instruction::Or; + else if (PBI->getSuccessor(1) == FalseDest) + Opc = Instruction::And; + else if (PBI->getSuccessor(0) == FalseDest) + Opc = Instruction::And, InvertPredCond = true; + else if (PBI->getSuccessor(1) == TrueDest) + Opc = Instruction::Or, InvertPredCond = true; + else + continue; + // Ensure that any values used in the bonus instruction are also used // by the terminator of the predecessor. This means that those values // must already have been resolved, so we won't be inhibiting the @@ -1502,20 +1561,6 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { if (!UsedValues.empty()) return false; } - - Instruction::BinaryOps Opc; - bool InvertPredCond = false; - - if (PBI->getSuccessor(0) == TrueDest) - Opc = Instruction::Or; - else if (PBI->getSuccessor(1) == FalseDest) - Opc = Instruction::And; - else if (PBI->getSuccessor(0) == FalseDest) - Opc = Instruction::And, InvertPredCond = true; - else if (PBI->getSuccessor(1) == TrueDest) - Opc = Instruction::Or, InvertPredCond = true; - else - continue; DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB); @@ -1566,6 +1611,12 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { AddPredecessorToBlock(FalseDest, PredBlock, BB); PBI->setSuccessor(1, FalseDest); } + + // Copy any debug value intrinsics into the end of PredBlock. + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + if (isa(*I)) + I->clone()->insertBefore(PBI); + return true; } return false; @@ -1598,13 +1649,15 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { // in the constant and simplify the block result. Subsequent passes of // simplifycfg will thread the block. if (BlockIsSimpleEnoughToThreadThrough(BB)) { + pred_iterator PB = pred_begin(BB), PE = pred_end(BB); PHINode *NewPN = PHINode::Create(Type::getInt1Ty(BB->getContext()), + std::distance(PB, PE), BI->getCondition()->getName() + ".pr", BB->begin()); // Okay, we're going to insert the PHI node. Since PBI is not the only // predecessor, compute the PHI'd conditional value for all of the preds. // Any predecessor where the condition is not computable we keep symbolic. - for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { + for (pred_iterator PI = PB; PI != PE; ++PI) { BasicBlock *P = *PI; if ((PBI = dyn_cast(P->getTerminator())) && PBI != BI && PBI->isConditional() && @@ -1800,6 +1853,26 @@ static bool SimplifyTerminatorOnSelect(TerminatorInst *OldTerm, Value *Cond, return true; } +// SimplifySwitchOnSelect - Replaces +// (switch (select cond, X, Y)) on constant X, Y +// with a branch - conditional if X and Y lead to distinct BBs, +// unconditional otherwise. +static bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select) { + // Check for constant integer values in the select. + ConstantInt *TrueVal = dyn_cast(Select->getTrueValue()); + ConstantInt *FalseVal = dyn_cast(Select->getFalseValue()); + if (!TrueVal || !FalseVal) + return false; + + // Find the relevant condition and destinations. + Value *Condition = Select->getCondition(); + BasicBlock *TrueBB = SI->getSuccessor(SI->findCaseValue(TrueVal)); + BasicBlock *FalseBB = SI->getSuccessor(SI->findCaseValue(FalseVal)); + + // Perform the actual simplification. + return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB); +} + // SimplifyIndirectBrOnSelect - Replaces // (indirectbr (select cond, blockaddress(@fn, BlockA), // blockaddress(@fn, BlockB))) @@ -2148,7 +2221,9 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { if (LI->isVolatile()) break; - // Delete this instruction + // Delete this instruction (any uses are guaranteed to be dead) + if (!BBI->use_empty()) + BBI->replaceAllUsesWith(UndefValue::get(BBI->getType())); BBI->eraseFromParent(); Changed = true; } @@ -2189,17 +2264,28 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) { // If the default value is unreachable, figure out the most popular // destination and make it the default. if (SI->getSuccessor(0) == BB) { - std::map Popularity; - for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) - Popularity[SI->getSuccessor(i)]++; - + std::map > Popularity; + for (unsigned i = 1, e = SI->getNumCases(); i != e; ++i) { + std::pair& entry = + Popularity[SI->getSuccessor(i)]; + if (entry.first == 0) { + entry.first = 1; + entry.second = i; + } else { + entry.first++; + } + } + // Find the most popular block. unsigned MaxPop = 0; + unsigned MaxIndex = 0; BasicBlock *MaxBlock = 0; - for (std::map::iterator + for (std::map >::iterator I = Popularity.begin(), E = Popularity.end(); I != E; ++I) { - if (I->second > MaxPop) { - MaxPop = I->second; + if (I->second.first > MaxPop || + (I->second.first == MaxPop && MaxIndex > I->second.second)) { + MaxPop = I->second.first; + MaxIndex = I->second.second; MaxBlock = I->first; } } @@ -2309,7 +2395,12 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI) { if (BasicBlock *OnlyPred = BB->getSinglePredecessor()) if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred)) return SimplifyCFG(BB) | true; - + + Value *Cond = SI->getCondition(); + if (SelectInst *Select = dyn_cast(Cond)) + if (SimplifySwitchOnSelect(SI, Select)) + return SimplifyCFG(BB) | true; + // If the block only contains the switch, see if we can fold the block // away into any preds. BasicBlock::iterator BBI = BB->begin(); diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp index ccb8287d7969..46d4adaaa154 100644 --- a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp +++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp @@ -116,7 +116,8 @@ bool UnifyFunctionExitNodes::runOnFunction(Function &F) { ReturnInst::Create(F.getContext(), NULL, NewRetBlock); } else { // If the function doesn't return void... add a PHI node to the block... - PN = PHINode::Create(F.getReturnType(), "UnifiedRetVal"); + PN = PHINode::Create(F.getReturnType(), ReturningBlocks.size(), + "UnifiedRetVal"); NewRetBlock->getInstList().push_back(PN); ReturnInst::Create(F.getContext(), PN, NewRetBlock); } diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp index f5481d31eb8a..a73bf0449813 100644 --- a/lib/Transforms/Utils/ValueMapper.cpp +++ b/lib/Transforms/Utils/ValueMapper.cpp @@ -39,7 +39,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, return VM[V] = const_cast(V); // Create a dummy node in case we have a metadata cycle. - MDNode *Dummy = MDNode::getTemporary(V->getContext(), 0, 0); + MDNode *Dummy = MDNode::getTemporary(V->getContext(), ArrayRef()); VM[V] = Dummy; // Check all operands to see if any need to be remapped. @@ -54,7 +54,7 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, Value *Op = MD->getOperand(i); Elts.push_back(Op ? MapValue(Op, VM, Flags) : 0); } - MDNode *NewMD = MDNode::get(V->getContext(), Elts.data(), Elts.size()); + MDNode *NewMD = MDNode::get(V->getContext(), Elts); Dummy->replaceAllUsesWith(NewMD); VM[V] = NewMD; MDNode::deleteTemporary(Dummy); diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp index cbc874a53f63..844284d09c72 100644 --- a/lib/VMCore/AsmWriter.cpp +++ b/lib/VMCore/AsmWriter.cpp @@ -32,6 +32,7 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/CFG.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" @@ -39,9 +40,13 @@ #include "llvm/Support/FormattedStream.h" #include #include -#include using namespace llvm; +static cl::opt +EnableDebugInfoComment("enable-debug-info-comment", cl::Hidden, + cl::desc("Enable debug info comments")); + + // Make virtual table appear in this compilation unit. AssemblyAnnotationWriter::~AssemblyAnnotationWriter() {} @@ -89,7 +94,7 @@ enum PrefixType { /// prefixed with % (if the string only contains simple characters) or is /// surrounded with ""'s (if it has special chars in it). Print it out. static void PrintLLVMName(raw_ostream &OS, StringRef Name, PrefixType Prefix) { - assert(Name.data() && "Cannot get empty name!"); + assert(!Name.empty() && "Cannot get empty name!"); switch (Prefix) { default: llvm_unreachable("Bad prefix!"); case NoPrefix: break; @@ -1075,7 +1080,7 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV, } if (CE->hasIndices()) { - const SmallVector &Indices = CE->getIndices(); + ArrayRef Indices = CE->getIndices(); for (unsigned i = 0, e = Indices.size(); i != e; ++i) Out << ", " << Indices[i]; } @@ -1338,9 +1343,12 @@ void AssemblyWriter::printModule(const Module *M) { CurPos = NewLine+1; NewLine = Asm.find_first_of('\n', CurPos); } - Out << "module asm \""; - PrintEscapedString(std::string(Asm.begin()+CurPos, Asm.end()), Out); - Out << "\"\n"; + std::string rest(Asm.begin()+CurPos, Asm.end()); + if (!rest.empty()) { + Out << "module asm \""; + PrintEscapedString(rest, Out); + Out << "\"\n"; + } } // Loop over the dependent libraries and emit them. @@ -1581,8 +1589,8 @@ void AssemblyWriter::printFunction(const Function *F) { case CallingConv::ARM_AAPCS: Out << "arm_aapcscc "; break; case CallingConv::ARM_AAPCS_VFP:Out << "arm_aapcs_vfpcc "; break; case CallingConv::MSP430_INTR: Out << "msp430_intrcc "; break; - case CallingConv::PTX_Kernel: Out << "ptx_kernel"; break; - case CallingConv::PTX_Device: Out << "ptx_device"; break; + case CallingConv::PTX_Kernel: Out << "ptx_kernel "; break; + case CallingConv::PTX_Device: Out << "ptx_device "; break; default: Out << "cc" << F->getCallingConv() << " "; break; } @@ -1727,6 +1735,18 @@ void AssemblyWriter::printBasicBlock(const BasicBlock *BB) { if (AnnotationWriter) AnnotationWriter->emitBasicBlockEndAnnot(BB, Out); } +/// printDebugLoc - Print DebugLoc. +static void printDebugLoc(const DebugLoc &DL, formatted_raw_ostream &OS) { + OS << DL.getLine() << ":" << DL.getCol(); + if (MDNode *N = DL.getInlinedAt(getGlobalContext())) { + DebugLoc IDL = DebugLoc::getFromDILocation(N); + if (!IDL.isUnknown()) { + OS << "@"; + printDebugLoc(IDL,OS); + } + } +} + /// printInfoComment - Print a little comment after the instruction indicating /// which slot it occupies. /// @@ -1734,6 +1754,43 @@ void AssemblyWriter::printInfoComment(const Value &V) { if (AnnotationWriter) { AnnotationWriter->printInfoComment(V, Out); return; + } else if (EnableDebugInfoComment) { + bool Padded = false; + if (const Instruction *I = dyn_cast(&V)) { + const DebugLoc &DL = I->getDebugLoc(); + if (!DL.isUnknown()) { + if (!Padded) { + Out.PadToColumn(50); + Padded = true; + Out << ";"; + } + Out << " [debug line = "; + printDebugLoc(DL,Out); + Out << "]"; + } + if (const DbgDeclareInst *DDI = dyn_cast(I)) { + const MDNode *Var = DDI->getVariable(); + if (!Padded) { + Out.PadToColumn(50); + Padded = true; + Out << ";"; + } + if (Var && Var->getNumOperands() >= 2) + if (MDString *MDS = dyn_cast_or_null(Var->getOperand(2))) + Out << " [debug variable = " << MDS->getString() << "]"; + } + else if (const DbgValueInst *DVI = dyn_cast(I)) { + const MDNode *Var = DVI->getVariable(); + if (!Padded) { + Out.PadToColumn(50); + Padded = true; + Out << ";"; + } + if (Var && Var->getNumOperands() >= 2) + if (MDString *MDS = dyn_cast_or_null(Var->getOperand(2))) + Out << " [debug variable = " << MDS->getString() << "]"; + } + } } } diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp index b32354035644..4541f381ed4a 100644 --- a/lib/VMCore/AutoUpgrade.cpp +++ b/lib/VMCore/AutoUpgrade.cpp @@ -84,7 +84,6 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { Name.compare(14, 5, "vsubl", 5) == 0 || Name.compare(14, 5, "vaddw", 5) == 0 || Name.compare(14, 5, "vsubw", 5) == 0 || - Name.compare(14, 5, "vmull", 5) == 0 || Name.compare(14, 5, "vmlal", 5) == 0 || Name.compare(14, 5, "vmlsl", 5) == 0 || Name.compare(14, 5, "vabdl", 5) == 0 || @@ -528,6 +527,12 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { // or 0. NewFn = 0; return true; + } else if (Name.compare(5, 16, "x86.sse.loadu.ps", 16) == 0 || + Name.compare(5, 17, "x86.sse2.loadu.dq", 17) == 0 || + Name.compare(5, 17, "x86.sse2.loadu.pd", 17) == 0) { + // Calls to these instructions are transformed into unaligned loads. + NewFn = 0; + return true; } else if (Name.compare(5, 17, "x86.ssse3.pshuf.w", 17) == 0) { // This is an SSE/MMX instruction. const Type *X86_MMXTy = VectorType::getX86_MMXTy(FTy->getContext()); @@ -947,7 +952,29 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { // Remove upgraded instruction. CI->eraseFromParent(); - + + } else if (F->getName() == "llvm.x86.sse.loadu.ps" || + F->getName() == "llvm.x86.sse2.loadu.dq" || + F->getName() == "llvm.x86.sse2.loadu.pd") { + // Convert to a native, unaligned load. + const Type *VecTy = CI->getType(); + const Type *IntTy = IntegerType::get(C, 128); + IRBuilder<> Builder(C); + Builder.SetInsertPoint(CI->getParent(), CI); + + Value *BC = Builder.CreateBitCast(CI->getArgOperand(0), + PointerType::getUnqual(IntTy), + "cast"); + LoadInst *LI = Builder.CreateLoad(BC, CI->getName()); + LI->setAlignment(1); // Unaligned load. + BC = Builder.CreateBitCast(LI, VecTy, "new.cast"); + + // Fix up all the uses with our new load. + if (!CI->use_empty()) + CI->replaceAllUsesWith(BC); + + // Remove intrinsic. + CI->eraseFromParent(); } else { llvm_unreachable("Unknown function for CallInst upgrade."); } @@ -1180,74 +1207,6 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { break; } -#if 0 - case Intrinsic::x86_mmx_cvtsi32_si64: { - // The return type needs to be changed. - Value *Operands[1]; - Operands[0] = CI->getArgOperand(0); - ConstructNewCallInst(NewFn, CI, Operands, 1); - break; - } - case Intrinsic::x86_mmx_cvtsi64_si32: { - Value *Operands[1]; - - // Cast the operand to the X86 MMX type. - Operands[0] = new BitCastInst(CI->getArgOperand(0), - NewFn->getFunctionType()->getParamType(0), - "upgraded.", CI); - - ConstructNewCallInst(NewFn, CI, Operands, 1); - break; - } - case Intrinsic::x86_mmx_vec_init_b: - case Intrinsic::x86_mmx_vec_init_w: - case Intrinsic::x86_mmx_vec_init_d: { - // The return type needs to be changed. - Value *Operands[8]; - unsigned NumOps = 0; - - switch (NewFn->getIntrinsicID()) { - default: break; - case Intrinsic::x86_mmx_vec_init_b: NumOps = 8; break; - case Intrinsic::x86_mmx_vec_init_w: NumOps = 4; break; - case Intrinsic::x86_mmx_vec_init_d: NumOps = 2; break; - } - - switch (NewFn->getIntrinsicID()) { - default: break; - case Intrinsic::x86_mmx_vec_init_b: - Operands[7] = CI->getArgOperand(7); - Operands[6] = CI->getArgOperand(6); - Operands[5] = CI->getArgOperand(5); - Operands[4] = CI->getArgOperand(4); - // FALLTHRU - case Intrinsic::x86_mmx_vec_init_w: - Operands[3] = CI->getArgOperand(3); - Operands[2] = CI->getArgOperand(2); - // FALLTHRU - case Intrinsic::x86_mmx_vec_init_d: - Operands[1] = CI->getArgOperand(1); - Operands[0] = CI->getArgOperand(0); - break; - } - - ConstructNewCallInst(NewFn, CI, Operands, NumOps); - break; - } - case Intrinsic::x86_mmx_vec_ext_d: { - Value *Operands[2]; - - // Cast the operand to the X86 MMX type. - Operands[0] = new BitCastInst(CI->getArgOperand(0), - NewFn->getFunctionType()->getParamType(0), - "upgraded.", CI); - Operands[1] = CI->getArgOperand(1); - - ConstructNewCallInst(NewFn, CI, Operands, 2); - break; - } -#endif - case Intrinsic::ctlz: case Intrinsic::ctpop: case Intrinsic::cttz: { diff --git a/lib/VMCore/CMakeLists.txt b/lib/VMCore/CMakeLists.txt index 1abd031dae4e..6bde263ce625 100644 --- a/lib/VMCore/CMakeLists.txt +++ b/lib/VMCore/CMakeLists.txt @@ -9,6 +9,7 @@ add_llvm_library(LLVMCore Constants.cpp Core.cpp DebugLoc.cpp + DebugInfoProbe.cpp Dominators.cpp Function.cpp GVMaterializer.cpp diff --git a/lib/VMCore/ConstantFold.cpp b/lib/VMCore/ConstantFold.cpp index 573efb7e5731..9985adaf576e 100644 --- a/lib/VMCore/ConstantFold.cpp +++ b/lib/VMCore/ConstantFold.cpp @@ -24,6 +24,7 @@ #include "llvm/Function.h" #include "llvm/GlobalAlias.h" #include "llvm/GlobalVariable.h" +#include "llvm/Operator.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" @@ -1735,7 +1736,7 @@ static ICmpInst::Predicate evaluateICmpRelation(Constant *V1, Constant *V2, // with a single zero index, it must be nonzero. assert(CE1->getNumOperands() == 2 && !CE1->getOperand(1)->isNullValue() && - "Suprising getelementptr!"); + "Surprising getelementptr!"); return isSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; } else { // If they are different globals, we don't know what the value is, diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp index 246fde1569ae..15d7793d5893 100644 --- a/lib/VMCore/Constants.cpp +++ b/lib/VMCore/Constants.cpp @@ -32,7 +32,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include -#include +#include using namespace llvm; //===----------------------------------------------------------------------===// @@ -325,27 +325,53 @@ ConstantInt::ConstantInt(const IntegerType *Ty, const APInt& V) assert(V.getBitWidth() == Ty->getBitWidth() && "Invalid constant for type"); } -ConstantInt* ConstantInt::getTrue(LLVMContext &Context) { +ConstantInt *ConstantInt::getTrue(LLVMContext &Context) { LLVMContextImpl *pImpl = Context.pImpl; if (!pImpl->TheTrueVal) pImpl->TheTrueVal = ConstantInt::get(Type::getInt1Ty(Context), 1); return pImpl->TheTrueVal; } -ConstantInt* ConstantInt::getFalse(LLVMContext &Context) { +ConstantInt *ConstantInt::getFalse(LLVMContext &Context) { LLVMContextImpl *pImpl = Context.pImpl; if (!pImpl->TheFalseVal) pImpl->TheFalseVal = ConstantInt::get(Type::getInt1Ty(Context), 0); return pImpl->TheFalseVal; } +Constant *ConstantInt::getTrue(const Type *Ty) { + const VectorType *VTy = dyn_cast(Ty); + if (!VTy) { + assert(Ty->isIntegerTy(1) && "True must be i1 or vector of i1."); + return ConstantInt::getTrue(Ty->getContext()); + } + assert(VTy->getElementType()->isIntegerTy(1) && + "True must be vector of i1 or i1."); + SmallVector Splat(VTy->getNumElements(), + ConstantInt::getTrue(Ty->getContext())); + return ConstantVector::get(Splat); +} + +Constant *ConstantInt::getFalse(const Type *Ty) { + const VectorType *VTy = dyn_cast(Ty); + if (!VTy) { + assert(Ty->isIntegerTy(1) && "False must be i1 or vector of i1."); + return ConstantInt::getFalse(Ty->getContext()); + } + assert(VTy->getElementType()->isIntegerTy(1) && + "False must be vector of i1 or i1."); + SmallVector Splat(VTy->getNumElements(), + ConstantInt::getFalse(Ty->getContext())); + return ConstantVector::get(Splat); +} + // Get a ConstantInt from an APInt. Note that the value stored in the DenseMap // as the key, is a DenseMapAPIntKeyInfo::KeyTy which has provided the // operator== and operator!= to ensure that the DenseMap doesn't attempt to // compare APInt's of different widths, which would violate an APInt class // invariant which generates an assertion. -ConstantInt *ConstantInt::get(LLVMContext &Context, const APInt& V) { +ConstantInt *ConstantInt::get(LLVMContext &Context, const APInt &V) { // Get the corresponding integer type for the bit width of the value. const IntegerType *ITy = IntegerType::get(Context, V.getBitWidth()); // get an existing value or the insertion position @@ -355,9 +381,8 @@ ConstantInt *ConstantInt::get(LLVMContext &Context, const APInt& V) { return Slot; } -Constant *ConstantInt::get(const Type* Ty, uint64_t V, bool isSigned) { - Constant *C = get(cast(Ty->getScalarType()), - V, isSigned); +Constant *ConstantInt::get(const Type *Ty, uint64_t V, bool isSigned) { + Constant *C = get(cast(Ty->getScalarType()), V, isSigned); // For vectors, broadcast the value. if (const VectorType *VTy = dyn_cast(Ty)) @@ -596,8 +621,6 @@ Constant *ConstantArray::get(LLVMContext &Context, StringRef Str, return get(ATy, ElementVals); } - - ConstantStruct::ConstantStruct(const StructType *T, const std::vector &V) : Constant(T, ConstantStructVal, @@ -644,6 +667,19 @@ Constant *ConstantStruct::get(LLVMContext &Context, return get(Context, std::vector(Vals, Vals+NumVals), Packed); } +Constant* ConstantStruct::get(LLVMContext &Context, bool Packed, + Constant * Val, ...) { + va_list ap; + std::vector Values; + va_start(ap, Val); + while (Val) { + Values.push_back(Val); + Val = va_arg(ap, llvm::Constant*); + } + va_end(ap); + return get(Context, Values, Packed); +} + ConstantVector::ConstantVector(const VectorType *T, const std::vector &V) : Constant(T, ConstantVectorVal, @@ -734,7 +770,7 @@ bool ConstantExpr::hasIndices() const { getOpcode() == Instruction::InsertValue; } -const SmallVector &ConstantExpr::getIndices() const { +ArrayRef ConstantExpr::getIndices() const { if (const ExtractValueConstantExpr *EVCE = dyn_cast(this)) return EVCE->Indices; @@ -818,10 +854,10 @@ ConstantExpr::getWithOperandReplaced(unsigned OpNo, Constant *Op) const { /// operands replaced with the specified values. The specified operands must /// match count and type with the existing ones. Constant *ConstantExpr:: -getWithOperands(Constant *const *Ops, unsigned NumOps) const { - assert(NumOps == getNumOperands() && "Operand count mismatch!"); +getWithOperands(ArrayRef Ops) const { + assert(Ops.size() == getNumOperands() && "Operand count mismatch!"); bool AnyChange = false; - for (unsigned i = 0; i != NumOps; ++i) { + for (unsigned i = 0; i != Ops.size(); ++i) { assert(Ops[i]->getType() == getOperand(i)->getType() && "Operand type mismatch!"); AnyChange |= Ops[i] != getOperand(i); @@ -853,8 +889,8 @@ getWithOperands(Constant *const *Ops, unsigned NumOps) const { return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]); case Instruction::GetElementPtr: return cast(this)->isInBounds() ? - ConstantExpr::getInBoundsGetElementPtr(Ops[0], &Ops[1], NumOps-1) : - ConstantExpr::getGetElementPtr(Ops[0], &Ops[1], NumOps-1); + ConstantExpr::getInBoundsGetElementPtr(Ops[0], &Ops[1], Ops.size()-1) : + ConstantExpr::getGetElementPtr(Ops[0], &Ops[1], Ops.size()-1); case Instruction::ICmp: case Instruction::FCmp: return ConstantExpr::getCompare(getPredicate(), Ops[0], Ops[1]); @@ -2114,7 +2150,7 @@ void ConstantExpr::replaceUsesOfWithOnConstant(Value *From, Value *ToV, Constant *Agg = getOperand(0); if (Agg == From) Agg = To; - const SmallVector &Indices = getIndices(); + ArrayRef Indices = getIndices(); Replacement = ConstantExpr::getExtractValue(Agg, &Indices[0], Indices.size()); } else if (getOpcode() == Instruction::InsertValue) { @@ -2123,7 +2159,7 @@ void ConstantExpr::replaceUsesOfWithOnConstant(Value *From, Value *ToV, if (Agg == From) Agg = To; if (Val == From) Val = To; - const SmallVector &Indices = getIndices(); + ArrayRef Indices = getIndices(); Replacement = ConstantExpr::getInsertValue(Agg, Val, &Indices[0], Indices.size()); } else if (isCast()) { diff --git a/lib/VMCore/ConstantsContext.h b/lib/VMCore/ConstantsContext.h index ffc673fac0da..13957545786d 100644 --- a/lib/VMCore/ConstantsContext.h +++ b/lib/VMCore/ConstantsContext.h @@ -301,20 +301,18 @@ struct OperandTraits : DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CompareConstantExpr, Value) struct ExprMapKeyType { - typedef SmallVector IndexList; - ExprMapKeyType(unsigned opc, - const std::vector &ops, + ArrayRef ops, unsigned short flags = 0, unsigned short optionalflags = 0, - const IndexList &inds = IndexList()) + ArrayRef inds = ArrayRef()) : opcode(opc), subclassoptionaldata(optionalflags), subclassdata(flags), - operands(ops), indices(inds) {} + operands(ops.begin(), ops.end()), indices(inds.begin(), inds.end()) {} uint8_t opcode; uint8_t subclassoptionaldata; uint16_t subclassdata; std::vector operands; - IndexList indices; + SmallVector indices; bool operator==(const ExprMapKeyType& that) const { return this->opcode == that.opcode && this->subclassdata == that.subclassdata && @@ -465,7 +463,7 @@ struct ConstantKeyData { CE->isCompare() ? CE->getPredicate() : 0, CE->getRawSubclassOptionalData(), CE->hasIndices() ? - CE->getIndices() : SmallVector()); + CE->getIndices() : ArrayRef()); } }; diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp index 35c3a2e92587..92f944027a7c 100644 --- a/lib/VMCore/Core.cpp +++ b/lib/VMCore/Core.cpp @@ -335,7 +335,7 @@ unsigned LLVMCountStructElementTypes(LLVMTypeRef StructTy) { void LLVMGetStructElementTypes(LLVMTypeRef StructTy, LLVMTypeRef *Dest) { StructType *Ty = unwrap(StructTy); - for (FunctionType::param_iterator I = Ty->element_begin(), + for (StructType::element_iterator I = Ty->element_begin(), E = Ty->element_end(); I != E; ++I) *Dest++ = wrap(*I); } @@ -543,7 +543,8 @@ LLVMValueRef LLVMMDString(const char *Str, unsigned SLen) { LLVMValueRef LLVMMDNodeInContext(LLVMContextRef C, LLVMValueRef *Vals, unsigned Count) { - return wrap(MDNode::get(*unwrap(C), unwrap(Vals, Count), Count)); + return wrap(MDNode::get(*unwrap(C), + ArrayRef(unwrap(Vals, Count), Count))); } LLVMValueRef LLVMMDNode(LLVMValueRef *Vals, unsigned Count) { @@ -2082,7 +2083,7 @@ LLVMValueRef LLVMBuildFCmp(LLVMBuilderRef B, LLVMRealPredicate Op, /*--.. Miscellaneous instructions ..........................................--*/ LLVMValueRef LLVMBuildPhi(LLVMBuilderRef B, LLVMTypeRef Ty, const char *Name) { - return wrap(unwrap(B)->CreatePHI(unwrap(Ty), Name)); + return wrap(unwrap(B)->CreatePHI(unwrap(Ty), 0, Name)); } LLVMValueRef LLVMBuildCall(LLVMBuilderRef B, LLVMValueRef Fn, diff --git a/lib/VMCore/DebugInfoProbe.cpp b/lib/VMCore/DebugInfoProbe.cpp new file mode 100644 index 000000000000..334c3d815d67 --- /dev/null +++ b/lib/VMCore/DebugInfoProbe.cpp @@ -0,0 +1,258 @@ +//===-- DebugInfoProbe.cpp - DebugInfo Probe ------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements DebugInfoProbe. This probe can be used by a pass +// manager to analyze how optimizer is treating debugging information. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "debuginfoprobe" +#include "llvm/DebugInfoProbe.h" +#include "llvm/Function.h" +#include "llvm/IntrinsicInst.h" +#include "llvm/Metadata.h" +#include "llvm/PassManager.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/DebugLoc.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/StringRef.h" +#include +#include + +using namespace llvm; + +static cl::opt +EnableDebugInfoProbe("enable-debug-info-probe", cl::Hidden, + cl::desc("Enable debug info probe")); + +// CreateInfoOutputFile - Return a file stream to print our output on. +namespace llvm { extern raw_ostream *CreateInfoOutputFile(); } + +//===----------------------------------------------------------------------===// +// DebugInfoProbeImpl - This class implements a interface to monitor +// how an optimization pass is preserving debugging information. + +namespace llvm { + + class DebugInfoProbeImpl { + public: + DebugInfoProbeImpl() : NumDbgLineLost(0),NumDbgValueLost(0) {} + void initialize(StringRef PName, Function &F); + void finalize(Function &F); + void report(); + private: + unsigned NumDbgLineLost, NumDbgValueLost; + std::string PassName; + Function *TheFn; + std::set LineNos; + std::set DbgVariables; + std::set MissingDebugLoc; + }; +} + +//===----------------------------------------------------------------------===// +// DebugInfoProbeImpl + +static void collect(Function &F, std::set &Lines) { + for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) + for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); + BI != BE; ++BI) { + const DebugLoc &DL = BI->getDebugLoc(); + unsigned LineNo = 0; + if (!DL.isUnknown()) { + if (MDNode *N = DL.getInlinedAt(F.getContext())) + LineNo = DebugLoc::getFromDILocation(N).getLine(); + else + LineNo = DL.getLine(); + + Lines.insert(LineNo); + } + } +} + +/// initialize - Collect information before running an optimization pass. +void DebugInfoProbeImpl::initialize(StringRef PName, Function &F) { + if (!EnableDebugInfoProbe) return; + PassName = PName; + + LineNos.clear(); + DbgVariables.clear(); + TheFn = &F; + collect(F, LineNos); + + for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) + for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); + BI != BE; ++BI) { + if (BI->getDebugLoc().isUnknown()) + MissingDebugLoc.insert(BI); + if (!isa(BI)) continue; + Value *Addr = NULL; + MDNode *Node = NULL; + if (DbgDeclareInst *DDI = dyn_cast(BI)) { + Addr = DDI->getAddress(); + Node = DDI->getVariable(); + } else if (DbgValueInst *DVI = dyn_cast(BI)) { + Addr = DVI->getValue(); + Node = DVI->getVariable(); + } + if (Addr) + DbgVariables.insert(Node); + } +} + +/// report - Report findings. This should be invoked after finalize. +void DebugInfoProbeImpl::report() { + if (!EnableDebugInfoProbe) return; + if (NumDbgLineLost || NumDbgValueLost) { + raw_ostream *OutStream = CreateInfoOutputFile(); + if (NumDbgLineLost) + *OutStream << NumDbgLineLost + << "\t times line number info lost by " + << PassName << "\n"; + if (NumDbgValueLost) + *OutStream << NumDbgValueLost + << "\t times variable info lost by " + << PassName << "\n"; + delete OutStream; + } + NumDbgLineLost = 0; + NumDbgValueLost = 0; +} + +/// finalize - Collect information after running an optimization pass. This +/// must be used after initialization. +void DebugInfoProbeImpl::finalize(Function &F) { + if (!EnableDebugInfoProbe) return; + std::set LineNos2; + collect(F, LineNos2); + assert (TheFn == &F && "Invalid function to measure!"); + + for (std::set::iterator I = LineNos.begin(), + E = LineNos.end(); I != E; ++I) { + unsigned LineNo = *I; + if (LineNos2.count(LineNo) == 0) { + DEBUG(dbgs() + << "DebugInfoProbe(" + << PassName + << "): Losing dbg info for source line " + << LineNo << "\n"); + ++NumDbgLineLost; + } + } + + std::setDbgVariables2; + for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) + for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); + BI != BE; ++BI) { + if (BI->getDebugLoc().isUnknown() && + MissingDebugLoc.count(BI) == 0) { + DEBUG(dbgs() << "DebugInfoProbe(" << PassName << "): --- "); + DEBUG(BI->print(dbgs())); + DEBUG(dbgs() << "\n"); + } + if (!isa(BI)) continue; + Value *Addr = NULL; + MDNode *Node = NULL; + if (DbgDeclareInst *DDI = dyn_cast(BI)) { + Addr = DDI->getAddress(); + Node = DDI->getVariable(); + } else if (DbgValueInst *DVI = dyn_cast(BI)) { + Addr = DVI->getValue(); + Node = DVI->getVariable(); + } + if (Addr) + DbgVariables2.insert(Node); + } + + for (std::set::iterator I = DbgVariables.begin(), + E = DbgVariables.end(); I != E; ++I) { + if (DbgVariables2.count(*I) == 0 && (*I)->getNumOperands() >= 2) { + DEBUG(dbgs() + << "DebugInfoProbe(" + << PassName + << "): Losing dbg info for variable: "; + if (MDString *MDS = dyn_cast_or_null( + (*I)->getOperand(2))) + dbgs() << MDS->getString(); + else + dbgs() << "..."; + dbgs() << "\n"); + ++NumDbgValueLost; + } + } +} + +//===----------------------------------------------------------------------===// +// DebugInfoProbe + +DebugInfoProbe::DebugInfoProbe() { + pImpl = new DebugInfoProbeImpl(); +} + +DebugInfoProbe::~DebugInfoProbe() { + delete pImpl; +} + +/// initialize - Collect information before running an optimization pass. +void DebugInfoProbe::initialize(StringRef PName, Function &F) { + pImpl->initialize(PName, F); +} + +/// finalize - Collect information after running an optimization pass. This +/// must be used after initialization. +void DebugInfoProbe::finalize(Function &F) { + pImpl->finalize(F); +} + +/// report - Report findings. This should be invoked after finalize. +void DebugInfoProbe::report() { + pImpl->report(); +} + +//===----------------------------------------------------------------------===// +// DebugInfoProbeInfo + +/// ~DebugInfoProbeInfo - Report data collected by all probes before deleting +/// them. +DebugInfoProbeInfo::~DebugInfoProbeInfo() { + if (!EnableDebugInfoProbe) return; + for (StringMap::iterator I = Probes.begin(), + E = Probes.end(); I != E; ++I) { + I->second->report(); + delete I->second; + } + } + +/// initialize - Collect information before running an optimization pass. +void DebugInfoProbeInfo::initialize(Pass *P, Function &F) { + if (!EnableDebugInfoProbe) return; + if (P->getAsPMDataManager()) + return; + + StringMapEntry &Entry = + Probes.GetOrCreateValue(P->getPassName()); + DebugInfoProbe *&Probe = Entry.getValue(); + if (!Probe) + Probe = new DebugInfoProbe(); + Probe->initialize(P->getPassName(), F); +} + +/// finalize - Collect information after running an optimization pass. This +/// must be used after initialization. +void DebugInfoProbeInfo::finalize(Pass *P, Function &F) { + if (!EnableDebugInfoProbe) return; + if (P->getAsPMDataManager()) + return; + StringMapEntry &Entry = + Probes.GetOrCreateValue(P->getPassName()); + DebugInfoProbe *&Probe = Entry.getValue(); + assert (Probe && "DebugInfoProbe is not initialized!"); + Probe->finalize(F); +} diff --git a/lib/VMCore/DebugLoc.cpp b/lib/VMCore/DebugLoc.cpp index f8b45eed0d5e..520333cbbcf2 100644 --- a/lib/VMCore/DebugLoc.cpp +++ b/lib/VMCore/DebugLoc.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/DebugLoc.h" +#include "llvm/ADT/DenseMapInfo.h" #include "LLVMContextImpl.h" using namespace llvm; @@ -108,7 +109,7 @@ MDNode *DebugLoc::getAsMDNode(const LLVMContext &Ctx) const { ConstantInt::get(Int32, getLine()), ConstantInt::get(Int32, getCol()), Scope, IA }; - return MDNode::get(Ctx2, &Elts[0], 4); + return MDNode::get(Ctx2, Elts); } /// getFromDILocation - Translate the DILocation quad into a DebugLoc. @@ -127,6 +128,29 @@ DebugLoc DebugLoc::getFromDILocation(MDNode *N) { return get(LineNo, ColNo, Scope, dyn_cast_or_null(N->getOperand(3))); } +//===----------------------------------------------------------------------===// +// DenseMap specialization +//===----------------------------------------------------------------------===// + +DebugLoc DenseMapInfo::getEmptyKey() { + return DebugLoc::getEmptyKey(); +} + +DebugLoc DenseMapInfo::getTombstoneKey() { + return DebugLoc::getTombstoneKey(); +} + +unsigned DenseMapInfo::getHashValue(const DebugLoc &Key) { + FoldingSetNodeID ID; + ID.AddInteger(Key.LineCol); + ID.AddInteger(Key.ScopeIdx); + return ID.ComputeHash(); +} + +bool DenseMapInfo::isEqual(const DebugLoc &LHS, const DebugLoc &RHS) { + return LHS == RHS; +} + //===----------------------------------------------------------------------===// // LLVMContextImpl Implementation //===----------------------------------------------------------------------===// diff --git a/lib/VMCore/Dominators.cpp b/lib/VMCore/Dominators.cpp index c374b067d72c..08b845ef9d6b 100644 --- a/lib/VMCore/Dominators.cpp +++ b/lib/VMCore/Dominators.cpp @@ -68,9 +68,8 @@ void DominatorTree::verifyAnalysis() const { DominatorTree OtherDT; OtherDT.getBase().recalculate(F); if (compare(OtherDT)) { - errs() << "DominatorTree is not up to date! Computed:\n"; + errs() << "DominatorTree is not up to date!\nComputed:\n"; print(errs()); - errs() << "\nActual:\n"; OtherDT.print(errs()); abort(); diff --git a/lib/VMCore/Function.cpp b/lib/VMCore/Function.cpp index 00d1d7873247..013c4587c9fc 100644 --- a/lib/VMCore/Function.cpp +++ b/lib/VMCore/Function.cpp @@ -328,7 +328,7 @@ unsigned Function::getIntrinsicID() const { std::string Intrinsic::getName(ID id, const Type **Tys, unsigned numTys) { assert(id < num_intrinsics && "Invalid intrinsic ID!"); - const char * const Table[] = { + static const char * const Table[] = { "not_intrinsic", #define GET_INTRINSIC_NAME_TABLE #include "llvm/Intrinsics.gen" @@ -363,7 +363,7 @@ const FunctionType *Intrinsic::getType(LLVMContext &Context, } bool Intrinsic::isOverloaded(ID id) { - const bool OTable[] = { + static const bool OTable[] = { false, #define GET_INTRINSIC_OVERLOAD_TABLE #include "llvm/Intrinsics.gen" diff --git a/lib/VMCore/IRBuilder.cpp b/lib/VMCore/IRBuilder.cpp index 595dea470bc3..21491557d4df 100644 --- a/lib/VMCore/IRBuilder.cpp +++ b/lib/VMCore/IRBuilder.cpp @@ -23,13 +23,14 @@ using namespace llvm; /// has array of i8 type filled in with the nul terminated string value /// specified. If Name is specified, it is the name of the global variable /// created. -Value *IRBuilderBase::CreateGlobalString(const char *Str, const Twine &Name) { +Value *IRBuilderBase::CreateGlobalString(StringRef Str, const Twine &Name) { Constant *StrConstant = ConstantArray::get(Context, Str, true); Module &M = *BB->getParent()->getParent(); GlobalVariable *GV = new GlobalVariable(M, StrConstant->getType(), true, GlobalValue::InternalLinkage, StrConstant, "", 0, false); GV->setName(Name); + GV->setUnnamedAddr(true); return GV; } diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp index d1290281cb1a..61da9b6b8e0c 100644 --- a/lib/VMCore/Instructions.cpp +++ b/lib/VMCore/Instructions.cpp @@ -131,26 +131,15 @@ Value *PHINode::removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty) { return Removed; } -/// resizeOperands - resize operands - This adjusts the length of the operands -/// list according to the following behavior: -/// 1. If NumOps == 0, grow the operand list in response to a push_back style -/// of operation. This grows the number of ops by 1.5 times. -/// 2. If NumOps > NumOperands, reserve space for NumOps operands. -/// 3. If NumOps == NumOperands, trim the reserved space. +/// growOperands - grow operands - This grows the operand list in response +/// to a push_back style of operation. This grows the number of ops by 1.5 +/// times. /// -void PHINode::resizeOperands(unsigned NumOps) { +void PHINode::growOperands() { unsigned e = getNumOperands(); - if (NumOps == 0) { - NumOps = e*3/2; - if (NumOps < 4) NumOps = 4; // 4 op PHI nodes are VERY common. - } else if (NumOps*2 > NumOperands) { - // No resize needed. - if (ReservedSpace >= NumOps) return; - } else if (NumOps == NumOperands) { - if (ReservedSpace == NumOps) return; - } else { - return; - } + // Multiply by 1.5 and round down so the result is still even. + unsigned NumOps = e + e / 4 * 2; + if (NumOps < 4) NumOps = 4; // 4 op PHI nodes are VERY common. ReservedSpace = NumOps; Use *OldOps = OperandList; @@ -2297,8 +2286,12 @@ bool CastInst::isCastable(const Type *SrcTy, const Type *DestTy) { if (const VectorType *SrcPTy = dyn_cast(SrcTy)) { // Casting from vector return DestPTy->getBitWidth() == SrcPTy->getBitWidth(); - } else { // Casting from something else - return DestPTy->getBitWidth() == SrcBits; + } else if (DestPTy->getBitWidth() == SrcBits) { + return true; // float/int -> vector + } else if (SrcTy->isX86_MMXTy()) { + return DestPTy->getBitWidth() == 64; // MMX to 64-bit vector + } else { + return false; } } else if (DestTy->isPointerTy()) { // Casting to pointer if (SrcTy->isPointerTy()) { // Casting from pointer @@ -2308,8 +2301,12 @@ bool CastInst::isCastable(const Type *SrcTy, const Type *DestTy) { } else { // Casting from something else return false; } - } else if (DestTy->isX86_MMXTy()) { - return SrcBits == 64; + } else if (DestTy->isX86_MMXTy()) { + if (const VectorType *SrcPTy = dyn_cast(SrcTy)) { + return SrcPTy->getBitWidth() == 64; // 64-bit vector to MMX + } else { + return false; + } } else { // Casting to something else return false; } @@ -2990,7 +2987,7 @@ SwitchInst::~SwitchInst() { void SwitchInst::addCase(ConstantInt *OnVal, BasicBlock *Dest) { unsigned OpNo = NumOperands; if (OpNo+2 > ReservedSpace) - resizeOperands(0); // Get more space! + growOperands(); // Get more space! // Initialize some new operands. assert(OpNo+1 < ReservedSpace && "Growing didn't work!"); NumOperands = OpNo+2; @@ -3021,25 +3018,12 @@ void SwitchInst::removeCase(unsigned idx) { NumOperands = NumOps-2; } -/// resizeOperands - resize operands - This adjusts the length of the operands -/// list according to the following behavior: -/// 1. If NumOps == 0, grow the operand list in response to a push_back style -/// of operation. This grows the number of ops by 3 times. -/// 2. If NumOps > NumOperands, reserve space for NumOps operands. -/// 3. If NumOps == NumOperands, trim the reserved space. +/// growOperands - grow operands - This grows the operand list in response +/// to a push_back style of operation. This grows the number of ops by 3 times. /// -void SwitchInst::resizeOperands(unsigned NumOps) { +void SwitchInst::growOperands() { unsigned e = getNumOperands(); - if (NumOps == 0) { - NumOps = e*3; - } else if (NumOps*2 > NumOperands) { - // No resize needed. - if (ReservedSpace >= NumOps) return; - } else if (NumOps == NumOperands) { - if (ReservedSpace == NumOps) return; - } else { - return; - } + unsigned NumOps = e*3; ReservedSpace = NumOps; Use *NewOps = allocHungoffUses(NumOps); @@ -3077,25 +3061,12 @@ void IndirectBrInst::init(Value *Address, unsigned NumDests) { } -/// resizeOperands - resize operands - This adjusts the length of the operands -/// list according to the following behavior: -/// 1. If NumOps == 0, grow the operand list in response to a push_back style -/// of operation. This grows the number of ops by 2 times. -/// 2. If NumOps > NumOperands, reserve space for NumOps operands. -/// 3. If NumOps == NumOperands, trim the reserved space. +/// growOperands - grow operands - This grows the operand list in response +/// to a push_back style of operation. This grows the number of ops by 2 times. /// -void IndirectBrInst::resizeOperands(unsigned NumOps) { +void IndirectBrInst::growOperands() { unsigned e = getNumOperands(); - if (NumOps == 0) { - NumOps = e*2; - } else if (NumOps*2 > NumOperands) { - // No resize needed. - if (ReservedSpace >= NumOps) return; - } else if (NumOps == NumOperands) { - if (ReservedSpace == NumOps) return; - } else { - return; - } + unsigned NumOps = e*2; ReservedSpace = NumOps; Use *NewOps = allocHungoffUses(NumOps); @@ -3139,7 +3110,7 @@ IndirectBrInst::~IndirectBrInst() { void IndirectBrInst::addDestination(BasicBlock *DestBB) { unsigned OpNo = NumOperands; if (OpNo+1 > ReservedSpace) - resizeOperands(0); // Get more space! + growOperands(); // Get more space! // Initialize some new operands. assert(OpNo < ReservedSpace && "Growing didn't work!"); NumOperands = OpNo+1; diff --git a/lib/VMCore/LLVMContextImpl.h b/lib/VMCore/LLVMContextImpl.h index 23971aafa74d..6ea4b48e79b7 100644 --- a/lib/VMCore/LLVMContextImpl.h +++ b/lib/VMCore/LLVMContextImpl.h @@ -184,7 +184,7 @@ class LLVMContextImpl { // Concrete/Abstract TypeDescriptions - We lazily calculate type descriptions // for types as they are needed. Because resolution of types must invalidate - // all of the abstract type descriptions, we keep them in a seperate map to + // all of the abstract type descriptions, we keep them in a separate map to // make this easy. TypePrinting ConcreteTypeDescriptions; TypePrinting AbstractTypeDescriptions; diff --git a/lib/VMCore/Metadata.cpp b/lib/VMCore/Metadata.cpp index 0b8e8dfa8b36..eb719e54b289 100644 --- a/lib/VMCore/Metadata.cpp +++ b/lib/VMCore/Metadata.cpp @@ -84,18 +84,18 @@ static MDNodeOperand *getOperandPtr(MDNode *N, unsigned Op) { return reinterpret_cast(N+1)+Op; } -MDNode::MDNode(LLVMContext &C, Value *const *Vals, unsigned NumVals, - bool isFunctionLocal) +MDNode::MDNode(LLVMContext &C, ArrayRef Vals, bool isFunctionLocal) : Value(Type::getMetadataTy(C), Value::MDNodeVal) { - NumOperands = NumVals; + NumOperands = Vals.size(); if (isFunctionLocal) setValueSubclassData(getSubclassDataFromValue() | FunctionLocalBit); // Initialize the operand list, which is co-allocated on the end of the node. + unsigned i = 0; for (MDNodeOperand *Op = getOperandPtr(this, 0), *E = Op+NumOperands; - Op != E; ++Op, ++Vals) - new (Op) MDNodeOperand(*Vals, this); + Op != E; ++Op, ++i) + new (Op) MDNodeOperand(Vals[i], this); } @@ -183,9 +183,8 @@ static bool isFunctionLocalValue(Value *V) { (isa(V) && cast(V)->isFunctionLocal()); } -MDNode *MDNode::getMDNode(LLVMContext &Context, Value *const *Vals, - unsigned NumVals, FunctionLocalness FL, - bool Insert) { +MDNode *MDNode::getMDNode(LLVMContext &Context, ArrayRef Vals, + FunctionLocalness FL, bool Insert) { LLVMContextImpl *pImpl = Context.pImpl; // Add all the operand pointers. Note that we don't have to add the @@ -193,7 +192,7 @@ MDNode *MDNode::getMDNode(LLVMContext &Context, Value *const *Vals, // Note that if the operands are later nulled out, the node will be // removed from the uniquing map. FoldingSetNodeID ID; - for (unsigned i = 0; i != NumVals; ++i) + for (unsigned i = 0; i != Vals.size(); ++i) ID.AddPointer(Vals[i]); void *InsertPoint; @@ -205,7 +204,7 @@ MDNode *MDNode::getMDNode(LLVMContext &Context, Value *const *Vals, bool isFunctionLocal = false; switch (FL) { case FL_Unknown: - for (unsigned i = 0; i != NumVals; ++i) { + for (unsigned i = 0; i != Vals.size(); ++i) { Value *V = Vals[i]; if (!V) continue; if (isFunctionLocalValue(V)) { @@ -223,8 +222,8 @@ MDNode *MDNode::getMDNode(LLVMContext &Context, Value *const *Vals, } // Coallocate space for the node and Operands together, then placement new. - void *Ptr = malloc(sizeof(MDNode)+NumVals*sizeof(MDNodeOperand)); - N = new (Ptr) MDNode(Context, Vals, NumVals, isFunctionLocal); + void *Ptr = malloc(sizeof(MDNode)+Vals.size()*sizeof(MDNodeOperand)); + N = new (Ptr) MDNode(Context, Vals, isFunctionLocal); // InsertPoint will have been set by the FindNodeOrInsertPos call. pImpl->MDNodeSet.InsertNode(N, InsertPoint); @@ -232,24 +231,24 @@ MDNode *MDNode::getMDNode(LLVMContext &Context, Value *const *Vals, return N; } -MDNode *MDNode::get(LLVMContext &Context, Value*const* Vals, unsigned NumVals) { - return getMDNode(Context, Vals, NumVals, FL_Unknown); +MDNode *MDNode::get(LLVMContext &Context, ArrayRef Vals) { + return getMDNode(Context, Vals, FL_Unknown); } -MDNode *MDNode::getWhenValsUnresolved(LLVMContext &Context, Value *const *Vals, - unsigned NumVals, bool isFunctionLocal) { - return getMDNode(Context, Vals, NumVals, isFunctionLocal ? FL_Yes : FL_No); +MDNode *MDNode::getWhenValsUnresolved(LLVMContext &Context, + ArrayRef Vals, + bool isFunctionLocal) { + return getMDNode(Context, Vals, isFunctionLocal ? FL_Yes : FL_No); } -MDNode *MDNode::getIfExists(LLVMContext &Context, Value *const *Vals, - unsigned NumVals) { - return getMDNode(Context, Vals, NumVals, FL_Unknown, false); +MDNode *MDNode::getIfExists(LLVMContext &Context, ArrayRef Vals) { + return getMDNode(Context, Vals, FL_Unknown, false); } -MDNode *MDNode::getTemporary(LLVMContext &Context, Value *const *Vals, - unsigned NumVals) { - MDNode *N = (MDNode *)malloc(sizeof(MDNode)+NumVals*sizeof(MDNodeOperand)); - N = new (N) MDNode(Context, Vals, NumVals, FL_No); +MDNode *MDNode::getTemporary(LLVMContext &Context, ArrayRef Vals) { + MDNode *N = + (MDNode *)malloc(sizeof(MDNode)+Vals.size()*sizeof(MDNodeOperand)); + N = new (N) MDNode(Context, Vals, FL_No); N->setValueSubclassData(N->getSubclassDataFromValue() | NotUniquedBit); LeakDetector::addGarbageObject(N); diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp index 8bfef9855ca2..637fa79195c9 100644 --- a/lib/VMCore/PassManager.cpp +++ b/lib/VMCore/PassManager.cpp @@ -14,6 +14,7 @@ #include "llvm/PassManagers.h" #include "llvm/PassManager.h" +#include "llvm/DebugInfoProbe.h" #include "llvm/Assembly/PrintModulePass.h" #include "llvm/Assembly/Writer.h" #include "llvm/Support/CommandLine.h" @@ -25,6 +26,7 @@ #include "llvm/Support/PassNameParser.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/Mutex.h" +#include "llvm/ADT/StringMap.h" #include #include #include @@ -63,11 +65,13 @@ PassOptionList; // Print IR out before/after specified passes. static PassOptionList PrintBefore("print-before", - llvm::cl::desc("Print IR before specified passes")); + llvm::cl::desc("Print IR before specified passes"), + cl::Hidden); static PassOptionList PrintAfter("print-after", - llvm::cl::desc("Print IR after specified passes")); + llvm::cl::desc("Print IR after specified passes"), + cl::Hidden); static cl::opt PrintBeforeAll("print-before-all", @@ -439,6 +443,20 @@ char PassManagerImpl::ID = 0; namespace { +//===----------------------------------------------------------------------===// +// DebugInfoProbe + +static DebugInfoProbeInfo *TheDebugProbe; +static void createDebugInfoProbe() { + if (TheDebugProbe) return; + + // Constructed the first time this is called. This guarantees that the + // object will be constructed, if -enable-debug-info-probe is set, + // before static globals, thus it will be destroyed before them. + static ManagedStatic DIP; + TheDebugProbe = &*DIP; +} + //===----------------------------------------------------------------------===// /// TimingInfo Class - This class is used to calculate information about the /// amount of time each pass takes to execute. This only happens when @@ -964,7 +982,7 @@ void PMDataManager::add(Pass *P, bool ProcessAnalysis) { // Keep track of higher level analysis used by this manager. HigherLevelAnalysis.push_back(PRequired); } else - llvm_unreachable("Unable to accomodate Required Pass"); + llvm_unreachable("Unable to accommodate Required Pass"); } // Set P as P's last user until someone starts using P. @@ -1428,6 +1446,7 @@ void FunctionPassManagerImpl::releaseMemoryOnTheFly() { bool FunctionPassManagerImpl::run(Function &F) { bool Changed = false; TimingInfo::createTheTimeInfo(); + createDebugInfoProbe(); initializeAllAnalysisInfo(); for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index) @@ -1475,13 +1494,16 @@ bool FPPassManager::runOnFunction(Function &F) { dumpRequiredSet(FP); initializeAnalysisImpl(FP); - + if (TheDebugProbe) + TheDebugProbe->initialize(FP, F); { PassManagerPrettyStackEntry X(FP, F); TimeRegion PassTimer(getPassTimer(FP)); LocalChanged |= FP->runOnFunction(F); } + if (TheDebugProbe) + TheDebugProbe->finalize(FP, F); Changed |= LocalChanged; if (LocalChanged) @@ -1629,6 +1651,7 @@ Pass* MPPassManager::getOnTheFlyPass(Pass *MP, AnalysisID PI, Function &F){ bool PassManagerImpl::run(Module &M) { bool Changed = false; TimingInfo::createTheTimeInfo(); + createDebugInfoProbe(); dumpArguments(); dumpPasses(); diff --git a/lib/VMCore/PassRegistry.cpp b/lib/VMCore/PassRegistry.cpp index c97a170f501f..fa92620b288e 100644 --- a/lib/VMCore/PassRegistry.cpp +++ b/lib/VMCore/PassRegistry.cpp @@ -26,7 +26,7 @@ using namespace llvm; // FIXME: We use ManagedStatic to erase the pass registrar on shutdown. // Unfortunately, passes are registered with static ctors, and having -// llvm_shutdown clear this map prevents successful ressurection after +// llvm_shutdown clear this map prevents successful resurrection after // llvm_shutdown is run. Ideally we should find a solution so that we don't // leak the map, AND can still resurrect after shutdown. static ManagedStatic PassRegistryObj; diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp index be28ad1f7122..b15304cc9593 100644 --- a/lib/VMCore/Type.cpp +++ b/lib/VMCore/Type.cpp @@ -17,6 +17,7 @@ #include "llvm/Assembly/Writer.h" #include "llvm/LLVMContext.h" #include "llvm/Metadata.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/SCCIterator.h" @@ -460,7 +461,7 @@ bool FunctionType::isValidArgumentType(const Type *ArgTy) { } FunctionType::FunctionType(const Type *Result, - const std::vector &Params, + ArrayRef Params, bool IsVarArgs) : DerivedType(Result->getContext(), FunctionTyID), isVarArgs(IsVarArgs) { ContainedTys = reinterpret_cast(this+1); @@ -483,7 +484,7 @@ FunctionType::FunctionType(const Type *Result, } StructType::StructType(LLVMContext &C, - const std::vector &Types, bool isPacked) + ArrayRef Types, bool isPacked) : CompositeType(C, StructTyID) { ContainedTys = reinterpret_cast(this + 1); NumContainedTys = Types.size(); @@ -838,7 +839,7 @@ FunctionValType FunctionValType::get(const FunctionType *FT) { // FunctionType::get - The factory function for the FunctionType class... FunctionType *FunctionType::get(const Type *ReturnType, - const std::vector &Params, + ArrayRef Params, bool isVarArg) { FunctionValType VT(ReturnType, Params, isVarArg); FunctionType *FT = 0; @@ -915,7 +916,7 @@ bool VectorType::isValidElementType(const Type *ElemTy) { // StructType *StructType::get(LLVMContext &Context, - const std::vector &ETypes, + ArrayRef ETypes, bool isPacked) { StructValType STV(ETypes, isPacked); StructType *ST = 0; diff --git a/lib/VMCore/TypesContext.h b/lib/VMCore/TypesContext.h index 4694486c41b6..ad09478bbcfd 100644 --- a/lib/VMCore/TypesContext.h +++ b/lib/VMCore/TypesContext.h @@ -15,6 +15,7 @@ #ifndef LLVM_TYPESCONTEXT_H #define LLVM_TYPESCONTEXT_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" #include @@ -157,8 +158,8 @@ class StructValType { std::vector ElTypes; bool packed; public: - StructValType(const std::vector &args, bool isPacked) - : ElTypes(args), packed(isPacked) {} + StructValType(ArrayRef args, bool isPacked) + : ElTypes(args.vec()), packed(isPacked) {} static StructValType get(const StructType *ST) { std::vector ElTypes; @@ -187,8 +188,8 @@ class FunctionValType { std::vector ArgTypes; bool isVarArg; public: - FunctionValType(const Type *ret, const std::vector &args, - bool isVA) : RetTy(ret), ArgTypes(args), isVarArg(isVA) {} + FunctionValType(const Type *ret, ArrayRef args, bool isVA) + : RetTy(ret), ArgTypes(args.vec()), isVarArg(isVA) {} static FunctionValType get(const FunctionType *FT); @@ -369,7 +370,7 @@ class TypeMap : public TypeMapBase { // Remove the old entry form TypesByHash. If the hash values differ // now, remove it from the old place. Otherwise, continue scanning - // withing this hashcode to reduce work. + // within this hashcode to reduce work. if (NewTypeHash != OldTypeHash) { RemoveFromTypesByHash(OldTypeHash, Ty); } else { diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp index 58ec6fe88d35..8b891100839a 100644 --- a/lib/VMCore/Verifier.cpp +++ b/lib/VMCore/Verifier.cpp @@ -471,6 +471,23 @@ void Verifier::visitGlobalVariable(GlobalVariable &GV) { "invalid linkage type for global declaration", &GV); } + if (GV.hasName() && (GV.getName() == "llvm.global_ctors" || + GV.getName() == "llvm.global_dtors")) { + Assert1(!GV.hasInitializer() || GV.hasAppendingLinkage(), + "invalid linkage for intrinsic global variable", &GV); + // Don't worry about emitting an error for it not being an array, + // visitGlobalValue will complain on appending non-array. + if (const ArrayType *ATy = dyn_cast(GV.getType())) { + const StructType *STy = dyn_cast(ATy->getElementType()); + const PointerType *FuncPtrTy = + FunctionType::get(Type::getVoidTy(*Context), false)->getPointerTo(); + Assert1(STy && STy->getNumElements() == 2 && + STy->getTypeAtIndex(0u)->isIntegerTy(32) && + STy->getTypeAtIndex(1) == FuncPtrTy, + "wrong type for intrinsic global variable", &GV); + } + } + visitGlobalValue(GV); } @@ -826,30 +843,10 @@ void Verifier::visitReturnInst(ReturnInst &RI) { Assert2(N == 0, "Found return instr that returns non-void in Function of void " "return type!", &RI, F->getReturnType()); - else if (N == 1 && F->getReturnType() == RI.getOperand(0)->getType()) { - // Exactly one return value and it matches the return type. Good. - } else if (const StructType *STy = dyn_cast(F->getReturnType())) { - // The return type is a struct; check for multiple return values. - Assert2(STy->getNumElements() == N, - "Incorrect number of return values in ret instruction!", - &RI, F->getReturnType()); - for (unsigned i = 0; i != N; ++i) - Assert2(STy->getElementType(i) == RI.getOperand(i)->getType(), - "Function return type does not match operand " - "type of return inst!", &RI, F->getReturnType()); - } else if (const ArrayType *ATy = dyn_cast(F->getReturnType())) { - // The return type is an array; check for multiple return values. - Assert2(ATy->getNumElements() == N, - "Incorrect number of return values in ret instruction!", - &RI, F->getReturnType()); - for (unsigned i = 0; i != N; ++i) - Assert2(ATy->getElementType() == RI.getOperand(i)->getType(), - "Function return type does not match operand " - "type of return inst!", &RI, F->getReturnType()); - } else { - CheckFailed("Function return type does not match operand " - "type of return inst!", &RI, F->getReturnType()); - } + else + Assert2(N == 1 && F->getReturnType() == RI.getOperand(0)->getType(), + "Function return type does not match operand " + "type of return inst!", &RI, F->getReturnType()); // Check to make sure that the return value has necessary properties for // terminators... diff --git a/projects/sample/autoconf/configure.ac b/projects/sample/autoconf/configure.ac index 4e61bee5e790..bb75bbdeb594 100644 --- a/projects/sample/autoconf/configure.ac +++ b/projects/sample/autoconf/configure.ac @@ -15,7 +15,7 @@ dnl Tell autoconf that this is an LLVM project being configured dnl This provides the --with-llvmsrc and --with-llvmobj options LLVM_CONFIG_PROJECT($LLVM_ABS_SRC_ROOT,$LLVM_ABS_OBJ_ROOT) -dnl Tell autoconf that the auxilliary files are actually located in +dnl Tell autoconf that the auxiliary files are actually located in dnl the LLVM autoconf directory, not here. AC_CONFIG_AUX_DIR($LLVM_SRC/autoconf) diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt new file mode 100644 index 000000000000..502b91dc580d --- /dev/null +++ b/runtime/CMakeLists.txt @@ -0,0 +1,5 @@ +if( NOT LLVM_BUILD_RUNTIME ) + set(EXCLUDE_FROM_ALL ON) +endif() + +add_subdirectory(libprofile) diff --git a/runtime/libprofile/CMakeLists.txt b/runtime/libprofile/CMakeLists.txt new file mode 100644 index 000000000000..414ad00b4a80 --- /dev/null +++ b/runtime/libprofile/CMakeLists.txt @@ -0,0 +1,19 @@ +set(SOURCES + BasicBlockTracing.c + CommonProfiling.c + GCDAProfiling.c + PathProfiling.c + EdgeProfiling.c + OptimalEdgeProfiling.c + Profiling.h + ) + +add_llvm_library( profile_rt-static ${SOURCES} ) +set_target_properties( profile_rt-static + PROPERTIES + OUTPUT_NAME "profile_rt" ) + +add_llvm_loadable_module( profile_rt-shared ${SOURCES} ) +set_target_properties( profile_rt-shared + PROPERTIES + OUTPUT_NAME "profile_rt" ) diff --git a/runtime/libprofile/CommonProfiling.c b/runtime/libprofile/CommonProfiling.c index 1c1771c3063e..210a5e5ab78a 100644 --- a/runtime/libprofile/CommonProfiling.c +++ b/runtime/libprofile/CommonProfiling.c @@ -19,7 +19,11 @@ #include #include #include +#if !defined(_MSC_VER) && !defined(__MINGW32__) #include +#else +#include +#endif #include static char *SavedArgs = 0; diff --git a/runtime/libprofile/GCDAProfiling.c b/runtime/libprofile/GCDAProfiling.c new file mode 100644 index 000000000000..2dcf22d96472 --- /dev/null +++ b/runtime/libprofile/GCDAProfiling.c @@ -0,0 +1,152 @@ +/*===- GCDAProfiling.c - Support library for GCDA file emission -----------===*\ +|* +|* The LLVM Compiler Infrastructure +|* +|* This file is distributed under the University of Illinois Open Source +|* License. See LICENSE.TXT for details. +|* +|*===----------------------------------------------------------------------===*| +|* +|* This file implements the call back routines for the gcov profiling +|* instrumentation pass. Link against this library when running code through +|* the -insert-gcov-profiling LLVM pass. +|* +|* We emit files in a corrupt version of GCOV's "gcda" file format. These files +|* are only close enough that LCOV will happily parse them. Anything that lcov +|* ignores is missing. +|* +|* TODO: gcov is multi-process safe by having each exit open the existing file +|* and append to it. We'd like to achieve that and be thread-safe too. +|* +\*===----------------------------------------------------------------------===*/ + +#include "llvm/Support/DataTypes.h" +#include +#include +#include + +/* #define DEBUG_GCDAPROFILING */ + +/* + * --- GCOV file format I/O primitives --- + */ + +static FILE *output_file = NULL; + +static void write_int32(uint32_t i) { + fwrite(&i, 4, 1, output_file); +} + +static void write_int64(uint64_t i) { + uint32_t lo, hi; + lo = i >> 0; + hi = i >> 32; + + write_int32(lo); + write_int32(hi); +} + +static char *mangle_filename(const char *orig_filename) { + /* TODO: handle GCOV_PREFIX_STRIP */ + const char *prefix; + char *filename = 0; + + prefix = getenv("GCOV_PREFIX"); + + if (!prefix) + return strdup(filename); + + filename = malloc(strlen(prefix) + 1 + strlen(orig_filename) + 1); + strcpy(filename, prefix); + strcat(filename, "/"); + strcat(filename, orig_filename); + + return filename; +} + +/* + * --- LLVM line counter API --- + */ + +/* A file in this case is a translation unit. Each .o file built with line + * profiling enabled will emit to a different file. Only one file may be + * started at a time. + */ +void llvm_gcda_start_file(const char *orig_filename) { + char *filename; + filename = mangle_filename(orig_filename); + output_file = fopen(filename, "wb"); + + /* gcda file, version 404*, stamp LLVM. */ + fwrite("adcg*404MVLL", 12, 1, output_file); + +#ifdef DEBUG_GCDAPROFILING + printf("llvmgcda: [%s]\n", orig_filename); +#endif + + free(filename); +} + +/* Given an array of pointers to counters (counters), increment the n-th one, + * where we're also given a pointer to n (predecessor). + */ +void llvm_gcda_increment_indirect_counter(uint32_t *predecessor, + uint64_t **counters) { + uint64_t *counter; + uint32_t pred; + + pred = *predecessor; + if (pred == 0xffffffff) + return; + counter = counters[pred]; + + /* Don't crash if the pred# is out of sync. This can happen due to threads, + or because of a TODO in GCOVProfiling.cpp buildEdgeLookupTable(). */ + if (counter) + ++*counter; +#ifdef DEBUG_GCDAPROFILING + else + printf("llvmgcda: increment_indirect_counter counters=%x, pred=%u\n", + state_table_row, *predecessor); +#endif +} + +void llvm_gcda_emit_function(uint32_t ident) { +#ifdef DEBUG_GCDAPROFILING + printf("llvmgcda: function id=%x\n", ident); +#endif + + /* function tag */ + fwrite("\0\0\0\1", 4, 1, output_file); + write_int32(2); + write_int32(ident); + write_int32(0); +} + +void llvm_gcda_emit_arcs(uint32_t num_counters, uint64_t *counters) { + uint32_t i; + /* counter #1 (arcs) tag */ + fwrite("\0\0\xa1\1", 4, 1, output_file); + write_int32(num_counters * 2); + for (i = 0; i < num_counters; ++i) { + write_int64(counters[i]); + } + +#ifdef DEBUG_GCDAPROFILING + printf("llvmgcda: %u arcs\n", num_counters); + for (i = 0; i < num_counters; ++i) { + printf("llvmgcda: %llu\n", (unsigned long long)counters[i]); + } +#endif +} + +void llvm_gcda_end_file() { + /* Write out EOF record. */ + fwrite("\0\0\0\0\0\0\0\0", 8, 1, output_file); + fclose(output_file); + output_file = NULL; + +#ifdef DEBUG_GCDAPROFILING + printf("llvmgcda: -----\n"); +#endif +} diff --git a/runtime/libprofile/Makefile b/runtime/libprofile/Makefile index 4125af60d21a..eced5e5f8ab1 100644 --- a/runtime/libprofile/Makefile +++ b/runtime/libprofile/Makefile @@ -13,9 +13,9 @@ include $(LEVEL)/Makefile.config ifneq ($(strip $(LLVMCC)),) BYTECODE_LIBRARY = 1 endif -SHARED_LIBRARY = 1 -LOADABLE_MODULE = 1 LIBRARYNAME = profile_rt +LINK_LIBS_IN_SHARED = 1 +SHARED_LIBRARY = 1 EXTRA_DIST = libprofile.exports EXPORTED_SYMBOL_FILE = $(PROJ_SRC_DIR)/libprofile.exports diff --git a/runtime/libprofile/OptimalEdgeProfiling.c b/runtime/libprofile/OptimalEdgeProfiling.c index eb7887b2aea9..947da532605d 100644 --- a/runtime/libprofile/OptimalEdgeProfiling.c +++ b/runtime/libprofile/OptimalEdgeProfiling.c @@ -26,7 +26,7 @@ static void OptEdgeProfAtExitHandler() { /* Note that, although the array has a counter for each edge, not all * counters are updated, the ones that are not used are initialised with -1. * When loading this information the counters with value -1 have to be - * recalculated, it is guranteed that this is possible. + * recalculated, it is guaranteed that this is possible. */ write_profiling_data(OptEdgeInfo, ArrayStart, NumElements); } diff --git a/runtime/libprofile/PathProfiling.c b/runtime/libprofile/PathProfiling.c index 651e63cbdd21..283678521381 100644 --- a/runtime/libprofile/PathProfiling.c +++ b/runtime/libprofile/PathProfiling.c @@ -15,14 +15,22 @@ #include "Profiling.h" #include "llvm/Analysis/ProfileInfoTypes.h" +#include "llvm/Support/DataTypes.h" #include +#if !defined(_MSC_VER) && !defined(__MINGW32__) #include +#else +#include +#endif #include #include -#include -#include #include +/* Must use __inline in Microsoft C */ +#if defined(_MSC_VER) +#define inline __inline +#endif + /* note that this is used for functions with large path counts, but it is unlikely those paths will ALL be executed */ #define ARBITRARY_HASH_BIN_COUNT 100 @@ -104,8 +112,8 @@ void writeArrayTable(uint32_t fNumber, ftEntry_t* ft, uint32_t* funcCount) { } } -inline uint32_t hash (uint32_t key) { - /* this may benifit from a proper hash function */ +static inline uint32_t hash (uint32_t key) { + /* this may benefit from a proper hash function */ return key%ARBITRARY_HASH_BIN_COUNT; } @@ -147,7 +155,8 @@ void writeHashTable(uint32_t functionNumber, pathHashTable_t* hashTable) { } /* Return a pointer to this path's specific path counter */ -inline uint32_t* getPathCounter(uint32_t functionNumber, uint32_t pathNumber) { +static inline uint32_t* getPathCounter(uint32_t functionNumber, + uint32_t pathNumber) { pathHashTable_t* hashTable; pathHashEntry_t* hashEntry; uint32_t index = hash(pathNumber); @@ -214,7 +223,7 @@ void llvm_decrement_path_count (uint32_t functionNumber, uint32_t pathNumber) { * +-----------------+-----------------+ * */ -static void pathProfAtExitHandler() { +static void pathProfAtExitHandler(void) { int outFile = getOutFile(); uint32_t i; uint32_t header[2] = { PathInfo, 0 }; diff --git a/runtime/libprofile/libprofile.exports b/runtime/libprofile/libprofile.exports index b8057c7aac96..2f25be692047 100644 --- a/runtime/libprofile/libprofile.exports +++ b/runtime/libprofile/libprofile.exports @@ -5,3 +5,8 @@ llvm_start_basic_block_tracing llvm_trace_basic_block llvm_increment_path_count llvm_decrement_path_count +llvm_gcda_start_file +llvm_gcda_increment_indirect_counter +llvm_gcda_emit_function +llvm_gcda_emit_arcs +llvm_gcda_end_file diff --git a/test/Analysis/BasicAA/intrinsics.ll b/test/Analysis/BasicAA/intrinsics.ll new file mode 100644 index 000000000000..59725cfded05 --- /dev/null +++ b/test/Analysis/BasicAA/intrinsics.ll @@ -0,0 +1,39 @@ +; RUN: opt -basicaa -gvn -S < %s | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32" + +; BasicAA should prove that these calls don't interfere, since they are +; IntrArgReadMem and have noalias pointers. + +; CHECK: define <8 x i16> @test0(i8* noalias %p, i8* noalias %q, <8 x i16> %y) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind +; CHECK-NEXT: call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16) +; CHECK-NEXT: %c = add <8 x i16> %a, %a +define <8 x i16> @test0(i8* noalias %p, i8* noalias %q, <8 x i16> %y) { +entry: + %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind + call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16) + %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind + %c = add <8 x i16> %a, %b + ret <8 x i16> %c +} + +; CHECK: define <8 x i16> @test1(i8* %p, <8 x i16> %y) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %q = getelementptr i8* %p, i64 16 +; CHECK-NEXT: %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind +; CHECK-NEXT: call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16) +; CHECK-NEXT: %c = add <8 x i16> %a, %a +define <8 x i16> @test1(i8* %p, <8 x i16> %y) { +entry: + %q = getelementptr i8* %p, i64 16 + %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind + call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16) + %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind + %c = add <8 x i16> %a, %b + ret <8 x i16> %c +} + +declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*, i32) nounwind readonly +declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind diff --git a/test/Analysis/BasicAA/store-promote.ll b/test/Analysis/BasicAA/store-promote.ll index 33d0f3a5449b..0db805c3e21e 100644 --- a/test/Analysis/BasicAA/store-promote.ll +++ b/test/Analysis/BasicAA/store-promote.ll @@ -24,7 +24,7 @@ Out: ; preds = %Loop ; The Loop block should be empty after the load/store are promoted. ; CHECK: @test1 -; CHECK: load i32* @B +; CHECK: load i32* @A ; CHECK: Loop: ; CHECK-NEXT: br i1 %c, label %Out, label %Loop ; CHECK: Out: diff --git a/test/Analysis/PostDominators/2006-09-26-PostDominanceFrontier.ll b/test/Analysis/PostDominators/2006-09-26-PostDominanceFrontier.ll deleted file mode 100644 index b73b7f03f7e7..000000000000 --- a/test/Analysis/PostDominators/2006-09-26-PostDominanceFrontier.ll +++ /dev/null @@ -1,97 +0,0 @@ -; RUN: opt < %s -analyze -postdomfrontier \ -; RUN: -disable-verify -; ModuleID = '2006-09-26-PostDominanceFrontier.bc' -target datalayout = "e-p:64:64" -target triple = "alphaev67-unknown-linux-gnu" - %struct.FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct.FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i32, [44 x i8] } - %struct._IO_marker = type { %struct._IO_marker*, %struct.FILE*, i32 } -@TOP = external global i64* ; [#uses=1] -@BOT = external global i64* ; [#uses=1] -@str = external global [2 x i8] ; <[2 x i8]*> [#uses=0] - -declare void @fopen() - -define void @main(i8** %argv) { -entry: - %netSelect.i507 = alloca i64, align 8 ; [#uses=0] - %topStart.i = alloca i64, align 8 ; [#uses=0] - %topEnd.i = alloca i64, align 8 ; [#uses=0] - %botStart.i = alloca i64, align 8 ; [#uses=0] - %botEnd.i = alloca i64, align 8 ; [#uses=0] - %c1.i154 = alloca i32, align 4 ; [#uses=0] - %b1.i155 = alloca i32, align 4 ; [#uses=0] - %t1.i156 = alloca i32, align 4 ; [#uses=0] - %c1.i = alloca i32, align 4 ; [#uses=0] - %b1.i = alloca i32, align 4 ; [#uses=0] - %t1.i = alloca i32, align 4 ; [#uses=0] - %netSelect.i5 = alloca i64, align 8 ; [#uses=0] - %netSelect.i = alloca i64, align 8 ; [#uses=0] - %tmp2.i = getelementptr i8** %argv, i32 1 ; [#uses=1] - %tmp3.i4 = load i8** %tmp2.i ; [#uses=0] - call void @fopen( ) - br i1 false, label %DimensionChannel.exit, label %bb.backedge.i - -bb.backedge.i: ; preds = %entry - ret void - -DimensionChannel.exit: ; preds = %entry - %tmp13.i137 = malloc i64, i32 0 ; [#uses=1] - %tmp610.i = malloc i64, i32 0 ; [#uses=1] - br label %cond_true.i143 - -cond_true.i143: ; preds = %cond_true.i143, %DimensionChannel.exit - %tmp9.i140 = getelementptr i64* %tmp13.i137, i64 0 ; [#uses=0] - %tmp12.i = getelementptr i64* %tmp610.i, i64 0 ; [#uses=0] - br i1 false, label %bb18.i144, label %cond_true.i143 - -bb18.i144: ; preds = %cond_true.i143 - call void @fopen( ) - %tmp76.i105 = malloc i64, i32 0 ; [#uses=3] - %tmp674.i = malloc i64, i32 0 ; [#uses=2] - %tmp1072.i = malloc i64, i32 0 ; [#uses=2] - %tmp1470.i = malloc i64, i32 0 ; [#uses=1] - br label %cond_true.i114 - -cond_true.i114: ; preds = %cond_true.i114, %bb18.i144 - %tmp17.i108 = getelementptr i64* %tmp76.i105, i64 0 ; [#uses=0] - %tmp20.i = getelementptr i64* %tmp674.i, i64 0 ; [#uses=0] - %tmp23.i111 = getelementptr i64* %tmp1470.i, i64 0 ; [#uses=0] - br i1 false, label %cond_true40.i, label %cond_true.i114 - -cond_true40.i: ; preds = %cond_true40.i, %cond_true.i114 - %tmp33.i115 = getelementptr i64* %tmp1072.i, i64 0 ; [#uses=0] - br i1 false, label %bb142.i, label %cond_true40.i - -cond_next54.i: ; preds = %cond_true76.i - %tmp57.i = getelementptr i64* %tmp55.i, i64 0 ; [#uses=0] - br i1 false, label %bb64.i, label %bb69.i - -bb64.i: ; preds = %cond_true76.i, %cond_next54.i - %tmp67.i117 = getelementptr i64* %tmp76.i105, i64 0 ; [#uses=0] - br i1 false, label %bb114.i, label %cond_true111.i - -bb69.i: ; preds = %cond_next54.i - br i1 false, label %bb79.i, label %cond_true76.i - -cond_true76.i: ; preds = %bb142.i, %bb69.i - %tmp48.i = getelementptr i64* %tmp46.i, i64 0 ; [#uses=0] - br i1 false, label %bb64.i, label %cond_next54.i - -bb79.i: ; preds = %bb69.i - br i1 false, label %bb114.i, label %cond_true111.i - -cond_true111.i: ; preds = %bb79.i, %bb64.i - %tmp84.i127 = getelementptr i64* %tmp46.i, i64 0 ; [#uses=0] - ret void - -bb114.i: ; preds = %bb142.i, %bb79.i, %bb64.i - %tmp117.i = getelementptr i64* %tmp76.i105, i64 0 ; [#uses=0] - %tmp132.i131 = getelementptr i64* %tmp674.i, i64 0 ; [#uses=0] - %tmp122.i = getelementptr i64* %tmp1072.i, i64 0 ; [#uses=0] - ret void - -bb142.i: ; preds = %cond_true40.i - %tmp46.i = load i64** @BOT ; [#uses=2] - %tmp55.i = load i64** @TOP ; [#uses=1] - br i1 false, label %bb114.i, label %cond_true76.i -} diff --git a/test/Analysis/PostDominators/2007-04-17-PostDominanceFrontier.ll b/test/Analysis/PostDominators/2007-04-17-PostDominanceFrontier.ll deleted file mode 100644 index 1ec056bc34e0..000000000000 --- a/test/Analysis/PostDominators/2007-04-17-PostDominanceFrontier.ll +++ /dev/null @@ -1,692 +0,0 @@ -; RUN: opt < %s -postdomfrontier -disable-output - -define void @SManager() { -entry: - br label %bb.outer - -bb.outer: ; preds = %bb193, %entry - br label %bb.outer156 - -bb.loopexit: ; preds = %bb442 - br label %bb.outer156 - -bb.outer156: ; preds = %bb.loopexit, %bb.outer - br label %bb - -bb: ; preds = %bb.backedge, %bb.outer156 - br i1 false, label %cond_true, label %bb.cond_next_crit_edge - -bb.cond_next_crit_edge: ; preds = %bb - br label %cond_next - -cond_true: ; preds = %bb - br label %cond_next - -cond_next: ; preds = %cond_true, %bb.cond_next_crit_edge - br i1 false, label %cond_next.bb.backedge_crit_edge, label %cond_next107 - -cond_next.bb.backedge_crit_edge: ; preds = %cond_next - br label %bb.backedge - -bb.backedge: ; preds = %cond_true112.bb.backedge_crit_edge, %cond_next.bb.backedge_crit_edge - br label %bb - -cond_next107: ; preds = %cond_next - br i1 false, label %cond_true112, label %cond_next197 - -cond_true112: ; preds = %cond_next107 - br i1 false, label %cond_true118, label %cond_true112.bb.backedge_crit_edge - -cond_true112.bb.backedge_crit_edge: ; preds = %cond_true112 - br label %bb.backedge - -cond_true118: ; preds = %cond_true112 - br i1 false, label %bb123.preheader, label %cond_true118.bb148_crit_edge - -cond_true118.bb148_crit_edge: ; preds = %cond_true118 - br label %bb148 - -bb123.preheader: ; preds = %cond_true118 - br label %bb123 - -bb123: ; preds = %bb142.bb123_crit_edge, %bb123.preheader - br i1 false, label %bb123.bb142_crit_edge, label %cond_next.i57 - -bb123.bb142_crit_edge: ; preds = %bb123 - br label %bb142 - -cond_next.i57: ; preds = %bb123 - br i1 false, label %cond_true135, label %cond_next.i57.bb142_crit_edge - -cond_next.i57.bb142_crit_edge: ; preds = %cond_next.i57 - br label %bb142 - -cond_true135: ; preds = %cond_next.i57 - br label %bb142 - -bb142: ; preds = %cond_true135, %cond_next.i57.bb142_crit_edge, %bb123.bb142_crit_edge - br i1 false, label %bb148.loopexit, label %bb142.bb123_crit_edge - -bb142.bb123_crit_edge: ; preds = %bb142 - br label %bb123 - -bb148.loopexit: ; preds = %bb142 - br label %bb148 - -bb148: ; preds = %bb148.loopexit, %cond_true118.bb148_crit_edge - br i1 false, label %bb151.preheader, label %bb148.bb177_crit_edge - -bb148.bb177_crit_edge: ; preds = %bb148 - br label %bb177 - -bb151.preheader: ; preds = %bb148 - br label %bb151 - -bb151: ; preds = %bb171.bb151_crit_edge, %bb151.preheader - br i1 false, label %bb151.bb171_crit_edge, label %cond_next.i49 - -bb151.bb171_crit_edge: ; preds = %bb151 - br label %bb171 - -cond_next.i49: ; preds = %bb151 - br i1 false, label %cond_true164, label %cond_next.i49.bb171_crit_edge - -cond_next.i49.bb171_crit_edge: ; preds = %cond_next.i49 - br label %bb171 - -cond_true164: ; preds = %cond_next.i49 - br label %bb171 - -bb171: ; preds = %cond_true164, %cond_next.i49.bb171_crit_edge, %bb151.bb171_crit_edge - br i1 false, label %bb177.loopexit, label %bb171.bb151_crit_edge - -bb171.bb151_crit_edge: ; preds = %bb171 - br label %bb151 - -bb177.loopexit: ; preds = %bb171 - br label %bb177 - -bb177: ; preds = %bb177.loopexit, %bb148.bb177_crit_edge - br i1 false, label %bb180.preheader, label %bb177.bb193_crit_edge - -bb177.bb193_crit_edge: ; preds = %bb177 - br label %bb193 - -bb180.preheader: ; preds = %bb177 - br label %bb180 - -bb180: ; preds = %bb180.bb180_crit_edge, %bb180.preheader - br i1 false, label %bb193.loopexit, label %bb180.bb180_crit_edge - -bb180.bb180_crit_edge: ; preds = %bb180 - br label %bb180 - -bb193.loopexit: ; preds = %bb180 - br label %bb193 - -bb193: ; preds = %bb193.loopexit, %bb177.bb193_crit_edge - br label %bb.outer - -cond_next197: ; preds = %cond_next107 - br i1 false, label %cond_next210, label %cond_true205 - -cond_true205: ; preds = %cond_next197 - br i1 false, label %cond_true205.bb213_crit_edge, label %cond_true205.bb299_crit_edge - -cond_true205.bb299_crit_edge: ; preds = %cond_true205 - br label %bb299 - -cond_true205.bb213_crit_edge: ; preds = %cond_true205 - br label %bb213 - -cond_next210: ; preds = %cond_next197 - br label %bb293 - -bb213: ; preds = %bb293.bb213_crit_edge, %cond_true205.bb213_crit_edge - br i1 false, label %bb213.cond_next290_crit_edge, label %cond_true248 - -bb213.cond_next290_crit_edge: ; preds = %bb213 - br label %cond_next290 - -cond_true248: ; preds = %bb213 - br i1 false, label %cond_true248.cond_next290_crit_edge, label %cond_true255 - -cond_true248.cond_next290_crit_edge: ; preds = %cond_true248 - br label %cond_next290 - -cond_true255: ; preds = %cond_true248 - br i1 false, label %cond_true266, label %cond_true255.cond_next271_crit_edge - -cond_true255.cond_next271_crit_edge: ; preds = %cond_true255 - br label %cond_next271 - -cond_true266: ; preds = %cond_true255 - br label %cond_next271 - -cond_next271: ; preds = %cond_true266, %cond_true255.cond_next271_crit_edge - br label %cond_next290 - -cond_next290: ; preds = %cond_next271, %cond_true248.cond_next290_crit_edge, %bb213.cond_next290_crit_edge - br label %bb293 - -bb293: ; preds = %cond_next290, %cond_next210 - br i1 false, label %bb293.bb213_crit_edge, label %bb293.bb299_crit_edge - -bb293.bb299_crit_edge: ; preds = %bb293 - br label %bb299 - -bb293.bb213_crit_edge: ; preds = %bb293 - br label %bb213 - -bb299: ; preds = %bb293.bb299_crit_edge, %cond_true205.bb299_crit_edge - br i1 false, label %bb302.preheader, label %bb299.bb390_crit_edge - -bb299.bb390_crit_edge: ; preds = %bb299 - br label %bb390 - -bb302.preheader: ; preds = %bb299 - br label %bb302 - -bb302: ; preds = %bb384.bb302_crit_edge, %bb302.preheader - br i1 false, label %bb302.bb384_crit_edge, label %cond_true339 - -bb302.bb384_crit_edge: ; preds = %bb302 - br label %bb384 - -cond_true339: ; preds = %bb302 - br i1 false, label %cond_true339.bb384_crit_edge, label %cond_true346 - -cond_true339.bb384_crit_edge: ; preds = %cond_true339 - br label %bb384 - -cond_true346: ; preds = %cond_true339 - br i1 false, label %cond_true357, label %cond_true346.cond_next361_crit_edge - -cond_true346.cond_next361_crit_edge: ; preds = %cond_true346 - br label %cond_next361 - -cond_true357: ; preds = %cond_true346 - br label %cond_next361 - -cond_next361: ; preds = %cond_true357, %cond_true346.cond_next361_crit_edge - br label %bb384 - -bb384: ; preds = %cond_next361, %cond_true339.bb384_crit_edge, %bb302.bb384_crit_edge - br i1 false, label %bb390.loopexit, label %bb384.bb302_crit_edge - -bb384.bb302_crit_edge: ; preds = %bb384 - br label %bb302 - -bb390.loopexit: ; preds = %bb384 - br label %bb390 - -bb390: ; preds = %bb390.loopexit, %bb299.bb390_crit_edge - br i1 false, label %bb391.preheader, label %bb390.bb442.preheader_crit_edge - -bb390.bb442.preheader_crit_edge: ; preds = %bb390 - br label %bb442.preheader - -bb391.preheader: ; preds = %bb390 - br label %bb391 - -bb391: ; preds = %bb413.bb391_crit_edge, %bb391.preheader - br i1 false, label %bb391.bb413_crit_edge, label %cond_next404 - -bb391.bb413_crit_edge: ; preds = %bb391 - br label %bb413 - -cond_next404: ; preds = %bb391 - br i1 false, label %cond_next404.HWrite.exit_crit_edge, label %cond_next.i13 - -cond_next404.HWrite.exit_crit_edge: ; preds = %cond_next404 - br label %HWrite.exit - -cond_next.i13: ; preds = %cond_next404 - br i1 false, label %cond_next.i13.cond_next13.i_crit_edge, label %cond_true12.i - -cond_next.i13.cond_next13.i_crit_edge: ; preds = %cond_next.i13 - br label %cond_next13.i - -cond_true12.i: ; preds = %cond_next.i13 - br label %cond_next13.i - -cond_next13.i: ; preds = %cond_true12.i, %cond_next.i13.cond_next13.i_crit_edge - br i1 false, label %cond_next13.i.bb.i22_crit_edge, label %cond_next43.i - -cond_next13.i.bb.i22_crit_edge: ; preds = %cond_next13.i - br label %bb.i22 - -cond_next43.i: ; preds = %cond_next13.i - br i1 false, label %cond_next43.i.bb.i22_crit_edge, label %bb60.i - -cond_next43.i.bb.i22_crit_edge: ; preds = %cond_next43.i - br label %bb.i22 - -bb.i22: ; preds = %cond_next43.i.bb.i22_crit_edge, %cond_next13.i.bb.i22_crit_edge - br label %bb413 - -bb60.i: ; preds = %cond_next43.i - br i1 false, label %bb60.i.HWrite.exit_crit_edge, label %cond_true81.i - -bb60.i.HWrite.exit_crit_edge: ; preds = %bb60.i - br label %HWrite.exit - -cond_true81.i: ; preds = %bb60.i - br label %bb413 - -HWrite.exit: ; preds = %bb60.i.HWrite.exit_crit_edge, %cond_next404.HWrite.exit_crit_edge - br label %bb413 - -bb413: ; preds = %HWrite.exit, %cond_true81.i, %bb.i22, %bb391.bb413_crit_edge - br i1 false, label %bb442.preheader.loopexit, label %bb413.bb391_crit_edge - -bb413.bb391_crit_edge: ; preds = %bb413 - br label %bb391 - -bb442.preheader.loopexit: ; preds = %bb413 - br label %bb442.preheader - -bb442.preheader: ; preds = %bb442.preheader.loopexit, %bb390.bb442.preheader_crit_edge - br label %bb442.outer - -bb420: ; preds = %bb442 - br i1 false, label %bb439.loopexit, label %cond_next433 - -cond_next433: ; preds = %bb420 - br i1 false, label %cond_next433.HRead.exit.loopexit_crit_edge, label %cond_next.i - -cond_next433.HRead.exit.loopexit_crit_edge: ; preds = %cond_next433 - br label %HRead.exit.loopexit - -cond_next.i: ; preds = %cond_next433 - br i1 false, label %cond_true9.i, label %cond_false223.i - -cond_true9.i: ; preds = %cond_next.i - switch i32 0, label %cond_false.i [ - i32 1, label %cond_true9.i.cond_true15.i_crit_edge - i32 5, label %cond_true9.i.cond_true15.i_crit_edge9 - ] - -cond_true9.i.cond_true15.i_crit_edge9: ; preds = %cond_true9.i - br label %cond_true15.i - -cond_true9.i.cond_true15.i_crit_edge: ; preds = %cond_true9.i - br label %cond_true15.i - -cond_true15.i: ; preds = %cond_true9.i.cond_true15.i_crit_edge, %cond_true9.i.cond_true15.i_crit_edge9 - br i1 false, label %cond_true15.i.cond_true44.i_crit_edge, label %cond_true15.i.cond_false49.i_crit_edge - -cond_true15.i.cond_false49.i_crit_edge: ; preds = %cond_true15.i - br label %cond_false49.i - -cond_true15.i.cond_true44.i_crit_edge: ; preds = %cond_true15.i - br label %cond_true44.i - -cond_false.i: ; preds = %cond_true9.i - br i1 false, label %cond_false.i.cond_next39.i_crit_edge, label %cond_true30.i - -cond_false.i.cond_next39.i_crit_edge: ; preds = %cond_false.i - br label %cond_next39.i - -cond_true30.i: ; preds = %cond_false.i - br label %cond_next39.i - -cond_next39.i: ; preds = %cond_true30.i, %cond_false.i.cond_next39.i_crit_edge - br i1 false, label %cond_next39.i.cond_true44.i_crit_edge, label %cond_next39.i.cond_false49.i_crit_edge - -cond_next39.i.cond_false49.i_crit_edge: ; preds = %cond_next39.i - br label %cond_false49.i - -cond_next39.i.cond_true44.i_crit_edge: ; preds = %cond_next39.i - br label %cond_true44.i - -cond_true44.i: ; preds = %cond_next39.i.cond_true44.i_crit_edge, %cond_true15.i.cond_true44.i_crit_edge - br i1 false, label %cond_true44.i.cond_next70.i_crit_edge, label %cond_true44.i.cond_true61.i_crit_edge - -cond_true44.i.cond_true61.i_crit_edge: ; preds = %cond_true44.i - br label %cond_true61.i - -cond_true44.i.cond_next70.i_crit_edge: ; preds = %cond_true44.i - br label %cond_next70.i - -cond_false49.i: ; preds = %cond_next39.i.cond_false49.i_crit_edge, %cond_true15.i.cond_false49.i_crit_edge - br i1 false, label %cond_false49.i.cond_next70.i_crit_edge, label %cond_false49.i.cond_true61.i_crit_edge - -cond_false49.i.cond_true61.i_crit_edge: ; preds = %cond_false49.i - br label %cond_true61.i - -cond_false49.i.cond_next70.i_crit_edge: ; preds = %cond_false49.i - br label %cond_next70.i - -cond_true61.i: ; preds = %cond_false49.i.cond_true61.i_crit_edge, %cond_true44.i.cond_true61.i_crit_edge - br i1 false, label %cond_true61.i.cond_next70.i_crit_edge, label %cond_true67.i - -cond_true61.i.cond_next70.i_crit_edge: ; preds = %cond_true61.i - br label %cond_next70.i - -cond_true67.i: ; preds = %cond_true61.i - br label %cond_next70.i - -cond_next70.i: ; preds = %cond_true67.i, %cond_true61.i.cond_next70.i_crit_edge, %cond_false49.i.cond_next70.i_crit_edge, %cond_true44.i.cond_next70.i_crit_edge - br i1 false, label %cond_true77.i, label %cond_next81.i - -cond_true77.i: ; preds = %cond_next70.i - br label %bb442.outer.backedge - -cond_next81.i: ; preds = %cond_next70.i - br i1 false, label %cond_true87.i, label %cond_false94.i - -cond_true87.i: ; preds = %cond_next81.i - br i1 false, label %cond_true87.i.cond_true130.i_crit_edge, label %cond_true87.i.cond_next135.i_crit_edge - -cond_true87.i.cond_next135.i_crit_edge: ; preds = %cond_true87.i - br label %cond_next135.i - -cond_true87.i.cond_true130.i_crit_edge: ; preds = %cond_true87.i - br label %cond_true130.i - -cond_false94.i: ; preds = %cond_next81.i - switch i32 0, label %cond_false94.i.cond_next125.i_crit_edge [ - i32 1, label %cond_false94.i.cond_true100.i_crit_edge - i32 5, label %cond_false94.i.cond_true100.i_crit_edge10 - ] - -cond_false94.i.cond_true100.i_crit_edge10: ; preds = %cond_false94.i - br label %cond_true100.i - -cond_false94.i.cond_true100.i_crit_edge: ; preds = %cond_false94.i - br label %cond_true100.i - -cond_false94.i.cond_next125.i_crit_edge: ; preds = %cond_false94.i - br label %cond_next125.i - -cond_true100.i: ; preds = %cond_false94.i.cond_true100.i_crit_edge, %cond_false94.i.cond_true100.i_crit_edge10 - br i1 false, label %cond_true107.i, label %cond_true100.i.cond_next109.i_crit_edge - -cond_true100.i.cond_next109.i_crit_edge: ; preds = %cond_true100.i - br label %cond_next109.i - -cond_true107.i: ; preds = %cond_true100.i - br label %cond_next109.i - -cond_next109.i: ; preds = %cond_true107.i, %cond_true100.i.cond_next109.i_crit_edge - br i1 false, label %cond_next109.i.cond_next125.i_crit_edge, label %cond_true116.i - -cond_next109.i.cond_next125.i_crit_edge: ; preds = %cond_next109.i - br label %cond_next125.i - -cond_true116.i: ; preds = %cond_next109.i - br label %cond_next125.i - -cond_next125.i: ; preds = %cond_true116.i, %cond_next109.i.cond_next125.i_crit_edge, %cond_false94.i.cond_next125.i_crit_edge - br i1 false, label %cond_next125.i.cond_true130.i_crit_edge, label %cond_next125.i.cond_next135.i_crit_edge - -cond_next125.i.cond_next135.i_crit_edge: ; preds = %cond_next125.i - br label %cond_next135.i - -cond_next125.i.cond_true130.i_crit_edge: ; preds = %cond_next125.i - br label %cond_true130.i - -cond_true130.i: ; preds = %cond_next125.i.cond_true130.i_crit_edge, %cond_true87.i.cond_true130.i_crit_edge - br label %cond_next135.i - -cond_next135.i: ; preds = %cond_true130.i, %cond_next125.i.cond_next135.i_crit_edge, %cond_true87.i.cond_next135.i_crit_edge - br i1 false, label %cond_true142.i, label %cond_next135.i.cond_next149.i_crit_edge - -cond_next135.i.cond_next149.i_crit_edge: ; preds = %cond_next135.i - br label %cond_next149.i - -cond_true142.i: ; preds = %cond_next135.i - br label %cond_next149.i - -cond_next149.i: ; preds = %cond_true142.i, %cond_next135.i.cond_next149.i_crit_edge - br i1 false, label %cond_true156.i, label %cond_next149.i.cond_next163.i_crit_edge - -cond_next149.i.cond_next163.i_crit_edge: ; preds = %cond_next149.i - br label %cond_next163.i - -cond_true156.i: ; preds = %cond_next149.i - br label %cond_next163.i - -cond_next163.i: ; preds = %cond_true156.i, %cond_next149.i.cond_next163.i_crit_edge - br i1 false, label %cond_true182.i, label %cond_next163.i.cond_next380.i_crit_edge - -cond_next163.i.cond_next380.i_crit_edge: ; preds = %cond_next163.i - br label %cond_next380.i - -cond_true182.i: ; preds = %cond_next163.i - br i1 false, label %cond_true182.i.cond_next380.i_crit_edge, label %cond_true196.i - -cond_true182.i.cond_next380.i_crit_edge: ; preds = %cond_true182.i - br label %cond_next380.i - -cond_true196.i: ; preds = %cond_true182.i - br i1 false, label %cond_true210.i, label %cond_true196.i.cond_next380.i_crit_edge - -cond_true196.i.cond_next380.i_crit_edge: ; preds = %cond_true196.i - br label %cond_next380.i - -cond_true210.i: ; preds = %cond_true196.i - br i1 false, label %cond_true216.i, label %cond_true210.i.cond_next380.i_crit_edge - -cond_true210.i.cond_next380.i_crit_edge: ; preds = %cond_true210.i - br label %cond_next380.i - -cond_true216.i: ; preds = %cond_true210.i - br label %cond_next380.i - -cond_false223.i: ; preds = %cond_next.i - br i1 false, label %cond_true229.i, label %cond_false355.i - -cond_true229.i: ; preds = %cond_false223.i - br i1 false, label %cond_true229.i.HRead.exit.loopexit_crit_edge, label %cond_next243.i - -cond_true229.i.HRead.exit.loopexit_crit_edge: ; preds = %cond_true229.i - br label %HRead.exit.loopexit - -cond_next243.i: ; preds = %cond_true229.i - br i1 false, label %cond_true248.i, label %cond_false255.i - -cond_true248.i: ; preds = %cond_next243.i - br label %cond_next260.i - -cond_false255.i: ; preds = %cond_next243.i - br label %cond_next260.i - -cond_next260.i: ; preds = %cond_false255.i, %cond_true248.i - br i1 false, label %cond_true267.i, label %cond_next273.i - -cond_true267.i: ; preds = %cond_next260.i - br label %bb442.backedge - -bb442.backedge: ; preds = %bb.i, %cond_true267.i - br label %bb442 - -cond_next273.i: ; preds = %cond_next260.i - br i1 false, label %cond_true281.i, label %cond_next273.i.cond_next288.i_crit_edge - -cond_next273.i.cond_next288.i_crit_edge: ; preds = %cond_next273.i - br label %cond_next288.i - -cond_true281.i: ; preds = %cond_next273.i - br label %cond_next288.i - -cond_next288.i: ; preds = %cond_true281.i, %cond_next273.i.cond_next288.i_crit_edge - br i1 false, label %cond_true295.i, label %cond_next288.i.cond_next302.i_crit_edge - -cond_next288.i.cond_next302.i_crit_edge: ; preds = %cond_next288.i - br label %cond_next302.i - -cond_true295.i: ; preds = %cond_next288.i - br label %cond_next302.i - -cond_next302.i: ; preds = %cond_true295.i, %cond_next288.i.cond_next302.i_crit_edge - br i1 false, label %cond_next302.i.cond_next380.i_crit_edge, label %cond_true328.i - -cond_next302.i.cond_next380.i_crit_edge: ; preds = %cond_next302.i - br label %cond_next380.i - -cond_true328.i: ; preds = %cond_next302.i - br i1 false, label %cond_true343.i, label %cond_true328.i.cond_next380.i_crit_edge - -cond_true328.i.cond_next380.i_crit_edge: ; preds = %cond_true328.i - br label %cond_next380.i - -cond_true343.i: ; preds = %cond_true328.i - br i1 false, label %cond_true349.i, label %cond_true343.i.cond_next380.i_crit_edge - -cond_true343.i.cond_next380.i_crit_edge: ; preds = %cond_true343.i - br label %cond_next380.i - -cond_true349.i: ; preds = %cond_true343.i - br label %cond_next380.i - -cond_false355.i: ; preds = %cond_false223.i - br i1 false, label %cond_false355.i.bb.i_crit_edge, label %cond_next363.i - -cond_false355.i.bb.i_crit_edge: ; preds = %cond_false355.i - br label %bb.i - -cond_next363.i: ; preds = %cond_false355.i - br i1 false, label %bb377.i, label %cond_next363.i.bb.i_crit_edge - -cond_next363.i.bb.i_crit_edge: ; preds = %cond_next363.i - br label %bb.i - -bb.i: ; preds = %cond_next363.i.bb.i_crit_edge, %cond_false355.i.bb.i_crit_edge - br label %bb442.backedge - -bb377.i: ; preds = %cond_next363.i - br label %cond_next380.i - -cond_next380.i: ; preds = %bb377.i, %cond_true349.i, %cond_true343.i.cond_next380.i_crit_edge, %cond_true328.i.cond_next380.i_crit_edge, %cond_next302.i.cond_next380.i_crit_edge, %cond_true216.i, %cond_true210.i.cond_next380.i_crit_edge, %cond_true196.i.cond_next380.i_crit_edge, %cond_true182.i.cond_next380.i_crit_edge, %cond_next163.i.cond_next380.i_crit_edge - br i1 false, label %cond_next380.i.HRead.exit_crit_edge, label %cond_true391.i - -cond_next380.i.HRead.exit_crit_edge: ; preds = %cond_next380.i - br label %HRead.exit - -cond_true391.i: ; preds = %cond_next380.i - br label %bb442.outer.backedge - -bb442.outer.backedge: ; preds = %bb439, %cond_true391.i, %cond_true77.i - br label %bb442.outer - -HRead.exit.loopexit: ; preds = %cond_true229.i.HRead.exit.loopexit_crit_edge, %cond_next433.HRead.exit.loopexit_crit_edge - br label %HRead.exit - -HRead.exit: ; preds = %HRead.exit.loopexit, %cond_next380.i.HRead.exit_crit_edge - br label %bb439 - -bb439.loopexit: ; preds = %bb420 - br label %bb439 - -bb439: ; preds = %bb439.loopexit, %HRead.exit - br label %bb442.outer.backedge - -bb442.outer: ; preds = %bb442.outer.backedge, %bb442.preheader - br label %bb442 - -bb442: ; preds = %bb442.outer, %bb442.backedge - br i1 false, label %bb420, label %bb.loopexit -} - -define void @Invalidate() { -entry: - br i1 false, label %cond_false, label %cond_true - -cond_true: ; preds = %entry - br i1 false, label %cond_true40, label %cond_true.cond_next_crit_edge - -cond_true.cond_next_crit_edge: ; preds = %cond_true - br label %cond_next - -cond_true40: ; preds = %cond_true - br label %cond_next - -cond_next: ; preds = %cond_true40, %cond_true.cond_next_crit_edge - br i1 false, label %cond_true68, label %cond_next.cond_next73_crit_edge - -cond_next.cond_next73_crit_edge: ; preds = %cond_next - br label %cond_next73 - -cond_true68: ; preds = %cond_next - br label %cond_next73 - -cond_next73: ; preds = %cond_true68, %cond_next.cond_next73_crit_edge - br i1 false, label %cond_true91, label %cond_next73.cond_next96_crit_edge - -cond_next73.cond_next96_crit_edge: ; preds = %cond_next73 - br label %cond_next96 - -cond_true91: ; preds = %cond_next73 - br label %cond_next96 - -cond_next96: ; preds = %cond_true91, %cond_next73.cond_next96_crit_edge - br i1 false, label %cond_next96.cond_next112_crit_edge, label %cond_true105 - -cond_next96.cond_next112_crit_edge: ; preds = %cond_next96 - br label %cond_next112 - -cond_true105: ; preds = %cond_next96 - br label %cond_next112 - -cond_next112: ; preds = %cond_true105, %cond_next96.cond_next112_crit_edge - br i1 false, label %cond_next112.cond_next127_crit_edge, label %cond_true119 - -cond_next112.cond_next127_crit_edge: ; preds = %cond_next112 - br label %cond_next127 - -cond_true119: ; preds = %cond_next112 - br label %cond_next127 - -cond_next127: ; preds = %cond_true119, %cond_next112.cond_next127_crit_edge - br i1 false, label %cond_next141, label %cond_true134 - -cond_true134: ; preds = %cond_next127 - br i1 false, label %cond_true134.bb161_crit_edge, label %cond_true134.bb_crit_edge - -cond_true134.bb_crit_edge: ; preds = %cond_true134 - br label %bb - -cond_true134.bb161_crit_edge: ; preds = %cond_true134 - br label %bb161 - -cond_next141: ; preds = %cond_next127 - br label %bb154 - -bb: ; preds = %bb154.bb_crit_edge, %cond_true134.bb_crit_edge - br label %bb154 - -bb154: ; preds = %bb, %cond_next141 - br i1 false, label %bb154.bb161_crit_edge, label %bb154.bb_crit_edge - -bb154.bb_crit_edge: ; preds = %bb154 - br label %bb - -bb154.bb161_crit_edge: ; preds = %bb154 - br label %bb161 - -bb161: ; preds = %bb154.bb161_crit_edge, %cond_true134.bb161_crit_edge - br i1 false, label %bb161.cond_next201_crit_edge, label %cond_true198 - -bb161.cond_next201_crit_edge: ; preds = %bb161 - br label %cond_next201 - -cond_true198: ; preds = %bb161 - br label %cond_next201 - -cond_next201: ; preds = %cond_true198, %bb161.cond_next201_crit_edge - br i1 false, label %cond_next212, label %cond_true206 - -cond_true206: ; preds = %cond_next201 - br label %UnifiedReturnBlock - -cond_false: ; preds = %entry - br label %UnifiedReturnBlock - -cond_next212: ; preds = %cond_next201 - br label %UnifiedReturnBlock - -UnifiedReturnBlock: ; preds = %cond_next212, %cond_false, %cond_true206 - ret void -} diff --git a/test/Analysis/PostDominators/2007-04-20-PostDom-Reset.ll b/test/Analysis/PostDominators/2007-04-20-PostDom-Reset.ll deleted file mode 100644 index 767e5db94ce8..000000000000 --- a/test/Analysis/PostDominators/2007-04-20-PostDom-Reset.ll +++ /dev/null @@ -1,28 +0,0 @@ -; RUN: opt < %s -postdomfrontier -disable-output - -define void @args_out_of_range() { -entry: - br label %bb - -bb: ; preds = %bb, %entry - br label %bb -} - -define void @args_out_of_range_3() { -entry: - br label %bb - -bb: ; preds = %bb, %entry - br label %bb -} - -define void @Feq() { -entry: - br i1 false, label %cond_true, label %cond_next - -cond_true: ; preds = %entry - unreachable - -cond_next: ; preds = %entry - unreachable -} diff --git a/test/Analysis/RegionInfo/next.ll b/test/Analysis/RegionInfo/next.ll index d986387099c3..377a84d389c1 100644 --- a/test/Analysis/RegionInfo/next.ll +++ b/test/Analysis/RegionInfo/next.ll @@ -32,8 +32,8 @@ __label_000020: ; preds = %__label_002001, %bb ; CHECK-NOT: => ; CHECK: [0] entry => ; CHECK-NEXT: [1] __label_002001.outer => __label_000020 -; CHECK-NEXT; [2] bb197 => bb229 -; CHECK-NEXT; [3] bb224 => bb229 +; CHECK-NEXT: [2] bb197 => bb229 +; CHECK-NEXT: [3] bb224 => bb229 ; STAT: 4 region - The # of regions ; STAT: 1 region - The # of simple regions diff --git a/test/Analysis/ScalarEvolution/2011-03-09-ExactNoMaxBECount.ll b/test/Analysis/ScalarEvolution/2011-03-09-ExactNoMaxBECount.ll new file mode 100644 index 000000000000..9f17e27577c2 --- /dev/null +++ b/test/Analysis/ScalarEvolution/2011-03-09-ExactNoMaxBECount.ll @@ -0,0 +1,34 @@ +; RUN: opt -indvars %s +; PR9424: Attempt to use a SCEVCouldNotCompute object! +; The inner loop computes the Step and Start of the outer loop. +; Call that Vexit. The outer End value is max(2,Vexit), because +; the condition "icmp %4 < 2" does not guard the outer loop. +; SCEV knows that Vexit has range [2,4], so End == Vexit == Start. +; Now we have ExactBECount == 0. However, MinStart == 2 and MaxEnd == 4. +; Since the stride is variable and may wrap, we cannot compute +; MaxBECount. SCEV should override MaxBECount with ExactBECount. + +define void @bar() nounwind { +entry: + %. = select i1 undef, i32 2, i32 1 + br label %"5.preheader" + +"4": ; preds = %"5.preheader", %"4" + %0 = phi i32 [ 0, %"5.preheader" ], [ %1, %"4" ] + %1 = add nsw i32 %0, 1 + %2 = icmp sgt i32 %., %1 + br i1 %2, label %"4", label %"9" + +"9": ; preds = %"4" + %3 = add i32 %6, 1 + %4 = add i32 %3, %1 + %5 = icmp slt i32 %4, 2 + br i1 %5, label %"5.preheader", label %return + +"5.preheader": ; preds = %"9", %entry + %6 = phi i32 [ 0, %entry ], [ %4, %"9" ] + br label %"4" + +return: ; preds = %"9" + ret void +} diff --git a/test/Analysis/ScalarEvolution/2011-04-26-FoldAddRec.ll b/test/Analysis/ScalarEvolution/2011-04-26-FoldAddRec.ll new file mode 100644 index 000000000000..1600d5f05a18 --- /dev/null +++ b/test/Analysis/ScalarEvolution/2011-04-26-FoldAddRec.ll @@ -0,0 +1,33 @@ +; RUN: opt < %s -analyze -iv-users +; PR9633: Tests that SCEV handles the mul.i2 recurrence being folded to +; constant zero. + +define signext i8 @func_14(i8 signext %p_18) nounwind readnone ssp { +entry: + br label %for.inc + +for.inc: + %p_17.addr.012 = phi i32 [ 0, %entry ], [ %add, %for.inc ] + %add = add nsw i32 %p_17.addr.012, 1 + br i1 false, label %for.inc, label %for.cond + +for.cond: + %tobool.i = icmp ult i32 %add, 8192 + %shl.i = select i1 %tobool.i, i32 13, i32 0 + %shl.left.i = shl i32 %add, %shl.i + %conv.i4 = trunc i32 %shl.left.i to i8 + br i1 undef, label %for.inc9, label %if.then + +for.inc9: + %p_18.addr.011 = phi i8 [ %add12, %for.inc9 ], [ %p_18, %for.cond ] + %add12 = add i8 %p_18.addr.011, 1 + %mul.i2 = mul i8 %add12, %conv.i4 + %mul.i2.lobit = lshr i8 %mul.i2, 7 + %lor.ext.shr.i = select i1 undef, i8 %mul.i2.lobit, i8 %mul.i2 + %tobool = icmp eq i8 %lor.ext.shr.i, 0 + br i1 %tobool, label %for.inc9, label %if.then + +if.then: + ret i8 0 + +} \ No newline at end of file diff --git a/test/Analysis/ScalarEvolution/nsw-offset.ll b/test/Analysis/ScalarEvolution/nsw-offset.ll index 4cd9a6de48c5..8969a5ad4ceb 100644 --- a/test/Analysis/ScalarEvolution/nsw-offset.ll +++ b/test/Analysis/ScalarEvolution/nsw-offset.ll @@ -19,11 +19,11 @@ bb: ; preds = %bb.nph, %bb1 %i.01 = phi i32 [ %16, %bb1 ], [ 0, %bb.nph ] ; [#uses=5] ; CHECK: %1 = sext i32 %i.01 to i64 -; CHECK: --> {0,+,2}<%bb> +; CHECK: --> {0,+,2}<%bb> %1 = sext i32 %i.01 to i64 ; [#uses=1] ; CHECK: %2 = getelementptr inbounds double* %d, i64 %1 -; CHECK: --> {%d,+,16}<%bb> +; CHECK: --> {%d,+,16}<%bb> %2 = getelementptr inbounds double* %d, i64 %1 ; [#uses=1] %3 = load double* %2, align 8 ; [#uses=1] @@ -33,11 +33,11 @@ bb: ; preds = %bb.nph, %bb1 %7 = or i32 %i.01, 1 ; [#uses=1] ; CHECK: %8 = sext i32 %7 to i64 -; CHECK: --> {1,+,2}<%bb> +; CHECK: --> {1,+,2}<%bb> %8 = sext i32 %7 to i64 ; [#uses=1] ; CHECK: %9 = getelementptr inbounds double* %q, i64 %8 -; CHECK: {(8 + %q),+,16}<%bb> +; CHECK: {(8 + %q),+,16}<%bb> %9 = getelementptr inbounds double* %q, i64 %8 ; [#uses=1] ; Artificially repeat the above three instructions, this time using @@ -45,11 +45,11 @@ bb: ; preds = %bb.nph, %bb1 %t7 = add nsw i32 %i.01, 1 ; [#uses=1] ; CHECK: %t8 = sext i32 %t7 to i64 -; CHECK: --> {1,+,2}<%bb> +; CHECK: --> {1,+,2}<%bb> %t8 = sext i32 %t7 to i64 ; [#uses=1] ; CHECK: %t9 = getelementptr inbounds double* %q, i64 %t8 -; CHECK: {(8 + %q),+,16}<%bb> +; CHECK: {(8 + %q),+,16}<%bb> %t9 = getelementptr inbounds double* %q, i64 %t8 ; [#uses=1] %10 = load double* %9, align 8 ; [#uses=1] diff --git a/test/Analysis/ScalarEvolution/nsw.ll b/test/Analysis/ScalarEvolution/nsw.ll index 9d8e2b62a9d2..da35a6cf7ae2 100644 --- a/test/Analysis/ScalarEvolution/nsw.ll +++ b/test/Analysis/ScalarEvolution/nsw.ll @@ -35,7 +35,7 @@ bb: ; preds = %bb1, %bb.nph bb1: ; preds = %bb %phitmp = sext i32 %tmp8 to i64 ; [#uses=1] ; CHECK: %phitmp -; CHECK-NEXT: --> {1,+,1}<%bb> +; CHECK-NEXT: --> {1,+,1}<%bb> %tmp9 = getelementptr double* %p, i64 %phitmp ; [#uses=1] ; CHECK: %tmp9 ; CHECK-NEXT: --> {(8 + %p),+,8}<%bb> @@ -62,11 +62,11 @@ for.body.lr.ph.i.i: ; preds = %entry for.body.i.i: ; preds = %for.body.i.i, %for.body.lr.ph.i.i %__first.addr.02.i.i = phi i32* [ %begin, %for.body.lr.ph.i.i ], [ %ptrincdec.i.i, %for.body.i.i ] ; CHECK: %__first.addr.02.i.i -; CHECK-NEXT: --> {%begin,+,4}<%for.body.i.i> +; CHECK-NEXT: --> {%begin,+,4}<%for.body.i.i> store i32 0, i32* %__first.addr.02.i.i, align 4 %ptrincdec.i.i = getelementptr inbounds i32* %__first.addr.02.i.i, i64 1 ; CHECK: %ptrincdec.i.i -; CHECK-NEXT: --> {(4 + %begin),+,4}<%for.body.i.i> +; CHECK-NEXT: --> {(4 + %begin),+,4}<%for.body.i.i> %cmp.i.i = icmp eq i32* %ptrincdec.i.i, %end br i1 %cmp.i.i, label %for.cond.for.end_crit_edge.i.i, label %for.body.i.i @@ -88,7 +88,7 @@ for.body.i.i: ; preds = %entry, %for.body.i. ; CHECK: %indvar.i.i ; CHECK: {0,+,1}<%for.body.i.i> %tmp = add nsw i64 %indvar.i.i, 1 -; CHECK: %tmp = +; CHECK: %tmp = ; CHECK: {1,+,1}<%for.body.i.i> %ptrincdec.i.i = getelementptr inbounds i32* %begin, i64 %tmp ; CHECK: %ptrincdec.i.i = @@ -99,8 +99,8 @@ for.body.i.i: ; preds = %entry, %for.body.i. store i32 0, i32* %__first.addr.08.i.i, align 4 %cmp.i.i = icmp eq i32* %ptrincdec.i.i, %end br i1 %cmp.i.i, label %_ZSt4fillIPiiEvT_S1_RKT0_.exit, label %for.body.i.i -; CHECK: Loop %for.body.i.i: Unpredictable backedge-taken count. -; CHECK: Loop %for.body.i.i: Unpredictable max backedge-taken count. +; CHECK: Loop %for.body.i.i: backedge-taken count is ((-4 + (-1 * %begin) + %end) /u 4) +; CHECK: Loop %for.body.i.i: max backedge-taken count is ((-4 + (-1 * %begin) + %end) /u 4) _ZSt4fillIPiiEvT_S1_RKT0_.exit: ; preds = %for.body.i.i, %entry ret void } \ No newline at end of file diff --git a/test/Analysis/ScalarEvolution/sext-iv-0.ll b/test/Analysis/ScalarEvolution/sext-iv-0.ll index 2af794fbbc3e..d5d32689e17c 100644 --- a/test/Analysis/ScalarEvolution/sext-iv-0.ll +++ b/test/Analysis/ScalarEvolution/sext-iv-0.ll @@ -1,5 +1,4 @@ -; RUN: opt < %s -scalar-evolution -analyze \ -; RUN: | grep { --> \{-128,+,1\}<%bb1> Exits: 127} | count 5 +; RUN: opt < %s -scalar-evolution -analyze | FileCheck %s ; Convert (sext {-128,+,1}) to {sext(-128),+,sext(1)}, since the ; trip count is within range where this is safe. @@ -13,9 +12,17 @@ bb1.thread: bb1: ; preds = %bb1, %bb1.thread %i.0.reg2mem.0 = phi i64 [ -128, %bb1.thread ], [ %8, %bb1 ] ; [#uses=3] +; CHECK: %i.0.reg2mem.0 +; CHECK-NEXT: --> {-128,+,1}<%bb1> Exits: 127 %0 = trunc i64 %i.0.reg2mem.0 to i8 ; [#uses=1] +; CHECK: %0 +; CHECK-NEXT: --> {-128,+,1}<%bb1> Exits: 127 %1 = trunc i64 %i.0.reg2mem.0 to i9 ; [#uses=1] +; CHECK: %1 +; CHECK-NEXT: --> {-128,+,1}<%bb1> Exits: 127 %2 = sext i9 %1 to i64 ; [#uses=1] +; CHECK: %2 +; CHECK-NEXT: --> {-128,+,1}<%bb1> Exits: 127 %3 = getelementptr double* %x, i64 %2 ; [#uses=1] %4 = load double* %3, align 8 ; [#uses=1] %5 = fmul double %4, 3.900000e+00 ; [#uses=1] diff --git a/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll b/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll new file mode 100644 index 000000000000..8f080e2108bd --- /dev/null +++ b/test/Analysis/TypeBasedAliasAnalysis/intrinsics.ll @@ -0,0 +1,27 @@ +; RUN: opt -tbaa -basicaa -gvn -S < %s | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32" + +; TBAA should prove that these calls don't interfere, since they are +; IntrArgReadMem and have TBAA metadata. + +; CHECK: define <8 x i16> @test0(i8* %p, i8* %q, <8 x i16> %y) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind +; CHECK-NEXT: call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16) +; CHECK-NEXT: %c = add <8 x i16> %a, %a +define <8 x i16> @test0(i8* %p, i8* %q, <8 x i16> %y) { +entry: + %a = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind, !tbaa !2 + call void @llvm.arm.neon.vst1.v8i16(i8* %q, <8 x i16> %y, i32 16), !tbaa !1 + %b = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i8* %p, i32 16) nounwind, !tbaa !2 + %c = add <8 x i16> %a, %b + ret <8 x i16> %c +} + +declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*, i32) nounwind readonly +declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind + +!0 = metadata !{metadata !"tbaa root", null} +!1 = metadata !{metadata !"A", metadata !0} +!2 = metadata !{metadata !"B", metadata !0} diff --git a/test/Assembler/AutoUpgradeIntrinsics.ll b/test/Assembler/AutoUpgradeIntrinsics.ll index 6752bd8281bd..e4e2d3a56e04 100644 --- a/test/Assembler/AutoUpgradeIntrinsics.ll +++ b/test/Assembler/AutoUpgradeIntrinsics.ll @@ -7,6 +7,8 @@ ; RUN: llvm-as < %s | llvm-dis | \ ; RUN: not grep {llvm\\.bswap\\.i\[0-9\]*\\.i\[0-9\]*} ; RUN: llvm-as < %s | llvm-dis | \ +; RUN: not grep {llvm\\.x86\\.sse2\\.loadu} +; RUN: llvm-as < %s | llvm-dis | \ ; RUN: grep {llvm\\.x86\\.mmx\\.ps} | grep {x86_mmx} | count 16 declare i32 @llvm.ctpop.i28(i28 %val) @@ -79,3 +81,13 @@ define void @sh64(<1 x i64> %A, <2 x i32> %B) { %r2 = call <1 x i64> @llvm.x86.mmx.psrl.q( <1 x i64> %A, <2 x i32> %B ) ; <<1 x i64>> [#uses=0] ret void } + +declare <4 x float> @llvm.x86.sse.loadu.ps(i8*) nounwind readnone +declare <16 x i8> @llvm.x86.sse2.loadu.dq(i8*) nounwind readnone +declare <2 x double> @llvm.x86.sse2.loadu.pd(double*) nounwind readnone +define void @test_loadu(i8* %a, double* %b) { + %v0 = call <4 x float> @llvm.x86.sse.loadu.ps(i8* %a) + %v1 = call <16 x i8> @llvm.x86.sse2.loadu.dq(i8* %a) + %v2 = call <2 x double> @llvm.x86.sse2.loadu.pd(double* %b) + ret void +} diff --git a/test/Assembler/aggregate-return-single-value.ll b/test/Assembler/aggregate-return-single-value.ll index 02fb59f8a0a4..04540b54af1f 100644 --- a/test/Assembler/aggregate-return-single-value.ll +++ b/test/Assembler/aggregate-return-single-value.ll @@ -1,14 +1,8 @@ ; RUN: llvm-as < %s | llvm-dis -define { i32 } @fooa() nounwind { - ret i32 0 -} define { i32 } @foob() nounwind { ret {i32}{ i32 0 } } -define [1 x i32] @fooc() nounwind { - ret i32 0 -} define [1 x i32] @food() nounwind { ret [1 x i32][ i32 0 ] } diff --git a/test/Assembler/comment.ll b/test/Assembler/comment.ll index fe23d26fbeb4..16362abc716f 100644 --- a/test/Assembler/comment.ll +++ b/test/Assembler/comment.ll @@ -7,7 +7,7 @@ ; BARE: } @B = external global i32 -; ANNOT: @B = external global i32 ; [#uses=0] +; ANNOT: @B = external global i32 ; [#uses=0 type=i32*] define <4 x i1> @foo(<4 x float> %a, <4 x float> %b) nounwind { entry: @@ -15,6 +15,5 @@ entry: ret <4 x i1> %cmp } -; ANNOT: %cmp = fcmp olt <4 x float> %a, %b ; [#uses=1] - +; ANNOT: %cmp = fcmp olt <4 x float> %a, %b ; [#uses=1 type=<4 x i1>] diff --git a/test/Bitcode/neon-intrinsics.ll b/test/Bitcode/neon-intrinsics.ll index 272cd424e2a2..feb2d74138d0 100644 --- a/test/Bitcode/neon-intrinsics.ll +++ b/test/Bitcode/neon-intrinsics.ll @@ -76,20 +76,13 @@ ; CHECK: zext <4 x i16> ; CHECK-NEXT: sub <4 x i32> -; vmull should be auto-upgraded to multiply with sext/zext -; (but vmullp should remain an intrinsic) +; vmull* intrinsics will remain intrinsics ; CHECK: vmulls8 -; CHECK-NOT: arm.neon.vmulls.v8i16 -; CHECK: sext <8 x i8> -; CHECK-NEXT: sext <8 x i8> -; CHECK-NEXT: mul <8 x i16> +; CHECK: arm.neon.vmulls.v8i16 ; CHECK: vmullu16 -; CHECK-NOT: arm.neon.vmullu.v4i32 -; CHECK: zext <4 x i16> -; CHECK-NEXT: zext <4 x i16> -; CHECK-NEXT: mul <4 x i32> +; CHECK: arm.neon.vmullu.v4i32 ; CHECK: vmullp8 ; CHECK: arm.neon.vmullp.v8i16 diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index b696682c13fa..993b6e274443 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -37,14 +37,32 @@ if(PYTHONINTERP_FOUND) foreach(INC_DIR ${INC_DIRS}) set(IDIRS "${IDIRS} -I${INC_DIR}") endforeach() - string(REPLACE "" "${CMAKE_CXX_COMPILER}" TEST_COMPILE_CXX_CMD ${CMAKE_CXX_COMPILE_OBJECT}) + + if( MSVC ) + # The compiler's path may contain white space. Wrap it: + string(REPLACE "" "\\\"${CMAKE_CXX_COMPILER}\\\"" TEST_COMPILE_CXX_CMD ${CMAKE_CXX_COMPILE_OBJECT}) + # Eliminate continuation lines from NMake flow. PR9680 + string(REPLACE "@<<\n" " " TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD}) + string(REPLACE "\n<<" " " TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD}) + else() + string(REPLACE "" "${CMAKE_CXX_COMPILER}" TEST_COMPILE_CXX_CMD ${CMAKE_CXX_COMPILE_OBJECT}) + endif() + string(REPLACE "" "${DEFS}" TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD}) string(REPLACE "" "${CMAKE_CXX_FLAGS}" TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD}) - string(REPLACE "-o" "" TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD}) + if (MSVC) # PR9680 + # Eliminate MSVC equivalent of -o + string(REPLACE "/Fo" "" TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD}) + # Eliminate "how to rename program database" argument + string(REPLACE "/Fd" "" TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD}) + else() + string(REPLACE "-o" "" TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD}) + endif(MSVC) string(REGEX REPLACE "<[^>]+>" "" TEST_COMPILE_CXX_CMD ${TEST_COMPILE_CXX_CMD}) set(TEST_COMPILE_CXX_CMD "${TEST_COMPILE_CXX_CMD} ${IDIRS}") if(NOT MSVC) set(TEST_COMPILE_CXX_CMD "${TEST_COMPILE_CXX_CMD} -x c++") + # MSVC already has /TP to indicate a C++ source file endif() configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/site.exp.in diff --git a/test/CodeGen/ARM/2009-10-27-double-align.ll b/test/CodeGen/ARM/2009-10-27-double-align.ll index c31b116c55b2..b37de9dbbdfd 100644 --- a/test/CodeGen/ARM/2009-10-27-double-align.ll +++ b/test/CodeGen/ARM/2009-10-27-double-align.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s +; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s +; RUN: llc < %s -mtriple=arm-linux-gnueabi -regalloc=basic | FileCheck %s @.str = private constant [1 x i8] zeroinitializer, align 1 diff --git a/test/CodeGen/ARM/2010-03-18-ldm-rtrn.ll b/test/CodeGen/ARM/2010-03-18-ldm-rtrn.ll index d9e1a1486a3c..fee86008ad71 100644 --- a/test/CodeGen/ARM/2010-03-18-ldm-rtrn.ll +++ b/test/CodeGen/ARM/2010-03-18-ldm-rtrn.ll @@ -6,7 +6,7 @@ define i32 @bar(i32 %a) nounwind { entry: %0 = tail call i32 @foo(i32 %a) nounwind ; [#uses=1] %1 = add nsw i32 %0, 3 ; [#uses=1] -; CHECK: ldmia sp!, {r11, pc} +; CHECK: pop {r11, pc} ; V4: pop ; V4-NEXT: mov pc, lr ret i32 %1 diff --git a/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll b/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll index 5ad1c09eda4a..df9dbca313f2 100644 --- a/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll +++ b/test/CodeGen/ARM/2010-05-18-PostIndexBug.ll @@ -7,13 +7,13 @@ define zeroext i8 @t(%struct.foo* %this) noreturn optsize { entry: ; ARM: t: -; ARM: str r0, [r1], r0 +; ARM: str r2, [r1], r0 ; THUMB: t: ; THUMB-NOT: str r0, [r1], r0 -; THUMB: str r0, [r1] +; THUMB: str r2, [r1] %0 = getelementptr inbounds %struct.foo* %this, i32 0, i32 1 ; [#uses=1] - store i32 undef, i32* inttoptr (i32 8 to i32*), align 8 + store i32 0, i32* inttoptr (i32 8 to i32*), align 8 br i1 undef, label %bb.nph96, label %bb3 bb3: ; preds = %entry diff --git a/test/CodeGen/ARM/2010-08-04-StackVariable.ll b/test/CodeGen/ARM/2010-08-04-StackVariable.ll index f077d04803bd..25d38ed77425 100644 --- a/test/CodeGen/ARM/2010-08-04-StackVariable.ll +++ b/test/CodeGen/ARM/2010-08-04-StackVariable.ll @@ -1,5 +1,5 @@ -; RUN: llc -O0 -mtriple=arm-apple-darwin < %s | grep DW_OP_fbreg -; Use DW_OP_fbreg in variable's location expression if the variable is in a stack slot. +; RUN: llc -O0 -mtriple=arm-apple-darwin < %s | grep DW_OP_breg +; Use DW_OP_breg in variable's location expression if the variable is in a stack slot. %struct.SVal = type { i8*, i32 } diff --git a/test/CodeGen/ARM/2010-10-25-ifcvt-ldm.ll b/test/CodeGen/ARM/2010-10-25-ifcvt-ldm.ll index 163c9b030ec8..32d350e9c8b1 100644 --- a/test/CodeGen/ARM/2010-10-25-ifcvt-ldm.ll +++ b/test/CodeGen/ARM/2010-10-25-ifcvt-ldm.ll @@ -4,9 +4,9 @@ ; was being treated as an instruction count. ; CHECK: push -; CHECK: ldmia -; CHECK: ldmia -; CHECK: ldmia +; CHECK: pop +; CHECK: pop +; CHECK: pop define i32 @test(i32 %x) { entry: diff --git a/test/CodeGen/ARM/2010-11-29-PrologueBug.ll b/test/CodeGen/ARM/2010-11-29-PrologueBug.ll index 8d7541feae94..e3c18cefd51d 100644 --- a/test/CodeGen/ARM/2010-11-29-PrologueBug.ll +++ b/test/CodeGen/ARM/2010-11-29-PrologueBug.ll @@ -10,7 +10,7 @@ entry: ; ARM: bl _foo ; ARM: bl _foo ; ARM: bl _foo -; ARM: ldmia sp!, {r7, pc} +; ARM: pop {r7, pc} ; THUMB2: t: ; THUMB2: push diff --git a/test/CodeGen/ARM/2010-12-13-reloc-pic.ll b/test/CodeGen/ARM/2010-12-13-reloc-pic.ll deleted file mode 100644 index d5aefbee197c..000000000000 --- a/test/CodeGen/ARM/2010-12-13-reloc-pic.ll +++ /dev/null @@ -1,100 +0,0 @@ -; RUN: llc %s -mtriple=armv7-linux-gnueabi -relocation-model=pic -filetype=obj -o - | \ -; RUN: elf-dump --dump-section-data | FileCheck -check-prefix=PIC01 %s - -;; FIXME: Reduce this test further, or even better, -;; redo as .s -> .o test once ARM AsmParser is working better - -; ModuleID = 'large2.pnacl.bc' -target triple = "armv7-none-linux-gnueabi" - -%struct._Bigint = type { %struct._Bigint*, i32, i32, i32, i32, [1 x i32] } -%struct.__FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, %struct._reent*, i8*, i32 (%struct._reent*, i8*, i8*, i32)*, i32 (%struct._reent*, i8*, i8*, i32)*, i32 (%struct._reent*, i8*, i32, i32)*, i32 (%struct._reent*, i8*)*, %struct.__sbuf, i8*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i32, %struct._flock_t, %struct._mbstate_t, i32 } -%struct.__sbuf = type { i8*, i32 } -%struct.__tm = type { i32, i32, i32, i32, i32, i32, i32, i32, i32 } -%struct._atexit = type { %struct._atexit*, i32, [32 x void ()*], %struct._on_exit_args* } -%struct._flock_t = type { i32, i32, i32, i32, i32 } -%struct._glue = type { %struct._glue*, i32, %struct.__FILE* } -%struct._mbstate_t = type { i32, %union.anon } -%struct._misc_reent = type { i8*, %struct._mbstate_t, %struct._mbstate_t, %struct._mbstate_t, [8 x i8], i32, %struct._mbstate_t, %struct._mbstate_t, %struct._mbstate_t, %struct._mbstate_t, %struct._mbstate_t } -%struct._mprec = type { %struct._Bigint*, i32, %struct._Bigint*, %struct._Bigint** } -%struct._on_exit_args = type { [32 x i8*], [32 x i8*], i32, i32 } -%struct._rand48 = type { [3 x i16], [3 x i16], i16, i64 } -%struct._reent = type { %struct.__FILE*, %struct.__FILE*, %struct.__FILE*, i32, i32, i8*, i32, i32, i8*, %struct._mprec*, void (%struct._reent*)*, i32, i32, i8*, %struct._rand48*, %struct.__tm*, i8*, void (i32)**, %struct._atexit*, %struct._atexit, %struct._glue, %struct.__FILE*, %struct._misc_reent*, i8* } -%union.anon = type { i32 } - -@buf = constant [2 x i8] c"x\00", align 4 -@_impure_ptr = external thread_local global %struct._reent* -@.str = private constant [22 x i8] c"This should fault...\0A\00", align 4 -@.str1 = private constant [40 x i8] c"We're still running. This is not good.\0A\00", align 4 - -define i32 @main() nounwind { -entry: - %0 = load %struct._reent** @_impure_ptr, align 4 - %1 = getelementptr inbounds %struct._reent* %0, i32 0, i32 1 - %2 = load %struct.__FILE** %1, align 4 - %3 = bitcast %struct.__FILE* %2 to i8* - %4 = tail call i32 @fwrite(i8* getelementptr inbounds ([22 x i8]* @.str, i32 0, i32 0), i32 1, i32 21, i8* %3) nounwind - %5 = load %struct._reent** @_impure_ptr, align 4 - %6 = getelementptr inbounds %struct._reent* %5, i32 0, i32 1 - %7 = load %struct.__FILE** %6, align 4 - %8 = tail call i32 @fflush(%struct.__FILE* %7) nounwind - store i8 121, i8* getelementptr inbounds ([2 x i8]* @buf, i32 0, i32 0), align 4 - %9 = load %struct._reent** @_impure_ptr, align 4 - %10 = getelementptr inbounds %struct._reent* %9, i32 0, i32 1 - %11 = load %struct.__FILE** %10, align 4 - %12 = bitcast %struct.__FILE* %11 to i8* - %13 = tail call i32 @fwrite(i8* getelementptr inbounds ([40 x i8]* @.str1, i32 0, i32 0), i32 1, i32 39, i8* %12) nounwind - ret i32 1 -} - - -; PIC01: Relocation 0x00000000 -; PIC01-NEXT: 'r_offset', 0x0000001c -; PIC01-NEXT: 'r_sym' -; PIC01-NEXT: 'r_type', 0x0000001b - - -; PIC01: Relocation 0x00000001 -; PIC01-NEXT: 'r_offset', 0x00000038 -; PIC01-NEXT: 'r_sym' -; PIC01-NEXT: 'r_type', 0x0000001b - -; PIC01: Relocation 0x00000002 -; PIC01-NEXT: 'r_offset', 0x00000044 -; PIC01-NEXT: 'r_sym' -; PIC01-NEXT: 'r_type', 0x0000001b - -; PIC01: Relocation 0x00000003 -; PIC01-NEXT: 'r_offset', 0x00000070 -; PIC01-NEXT: 'r_sym' -; PIC01-NEXT: 'r_type', 0x0000001b - -; PIC01: Relocation 0x00000004 -; PIC01-NEXT: 'r_offset', 0x0000007c -; PIC01-NEXT: 'r_sym' -; PIC01-NEXT: 'r_type', 0x00000019 - - -; PIC01: Relocation 0x00000005 -; PIC01-NEXT: 'r_offset', 0x00000080 -; PIC01-NEXT: 'r_sym' -; PIC01-NEXT: 'r_type', 0x00000018 - -; PIC01: Relocation 0x00000006 -; PIC01-NEXT: 'r_offset', 0x00000084 -; PIC01-NEXT: 'r_sym' -; PIC01-NEXT: 'r_type', 0x00000068 - -; PIC01: Relocation 0x00000007 -; PIC01-NEXT: 'r_offset', 0x00000088 -; PIC01-NEXT: 'r_sym' -; PIC01-NEXT: 'r_type', 0x0000001a - -; PIC01: Relocation 0x00000008 -; PIC01-NEXT: 'r_offset', 0x0000008c -; PIC01-NEXT: 'r_sym' -; PIC01-NEXT: 'r_type', 0x00000018 - -declare i32 @fwrite(i8* nocapture, i32, i32, i8* nocapture) nounwind - -declare i32 @fflush(%struct.__FILE* nocapture) nounwind diff --git a/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll b/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll index eaa34e7960fb..69d4a1482299 100644 --- a/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll +++ b/test/CodeGen/ARM/2010-12-15-elf-lcomm.ll @@ -10,12 +10,12 @@ @STRIDE = internal global i32 8 ; ASM: .type array00,%object @ @array00 -; ASM-NEXT: .lcomm array00,80 @ @array00 +; ASM-NEXT: .lcomm array00,80 ; ASM-NEXT: .type _MergedGlobals,%object @ @_MergedGlobals -; OBJ: Section 0x00000003 +; OBJ: Section 0x00000004 ; OBJ-NEXT: '.bss' ; OBJ: 'array00' @@ -24,7 +24,7 @@ ; OBJ-NEXT: 'st_bind', 0x00000000 ; OBJ-NEXT: 'st_type', 0x00000001 ; OBJ-NEXT: 'st_other', 0x00000000 -; OBJ-NEXT: 'st_shndx', 0x00000003 +; OBJ-NEXT: 'st_shndx', 0x00000004 define i32 @main(i32 %argc) nounwind { %1 = load i32* @sum, align 4 diff --git a/test/CodeGen/ARM/2011-03-10-DAGCombineCrash.ll b/test/CodeGen/ARM/2011-03-10-DAGCombineCrash.ll new file mode 100644 index 000000000000..81babe0b4b19 --- /dev/null +++ b/test/CodeGen/ARM/2011-03-10-DAGCombineCrash.ll @@ -0,0 +1,47 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8 + +; rdar://9117613 + +%struct.mo = type { i32, %struct.mo_pops* } +%struct.mo_pops = type { void (%struct.mo*)*, void (%struct.mo*)*, i32 (%struct.mo*, i32*, i32)*, i32 (%struct.mo*)*, i32 (%struct.mo*, i64, i32, i32, i32*, i64, i32)*, i32 (%struct.mo*, i64, i32, i64*, i32*, i32, i32, i32)*, i32 (%struct.mo*, i64, i32)*, i32 (%struct.mo*, i64, i64, i32)*, i32 (%struct.mo*, i64, i64, i32)*, i32 (%struct.mo*, i32)*, i32 (%struct.mo*)*, i32 (%struct.mo*, i32)*, i8* } +%struct.ui = type { %struct.mo*, i32*, i32, i32*, i32*, i64, i32*, i32*, i32* } + + +define internal fastcc i32 @t(i32* %vp, i32 %withfsize, i64 %filesize) nounwind { +entry: + br i1 undef, label %bb1, label %bb + +bb: ; preds = %entry + unreachable + +bb1: ; preds = %entry + %0 = call %struct.ui* @vn_pp_to_ui(i32* undef) nounwind + call void @llvm.memset.p0i8.i32(i8* undef, i8 0, i32 40, i32 4, i1 false) + %1 = getelementptr inbounds %struct.ui* %0, i32 0, i32 0 + store %struct.mo* undef, %struct.mo** %1, align 4 + %2 = getelementptr inbounds %struct.ui* %0, i32 0, i32 5 + %3 = load i64* %2, align 4 + %4 = call i32 @mo_create_nnm(%struct.mo* undef, i64 %3, i32** undef) nounwind + br i1 undef, label %bb3, label %bb2 + +bb2: ; preds = %bb1 + unreachable + +bb3: ; preds = %bb1 + br i1 undef, label %bb4, label %bb6 + +bb4: ; preds = %bb3 + %5 = call i32 @vn_size(i32* %vp, i64* %2, i32* undef) nounwind + unreachable + +bb6: ; preds = %bb3 + ret i32 0 +} + +declare %struct.ui* @vn_pp_to_ui(i32*) + +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind + +declare i32 @mo_create_nnm(%struct.mo*, i64, i32**) + +declare i32 @vn_size(i32*, i64*, i32*) diff --git a/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll b/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll new file mode 100644 index 000000000000..ccda281e901e --- /dev/null +++ b/test/CodeGen/ARM/2011-03-15-LdStMultipleBug.ll @@ -0,0 +1,55 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8 | FileCheck %s + +; Do not form Thumb2 ldrd / strd if the offset is not multiple of 4. +; rdar://9133587 + +%struct.Outer = type { i32, [2 x %"struct.Outer::Inner"] } +%"struct.Outer::Inner" = type { i32, i32, i8, i8 } + +@oStruct = external global %struct.Outer, align 4 + +define void @main() nounwind { +; CHECK: main: +; CHECK-NOT: ldrd +; CHECK: mul +for.body.lr.ph: + br label %for.body + +for.body: ; preds = %_Z14printIsNotZeroi.exit17.for.body_crit_edge, %for.body.lr.ph + %tmp3 = phi i1 [ false, %for.body.lr.ph ], [ %phitmp27, %_Z14printIsNotZeroi.exit17.for.body_crit_edge ] + %i.022 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %_Z14printIsNotZeroi.exit17.for.body_crit_edge ] + %x = getelementptr %struct.Outer* @oStruct, i32 0, i32 1, i32 %i.022, i32 0 + %y = getelementptr %struct.Outer* @oStruct, i32 0, i32 1, i32 %i.022, i32 1 + %inc = add i32 %i.022, 1 + br i1 %tmp3, label %_Z14printIsNotZeroi.exit, label %if.then.i + +if.then.i: ; preds = %for.body + unreachable + +_Z14printIsNotZeroi.exit: ; preds = %for.body + %tmp8 = load i32* %x, align 4, !tbaa !0 + %tmp11 = load i32* %y, align 4, !tbaa !0 + %mul = mul nsw i32 %tmp11, %tmp8 + %tobool.i14 = icmp eq i32 %mul, 0 + br i1 %tobool.i14, label %_Z14printIsNotZeroi.exit17, label %if.then.i16 + +if.then.i16: ; preds = %_Z14printIsNotZeroi.exit + unreachable + +_Z14printIsNotZeroi.exit17: ; preds = %_Z14printIsNotZeroi.exit + br i1 undef, label %_Z14printIsNotZeroi.exit17.for.body_crit_edge, label %for.end + +_Z14printIsNotZeroi.exit17.for.body_crit_edge: ; preds = %_Z14printIsNotZeroi.exit17 + %b.phi.trans.insert = getelementptr %struct.Outer* @oStruct, i32 0, i32 1, i32 %inc, i32 3 + %tmp3.pre = load i8* %b.phi.trans.insert, align 1, !tbaa !3 + %phitmp27 = icmp eq i8 undef, 0 + br label %for.body + +for.end: ; preds = %_Z14printIsNotZeroi.exit17 + ret void +} + +!0 = metadata !{metadata !"int", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA", null} +!3 = metadata !{metadata !"bool", metadata !1} diff --git a/test/CodeGen/ARM/2011-03-23-PeepholeBug.ll b/test/CodeGen/ARM/2011-03-23-PeepholeBug.ll new file mode 100644 index 000000000000..7c9af6f5e590 --- /dev/null +++ b/test/CodeGen/ARM/2011-03-23-PeepholeBug.ll @@ -0,0 +1,41 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic -disable-fp-elim -mcpu=cortex-a8 | FileCheck %s + +; subs r4, #1 +; cmp r4, 0 +; bgt +; cmp cannot be optimized away since it will clear the overflow bit. +; gt / ge, lt, le conditions all depend on V bit. +; rdar://9172742 + +define i32 @t() nounwind { +; CHECK: t: +entry: + br label %bb2 + +bb: ; preds = %bb2 + %0 = tail call i32 @rand() nounwind + %1 = icmp eq i32 %0, 50 + br i1 %1, label %bb3, label %bb1 + +bb1: ; preds = %bb + %tmp = tail call i32 @puts() nounwind + %indvar.next = add i32 %indvar, 1 + br label %bb2 + +bb2: ; preds = %bb1, %entry +; CHECK: bb2 +; CHECK: subs [[REG:r[0-9]+]], #1 +; CHECK: cmp [[REG]], #0 +; CHECK: bgt + %indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %entry ] + %tries.0 = sub i32 2147483647, %indvar + %tmp1 = icmp sgt i32 %tries.0, 0 + br i1 %tmp1, label %bb, label %bb3 + +bb3: ; preds = %bb2, %bb + ret i32 0 +} + +declare i32 @rand() + +declare i32 @puts() nounwind diff --git a/test/CodeGen/ARM/2011-04-07-schediv.ll b/test/CodeGen/ARM/2011-04-07-schediv.ll new file mode 100644 index 000000000000..a61908fd7c45 --- /dev/null +++ b/test/CodeGen/ARM/2011-04-07-schediv.ll @@ -0,0 +1,31 @@ +; RUN: llc < %s -mcpu=cortex-a8 | FileCheck %s +; Tests preRAsched support for VRegCycle interference. + +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32" +target triple = "thumbv7-apple-darwin10" + +define void @t(i32 %src_width, float* nocapture %src_copy_start, float* nocapture %dst_copy_start, i32 %src_copy_start_index) nounwind optsize { +entry: + %src_copy_start6 = bitcast float* %src_copy_start to i8* + %0 = icmp eq i32 %src_width, 0 + br i1 %0, label %return, label %bb + +; Make sure the scheduler schedules all uses of the preincrement +; induction variable before defining the postincrement value. +; CHECK: t: +; CHECK-NOT: mov +bb: ; preds = %entry, %bb + %j.05 = phi i32 [ %2, %bb ], [ 0, %entry ] + %tmp = mul i32 %j.05, %src_copy_start_index + %uglygep = getelementptr i8* %src_copy_start6, i32 %tmp + %src_copy_start_addr.04 = bitcast i8* %uglygep to float* + %dst_copy_start_addr.03 = getelementptr float* %dst_copy_start, i32 %j.05 + %1 = load float* %src_copy_start_addr.04, align 4 + store float %1, float* %dst_copy_start_addr.03, align 4 + %2 = add i32 %j.05, 1 + %exitcond = icmp eq i32 %2, %src_width + br i1 %exitcond, label %return, label %bb + +return: ; preds = %bb, %entry + ret void +} diff --git a/test/CodeGen/ARM/2011-04-11-MachineLICMBug.ll b/test/CodeGen/ARM/2011-04-11-MachineLICMBug.ll new file mode 100644 index 000000000000..a9dd97182a4c --- /dev/null +++ b/test/CodeGen/ARM/2011-04-11-MachineLICMBug.ll @@ -0,0 +1,34 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s + +; Overly aggressive LICM simply adds copies of constants +; rdar://9266679 + +define zeroext i1 @t(i32* nocapture %A, i32 %size, i32 %value) nounwind readonly ssp { +; CHECK: t: +entry: + br label %for.cond + +for.cond: + %0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] + %cmp = icmp ult i32 %0, %size + br i1 %cmp, label %for.body, label %return + +for.body: +; CHECK: %for.body +; CHECK: movs r{{[0-9]+}}, #1 + %arrayidx = getelementptr i32* %A, i32 %0 + %tmp4 = load i32* %arrayidx, align 4 + %cmp6 = icmp eq i32 %tmp4, %value + br i1 %cmp6, label %return, label %for.inc + +; CHECK: %for.cond +; CHECK: movs r{{[0-9]+}}, #0 + +for.inc: + %inc = add i32 %0, 1 + br label %for.cond + +return: + %retval.0 = phi i1 [ true, %for.body ], [ false, %for.cond ] + ret i1 %retval.0 +} diff --git a/test/CodeGen/ARM/2011-04-12-AlignBug.ll b/test/CodeGen/ARM/2011-04-12-AlignBug.ll new file mode 100644 index 000000000000..317be94e86b0 --- /dev/null +++ b/test/CodeGen/ARM/2011-04-12-AlignBug.ll @@ -0,0 +1,11 @@ +; RUN: llc < %s | FileCheck %s +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32" +target triple = "thumbv7-apple-darwin10.0.0" + +; CHECK: align 3 +@.v = linker_private unnamed_addr constant <4 x i32> , align 8 +; CHECK: align 2 +@.strA = linker_private unnamed_addr constant [4 x i8] c"bar\00" +; CHECK-NOT: align +@.strB = linker_private unnamed_addr constant [4 x i8] c"foo\00", align 1 +@.strC = linker_private unnamed_addr constant [4 x i8] c"baz\00", section "__TEXT,__cstring,cstring_literals", align 1 diff --git a/test/CodeGen/ARM/2011-04-12-FastRegAlloc.ll b/test/CodeGen/ARM/2011-04-12-FastRegAlloc.ll new file mode 100644 index 000000000000..eb23de0b9716 --- /dev/null +++ b/test/CodeGen/ARM/2011-04-12-FastRegAlloc.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -O0 -verify-machineinstrs -regalloc=fast +; Previously we'd crash as out of registers on this input by clobbering all of +; the aliases. +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32" +target triple = "thumbv7-apple-darwin10.0.0" + +define void @_Z8TestCasev() nounwind ssp { +entry: + %a = alloca float, align 4 + %tmp = load float* %a, align 4 + call void asm sideeffect "", "w,~{s0},~{s16}"(float %tmp) nounwind, !srcloc !0 + ret void +} + +!0 = metadata !{i32 109} diff --git a/test/CodeGen/ARM/2011-04-15-AndVFlagPeepholeBug.ll b/test/CodeGen/ARM/2011-04-15-AndVFlagPeepholeBug.ll new file mode 100644 index 000000000000..e712e08ddb6a --- /dev/null +++ b/test/CodeGen/ARM/2011-04-15-AndVFlagPeepholeBug.ll @@ -0,0 +1,22 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 | FileCheck %s + +; CHECK: _f +; CHECK-NOT: ands +; CHECK: cmp +; CHECK: blxle _g + +define i32 @f(i32 %a, i32 %b) nounwind ssp { +entry: + %and = and i32 %b, %a + %cmp = icmp slt i32 %and, 1 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + tail call void (...)* @g(i32 %a, i32 %b) nounwind + br label %if.end + +if.end: ; preds = %if.then, %entry + ret i32 %and +} + +declare void @g(...) diff --git a/test/CodeGen/ARM/2011-04-15-RegisterCmpPeephole.ll b/test/CodeGen/ARM/2011-04-15-RegisterCmpPeephole.ll new file mode 100644 index 000000000000..5404cf57a59f --- /dev/null +++ b/test/CodeGen/ARM/2011-04-15-RegisterCmpPeephole.ll @@ -0,0 +1,41 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 | FileCheck %s + +; CHECK: _f +; CHECK: adds +; CHECK-NOT: cmp +; CHECK: blxeq _h + +define i32 @f(i32 %a, i32 %b) nounwind ssp { +entry: + %add = add nsw i32 %b, %a + %cmp = icmp eq i32 %add, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + tail call void (...)* @h(i32 %a, i32 %b) nounwind + br label %if.end + +if.end: ; preds = %if.then, %entry + ret i32 %add +} + +; CHECK: _g +; CHECK: orrs +; CHECK-NOT: cmp +; CHECK: blxeq _h + +define i32 @g(i32 %a, i32 %b) nounwind ssp { +entry: + %add = or i32 %b, %a + %cmp = icmp eq i32 %add, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + tail call void (...)* @h(i32 %a, i32 %b) nounwind + br label %if.end + +if.end: ; preds = %if.then, %entry + ret i32 %add +} + +declare void @h(...) diff --git a/test/CodeGen/ARM/2011-04-26-SchedTweak.ll b/test/CodeGen/ARM/2011-04-26-SchedTweak.ll new file mode 100644 index 000000000000..ed7dd0332046 --- /dev/null +++ b/test/CodeGen/ARM/2011-04-26-SchedTweak.ll @@ -0,0 +1,70 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-ios -relocation-model=pic -mcpu=cortex-a8 | FileCheck %s + +; Do not move the umull above previous call which would require use of +; more callee-saved registers and introduce copies. +; rdar://9329627 + +%struct.FF = type { i32 (i32*)*, i32 (i32*, i32*, i32, i32, i32, i32)*, i32 (i32, i32, i8*)*, void ()*, i32 (i32, i8*, i32*)*, i32 ()* } +%struct.BD = type { %struct.BD*, i32, i32, i32, i32, i64, i32 (%struct.BD*, i8*, i64, i32)*, i32 (%struct.BD*, i8*, i32, i32)*, i32 (%struct.BD*, i8*, i64, i32)*, i32 (%struct.BD*, i8*, i32, i32)*, i32 (%struct.BD*, i64, i32)*, [16 x i8], i64, i64 } + +@FuncPtr = external hidden unnamed_addr global %struct.FF* +@.str1 = external hidden unnamed_addr constant [6 x i8], align 4 +@G = external unnamed_addr global i32 +@.str2 = external hidden unnamed_addr constant [58 x i8], align 4 +@.str3 = external hidden unnamed_addr constant [58 x i8], align 4 + +define i32 @test() nounwind optsize ssp { +entry: +; CHECK: test: +; CHECK: push +; CHECK-NOT: push + %block_size = alloca i32, align 4 + %block_count = alloca i32, align 4 + %index_cache = alloca i32, align 4 + store i32 0, i32* %index_cache, align 4 + %tmp = load i32* @G, align 4 + %tmp1 = call i32 @bar(i32 0, i32 0, i32 %tmp) nounwind + switch i32 %tmp1, label %bb8 [ + i32 0, label %bb + i32 536870913, label %bb4 + i32 536870914, label %bb6 + ] + +bb: + %tmp2 = load i32* @G, align 4 + %tmp4 = icmp eq i32 %tmp2, 0 + br i1 %tmp4, label %bb1, label %bb8 + +bb1: +; CHECK: %bb1 +; CHECK-NOT: umull +; CHECK: blx _Get +; CHECK: umull +; CHECK: blx _foo + %tmp5 = load i32* %block_size, align 4 + %tmp6 = load i32* %block_count, align 4 + %tmp7 = call %struct.FF* @Get() nounwind + store %struct.FF* %tmp7, %struct.FF** @FuncPtr, align 4 + %tmp10 = zext i32 %tmp6 to i64 + %tmp11 = zext i32 %tmp5 to i64 + %tmp12 = mul nsw i64 %tmp10, %tmp11 + %tmp13 = call i32 @foo(i8* getelementptr inbounds ([6 x i8]* @.str1, i32 0, i32 0), i64 %tmp12, i32 %tmp5) nounwind + br label %bb8 + +bb4: + ret i32 0 + +bb6: + ret i32 1 + +bb8: + ret i32 -1 +} + +declare i32 @printf(i8*, ...) + +declare %struct.FF* @Get() + +declare i32 @foo(i8*, i64, i32) + +declare i32 @bar(i32, i32, i32) diff --git a/test/CodeGen/ARM/2011-04-27-IfCvtBug.ll b/test/CodeGen/ARM/2011-04-27-IfCvtBug.ll new file mode 100644 index 000000000000..0741049cffdd --- /dev/null +++ b/test/CodeGen/ARM/2011-04-27-IfCvtBug.ll @@ -0,0 +1,59 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-ios + +; If converter was being too cute. It look for root BBs (which don't have +; successors) and use inverse depth first search to traverse the BBs. However +; that doesn't work when the CFG has infinite loops. Simply do a linear +; traversal of all BBs work just fine. + +; rdar://9344645 + +%struct.hc = type { i32, i32, i32, i32 } + +define i32 @t(i32 %type) optsize { +entry: + br i1 undef, label %if.then, label %if.else + +if.then: + unreachable + +if.else: + br i1 undef, label %if.then15, label %if.else18 + +if.then15: + unreachable + +if.else18: + switch i32 %type, label %if.else173 [ + i32 3, label %if.then115 + i32 1, label %if.then102 + ] + +if.then102: + br i1 undef, label %cond.true10.i, label %t.exit + +cond.true10.i: + br label %t.exit + +t.exit: + unreachable + +if.then115: + br i1 undef, label %if.else163, label %if.else145 + +if.else145: + %call150 = call fastcc %struct.hc* @foo(%struct.hc* undef, i32 34865152) optsize + br label %while.body172 + +if.else163: + %call168 = call fastcc %struct.hc* @foo(%struct.hc* undef, i32 34078720) optsize + br label %while.body172 + +while.body172: + br label %while.body172 + +if.else173: + ret i32 -1 +} + +declare hidden fastcc %struct.hc* @foo(%struct.hc* nocapture, i32) nounwind optsize + diff --git a/test/CodeGen/ARM/align.ll b/test/CodeGen/ARM/align.ll index d57c159b85cb..9589e72df2f5 100644 --- a/test/CodeGen/ARM/align.ll +++ b/test/CodeGen/ARM/align.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -mtriple=arm-linux-gnueabi | FileCheck %s -check-prefix=ELF -; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s -check-prefix=DARWIN +; RUN: llc < %s -mtriple=arm-apple-darwin10 | FileCheck %s -check-prefix=DARWIN @a = global i1 true ; no alignment diff --git a/test/CodeGen/ARM/arguments.ll b/test/CodeGen/ARM/arguments.ll index c7fcb9755d9e..a8b42e63b71f 100644 --- a/test/CodeGen/ARM/arguments.ll +++ b/test/CodeGen/ARM/arguments.ll @@ -14,7 +14,7 @@ define i32 @f1(i32 %a, i64 %b) { define i32 @f2() nounwind optsize { ; ELF: f2: ; ELF: mov [[REGISTER:(r[0-9]+)]], #128 -; ELF: str [[REGISTER]], [sp] +; ELF: str [[REGISTER]], [ ; DARWIN: f2: ; DARWIN: mov r3, #128 entry: diff --git a/test/CodeGen/ARM/arm-and-tst-peephole.ll b/test/CodeGen/ARM/arm-and-tst-peephole.ll index 50c638b73931..07620700aedb 100644 --- a/test/CodeGen/ARM/arm-and-tst-peephole.ll +++ b/test/CodeGen/ARM/arm-and-tst-peephole.ll @@ -23,15 +23,15 @@ tailrecurse: ; preds = %sw.bb, %entry %tmp2 = load i8** %scevgep5 %0 = ptrtoint i8* %tmp2 to i32 -; ARM: ands r12, r12, #3 +; ARM: ands {{r[0-9]+}}, {{r[0-9]+}}, #3 ; ARM-NEXT: beq -; THUMB: movs r5, #3 -; THUMB-NEXT: ands r5, r4 -; THUMB-NEXT: cmp r5, #0 +; THUMB: movs r[[R0:[0-9]+]], #3 +; THUMB-NEXT: ands r[[R0]], r +; THUMB-NEXT: cmp r[[R0]], #0 ; THUMB-NEXT: beq -; T2: ands r12, r12, #3 +; T2: ands {{r[0-9]+}}, {{r[0-9]+}}, #3 ; T2-NEXT: beq %and = and i32 %0, 3 diff --git a/test/CodeGen/ARM/arm-returnaddr.ll b/test/CodeGen/ARM/arm-returnaddr.ll index 382a18334600..95edaad47e5f 100644 --- a/test/CodeGen/ARM/arm-returnaddr.ll +++ b/test/CodeGen/ARM/arm-returnaddr.ll @@ -1,5 +1,7 @@ ; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s ; RUN: llc < %s -mtriple=thumbv6-apple-darwin | FileCheck %s +; RUN: llc < %s -mtriple=arm-apple-darwin -regalloc=basic | FileCheck %s +; RUN: llc < %s -mtriple=thumbv6-apple-darwin -regalloc=basic | FileCheck %s ; rdar://8015977 ; rdar://8020118 @@ -16,7 +18,7 @@ define i8* @rt2() nounwind readnone { entry: ; CHECK: rt2: ; CHECK: {r7, lr} -; CHECK: ldr r0, [r7] +; CHECK: ldr r[[R0:[0-9]+]], [r7] ; CHECK: ldr r0, [r0] ; CHECK: ldr r0, [r0, #4] %0 = tail call i8* @llvm.returnaddress(i32 2) diff --git a/test/CodeGen/ARM/avoid-cpsr-rmw.ll b/test/CodeGen/ARM/avoid-cpsr-rmw.ll new file mode 100644 index 000000000000..d0c4f3ae9d67 --- /dev/null +++ b/test/CodeGen/ARM/avoid-cpsr-rmw.ll @@ -0,0 +1,16 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a9 | FileCheck %s +; Avoid some 's' 16-bit instruction which partially update CPSR (and add false +; dependency) when it isn't dependent on last CPSR defining instruction. +; rdar://8928208 + +define i32 @t(i32 %a, i32 %b, i32 %c, i32 %d) nounwind readnone { + entry: +; CHECK: t: +; CHECK: muls r2, r3, r2 +; CHECK-NEXT: mul r0, r0, r1 +; CHECK-NEXT: muls r0, r2, r0 + %0 = mul nsw i32 %a, %b + %1 = mul nsw i32 %c, %d + %2 = mul nsw i32 %0, %1 + ret i32 %2 +} diff --git a/test/CodeGen/ARM/bx_fold.ll b/test/CodeGen/ARM/bx_fold.ll index 09f1aae0a9f0..5533038fb828 100644 --- a/test/CodeGen/ARM/bx_fold.ll +++ b/test/CodeGen/ARM/bx_fold.ll @@ -24,7 +24,7 @@ bb1: ; preds = %bb, %entry bb18: ; preds = %bb1 ; CHECK-NOT: bx -; CHECK: ldmia sp! +; CHECK: pop ret void } diff --git a/test/CodeGen/ARM/call-tc.ll b/test/CodeGen/ARM/call-tc.ll index a77aba037be5..4dc37aa27558 100644 --- a/test/CodeGen/ARM/call-tc.ll +++ b/test/CodeGen/ARM/call-tc.ll @@ -74,7 +74,7 @@ entry: ; CHECKT2: t7: ; CHECKT2: blxeq _foo ; CHECKT2-NEXT: pop.w -; CHECKT2-NEXT: b.w _foo +; CHECKT2-NEXT: b _foo br i1 undef, label %bb, label %bb1.lr.ph bb1.lr.ph: diff --git a/test/CodeGen/ARM/carry.ll b/test/CodeGen/ARM/carry.ll index a6a7ed6af184..9b90408cc4db 100644 --- a/test/CodeGen/ARM/carry.ll +++ b/test/CodeGen/ARM/carry.ll @@ -19,3 +19,20 @@ entry: %tmp2 = sub i64 %tmp1, %b ret i64 %tmp2 } + +; add with live carry +define i64 @f3(i32 %al, i32 %bl) { +; CHECK: f3: +; CHECK: adds r +; CHECK: adcs r +; CHECK: adc r +entry: + ; unsigned wide add + %aw = zext i32 %al to i64 + %bw = zext i32 %bl to i64 + %cw = add i64 %aw, %bw + ; ch == carry bit + %ch = lshr i64 %cw, 32 + %dw = add i64 %ch, %bw + ret i64 %dw +} diff --git a/test/CodeGen/ARM/code-placement.ll b/test/CodeGen/ARM/code-placement.ll index 845be8c20ea5..91ef65925221 100644 --- a/test/CodeGen/ARM/code-placement.ll +++ b/test/CodeGen/ARM/code-placement.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=armv7-apple-darwin -cgp-critical-edge-splitting=0 | FileCheck %s +; RUN: llc < %s -mtriple=armv7-apple-darwin | FileCheck %s ; PHI elimination shouldn't break backedge. ; rdar://8263994 @@ -72,7 +72,7 @@ bb2.preheader: ; preds = %bb3, %bb.nph15 br i1 %4, label %bb1, label %bb3 ; CHECK: LBB1_[[RET]]: @ %bb5 -; CHECK: ldmia sp! +; CHECK: pop bb5: ; preds = %bb3, %entry %sum.1.lcssa = phi i32 [ 0, %entry ], [ %sum.0.lcssa, %bb3 ] ; [#uses=1] ret i32 %sum.1.lcssa diff --git a/test/CodeGen/ARM/constants.ll b/test/CodeGen/ARM/constants.ll index 542cf02f2a90..7893df782054 100644 --- a/test/CodeGen/ARM/constants.ll +++ b/test/CodeGen/ARM/constants.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm | FileCheck %s +; RUN: llc < %s -march=arm -disable-cgp-branch-opts | FileCheck %s define i32 @f1() { ; CHECK: f1 @@ -14,31 +14,31 @@ define i32 @f2() { define i32 @f3() { ; CHECK: f3 -; CHECK: mov r0, #1, 24 +; CHECK: mov r0, #1, #24 ret i32 256 } define i32 @f4() { ; CHECK: f4 -; CHECK: orr{{.*}}#1, 24 +; CHECK: orr{{.*}}#1, #24 ret i32 257 } define i32 @f5() { ; CHECK: f5 -; CHECK: mov r0, #255, 2 +; CHECK: mov r0, #255, #2 ret i32 -1073741761 } define i32 @f6() { ; CHECK: f6 -; CHECK: mov r0, #63, 28 +; CHECK: mov r0, #63, #28 ret i32 1008 } define void @f7(i32 %a) { ; CHECK: f7 -; CHECK: cmp r0, #1, 16 +; CHECK: cmp r0, #1, #16 %b = icmp ugt i32 %a, 65536 br i1 %b, label %r, label %r r: diff --git a/test/CodeGen/ARM/crash-greedy.ll b/test/CodeGen/ARM/crash-greedy.ll new file mode 100644 index 000000000000..8a865e23d0a4 --- /dev/null +++ b/test/CodeGen/ARM/crash-greedy.ll @@ -0,0 +1,84 @@ +; RUN: llc < %s -regalloc=greedy -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim -verify-machineinstrs | FileCheck %s +; +; ARM tests that crash or fail with the greedy register allocator. + +target triple = "thumbv7-apple-darwin" + +declare double @exp(double) + +; CHECK: remat_subreg +define void @remat_subreg(float* nocapture %x, i32* %y, i32 %n, i32 %z, float %c, float %lambda, float* nocapture %ret_f, float* nocapture %ret_df) nounwind { +entry: + %conv16 = fpext float %lambda to double + %mul17 = fmul double %conv16, -1.000000e+00 + br i1 undef, label %cond.end.us, label %cond.end + +cond.end.us: ; preds = %entry + unreachable + +cond.end: ; preds = %cond.end, %entry + %mul = fmul double undef, 0.000000e+00 + %add = fadd double undef, %mul + %add46 = fadd double undef, undef + %add75 = fadd double 0.000000e+00, undef + br i1 undef, label %for.end, label %cond.end + +for.end: ; preds = %cond.end + %conv78 = sitofp i32 %z to double + %conv83 = fpext float %c to double + %mul84 = fmul double %mul17, %conv83 + %call85 = tail call double @exp(double %mul84) nounwind + %mul86 = fmul double %conv78, %call85 + %add88 = fadd double 0.000000e+00, %mul86 +; CHECK: blx _exp + %call100 = tail call double @exp(double %mul84) nounwind + %mul101 = fmul double undef, %call100 + %add103 = fadd double %add46, %mul101 + %mul111 = fmul double undef, %conv83 + %mul119 = fmul double %mul111, undef + %add121 = fadd double undef, %mul119 + %div = fdiv double 1.000000e+00, %conv16 + %div126 = fdiv double %add, %add75 + %sub = fsub double %div, %div126 + %div129 = fdiv double %add103, %add88 + %add130 = fadd double %sub, %div129 + %conv131 = fptrunc double %add130 to float + store float %conv131, float* %ret_f, align 4 + %mul139 = fmul double %div129, %div129 + %div142 = fdiv double %add121, %add88 + %sub143 = fsub double %mul139, %div142 +; %lambda is passed on the stack, and the stack slot load is rematerialized. +; The rematted load of a float constrains the D register used for the mul. +; CHECK: vldr + %mul146 = fmul float %lambda, %lambda + %conv147 = fpext float %mul146 to double + %div148 = fdiv double 1.000000e+00, %conv147 + %sub149 = fsub double %sub143, %div148 + %conv150 = fptrunc double %sub149 to float + store float %conv150, float* %ret_df, align 4 + ret void +} + +; CHECK: insert_elem +; This test has a sub-register copy with a kill flag: +; %vreg6:ssub_3 = COPY %vreg6:ssub_2; QPR_VFP2:%vreg6 +; The rewriter must do something sensible with that, or the scavenger crashes. +define void @insert_elem() nounwind { +entry: + br i1 undef, label %if.end251, label %if.then84 + +if.then84: ; preds = %entry + br i1 undef, label %if.end251, label %if.then195 + +if.then195: ; preds = %if.then84 + %div = fdiv float 1.000000e+00, undef + %vecinit207 = insertelement <4 x float> undef, float %div, i32 1 + %vecinit208 = insertelement <4 x float> %vecinit207, float 1.000000e+00, i32 2 + %vecinit209 = insertelement <4 x float> %vecinit208, float 1.000000e+00, i32 3 + %mul216 = fmul <4 x float> zeroinitializer, %vecinit209 + store <4 x float> %mul216, <4 x float>* undef, align 16 + br label %if.end251 + +if.end251: ; preds = %if.then195, %if.then84, %entry + ret void +} diff --git a/test/CodeGen/ARM/debug-info-d16-reg.ll b/test/CodeGen/ARM/debug-info-d16-reg.ll new file mode 100644 index 000000000000..8c9095e3a9ea --- /dev/null +++ b/test/CodeGen/ARM/debug-info-d16-reg.ll @@ -0,0 +1,105 @@ +; RUN: llc < %s - | FileCheck %s +; Radar 9309221 +; Test dwarf reg no for d16 +;CHECK: DW_OP_regx +;CHECK-NEXT: 272 + +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32" +target triple = "thumbv7-apple-darwin10" + +@.str = private unnamed_addr constant [11 x i8] c"%p %lf %c\0A\00", align 4 +@.str1 = private unnamed_addr constant [6 x i8] c"point\00", align 4 + +define i32 @inlineprinter(i8* %ptr, double %val, i8 zeroext %c) nounwind optsize { +entry: + tail call void @llvm.dbg.value(metadata !{i8* %ptr}, i64 0, metadata !19), !dbg !26 + tail call void @llvm.dbg.value(metadata !{double %val}, i64 0, metadata !20), !dbg !26 + tail call void @llvm.dbg.value(metadata !{i8 %c}, i64 0, metadata !21), !dbg !26 + %0 = zext i8 %c to i32, !dbg !27 + %1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0), i8* %ptr, double %val, i32 %0) nounwind, !dbg !27 + ret i32 0, !dbg !29 +} + +define i32 @printer(i8* %ptr, double %val, i8 zeroext %c) nounwind optsize noinline { +entry: + tail call void @llvm.dbg.value(metadata !{i8* %ptr}, i64 0, metadata !16), !dbg !30 + tail call void @llvm.dbg.value(metadata !{double %val}, i64 0, metadata !17), !dbg !30 + tail call void @llvm.dbg.value(metadata !{i8 %c}, i64 0, metadata !18), !dbg !30 + %0 = zext i8 %c to i32, !dbg !31 + %1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0), i8* %ptr, double %val, i32 %0) nounwind, !dbg !31 + ret i32 0, !dbg !33 +} + +declare i32 @printf(i8* nocapture, ...) nounwind + +declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone + +define i32 @main(i32 %argc, i8** nocapture %argv) nounwind optsize { +entry: + tail call void @llvm.dbg.value(metadata !{i32 %argc}, i64 0, metadata !22), !dbg !34 + tail call void @llvm.dbg.value(metadata !{i8** %argv}, i64 0, metadata !23), !dbg !34 + %0 = sitofp i32 %argc to double, !dbg !35 + %1 = fadd double %0, 5.555552e+05, !dbg !35 + tail call void @llvm.dbg.value(metadata !{double %1}, i64 0, metadata !24), !dbg !35 + %2 = tail call i32 @puts(i8* getelementptr inbounds ([6 x i8]* @.str1, i32 0, i32 0)) nounwind, !dbg !36 + %3 = getelementptr inbounds i8* bitcast (i32 (i32, i8**)* @main to i8*), i32 %argc, !dbg !37 + %4 = trunc i32 %argc to i8, !dbg !37 + %5 = add i8 %4, 97, !dbg !37 + tail call void @llvm.dbg.value(metadata !{i8* %3}, i64 0, metadata !19) nounwind, !dbg !38 + tail call void @llvm.dbg.value(metadata !{double %1}, i64 0, metadata !20) nounwind, !dbg !38 + tail call void @llvm.dbg.value(metadata !{i8 %5}, i64 0, metadata !21) nounwind, !dbg !38 + %6 = zext i8 %5 to i32, !dbg !39 + %7 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0), i8* %3, double %1, i32 %6) nounwind, !dbg !39 + %8 = tail call i32 @printer(i8* %3, double %1, i8 zeroext %5) nounwind, !dbg !40 + ret i32 0, !dbg !41 +} + +declare i32 @puts(i8* nocapture) nounwind + +!llvm.dbg.sp = !{!0, !9, !10} +!llvm.dbg.lv.printer = !{!16, !17, !18} +!llvm.dbg.lv.inlineprinter = !{!19, !20, !21} +!llvm.dbg.lv.main = !{!22, !23, !24} + +!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"printer", metadata !"printer", metadata !"printer", metadata !1, i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i8*, double, i8)* @printer} ; [ DW_TAG_subprogram ] +!1 = metadata !{i32 589865, metadata !"a.c", metadata !"/tmp/", metadata !2} ; [ DW_TAG_file_type ] +!2 = metadata !{i32 589841, i32 0, i32 1, metadata !"/tmp/a.c", metadata !"/tmp", metadata !"(LLVM build 00)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ] +!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0, null} ; [ DW_TAG_subroutine_type ] +!4 = metadata !{metadata !5, metadata !6, metadata !7, metadata !8} +!5 = metadata !{i32 589860, metadata !1, metadata !"int", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!6 = metadata !{i32 589839, metadata !1, metadata !"", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ] +!7 = metadata !{i32 589860, metadata !1, metadata !"double", metadata !1, i32 0, i64 64, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] +!8 = metadata !{i32 589860, metadata !1, metadata !"unsigned char", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ] +!9 = metadata !{i32 589870, i32 0, metadata !1, metadata !"inlineprinter", metadata !"inlineprinter", metadata !"inlineprinter", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i8*, double, i8)* @inlineprinter} ; [ DW_TAG_subprogram ] +!10 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"main", metadata !1, i32 18, metadata !11, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32, i8**)* @main} ; [ DW_TAG_subprogram ] +!11 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !12, i32 0, null} ; [ DW_TAG_subroutine_type ] +!12 = metadata !{metadata !5, metadata !5, metadata !13} +!13 = metadata !{i32 589839, metadata !1, metadata !"", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !14} ; [ DW_TAG_pointer_type ] +!14 = metadata !{i32 589839, metadata !1, metadata !"", metadata !1, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !15} ; [ DW_TAG_pointer_type ] +!15 = metadata !{i32 589860, metadata !1, metadata !"char", metadata !1, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] +!16 = metadata !{i32 590081, metadata !0, metadata !"ptr", metadata !1, i32 11, metadata !6, i32 0} ; [ DW_TAG_arg_variable ] +!17 = metadata !{i32 590081, metadata !0, metadata !"val", metadata !1, i32 11, metadata !7, i32 0} ; [ DW_TAG_arg_variable ] +!18 = metadata !{i32 590081, metadata !0, metadata !"c", metadata !1, i32 11, metadata !8, i32 0} ; [ DW_TAG_arg_variable ] +!19 = metadata !{i32 590081, metadata !9, metadata !"ptr", metadata !1, i32 4, metadata !6, i32 0} ; [ DW_TAG_arg_variable ] +!20 = metadata !{i32 590081, metadata !9, metadata !"val", metadata !1, i32 4, metadata !7, i32 0} ; [ DW_TAG_arg_variable ] +!21 = metadata !{i32 590081, metadata !9, metadata !"c", metadata !1, i32 4, metadata !8, i32 0} ; [ DW_TAG_arg_variable ] +!22 = metadata !{i32 590081, metadata !10, metadata !"argc", metadata !1, i32 17, metadata !5, i32 0} ; [ DW_TAG_arg_variable ] +!23 = metadata !{i32 590081, metadata !10, metadata !"argv", metadata !1, i32 17, metadata !13, i32 0} ; [ DW_TAG_arg_variable ] +!24 = metadata !{i32 590080, metadata !25, metadata !"dval", metadata !1, i32 19, metadata !7, i32 0} ; [ DW_TAG_auto_variable ] +!25 = metadata !{i32 589835, metadata !10, i32 18, i32 0, metadata !1, i32 2} ; [ DW_TAG_lexical_block ] +!26 = metadata !{i32 4, i32 0, metadata !9, null} +!27 = metadata !{i32 6, i32 0, metadata !28, null} +!28 = metadata !{i32 589835, metadata !9, i32 5, i32 0, metadata !1, i32 1} ; [ DW_TAG_lexical_block ] +!29 = metadata !{i32 7, i32 0, metadata !28, null} +!30 = metadata !{i32 11, i32 0, metadata !0, null} +!31 = metadata !{i32 13, i32 0, metadata !32, null} +!32 = metadata !{i32 589835, metadata !0, i32 12, i32 0, metadata !1, i32 0} ; [ DW_TAG_lexical_block ] +!33 = metadata !{i32 14, i32 0, metadata !32, null} +!34 = metadata !{i32 17, i32 0, metadata !10, null} +!35 = metadata !{i32 19, i32 0, metadata !25, null} +!36 = metadata !{i32 20, i32 0, metadata !25, null} +!37 = metadata !{i32 21, i32 0, metadata !25, null} +!38 = metadata !{i32 4, i32 0, metadata !9, metadata !37} +!39 = metadata !{i32 6, i32 0, metadata !28, metadata !37} +!40 = metadata !{i32 22, i32 0, metadata !25, null} +!41 = metadata !{i32 23, i32 0, metadata !25, null} diff --git a/test/CodeGen/ARM/debug-info-qreg.ll b/test/CodeGen/ARM/debug-info-qreg.ll new file mode 100644 index 000000000000..e83a83d1f10a --- /dev/null +++ b/test/CodeGen/ARM/debug-info-qreg.ll @@ -0,0 +1,94 @@ +; RUN: llc < %s - | FileCheck %s +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32" +target triple = "thumbv7-apple-macosx10.6.7" + +;CHECK: DW_OP_regx for Q register: D1 +;CHECK-NEXT: byte +;CHECK-NEXT: byte +;CHECK-NEXT: DW_OP_piece 8 +;CHECK-NEXT: byte 8 +;CHECK-NEXT: DW_OP_regx for Q register: D2 +;CHECK-NEXT: byte +;CHECK-NEXT: byte +;CHECK-NEXT: DW_OP_piece 8 +;CHECK-NEXT: byte 8 + +@.str = external constant [13 x i8] + +declare <4 x float> @test0001(float) nounwind readnone ssp + +define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp { +entry: + br label %for.body9 + +for.body9: ; preds = %for.body9, %entry + %add19 = fadd <4 x float> undef, , !dbg !39 + br i1 undef, label %for.end54, label %for.body9, !dbg !44 + +for.end54: ; preds = %for.body9 + tail call void @llvm.dbg.value(metadata !{<4 x float> %add19}, i64 0, metadata !27), !dbg !39 + %tmp115 = extractelement <4 x float> %add19, i32 1 + %conv6.i75 = fpext float %tmp115 to double, !dbg !45 + %call.i82 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13 x i8]* @.str, i32 0, i32 0), double undef, double %conv6.i75, double undef, double undef) nounwind, !dbg !45 + ret i32 0, !dbg !49 +} + +declare i32 @printf(i8* nocapture, ...) nounwind + +declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone + +!llvm.dbg.sp = !{!0, !10, !14} +!llvm.dbg.lv.test0001 = !{!18} +!llvm.dbg.lv.main = !{!19, !20, !24, !26, !27, !28, !29} +!llvm.dbg.lv.printFV = !{!30} + +!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"test0001", metadata !"test0001", metadata !"", metadata !1, i32 3, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, <4 x float> (float)* @test0001, null} ; [ DW_TAG_subprogram ] +!1 = metadata !{i32 589865, metadata !"build2.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ] +!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"build2.c", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 129915)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ] +!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] +!4 = metadata !{metadata !5} +!5 = metadata !{i32 589846, metadata !2, metadata !"v4f32", metadata !1, i32 14, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ] +!6 = metadata !{i32 590083, metadata !2, metadata !"", metadata !2, i32 0, i64 128, i64 128, i32 0, i32 0, metadata !7, metadata !8, i32 0, i32 0} ; [ DW_TAG_vector_type ] +!7 = metadata !{i32 589860, metadata !2, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] +!8 = metadata !{metadata !9} +!9 = metadata !{i32 589857, i64 0, i64 3} ; [ DW_TAG_subrange_type ] +!10 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 59, metadata !11, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**)* @main, null} ; [ DW_TAG_subprogram ] +!11 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !12, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] +!12 = metadata !{metadata !13} +!13 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!14 = metadata !{i32 589870, i32 0, metadata !15, metadata !"printFV", metadata !"printFV", metadata !"", metadata !15, i32 41, metadata !16, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, null, null} ; [ DW_TAG_subprogram ] +!15 = metadata !{i32 589865, metadata !"/Volumes/Lalgate/work/llvm/projects/llvm-test/SingleSource/UnitTests/Vector/helpers.h", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ] +!16 = metadata !{i32 589845, metadata !15, metadata !"", metadata !15, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !17, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] +!17 = metadata !{null} +!18 = metadata !{i32 590081, metadata !0, metadata !"a", metadata !1, i32 16777219, metadata !7, i32 0} ; [ DW_TAG_arg_variable ] +!19 = metadata !{i32 590081, metadata !10, metadata !"argc", metadata !1, i32 16777275, metadata !13, i32 0} ; [ DW_TAG_arg_variable ] +!20 = metadata !{i32 590081, metadata !10, metadata !"argv", metadata !1, i32 33554491, metadata !21, i32 0} ; [ DW_TAG_arg_variable ] +!21 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !22} ; [ DW_TAG_pointer_type ] +!22 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !23} ; [ DW_TAG_pointer_type ] +!23 = metadata !{i32 589860, metadata !2, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] +!24 = metadata !{i32 590080, metadata !25, metadata !"i", metadata !1, i32 60, metadata !13, i32 0} ; [ DW_TAG_auto_variable ] +!25 = metadata !{i32 589835, metadata !10, i32 59, i32 33, metadata !1, i32 14} ; [ DW_TAG_lexical_block ] +!26 = metadata !{i32 590080, metadata !25, metadata !"j", metadata !1, i32 60, metadata !13, i32 0} ; [ DW_TAG_auto_variable ] +!27 = metadata !{i32 590080, metadata !25, metadata !"x", metadata !1, i32 61, metadata !5, i32 0} ; [ DW_TAG_auto_variable ] +!28 = metadata !{i32 590080, metadata !25, metadata !"y", metadata !1, i32 62, metadata !5, i32 0} ; [ DW_TAG_auto_variable ] +!29 = metadata !{i32 590080, metadata !25, metadata !"z", metadata !1, i32 63, metadata !5, i32 0} ; [ DW_TAG_auto_variable ] +!30 = metadata !{i32 590081, metadata !14, metadata !"F", metadata !15, i32 16777257, metadata !31, i32 0} ; [ DW_TAG_arg_variable ] +!31 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_pointer_type ] +!32 = metadata !{i32 589846, metadata !2, metadata !"FV", metadata !15, i32 25, i64 0, i64 0, i64 0, i32 0, metadata !33} ; [ DW_TAG_typedef ] +!33 = metadata !{i32 589847, metadata !2, metadata !"", metadata !15, i32 22, i64 128, i64 128, i64 0, i32 0, i32 0, metadata !34, i32 0, i32 0} ; [ DW_TAG_union_type ] +!34 = metadata !{metadata !35, metadata !37} +!35 = metadata !{i32 589837, metadata !15, metadata !"V", metadata !15, i32 23, i64 128, i64 128, i64 0, i32 0, metadata !36} ; [ DW_TAG_member ] +!36 = metadata !{i32 589846, metadata !2, metadata !"v4sf", metadata !15, i32 3, i64 0, i64 0, i64 0, i32 0, metadata !6} ; [ DW_TAG_typedef ] +!37 = metadata !{i32 589837, metadata !15, metadata !"A", metadata !15, i32 24, i64 128, i64 32, i64 0, i32 0, metadata !38} ; [ DW_TAG_member ] +!38 = metadata !{i32 589825, metadata !2, metadata !"", metadata !2, i32 0, i64 128, i64 32, i32 0, i32 0, metadata !7, metadata !8, i32 0, i32 0} ; [ DW_TAG_array_type ] +!39 = metadata !{i32 79, i32 7, metadata !40, null} +!40 = metadata !{i32 589835, metadata !41, i32 75, i32 35, metadata !1, i32 18} ; [ DW_TAG_lexical_block ] +!41 = metadata !{i32 589835, metadata !42, i32 75, i32 5, metadata !1, i32 17} ; [ DW_TAG_lexical_block ] +!42 = metadata !{i32 589835, metadata !43, i32 71, i32 32, metadata !1, i32 16} ; [ DW_TAG_lexical_block ] +!43 = metadata !{i32 589835, metadata !25, i32 71, i32 3, metadata !1, i32 15} ; [ DW_TAG_lexical_block ] +!44 = metadata !{i32 75, i32 5, metadata !42, null} +!45 = metadata !{i32 42, i32 2, metadata !46, metadata !48} +!46 = metadata !{i32 589835, metadata !47, i32 42, i32 2, metadata !15, i32 20} ; [ DW_TAG_lexical_block ] +!47 = metadata !{i32 589835, metadata !14, i32 41, i32 28, metadata !15, i32 19} ; [ DW_TAG_lexical_block ] +!48 = metadata !{i32 95, i32 3, metadata !25, null} +!49 = metadata !{i32 99, i32 3, metadata !25, null} diff --git a/test/CodeGen/ARM/debug-info-s16-reg.ll b/test/CodeGen/ARM/debug-info-s16-reg.ll new file mode 100644 index 000000000000..548c9bdebf02 --- /dev/null +++ b/test/CodeGen/ARM/debug-info-s16-reg.ll @@ -0,0 +1,116 @@ +; RUN: llc < %s - | FileCheck %s +; Radar 9309221 +; Test dwarf reg no for s16 +;CHECK: DW_OP_regx for S register +;CHECK-NEXT: byte +;CHECK-NEXT: byte +;CHECK-NEXT: DW_OP_bit_piece 32 0 + +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32" +target triple = "thumbv7-apple-macosx10.6.7" + +@.str = private unnamed_addr constant [11 x i8] c"%p %lf %c\0A\00" +@.str1 = private unnamed_addr constant [6 x i8] c"point\00" + +define i32 @inlineprinter(i8* %ptr, float %val, i8 zeroext %c) nounwind optsize ssp { +entry: + tail call void @llvm.dbg.value(metadata !{i8* %ptr}, i64 0, metadata !8), !dbg !24 + tail call void @llvm.dbg.value(metadata !{float %val}, i64 0, metadata !10), !dbg !25 + tail call void @llvm.dbg.value(metadata !{i8 %c}, i64 0, metadata !12), !dbg !26 + %conv = fpext float %val to double, !dbg !27 + %conv3 = zext i8 %c to i32, !dbg !27 + %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0), i8* %ptr, double %conv, i32 %conv3) nounwind optsize, !dbg !27 + ret i32 0, !dbg !29 +} + +declare i32 @printf(i8* nocapture, ...) nounwind optsize + +define i32 @printer(i8* %ptr, float %val, i8 zeroext %c) nounwind optsize noinline ssp { +entry: + tail call void @llvm.dbg.value(metadata !{i8* %ptr}, i64 0, metadata !14), !dbg !30 + tail call void @llvm.dbg.value(metadata !{float %val}, i64 0, metadata !15), !dbg !31 + tail call void @llvm.dbg.value(metadata !{i8 %c}, i64 0, metadata !16), !dbg !32 + %conv = fpext float %val to double, !dbg !33 + %conv3 = zext i8 %c to i32, !dbg !33 + %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0), i8* %ptr, double %conv, i32 %conv3) nounwind optsize, !dbg !33 + ret i32 0, !dbg !35 +} + +define i32 @main(i32 %argc, i8** nocapture %argv) nounwind optsize ssp { +entry: + tail call void @llvm.dbg.value(metadata !{i32 %argc}, i64 0, metadata !17), !dbg !36 + tail call void @llvm.dbg.value(metadata !{i8** %argv}, i64 0, metadata !18), !dbg !37 + %conv = sitofp i32 %argc to double, !dbg !38 + %add = fadd double %conv, 5.555552e+05, !dbg !38 + %conv1 = fptrunc double %add to float, !dbg !38 + tail call void @llvm.dbg.value(metadata !{float %conv1}, i64 0, metadata !22), !dbg !38 + %call = tail call i32 @puts(i8* getelementptr inbounds ([6 x i8]* @.str1, i32 0, i32 0)) nounwind optsize, !dbg !39 + %add.ptr = getelementptr i8* bitcast (i32 (i32, i8**)* @main to i8*), i32 %argc, !dbg !40 + %add5 = add nsw i32 %argc, 97, !dbg !40 + %conv6 = trunc i32 %add5 to i8, !dbg !40 + tail call void @llvm.dbg.value(metadata !{i8* %add.ptr}, i64 0, metadata !8) nounwind, !dbg !41 + tail call void @llvm.dbg.value(metadata !{float %conv1}, i64 0, metadata !10) nounwind, !dbg !42 + tail call void @llvm.dbg.value(metadata !{i8 %conv6}, i64 0, metadata !12) nounwind, !dbg !43 + %conv.i = fpext float %conv1 to double, !dbg !44 + %conv3.i = and i32 %add5, 255, !dbg !44 + %call.i = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0), i8* %add.ptr, double %conv.i, i32 %conv3.i) nounwind optsize, !dbg !44 + %call14 = tail call i32 @printer(i8* %add.ptr, float %conv1, i8 zeroext %conv6) optsize, !dbg !45 + ret i32 0, !dbg !46 +} + +declare i32 @puts(i8* nocapture) nounwind optsize + +declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone + +!llvm.dbg.sp = !{!0, !6, !7} +!llvm.dbg.lv.inlineprinter = !{!8, !10, !12} +!llvm.dbg.lv.printer = !{!14, !15, !16} +!llvm.dbg.lv.main = !{!17, !18, !22} + +!0 = metadata !{i32 589870, i32 0, metadata !1, metadata !"inlineprinter", metadata !"inlineprinter", metadata !"", metadata !1, i32 5, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i8*, float, i8)* @inlineprinter, null} ; [ DW_TAG_subprogram ] +!1 = metadata !{i32 589865, metadata !"a.c", metadata !"/private/tmp", metadata !2} ; [ DW_TAG_file_type ] +!2 = metadata !{i32 589841, i32 0, i32 12, metadata !"a.c", metadata !"/private/tmp", metadata !"clang version 3.0 (trunk 129915)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ] +!3 = metadata !{i32 589845, metadata !1, metadata !"", metadata !1, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !4, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] +!4 = metadata !{metadata !5} +!5 = metadata !{i32 589860, metadata !2, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] +!6 = metadata !{i32 589870, i32 0, metadata !1, metadata !"printer", metadata !"printer", metadata !"", metadata !1, i32 12, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i8*, float, i8)* @printer, null} ; [ DW_TAG_subprogram ] +!7 = metadata !{i32 589870, i32 0, metadata !1, metadata !"main", metadata !"main", metadata !"", metadata !1, i32 18, metadata !3, i1 false, i1 true, i32 0, i32 0, i32 0, i32 256, i1 true, i32 (i32, i8**)* @main, null} ; [ DW_TAG_subprogram ] +!8 = metadata !{i32 590081, metadata !0, metadata !"ptr", metadata !1, i32 16777220, metadata !9, i32 0} ; [ DW_TAG_arg_variable ] +!9 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ] +!10 = metadata !{i32 590081, metadata !0, metadata !"val", metadata !1, i32 33554436, metadata !11, i32 0} ; [ DW_TAG_arg_variable ] +!11 = metadata !{i32 589860, metadata !2, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ] +!12 = metadata !{i32 590081, metadata !0, metadata !"c", metadata !1, i32 50331652, metadata !13, i32 0} ; [ DW_TAG_arg_variable ] +!13 = metadata !{i32 589860, metadata !2, metadata !"unsigned char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ] +!14 = metadata !{i32 590081, metadata !6, metadata !"ptr", metadata !1, i32 16777227, metadata !9, i32 0} ; [ DW_TAG_arg_variable ] +!15 = metadata !{i32 590081, metadata !6, metadata !"val", metadata !1, i32 33554443, metadata !11, i32 0} ; [ DW_TAG_arg_variable ] +!16 = metadata !{i32 590081, metadata !6, metadata !"c", metadata !1, i32 50331659, metadata !13, i32 0} ; [ DW_TAG_arg_variable ] +!17 = metadata !{i32 590081, metadata !7, metadata !"argc", metadata !1, i32 16777233, metadata !5, i32 0} ; [ DW_TAG_arg_variable ] +!18 = metadata !{i32 590081, metadata !7, metadata !"argv", metadata !1, i32 33554449, metadata !19, i32 0} ; [ DW_TAG_arg_variable ] +!19 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !20} ; [ DW_TAG_pointer_type ] +!20 = metadata !{i32 589839, metadata !2, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !21} ; [ DW_TAG_pointer_type ] +!21 = metadata !{i32 589860, metadata !2, metadata !"char", null, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ] +!22 = metadata !{i32 590080, metadata !23, metadata !"dval", metadata !1, i32 19, metadata !11, i32 0} ; [ DW_TAG_auto_variable ] +!23 = metadata !{i32 589835, metadata !7, i32 18, i32 1, metadata !1, i32 2} ; [ DW_TAG_lexical_block ] +!24 = metadata !{i32 4, i32 22, metadata !0, null} +!25 = metadata !{i32 4, i32 33, metadata !0, null} +!26 = metadata !{i32 4, i32 52, metadata !0, null} +!27 = metadata !{i32 6, i32 3, metadata !28, null} +!28 = metadata !{i32 589835, metadata !0, i32 5, i32 1, metadata !1, i32 0} ; [ DW_TAG_lexical_block ] +!29 = metadata !{i32 7, i32 3, metadata !28, null} +!30 = metadata !{i32 11, i32 42, metadata !6, null} +!31 = metadata !{i32 11, i32 53, metadata !6, null} +!32 = metadata !{i32 11, i32 72, metadata !6, null} +!33 = metadata !{i32 13, i32 3, metadata !34, null} +!34 = metadata !{i32 589835, metadata !6, i32 12, i32 1, metadata !1, i32 1} ; [ DW_TAG_lexical_block ] +!35 = metadata !{i32 14, i32 3, metadata !34, null} +!36 = metadata !{i32 17, i32 15, metadata !7, null} +!37 = metadata !{i32 17, i32 28, metadata !7, null} +!38 = metadata !{i32 19, i32 31, metadata !23, null} +!39 = metadata !{i32 20, i32 3, metadata !23, null} +!40 = metadata !{i32 21, i32 3, metadata !23, null} +!41 = metadata !{i32 4, i32 22, metadata !0, metadata !40} +!42 = metadata !{i32 4, i32 33, metadata !0, metadata !40} +!43 = metadata !{i32 4, i32 52, metadata !0, metadata !40} +!44 = metadata !{i32 6, i32 3, metadata !28, metadata !40} +!45 = metadata !{i32 22, i32 3, metadata !23, null} +!46 = metadata !{i32 23, i32 1, metadata !23, null} diff --git a/test/CodeGen/ARM/divmod.ll b/test/CodeGen/ARM/divmod.ll new file mode 100644 index 000000000000..34313aa89aae --- /dev/null +++ b/test/CodeGen/ARM/divmod.ll @@ -0,0 +1,58 @@ +; RUN: llc < %s -mtriple=arm-apple-ios | FileCheck %s + +define void @foo(i32 %x, i32 %y, i32* nocapture %P) nounwind ssp { +entry: +; CHECK: foo: +; CHECK: bl ___divmodsi4 +; CHECK-NOT: bl ___divmodsi4 + %div = sdiv i32 %x, %y + store i32 %div, i32* %P, align 4 + %rem = srem i32 %x, %y + %arrayidx6 = getelementptr inbounds i32* %P, i32 1 + store i32 %rem, i32* %arrayidx6, align 4 + ret void +} + +define void @bar(i32 %x, i32 %y, i32* nocapture %P) nounwind ssp { +entry: +; CHECK: bar: +; CHECK: bl ___udivmodsi4 +; CHECK-NOT: bl ___udivmodsi4 + %div = udiv i32 %x, %y + store i32 %div, i32* %P, align 4 + %rem = urem i32 %x, %y + %arrayidx6 = getelementptr inbounds i32* %P, i32 1 + store i32 %rem, i32* %arrayidx6, align 4 + ret void +} + +; rdar://9280991 +@flags = external unnamed_addr global i32 +@tabsize = external unnamed_addr global i32 + +define void @do_indent(i32 %cols) nounwind { +entry: +; CHECK: do_indent: + %0 = load i32* @flags, align 4 + %1 = and i32 %0, 67108864 + %2 = icmp eq i32 %1, 0 + br i1 %2, label %bb1, label %bb + +bb: +; CHECK: bl ___divmodsi4 + %3 = load i32* @tabsize, align 4 + %4 = srem i32 %cols, %3 + %5 = sdiv i32 %cols, %3 + %6 = tail call i32 @llvm.objectsize.i32(i8* null, i1 false) + %7 = tail call i8* @__memset_chk(i8* null, i32 9, i32 %5, i32 %6) nounwind + br label %bb1 + +bb1: + %line_indent_len.0 = phi i32 [ %4, %bb ], [ 0, %entry ] + %8 = getelementptr inbounds i8* null, i32 %line_indent_len.0 + store i8 0, i8* %8, align 1 + ret void +} + +declare i32 @llvm.objectsize.i32(i8*, i1) nounwind readnone +declare i8* @__memset_chk(i8*, i32, i32, i32) nounwind diff --git a/test/CodeGen/ARM/fabss.ll b/test/CodeGen/ARM/fabss.ll index f03282bdab7f..51efe51bf152 100644 --- a/test/CodeGen/ARM/fabss.ll +++ b/test/CodeGen/ARM/fabss.ll @@ -24,4 +24,4 @@ declare float @fabsf(float) ; CORTEXA8: test: ; CORTEXA8: vabs.f32 d1, d1 ; CORTEXA9: test: -; CORTEXA9: vabs.f32 s1, s1 +; CORTEXA9: vabs.f32 s{{.}}, s{{.}} diff --git a/test/CodeGen/ARM/fadds.ll b/test/CodeGen/ARM/fadds.ll index 749690e98d0f..e35103c045eb 100644 --- a/test/CodeGen/ARM/fadds.ll +++ b/test/CodeGen/ARM/fadds.ll @@ -20,4 +20,4 @@ entry: ; CORTEXA8: test: ; CORTEXA8: vadd.f32 d0, d1, d0 ; CORTEXA9: test: -; CORTEXA9: vadd.f32 s0, s1, s0 +; CORTEXA9: vadd.f32 s{{.}}, s{{.}}, s{{.}} diff --git a/test/CodeGen/ARM/fast-isel-pred.ll b/test/CodeGen/ARM/fast-isel-pred.ll new file mode 100644 index 000000000000..8de54ad5332b --- /dev/null +++ b/test/CodeGen/ARM/fast-isel-pred.ll @@ -0,0 +1,58 @@ +; RUN: llc -O0 -mtriple=armv7-apple-darwin < %s + +define i32 @main() nounwind ssp { +entry: + %retval = alloca i32, align 4 + %X = alloca <4 x i32>, align 16 + %Y = alloca <4 x float>, align 16 + store i32 0, i32* %retval + %tmp = load <4 x i32>* %X, align 16 + call void @__aa(<4 x i32> %tmp, i8* null, i32 3, <4 x float>* %Y) + %0 = load i32* %retval + ret i32 %0 +} + +define internal void @__aa(<4 x i32> %v, i8* %p, i32 %offset, <4 x float>* %constants) nounwind inlinehint ssp { +entry: + %__a.addr.i = alloca <4 x i32>, align 16 + %v.addr = alloca <4 x i32>, align 16 + %p.addr = alloca i8*, align 4 + %offset.addr = alloca i32, align 4 + %constants.addr = alloca <4 x float>*, align 4 + store <4 x i32> %v, <4 x i32>* %v.addr, align 16 + store i8* %p, i8** %p.addr, align 4 + store i32 %offset, i32* %offset.addr, align 4 + store <4 x float>* %constants, <4 x float>** %constants.addr, align 4 + %tmp = load <4 x i32>* %v.addr, align 16 + store <4 x i32> %tmp, <4 x i32>* %__a.addr.i, align 16 + %tmp.i = load <4 x i32>* %__a.addr.i, align 16 + %0 = bitcast <4 x i32> %tmp.i to <16 x i8> + %1 = bitcast <16 x i8> %0 to <4 x i32> + %vcvt.i = sitofp <4 x i32> %1 to <4 x float> + %tmp1 = load i8** %p.addr, align 4 + %tmp2 = load i32* %offset.addr, align 4 + %tmp3 = load <4 x float>** %constants.addr, align 4 + call void @__bb(<4 x float> %vcvt.i, i8* %tmp1, i32 %tmp2, <4 x float>* %tmp3) + ret void +} + +define internal void @__bb(<4 x float> %v, i8* %p, i32 %offset, <4 x float>* %constants) nounwind inlinehint ssp { +entry: + %v.addr = alloca <4 x float>, align 16 + %p.addr = alloca i8*, align 4 + %offset.addr = alloca i32, align 4 + %constants.addr = alloca <4 x float>*, align 4 + %data = alloca i64, align 4 + store <4 x float> %v, <4 x float>* %v.addr, align 16 + store i8* %p, i8** %p.addr, align 4 + store i32 %offset, i32* %offset.addr, align 4 + store <4 x float>* %constants, <4 x float>** %constants.addr, align 4 + %tmp = load i64* %data, align 4 + %tmp1 = load i8** %p.addr, align 4 + %tmp2 = load i32* %offset.addr, align 4 + %add.ptr = getelementptr i8* %tmp1, i32 %tmp2 + %0 = bitcast i8* %add.ptr to i64* + %arrayidx = getelementptr inbounds i64* %0, i32 0 + store i64 %tmp, i64* %arrayidx + ret void +} diff --git a/test/CodeGen/ARM/fast-isel-redefinition.ll b/test/CodeGen/ARM/fast-isel-redefinition.ll new file mode 100644 index 000000000000..08dcc64c9c84 --- /dev/null +++ b/test/CodeGen/ARM/fast-isel-redefinition.ll @@ -0,0 +1,11 @@ +; RUN: llc -O0 -regalloc=linearscan < %s +; This isn't exactly a useful set of command-line options, but check that it +; doesn't crash. (It was crashing because a register was getting redefined.) + +target triple = "thumbv7-apple-macosx10.6.7" + +define i32 @f(i32* %x) nounwind ssp { + %y = getelementptr inbounds i32* %x, i32 5000 + %tmp103 = load i32* %y, align 4 + ret i32 %tmp103 +} diff --git a/test/CodeGen/ARM/fast-isel-static.ll b/test/CodeGen/ARM/fast-isel-static.ll index 8f58480be164..2d79674028ca 100644 --- a/test/CodeGen/ARM/fast-isel-static.ll +++ b/test/CodeGen/ARM/fast-isel-static.ll @@ -24,7 +24,7 @@ entry: store float 0.000000e+00, float* %ztot, align 4 store float 1.000000e+00, float* %z, align 4 ; CHECK-LONG: blx r2 -; CHECK-NORM: blx _myadd +; CHECK-NORM: bl _myadd call void @myadd(float* %ztot, float* %z) ret i32 0 } diff --git a/test/CodeGen/ARM/fast-isel.ll b/test/CodeGen/ARM/fast-isel.ll index dd806ec6f1ae..6aad92fbc6a4 100644 --- a/test/CodeGen/ARM/fast-isel.ll +++ b/test/CodeGen/ARM/fast-isel.ll @@ -1,8 +1,7 @@ -; RUN: llc < %s -O0 -fast-isel-abort -mtriple=armv7-apple-darwin -; RUN: llc < %s -O0 -fast-isel-abort -mtriple=thumbv7-apple-darwin +; RUN: llc < %s -O0 -fast-isel-abort -mtriple=armv7-apple-darwin | FileCheck %s --check-prefix=ARM +; RUN: llc < %s -O0 -fast-isel-abort -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=THUMB ; Very basic fast-isel functionality. - define i32 @add(i32 %a, i32 %b) nounwind { entry: %a.addr = alloca i32, align 4 @@ -13,4 +12,52 @@ entry: %tmp1 = load i32* %b.addr %add = add nsw i32 %tmp, %tmp1 ret i32 %add -} \ No newline at end of file +} + +; Check truncate to bool +define void @test1(i32 %tmp) nounwind { +entry: +%tobool = trunc i32 %tmp to i1 +br i1 %tobool, label %if.then, label %if.end + +if.then: ; preds = %entry +call void @test1(i32 0) +br label %if.end + +if.end: ; preds = %if.then, %entry +ret void +; ARM: test1: +; ARM: tst r0, #1 +; THUMB: test1: +; THUMB: tst.w r0, #1 +} + +; Check some simple operations with immediates +define void @test2(i32 %tmp, i32* %ptr) nounwind { +; THUMB: test2: +; ARM: test2: + +b1: + %a = add i32 %tmp, 4096 + store i32 %a, i32* %ptr + br label %b2 + +; THUMB: add.w {{.*}} #4096 +; ARM: add {{.*}} #1, #20 + +b2: + %b = add i32 %tmp, 4095 + store i32 %b, i32* %ptr + br label %b3 +; THUMB: addw {{.*}} #4095 +; ARM: movw {{.*}} #4095 +; ARM: add + +b3: + %c = or i32 %tmp, 4 + store i32 %c, i32* %ptr + ret void + +; THUMB: orr {{.*}} #4 +; ARM: orr {{.*}} #4 +} diff --git a/test/CodeGen/ARM/fcopysign.ll b/test/CodeGen/ARM/fcopysign.ll index d30e3ebf50a5..f241c2681cbf 100644 --- a/test/CodeGen/ARM/fcopysign.ll +++ b/test/CodeGen/ARM/fcopysign.ll @@ -40,5 +40,37 @@ entry: ret double %1 } +; rdar://9059537 +define i32 @test4() ssp { +entry: +; SOFT: test4: +; SOFT: vmov.f64 [[REG4:(d[0-9]+)]], #1.000000e+00 +; This S-reg must be the first sub-reg of the last D-reg on vbsl. +; SOFT: vcvt.f32.f64 {{s1?[02468]}}, [[REG4]] +; SOFT: vshr.u64 [[REG4]], [[REG4]], #32 +; SOFT: vmov.i32 [[REG5:(d[0-9]+)]], #0x80000000 +; SOFT: vbsl [[REG5]], [[REG4]], {{d[0-9]+}} + %call80 = tail call double @copysign(double 1.000000e+00, double undef) + %conv81 = fptrunc double %call80 to float + %tmp88 = bitcast float %conv81 to i32 + ret i32 %tmp88 +} + +; rdar://9287902 +define float @test5() nounwind { +entry: +; SOFT: test5: +; SOFT: vmov.i32 [[REG6:(d[0-9]+)]], #0x80000000 +; SOFT: vmov [[REG7:(d[0-9]+)]], r0, r1 +; SOFT: vshr.u64 [[REG7]], [[REG7]], #32 +; SOFT: vbsl [[REG6]], [[REG7]], + %0 = tail call double (...)* @bar() nounwind + %1 = fptrunc double %0 to float + %2 = tail call float @copysignf(float 5.000000e-01, float %1) nounwind readnone + %3 = fadd float %1, %2 + ret float %3 +} + +declare double @bar(...) declare double @copysign(double, double) nounwind declare float @copysignf(float, float) nounwind diff --git a/test/CodeGen/ARM/fdivs.ll b/test/CodeGen/ARM/fdivs.ll index 0c3149579297..31c1ca940502 100644 --- a/test/CodeGen/ARM/fdivs.ll +++ b/test/CodeGen/ARM/fdivs.ll @@ -20,4 +20,4 @@ entry: ; CORTEXA8: test: ; CORTEXA8: vdiv.f32 s0, s1, s0 ; CORTEXA9: test: -; CORTEXA9: vdiv.f32 s0, s1, s0 +; CORTEXA9: vdiv.f32 s{{.}}, s{{.}}, s{{.}} diff --git a/test/CodeGen/ARM/fmacs.ll b/test/CodeGen/ARM/fmacs.ll index fb83ef626af6..b63f609e755a 100644 --- a/test/CodeGen/ARM/fmacs.ll +++ b/test/CodeGen/ARM/fmacs.ll @@ -1,6 +1,8 @@ ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8 +; RUN: llc < %s -march=arm -mcpu=cortex-a9 | FileCheck %s -check-prefix=A9 +; RUN: llc < %s -mtriple=arm-linux-gnueabi -mcpu=cortex-a9 -float-abi=hard | FileCheck %s -check-prefix=HARD define float @t1(float %acc, float %a, float %b) { entry: @@ -49,3 +51,54 @@ entry: %1 = fadd float %0, %acc ret float %1 } + +; It's possible to make use of fp vmla / vmls on Cortex-A9. +; rdar://8659675 +define void @t4(float %acc1, float %a, float %b, float %acc2, float %c, float* %P1, float* %P2) { +entry: +; A8: t4: +; A8: vmul.f32 +; A8: vmul.f32 +; A8: vadd.f32 +; A8: vadd.f32 + +; Two vmla with now RAW hazard +; A9: t4: +; A9: vmla.f32 +; A9: vmla.f32 + +; HARD: t4: +; HARD: vmla.f32 s0, s1, s2 +; HARD: vmla.f32 s3, s1, s4 + %0 = fmul float %a, %b + %1 = fadd float %acc1, %0 + %2 = fmul float %a, %c + %3 = fadd float %acc2, %2 + store float %1, float* %P1 + store float %3, float* %P2 + ret void +} + +define float @t5(float %a, float %b, float %c, float %d, float %e) { +entry: +; A8: t5: +; A8: vmul.f32 +; A8: vmul.f32 +; A8: vadd.f32 +; A8: vadd.f32 + +; A9: t5: +; A9: vmla.f32 +; A9: vmul.f32 +; A9: vadd.f32 + +; HARD: t5: +; HARD: vmla.f32 s4, s0, s1 +; HARD: vmul.f32 s0, s2, s3 +; HARD: vadd.f32 s0, s4, s0 + %0 = fmul float %a, %b + %1 = fadd float %e, %0 + %2 = fmul float %c, %d + %3 = fadd float %1, %2 + ret float %3 +} diff --git a/test/CodeGen/ARM/fmuls.ll b/test/CodeGen/ARM/fmuls.ll index ef4e3e52818e..bc118b8cb226 100644 --- a/test/CodeGen/ARM/fmuls.ll +++ b/test/CodeGen/ARM/fmuls.ll @@ -20,4 +20,4 @@ entry: ; CORTEXA8: test: ; CORTEXA8: vmul.f32 d0, d1, d0 ; CORTEXA9: test: -; CORTEXA9: vmul.f32 s0, s1, s0 +; CORTEXA9: vmul.f32 s{{.}}, s{{.}}, s{{.}} diff --git a/test/CodeGen/ARM/fnmscs.ll b/test/CodeGen/ARM/fnmscs.ll index 76c806761f75..9facf20fee7e 100644 --- a/test/CodeGen/ARM/fnmscs.ll +++ b/test/CodeGen/ARM/fnmscs.ll @@ -1,6 +1,7 @@ ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s -check-prefix=VFP2 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s -check-prefix=NEON ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s -check-prefix=A8 +; RUN: llc < %s -march=arm -mcpu=cortex-a8 -regalloc=basic | FileCheck %s -check-prefix=A8 define float @t1(float %acc, float %a, float %b) nounwind { entry: @@ -11,8 +12,8 @@ entry: ; NEON: vnmla.f32 ; A8: t1: -; A8: vnmul.f32 s0, s{{[01]}}, s{{[01]}} -; A8: vsub.f32 d0, d0, d1 +; A8: vnmul.f32 s{{[0-9]}}, s{{[0-9]}}, s{{[0-9]}} +; A8: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}} %0 = fmul float %a, %b %1 = fsub float -0.0, %0 %2 = fsub float %1, %acc @@ -28,8 +29,8 @@ entry: ; NEON: vnmla.f32 ; A8: t2: -; A8: vnmul.f32 s0, s{{[01]}}, s{{[01]}} -; A8: vsub.f32 d0, d0, d1 +; A8: vnmul.f32 s{{[0123]}}, s{{[0123]}}, s{{[0123]}} +; A8: vsub.f32 d{{[0-9]}}, d{{[0-9]}}, d{{[0-9]}} %0 = fmul float %a, %b %1 = fmul float -1.0, %0 %2 = fsub float %1, %acc @@ -45,8 +46,8 @@ entry: ; NEON: vnmla.f64 ; A8: t3: -; A8: vnmul.f64 d16, d1{{[67]}}, d1{{[67]}} -; A8: vsub.f64 d16, d16, d17 +; A8: vnmul.f64 d1{{[67]}}, d1{{[67]}}, d1{{[67]}} +; A8: vsub.f64 d1{{[67]}}, d1{{[67]}}, d1{{[67]}} %0 = fmul double %a, %b %1 = fsub double -0.0, %0 %2 = fsub double %1, %acc @@ -62,8 +63,8 @@ entry: ; NEON: vnmla.f64 ; A8: t4: -; A8: vnmul.f64 d16, d1{{[67]}}, d1{{[67]}} -; A8: vsub.f64 d16, d16, d17 +; A8: vnmul.f64 d1{{[67]}}, d1{{[67]}}, d1{{[67]}} +; A8: vsub.f64 d1{{[67]}}, d1{{[67]}}, d1{{[67]}} %0 = fmul double %a, %b %1 = fmul double -1.0, %0 %2 = fsub double %1, %acc diff --git a/test/CodeGen/ARM/fp-arg-shuffle.ll b/test/CodeGen/ARM/fp-arg-shuffle.ll new file mode 100644 index 000000000000..ae02b792e4d6 --- /dev/null +++ b/test/CodeGen/ARM/fp-arg-shuffle.ll @@ -0,0 +1,11 @@ +; RUN: llc < %s -march=arm -mattr=+neon -float-abi=soft | FileCheck %s + +; CHECK: function1 +; CHECK-NOT: vmov +define double @function1(double %a, double %b, double %c, double %d, double %e, double %f) nounwind noinline ssp { +entry: + %call = tail call double @function2(double %f, double %e, double %d, double %c, double %b, double %a) nounwind + ret double %call +} + +declare double @function2(double, double, double, double, double, double) diff --git a/test/CodeGen/ARM/fp.ll b/test/CodeGen/ARM/fp.ll index b6e9c3c22e75..8ef45f2bbc97 100644 --- a/test/CodeGen/ARM/fp.ll +++ b/test/CodeGen/ARM/fp.ll @@ -51,7 +51,7 @@ entry: define float @h2() { ;CHECK: h2: -;CHECK: mov r0, #254, 10 +;CHECK: mov r0, #254, #10 entry: ret float 1.000000e+00 } diff --git a/test/CodeGen/ARM/fp_convert.ll b/test/CodeGen/ARM/fp_convert.ll index 1ef9f7f32164..86c06f1ddd9e 100644 --- a/test/CodeGen/ARM/fp_convert.ll +++ b/test/CodeGen/ARM/fp_convert.ll @@ -5,7 +5,7 @@ define i32 @test1(float %a, float %b) { ; VFP2: test1: -; VFP2: vcvt.s32.f32 s0, s0 +; VFP2: vcvt.s32.f32 s{{.}}, s{{.}} ; NEON: test1: ; NEON: vcvt.s32.f32 d0, d0 entry: @@ -16,7 +16,7 @@ entry: define i32 @test2(float %a, float %b) { ; VFP2: test2: -; VFP2: vcvt.u32.f32 s0, s0 +; VFP2: vcvt.u32.f32 s{{.}}, s{{.}} ; NEON: test2: ; NEON: vcvt.u32.f32 d0, d0 entry: @@ -27,7 +27,7 @@ entry: define float @test3(i32 %a, i32 %b) { ; VFP2: test3: -; VFP2: vcvt.f32.u32 s0, s0 +; VFP2: vcvt.f32.u32 s{{.}}, s{{.}} ; NEON: test3: ; NEON: vcvt.f32.u32 d0, d0 entry: @@ -38,7 +38,7 @@ entry: define float @test4(i32 %a, i32 %b) { ; VFP2: test4: -; VFP2: vcvt.f32.s32 s0, s0 +; VFP2: vcvt.f32.s32 s{{.}}, s{{.}} ; NEON: test4: ; NEON: vcvt.f32.s32 d0, d0 entry: diff --git a/test/CodeGen/ARM/fpcmp-opt.ll b/test/CodeGen/ARM/fpcmp-opt.ll index 65b921bdf655..7c0dd0e12a79 100644 --- a/test/CodeGen/ARM/fpcmp-opt.ll +++ b/test/CodeGen/ARM/fpcmp-opt.ll @@ -37,7 +37,7 @@ define arm_apcscc i32 @t2(double* %a, double* %b) nounwind { entry: ; FINITE: t2: ; FINITE-NOT: vldr -; FINITE: ldrd r0, [r0] +; FINITE: ldrd r0, r1, [r0] ; FINITE-NOT: b LBB ; FINITE: cmp r0, #0 ; FINITE: cmpeq r1, #0 diff --git a/test/CodeGen/ARM/ifcvt10.ll b/test/CodeGen/ARM/ifcvt10.ll index 75428ac21655..18f87bfc2e71 100644 --- a/test/CodeGen/ARM/ifcvt10.ll +++ b/test/CodeGen/ARM/ifcvt10.ll @@ -9,9 +9,9 @@ entry: ; CHECK: t: ; CHECK: vpop {d8} ; CHECK-NOT: vpopne -; CHECK: ldmia sp!, {r7, pc} +; CHECK: pop {r7, pc} ; CHECK: vpop {d8} -; CHECK: ldmia sp!, {r7, pc} +; CHECK: pop {r7, pc} br i1 undef, label %if.else, label %if.then if.then: ; preds = %entry diff --git a/test/CodeGen/ARM/ifcvt5.ll b/test/CodeGen/ARM/ifcvt5.ll index bca2ae346a6f..3615055f8b29 100644 --- a/test/CodeGen/ARM/ifcvt5.ll +++ b/test/CodeGen/ARM/ifcvt5.ll @@ -11,7 +11,7 @@ entry: define i32 @t1(i32 %a, i32 %b) { ; CHECK: t1: -; CHECK: ldmialt sp!, {r7, pc} +; CHECK: poplt {r7, pc} entry: %tmp1 = icmp sgt i32 %a, 10 ; [#uses=1] br i1 %tmp1, label %cond_true, label %UnifiedReturnBlock diff --git a/test/CodeGen/ARM/ifcvt6.ll b/test/CodeGen/ARM/ifcvt6.ll index 5edf32fd1af6..232765768550 100644 --- a/test/CodeGen/ARM/ifcvt6.ll +++ b/test/CodeGen/ARM/ifcvt6.ll @@ -3,7 +3,7 @@ define void @foo(i32 %X, i32 %Y) { entry: ; CHECK: cmpne -; CHECK: ldmiahi sp! +; CHECK: pophi %tmp1 = icmp ult i32 %X, 4 ; [#uses=1] %tmp4 = icmp eq i32 %Y, 0 ; [#uses=1] %tmp7 = or i1 %tmp4, %tmp1 ; [#uses=1] diff --git a/test/CodeGen/ARM/ifcvt7.ll b/test/CodeGen/ARM/ifcvt7.ll index 62e13557cfdc..476ed4d47c64 100644 --- a/test/CodeGen/ARM/ifcvt7.ll +++ b/test/CodeGen/ARM/ifcvt7.ll @@ -6,7 +6,7 @@ define fastcc i32 @CountTree(%struct.quad_struct* %tree) { ; CHECK: cmpeq ; CHECK: moveq -; CHECK: ldmiaeq sp! +; CHECK: popeq entry: br label %tailrecurse diff --git a/test/CodeGen/ARM/ifcvt8.ll b/test/CodeGen/ARM/ifcvt8.ll index 5fdfc4ea6805..ca9a5c63cda6 100644 --- a/test/CodeGen/ARM/ifcvt8.ll +++ b/test/CodeGen/ARM/ifcvt8.ll @@ -5,7 +5,7 @@ declare void @abort() define fastcc void @t(%struct.SString* %word, i8 signext %c) { -; CHECK: ldmiane sp! +; CHECK: popne entry: %tmp1 = icmp eq %struct.SString* %word, null ; [#uses=1] br i1 %tmp1, label %cond_true, label %cond_false diff --git a/test/CodeGen/ARM/indirectbr.ll b/test/CodeGen/ARM/indirectbr.ll index 0aac9d16ec6c..f0ab9dd7ea00 100644 --- a/test/CodeGen/ARM/indirectbr.ll +++ b/test/CodeGen/ARM/indirectbr.ll @@ -14,15 +14,15 @@ entry: %1 = icmp eq i8* %0, null ; [#uses=1] ; indirect branch gets duplicated here ; ARM: bx -; THUMB: mov pc, r1 -; THUMB2: mov pc, r2 +; THUMB: mov pc, +; THUMB2: mov pc, br i1 %1, label %bb3, label %bb2 bb2: ; preds = %entry, %bb3 %gotovar.4.0 = phi i8* [ %gotovar.4.0.pre, %bb3 ], [ %0, %entry ] ; [#uses=1] ; ARM: bx -; THUMB: mov pc, r1 -; THUMB2: mov pc, r2 +; THUMB: mov pc, +; THUMB2: mov pc, indirectbr i8* %gotovar.4.0, [label %L5, label %L4, label %L3, label %L2, label %L1] bb3: ; preds = %entry @@ -42,20 +42,23 @@ L3: ; preds = %L4, %bb2 br label %L2 L2: ; preds = %L3, %bb2 +; THUMB: muls %res.2 = phi i32 [ %res.1, %L3 ], [ 1, %bb2 ] ; [#uses=1] %phitmp = mul i32 %res.2, 6 ; [#uses=1] br label %L1 L1: ; preds = %L2, %bb2 %res.3 = phi i32 [ %phitmp, %L2 ], [ 2, %bb2 ] ; [#uses=1] -; ARM: ldr r1, LCPI -; ARM: add r1, pc, r1 -; ARM: str r1 -; THUMB: ldr.n r2, LCPI -; THUMB: add r2, pc -; THUMB: str r2 -; THUMB2: ldr.n r2, LCPI -; THUMB2-NEXT: str r2 +; ARM: ldr [[R1:r[0-9]+]], LCPI +; ARM: add [[R1b:r[0-9]+]], pc, [[R1]] +; ARM: str [[R1b]] +; THUMB: ldr.n +; THUMB: add +; THUMB: ldr.n [[R2:r[0-9]+]], LCPI +; THUMB: add [[R2]], pc +; THUMB: str [[R2]] +; THUMB2: ldr.n [[R2:r[0-9]+]], LCPI +; THUMB2-NEXT: str{{(.w)?}} [[R2]] store i8* blockaddress(@foo, %L5), i8** @nextaddr, align 4 ret i32 %res.3 } diff --git a/test/CodeGen/ARM/inlineasm3.ll b/test/CodeGen/ARM/inlineasm3.ll index 9f77ad1f794c..9d6eba85301e 100644 --- a/test/CodeGen/ARM/inlineasm3.ll +++ b/test/CodeGen/ARM/inlineasm3.ll @@ -6,7 +6,7 @@ define void @t() nounwind { entry: ; CHECK: vmov.I64 q15, #0 -; CHECK: vmov.32 d30[0], r0 +; CHECK: vmov.32 d30[0], ; CHECK: vmov q8, q15 %tmp = alloca %struct.int32x4_t, align 16 call void asm sideeffect "vmov.I64 q15, #0\0Avmov.32 d30[0], $1\0Avmov ${0:q}, q15\0A", "=*w,r,~{d31},~{d30}"(%struct.int32x4_t* %tmp, i32 8192) nounwind diff --git a/test/CodeGen/ARM/int-to-fp.ll b/test/CodeGen/ARM/int-to-fp.ll new file mode 100644 index 000000000000..889b14919840 --- /dev/null +++ b/test/CodeGen/ARM/int-to-fp.ll @@ -0,0 +1,19 @@ +; RUN: llc < %s | FileCheck %s +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32" +target triple = "thumbv7-apple-darwin10.0.0" + +; CHECK: sint_to_fp +; CHECK: vmovl.s16 +; CHECK: vcvt.f32.s32 +define <4 x float> @sint_to_fp(<4 x i16> %x) nounwind ssp { + %a = sitofp <4 x i16> %x to <4 x float> + ret <4 x float> %a +} + +; CHECK: uint_to_fp +; CHECK: vmovl.u16 +; CHECK: vcvt.f32.u32 +define <4 x float> @uint_to_fp(<4 x i16> %x) nounwind ssp { + %a = uitofp <4 x i16> %x to <4 x float> + ret <4 x float> %a +} diff --git a/test/CodeGen/ARM/ldm.ll b/test/CodeGen/ARM/ldm.ll index 2f1b85ebbb04..db78fd06ab2d 100644 --- a/test/CodeGen/ARM/ldm.ll +++ b/test/CodeGen/ARM/ldm.ll @@ -5,9 +5,9 @@ define i32 @t1() { ; CHECK: t1: -; CHECK: ldmia +; CHECK: pop ; V4T: t1: -; V4T: ldmia +; V4T: pop %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 0) ; [#uses=1] %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 1) ; [#uses=1] %tmp4 = tail call i32 @f1( i32 %tmp, i32 %tmp3 ) ; [#uses=1] @@ -16,9 +16,9 @@ define i32 @t1() { define i32 @t2() { ; CHECK: t2: -; CHECK: ldmia +; CHECK: pop ; V4T: t2: -; V4T: ldmia +; V4T: pop %tmp = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 2) ; [#uses=1] %tmp3 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 3) ; [#uses=1] %tmp5 = load i32* getelementptr ([0 x i32]* @X, i32 0, i32 4) ; [#uses=1] @@ -29,7 +29,7 @@ define i32 @t2() { define i32 @t3() { ; CHECK: t3: ; CHECK: ldmib -; CHECK: ldmia sp! +; CHECK: pop ; V4T: t3: ; V4T: ldmib ; V4T: pop diff --git a/test/CodeGen/ARM/ldrd.ll b/test/CodeGen/ARM/ldrd.ll index 895562a1d31e..8010f20689be 100644 --- a/test/CodeGen/ARM/ldrd.ll +++ b/test/CodeGen/ARM/ldrd.ll @@ -1,19 +1,21 @@ -; RUN: llc < %s -mtriple=armv6-apple-darwin | FileCheck %s -check-prefix=V6 -; RUN: llc < %s -mtriple=armv5-apple-darwin | FileCheck %s -check-prefix=V5 -; RUN: llc < %s -mtriple=armv6-eabi | FileCheck %s -check-prefix=EABI +; RUN: llc < %s -mtriple=armv6-apple-darwin -regalloc=linearscan | FileCheck %s -check-prefix=V6 +; RUN: llc < %s -mtriple=armv5-apple-darwin -regalloc=linearscan | FileCheck %s -check-prefix=V5 +; RUN: llc < %s -mtriple=armv6-eabi -regalloc=linearscan | FileCheck %s -check-prefix=EABI ; rdar://r6949835 +; Magic ARM pair hints works best with linearscan. + @b = external global i64* define i64 @t(i64 %a) nounwind readonly { entry: -;V6: ldrd r2, [r2] +;V6: ldrd r2, r3, [r2] -;V5: ldr r3, [r2] -;V5: ldr r2, [r2, #4] +;V5: ldr r{{[0-9]+}}, [r2] +;V5: ldr r{{[0-9]+}}, [r2, #4] -;EABI: ldr r3, [r2] -;EABI: ldr r2, [r2, #4] +;EABI: ldr r{{[0-9]+}}, [r2] +;EABI: ldr r{{[0-9]+}}, [r2, #4] %0 = load i64** @b, align 4 %1 = load i64* %0, align 4 diff --git a/test/CodeGen/ARM/long.ll b/test/CodeGen/ARM/long.ll index 74f8d783377d..e401dca1ca80 100644 --- a/test/CodeGen/ARM/long.ll +++ b/test/CodeGen/ARM/long.ll @@ -14,14 +14,14 @@ entry: define i64 @f3() { ; CHECK: f3: -; CHECK: mvn r0, #2, 2 +; CHECK: mvn r0, #2, #2 entry: ret i64 2147483647 } define i64 @f4() { ; CHECK: f4: -; CHECK: mov r0, #2, 2 +; CHECK: mov r0, #2, #2 entry: ret i64 2147483648 } @@ -29,7 +29,7 @@ entry: define i64 @f5() { ; CHECK: f5: ; CHECK: mvn r0, #0 -; CHECK: mvn r1, #2, 2 +; CHECK: mvn r1, #2, #2 entry: ret i64 9223372036854775807 } diff --git a/test/CodeGen/ARM/long_shift.ll b/test/CodeGen/ARM/long_shift.ll index 5e4f5730f8d2..d5aac2e3ddaf 100644 --- a/test/CodeGen/ARM/long_shift.ll +++ b/test/CodeGen/ARM/long_shift.ll @@ -24,9 +24,10 @@ define i32 @f2(i64 %x, i64 %y) { ; CHECK: f2 ; CHECK: lsr{{.*}}r2 ; CHECK-NEXT: rsb r3, r2, #32 -; CHECK-NEXT: subs r2, r2, #32 +; CHECK-NEXT: sub r2, r2, #32 +; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: orr r0, r0, r1, lsl r3 -; CHECK-NEXT: movge r0, r1, asr r2 +; CHECK-NEXT: asrge r0, r1, r2 %a = ashr i64 %x, %y %b = trunc i64 %a to i32 ret i32 %b @@ -36,9 +37,10 @@ define i32 @f3(i64 %x, i64 %y) { ; CHECK: f3 ; CHECK: lsr{{.*}}r2 ; CHECK-NEXT: rsb r3, r2, #32 -; CHECK-NEXT: subs r2, r2, #32 +; CHECK-NEXT: sub r2, r2, #32 +; CHECK-NEXT: cmp r2, #0 ; CHECK-NEXT: orr r0, r0, r1, lsl r3 -; CHECK-NEXT: movge r0, r1, lsr r2 +; CHECK-NEXT: lsrge r0, r1, r2 %a = lshr i64 %x, %y %b = trunc i64 %a to i32 ret i32 %b diff --git a/test/CodeGen/ARM/lsr-code-insertion.ll b/test/CodeGen/ARM/lsr-code-insertion.ll index 1bbb96deeefe..153fd8fe34e4 100644 --- a/test/CodeGen/ARM/lsr-code-insertion.ll +++ b/test/CodeGen/ARM/lsr-code-insertion.ll @@ -1,5 +1,4 @@ -; RUN: llc < %s -stats |& grep {39.*Number of machine instrs printed} -; RUN: llc < %s -stats |& not grep {.*Number of re-materialization} +; RUN: llc < %s | FileCheck %s ; This test really wants to check that the resultant "cond_true" block only ; has a single store in it, and that cond_true55 only has code to materialize ; the constant and do a store. We do *not* want something like this: @@ -8,6 +7,11 @@ ; add r8, r0, r6 ; str r10, [r8, #+4] ; +; CHECK: ldr [[R6:r[0-9*]+]], LCP +; CHECK: cmp {{.*}}, [[R6]] +; CHECK: ldrle +; CHECK-NEXT: strle + target triple = "arm-apple-darwin8" define void @foo(i32* %mc, i32* %mpp, i32* %ip, i32* %dpp, i32* %tpmm, i32 %M, i32* %tpim, i32* %tpdm, i32* %bp, i32* %ms, i32 %xmb) { diff --git a/test/CodeGen/ARM/lsr-on-unrolled-loops.ll b/test/CodeGen/ARM/lsr-on-unrolled-loops.ll index 9882690da268..c1318ec31f58 100644 --- a/test/CodeGen/ARM/lsr-on-unrolled-loops.ll +++ b/test/CodeGen/ARM/lsr-on-unrolled-loops.ll @@ -4,11 +4,6 @@ ; constant offset addressing, so that each of the following stores ; uses the same register. -; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #-128] -; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #-96] -; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #-64] -; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #-32] -; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}] ; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #32] ; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #64] ; CHECK: vstr.32 s{{.*}}, [{{(r[0-9]+)|(lr)}}, #96] diff --git a/test/CodeGen/ARM/memcpy-inline.ll b/test/CodeGen/ARM/memcpy-inline.ll index ed20c32dc0d5..5bae037cafb3 100644 --- a/test/CodeGen/ARM/memcpy-inline.ll +++ b/test/CodeGen/ARM/memcpy-inline.ll @@ -1,9 +1,11 @@ -; RUN: llc < %s -mtriple=arm-apple-darwin | grep ldmia -; RUN: llc < %s -mtriple=arm-apple-darwin | grep stmia -; RUN: llc < %s -mtriple=arm-apple-darwin | grep ldrb -; RUN: llc < %s -mtriple=arm-apple-darwin | grep ldrh +; RUN: llc < %s -mtriple=thumbv7-apple-darwin -regalloc=linearscan -disable-post-ra | FileCheck %s - %struct.x = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 } +; The ARM magic hinting works best with linear scan. +; CHECK: ldrd +; CHECK: strd +; CHECK: ldrb + +%struct.x = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 } @src = external global %struct.x @dst = external global %struct.x diff --git a/test/CodeGen/ARM/neon_div.ll b/test/CodeGen/ARM/neon_div.ll index e33797079093..de48feeb9ec2 100644 --- a/test/CodeGen/ARM/neon_div.ll +++ b/test/CodeGen/ARM/neon_div.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc < %s -march=arm -mattr=+neon -pre-RA-sched=source | FileCheck %s define <8 x i8> @sdivi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: vrecpe.f32 diff --git a/test/CodeGen/ARM/neon_shift.ll b/test/CodeGen/ARM/neon_shift.ll new file mode 100644 index 000000000000..340f220fb362 --- /dev/null +++ b/test/CodeGen/ARM/neon_shift.ll @@ -0,0 +1,11 @@ +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s + +; +define <4 x i16> @t1(<4 x i32> %a) nounwind { +entry: +; CHECK: vqrshrn.s32 d{{[0-9]+}}, q{{[0-9]*}}, #13 + %x = tail call <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32> %a, <4 x i32> ) + ret <4 x i16> %x +} + +declare <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32>, <4 x i32>) nounwind readnone diff --git a/test/CodeGen/ARM/peephole-bitcast.ll b/test/CodeGen/ARM/peephole-bitcast.ll new file mode 100644 index 000000000000..e670a5be3bca --- /dev/null +++ b/test/CodeGen/ARM/peephole-bitcast.ll @@ -0,0 +1,26 @@ +; RUN: llc < %s -march=arm -mcpu=cortex-a8 -regalloc=linearscan | FileCheck %s + +; vmov s0, r0 + vmov r0, s0 should have been optimized away. +; rdar://9104514 + +; Peephole leaves a dead vmovsr instruction behind, and depends on linear scan +; to remove it. + +define void @t(float %x) nounwind ssp { +entry: +; CHECK: t: +; CHECK-NOT: vmov +; CHECK: bl + %0 = bitcast float %x to i32 + %cmp = icmp ult i32 %0, 2139095039 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + tail call void @doSomething(float %x) nounwind + br label %if.end + +if.end: ; preds = %if.then, %entry + ret void +} + +declare void @doSomething(float) diff --git a/test/CodeGen/ARM/prefetch.ll b/test/CodeGen/ARM/prefetch.ll index 895b27b749db..95f082aa9385 100644 --- a/test/CodeGen/ARM/prefetch.ll +++ b/test/CodeGen/ARM/prefetch.ll @@ -1,10 +1,15 @@ ; RUN: llc < %s -march=thumb -mattr=-thumb2 | not grep pld -; RUN: llc < %s -march=thumb -mattr=+v7a | FileCheck %s -check-prefix=THUMB2 -; RUN: llc < %s -march=arm -mattr=+v7a,+mp | FileCheck %s -check-prefix=ARM-MP +; RUN: llc < %s -march=thumb -mattr=+v7a | FileCheck %s -check-prefix=THUMB2 +; RUN: llc < %s -march=arm -mattr=+v7a | FileCheck %s -check-prefix=ARM +; RUN: llc < %s -march=arm -mcpu=cortex-a9-mp | FileCheck %s -check-prefix=ARM-MP ; rdar://8601536 define void @t1(i8* %ptr) nounwind { entry: +; ARM: t1: +; ARM-NOT: pldw [r0] +; ARM: pld [r0] + ; ARM-MP: t1: ; ARM-MP: pldw [r0] ; ARM-MP: pld [r0] @@ -19,8 +24,8 @@ entry: define void @t2(i8* %ptr) nounwind { entry: -; ARM-MP: t2: -; ARM-MP: pld [r0, #1023] +; ARM: t2: +; ARM: pld [r0, #1023] ; THUMB2: t2: ; THUMB2: pld [r0, #1023] @@ -31,8 +36,8 @@ entry: define void @t3(i32 %base, i32 %offset) nounwind { entry: -; ARM-MP: t3: -; ARM-MP: pld [r0, r1, lsr #2] +; ARM: t3: +; ARM: pld [r0, r1, lsr #2] ; THUMB2: t3: ; THUMB2: lsrs r1, r1, #2 @@ -46,8 +51,8 @@ entry: define void @t4(i32 %base, i32 %offset) nounwind { entry: -; ARM-MP: t4: -; ARM-MP: pld [r0, r1, lsl #2] +; ARM: t4: +; ARM: pld [r0, r1, lsl #2] ; THUMB2: t4: ; THUMB2: pld [r0, r1, lsl #2] diff --git a/test/CodeGen/ARM/reg_sequence.ll b/test/CodeGen/ARM/reg_sequence.ll index 53214fd4c302..d350937c683e 100644 --- a/test/CodeGen/ARM/reg_sequence.ll +++ b/test/CodeGen/ARM/reg_sequence.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s +; RUN: llc < %s -march=arm -mcpu=cortex-a8 -regalloc=basic | FileCheck %s ; Implementing vld / vst as REG_SEQUENCE eliminates the extra vmov's. %struct.int16x8_t = type { <8 x i16> } @@ -123,9 +124,9 @@ return1: return2: ; CHECK: %return2 ; CHECK: vadd.i32 -; CHECK: vmov q9, q11 +; CHECK: vmov {{q[0-9]+}}, {{q[0-9]+}} ; CHECK-NOT: vmov -; CHECK: vst2.32 {d16, d17, d18, d19} +; CHECK: vst2.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}} %tmp100 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0 ; <<4 x i32>> [#uses=1] %tmp101 = extractvalue %struct.__neon_int32x4x2_t %tmp5, 1 ; <<4 x i32>> [#uses=1] %tmp102 = add <4 x i32> %tmp100, %tmp101 ; <<4 x i32>> [#uses=1] @@ -137,9 +138,10 @@ return2: define <8 x i16> @t5(i16* %A, <8 x i16>* %B) nounwind { ; CHECK: t5: ; CHECK: vldmia -; CHECK: vmov q9, q8 +; How can FileCheck match Q and D registers? We need a lisp interpreter. +; CHECK: vmov {{q[0-9]+}}, {{q[0-9]+}} ; CHECK-NOT: vmov -; CHECK: vld2.16 {d16[1], d18[1]}, [r0] +; CHECK: vld2.16 {d{{[0-9]+}}[1], d{{[0-9]+}}[1]}, [r0] ; CHECK-NOT: vmov ; CHECK: vadd.i16 %tmp0 = bitcast i16* %A to i8* ; [#uses=1] @@ -154,8 +156,8 @@ define <8 x i16> @t5(i16* %A, <8 x i16>* %B) nounwind { define <8 x i8> @t6(i8* %A, <8 x i8>* %B) nounwind { ; CHECK: t6: ; CHECK: vldr.64 -; CHECK: vmov d17, d16 -; CHECK-NEXT: vld2.8 {d16[1], d17[1]} +; CHECK: vmov d[[D0:[0-9]+]], d[[D1:[0-9]+]] +; CHECK-NEXT: vld2.8 {d[[D1]][1], d[[D0]][1]} %tmp1 = load <8 x i8>* %B ; <<8 x i8>> [#uses=2] %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1) ; <%struct.__neon_int8x8x2_t> [#uses=2] %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp2, 0 ; <<8 x i8>> [#uses=1] @@ -169,10 +171,10 @@ entry: ; CHECK: t7: ; CHECK: vld2.32 ; CHECK: vst2.32 -; CHECK: vld1.32 {d16, d17}, -; CHECK: vmov q9, q8 +; CHECK: vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}}, +; CHECK: vmov q[[Q0:[0-9]+]], q[[Q1:[0-9]+]] ; CHECK-NOT: vmov -; CHECK: vuzp.32 q8, q9 +; CHECK: vuzp.32 q[[Q1]], q[[Q0]] ; CHECK: vst1.32 %0 = bitcast i32* %iptr to i8* ; [#uses=2] %1 = tail call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %0, i32 1) ; <%struct.__neon_int32x4x2_t> [#uses=2] @@ -271,7 +273,7 @@ define arm_aapcs_vfpcc i32 @t10() nounwind { entry: ; CHECK: t10: ; CHECK: vmul.f32 q8, q8, d0[0] -; CHECK: vmov.i32 q9, #0x3F000000 +; CHECK: vmov.i32 q[[Q0:[0-9]+]], #0x3F000000 ; CHECK: vadd.f32 q8, q8, q8 %0 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1] %1 = insertelement <4 x float> %0, float undef, i32 1 ; <<4 x float>> [#uses=1] diff --git a/test/CodeGen/ARM/rev.ll b/test/CodeGen/ARM/rev.ll index 687bf8834c9f..4170ff3071ad 100644 --- a/test/CodeGen/ARM/rev.ll +++ b/test/CodeGen/ARM/rev.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=arm -mattr=+v6 | FileCheck %s -define i32 @test1(i32 %X) { +define i32 @test1(i32 %X) nounwind { ; CHECK: test1 ; CHECK: rev16 r0, r0 %tmp1 = lshr i32 %X, 8 @@ -16,7 +16,7 @@ define i32 @test1(i32 %X) { ret i32 %tmp14 } -define i32 @test2(i32 %X) { +define i32 @test2(i32 %X) nounwind { ; CHECK: test2 ; CHECK: revsh r0, r0 %tmp1 = lshr i32 %X, 8 @@ -28,3 +28,29 @@ define i32 @test2(i32 %X) { %tmp5.upgrd.2 = sext i16 %tmp5 to i32 ret i32 %tmp5.upgrd.2 } + +; rdar://9147637 +define i32 @test3(i16 zeroext %a) nounwind { +entry: +; CHECK: test3: +; CHECK: revsh r0, r0 + %0 = tail call i16 @llvm.bswap.i16(i16 %a) + %1 = sext i16 %0 to i32 + ret i32 %1 +} + +declare i16 @llvm.bswap.i16(i16) nounwind readnone + +define i32 @test4(i16 zeroext %a) nounwind { +entry: +; CHECK: test4: +; CHECK: revsh r0, r0 + %conv = zext i16 %a to i32 + %shr9 = lshr i16 %a, 8 + %conv2 = zext i16 %shr9 to i32 + %shl = shl nuw nsw i32 %conv, 8 + %or = or i32 %conv2, %shl + %sext = shl i32 %or, 16 + %conv8 = ashr exact i32 %sext, 16 + ret i32 %conv8 +} diff --git a/test/CodeGen/ARM/select-imm.ll b/test/CodeGen/ARM/select-imm.ll index 578834ec93bc..82ed0184badd 100644 --- a/test/CodeGen/ARM/select-imm.ll +++ b/test/CodeGen/ARM/select-imm.ll @@ -6,7 +6,7 @@ define i32 @t1(i32 %c) nounwind readnone { entry: ; ARM: t1: ; ARM: mov r1, #101 -; ARM: orr r1, r1, #1, 24 +; ARM: orr r1, r1, #1, #24 ; ARM: movgt r0, #123 ; ARMT2: t1: @@ -27,7 +27,7 @@ entry: ; ARM: t2: ; ARM: mov r0, #123 ; ARM: movgt r0, #101 -; ARM: orrgt r0, r0, #1, 24 +; ARM: orrgt r0, r0, #1, #24 ; ARMT2: t2: ; ARMT2: mov r0, #123 diff --git a/test/CodeGen/ARM/select.ll b/test/CodeGen/ARM/select.ll index 1aa0d3904125..d1493ee56e4b 100644 --- a/test/CodeGen/ARM/select.ll +++ b/test/CodeGen/ARM/select.ll @@ -90,3 +90,26 @@ define arm_apcscc float @f8(i32 %a) nounwind { %tmp1 = select i1 %tmp, float 0x3FF3BE76C0000000, float 0x40030E9A20000000 ret float %tmp1 } + +; +; Glue values can only have a single use, but the following test exposed a +; case where a SELECT was lowered with 2 uses of a comparison, causing the +; scheduler to assert. +; CHECK-VFP: f9: + +declare i8* @objc_msgSend(i8*, i8*, ...) +define void @f9() optsize { +entry: + %cmp = icmp eq i8* undef, inttoptr (i32 4 to i8*) + %conv191 = select i1 %cmp, float -3.000000e+00, float 0.000000e+00 + %conv195 = select i1 %cmp, double -1.000000e+00, double 0.000000e+00 + %add = fadd double %conv195, 1.100000e+01 + %conv196 = fptrunc double %add to float + %add201 = fadd float undef, %conv191 + %tmp484 = bitcast float %conv196 to i32 + %tmp478 = bitcast float %add201 to i32 + %tmp490 = insertvalue [2 x i32] undef, i32 %tmp484, 0 + %tmp493 = insertvalue [2 x i32] %tmp490, i32 %tmp478, 1 + call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, [2 x i32], i32, float)*)(i8* undef, i8* undef, [2 x i32] %tmp493, i32 0, float 1.000000e+00) optsize + ret void +} diff --git a/test/CodeGen/ARM/select_xform.ll b/test/CodeGen/ARM/select_xform.ll index 5dabfc3a82a3..4211797ef77e 100644 --- a/test/CodeGen/ARM/select_xform.ll +++ b/test/CodeGen/ARM/select_xform.ll @@ -4,7 +4,7 @@ define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind { ; ARM: t1: -; ARM: sub r0, r1, #6, 2 +; ARM: sub r0, r1, #6, #2 ; ARM: movgt r0, r1 ; T2: t1: diff --git a/test/CodeGen/ARM/shifter_operand.ll b/test/CodeGen/ARM/shifter_operand.ll index 01e3a922f656..f0e2d102610d 100644 --- a/test/CodeGen/ARM/shifter_operand.ll +++ b/test/CodeGen/ARM/shifter_operand.ll @@ -51,19 +51,19 @@ entry: declare i8* @malloc(...) -define fastcc void @test4() nounwind { +define fastcc void @test4(i16 %addr) nounwind { entry: ; A8: test4: -; A8: ldr r1, [r0, r0, lsl #2] -; A8: str r1, [r0, r0, lsl #2] +; A8: ldr r2, [r0, r1, lsl #2] +; A8: str r2, [r0, r1, lsl #2] ; A9: test4: -; A9: add r0, r0, r0, lsl #2 +; A9: add r0, r0, r{{[0-9]+}}, lsl #2 ; A9: ldr r1, [r0] ; A9: str r1, [r0] %0 = tail call i8* (...)* @malloc(i32 undef) nounwind %1 = bitcast i8* %0 to i32* - %2 = sext i16 undef to i32 + %2 = sext i16 %addr to i32 %3 = getelementptr inbounds i32* %1, i32 %2 %4 = load i32* %3, align 4 %5 = add nsw i32 %4, 1 diff --git a/test/CodeGen/ARM/shuffle.ll b/test/CodeGen/ARM/shuffle.ll new file mode 100644 index 000000000000..7d6be4f5e6c3 --- /dev/null +++ b/test/CodeGen/ARM/shuffle.ll @@ -0,0 +1,18 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-darwin -relocation-model=pic -disable-fp-elim | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32" +target triple = "thumbv7-apple-darwin" + +define <8 x i8> @shuf(<8 x i8> %a) nounwind readnone optsize ssp { +entry: +; CHECK: vtbl + %shuffle = shufflevector <8 x i8> %a, <8 x i8> undef, <8 x i32> + ret <8 x i8> %shuffle +} + +define <8 x i8> @shuf2(<8 x i8> %a, <8 x i8> %b) nounwind readnone optsize ssp { +entry: +; CHECK: vtbl + %shuffle = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> + ret <8 x i8> %shuffle +} diff --git a/test/CodeGen/ARM/smul.ll b/test/CodeGen/ARM/smul.ll index b7ab2e796f8a..686d791ce60d 100644 --- a/test/CodeGen/ARM/smul.ll +++ b/test/CodeGen/ARM/smul.ll @@ -1,16 +1,12 @@ -; RUN: llc < %s -march=arm -; RUN: llc < %s -march=arm -mattr=+v5TE -; RUN: llc < %s -march=arm -mattr=+v5TE | \ -; RUN: grep smulbt | count 1 -; RUN: llc < %s -march=arm -mattr=+v5TE | \ -; RUN: grep smultt | count 1 -; RUN: llc < %s -march=arm -mattr=+v5TE | \ -; RUN: grep smlabt | count 1 +; RUN: llc < %s -march=arm -mcpu=generic +; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s @x = weak global i16 0 ; [#uses=1] @y = weak global i16 0 ; [#uses=0] define i32 @f1(i32 %y) { +; CHECK: f1 +; CHECK: smulbt %tmp = load i16* @x ; [#uses=1] %tmp1 = add i16 %tmp, 2 ; [#uses=1] %tmp2 = sext i16 %tmp1 to i32 ; [#uses=1] @@ -20,6 +16,8 @@ define i32 @f1(i32 %y) { } define i32 @f2(i32 %x, i32 %y) { +; CHECK: f2 +; CHECK: smultt %tmp1 = ashr i32 %x, 16 ; [#uses=1] %tmp3 = ashr i32 %y, 16 ; [#uses=1] %tmp4 = mul i32 %tmp3, %tmp1 ; [#uses=1] @@ -27,6 +25,8 @@ define i32 @f2(i32 %x, i32 %y) { } define i32 @f3(i32 %a, i16 %x, i32 %y) { +; CHECK: f3 +; CHECK: smlabt %tmp = sext i16 %x to i32 ; [#uses=1] %tmp2 = ashr i32 %y, 16 ; [#uses=1] %tmp3 = mul i32 %tmp2, %tmp ; [#uses=1] diff --git a/test/CodeGen/ARM/str_pre-2.ll b/test/CodeGen/ARM/str_pre-2.ll index 465c7e676c56..b24f75a6e2b8 100644 --- a/test/CodeGen/ARM/str_pre-2.ll +++ b/test/CodeGen/ARM/str_pre-2.ll @@ -1,4 +1,7 @@ -; RUN: llc < %s -mtriple=armv6-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=armv6-linux-gnu -regalloc=linearscan | FileCheck %s +; RUN: llc < %s -mtriple=armv6-linux-gnu -regalloc=basic | FileCheck %s + +; The greedy register allocator uses a single CSR here, invalidating the test. @b = external global i64* diff --git a/test/CodeGen/ARM/sub.ll b/test/CodeGen/ARM/sub.ll index 81513e23e807..555b18eb1e99 100644 --- a/test/CodeGen/ARM/sub.ll +++ b/test/CodeGen/ARM/sub.ll @@ -12,7 +12,7 @@ define i64 @f1(i64 %a) { ; 66846720 = 0x03fc0000 define i64 @f2(i64 %a) { ; CHECK: f2 -; CHECK: subs r0, r0, #255, 14 +; CHECK: subs r0, r0, #255, #14 ; CHECK: sbc r1, r1, #0 %tmp = sub i64 %a, 66846720 ret i64 %tmp @@ -27,3 +27,12 @@ define i64 @f3(i64 %a) { ret i64 %tmp } +define i32 @f4(i32 %x) { +entry: +; CHECK: f4 +; CHECK: rsbs + %sub = sub i32 1, %x + %cmp = icmp ugt i32 %sub, 0 + %sel = select i1 %cmp, i32 1, i32 %sub + ret i32 %sel +} diff --git a/test/CodeGen/ARM/thumb1-varalloc.ll b/test/CodeGen/ARM/thumb1-varalloc.ll index 25093fee225a..aa88ae0c1a86 100644 --- a/test/CodeGen/ARM/thumb1-varalloc.ll +++ b/test/CodeGen/ARM/thumb1-varalloc.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=thumbv6-apple-darwin | FileCheck %s +; RUN: llc < %s -mtriple=thumbv6-apple-darwin -regalloc=basic | FileCheck %s ; rdar://8819685 @__bar = external hidden global i8* @@ -12,12 +13,13 @@ entry: %0 = load i8** @__bar, align 4 %1 = icmp eq i8* %0, null br i1 %1, label %bb1, label %bb3 +; CHECK: bne bb1: store i32 1026, i32* %size, align 4 %2 = alloca [1026 x i8], align 1 -; CHECK: mov r0, sp -; CHECK: adds r4, r0, r4 +; CHECK: mov [[R0:r[0-9]+]], sp +; CHECK: adds {{r[0-9]+}}, [[R0]], {{r[0-9]+}} %3 = getelementptr inbounds [1026 x i8]* %2, i32 0, i32 0 %4 = call i32 @_called_func(i8* %3, i32* %size) nounwind %5 = icmp eq i32 %4, 0 diff --git a/test/CodeGen/ARM/trap.ll b/test/CodeGen/ARM/trap.ll index b2f6b6e69fa5..38842a9646ff 100644 --- a/test/CodeGen/ARM/trap.ll +++ b/test/CodeGen/ARM/trap.ll @@ -1,10 +1,15 @@ -; RUN: llc < %s -march=arm | FileCheck %s +; RUN: llc < %s -mtriple=arm-apple-darwin | FileCheck %s -check-prefix=INSTR +; RUN: llc < %s -mtriple=arm-apple-darwin -trap-func=_trap | FileCheck %s -check-prefix=FUNC ; rdar://7961298 +; rdar://9249183 define void @t() nounwind { entry: -; CHECK: t: -; CHECK: trap +; INSTR: t: +; INSTR: trap + +; FUNC: t: +; FUNC: bl __trap call void @llvm.trap() unreachable } diff --git a/test/CodeGen/ARM/umulo-32.ll b/test/CodeGen/ARM/umulo-32.ll index aa7d28a62349..fa5c0168fefe 100644 --- a/test/CodeGen/ARM/umulo-32.ll +++ b/test/CodeGen/ARM/umulo-32.ll @@ -12,3 +12,30 @@ define i32 @func(i32 %a) nounwind { } declare %umul.ty @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone + +define i32 @f(i32 %argc, i8** %argv) ssp { +; CHECK: func +; CHECK: str r0 +; CHECK: movs r2 +; CHECK: mov r1 +; CHECK: mov r3 +; CHECK: muldi3 +%1 = alloca i32, align 4 +%2 = alloca i32, align 4 +%3 = alloca i8**, align 4 +%m_degree = alloca i32, align 4 +store i32 0, i32* %1 +store i32 %argc, i32* %2, align 4 +store i8** %argv, i8*** %3, align 4 +store i32 10, i32* %m_degree, align 4 +%4 = load i32* %m_degree, align 4 +%5 = call %umul.ty @llvm.umul.with.overflow.i32(i32 %4, i32 8) +%6 = extractvalue %umul.ty %5, 1 +%7 = extractvalue %umul.ty %5, 0 +%8 = select i1 %6, i32 -1, i32 %7 +%9 = call noalias i8* @_Znam(i32 %8) +%10 = bitcast i8* %9 to double* +ret i32 0 +} + +declare noalias i8* @_Znam(i32) diff --git a/test/CodeGen/ARM/unaligned_load_store.ll b/test/CodeGen/ARM/unaligned_load_store.ll index b42e11f2c4ab..a8237c60e4e0 100644 --- a/test/CodeGen/ARM/unaligned_load_store.ll +++ b/test/CodeGen/ARM/unaligned_load_store.ll @@ -8,14 +8,14 @@ define void @t(i8* nocapture %a, i8* nocapture %b) nounwind { entry: ; GENERIC: t: -; GENERIC: ldrb r2 -; GENERIC: ldrb r3 -; GENERIC: ldrb r12 -; GENERIC: ldrb r1 -; GENERIC: strb r1 -; GENERIC: strb r12 -; GENERIC: strb r3 -; GENERIC: strb r2 +; GENERIC: ldrb [[R2:r[0-9]+]] +; GENERIC: ldrb [[R3:r[0-9]+]] +; GENERIC: ldrb [[R12:r[0-9]+]] +; GENERIC: ldrb [[R1:r[0-9]+]] +; GENERIC: strb [[R1]] +; GENERIC: strb [[R12]] +; GENERIC: strb [[R3]] +; GENERIC: strb [[R2]] ; DARWIN_V6: t: ; DARWIN_V6: ldr r1 diff --git a/test/CodeGen/ARM/undef-sext.ll b/test/CodeGen/ARM/undef-sext.ll new file mode 100644 index 000000000000..2c28da3b6461 --- /dev/null +++ b/test/CodeGen/ARM/undef-sext.ll @@ -0,0 +1,14 @@ +; RUN: llc < %s -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s + +; No need to sign-extend undef. + +define i32 @t(i32* %a) nounwind { +entry: +; CHECK: t: +; CHECK: ldr r0, [r0] +; CHECK: bx lr + %0 = sext i16 undef to i32 + %1 = getelementptr inbounds i32* %a, i32 %0 + %2 = load i32* %1, align 4 + ret i32 %2 +} diff --git a/test/CodeGen/ARM/va_arg.ll b/test/CodeGen/ARM/va_arg.ll index 7cb976236dc5..bb4045311624 100644 --- a/test/CodeGen/ARM/va_arg.ll +++ b/test/CodeGen/ARM/va_arg.ll @@ -1,10 +1,10 @@ -; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi | FileCheck %s +; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -pre-RA-sched=source | FileCheck %s ; Test that we correctly align elements when using va_arg ; CHECK: test1: ; CHECK-NOT: bfc -; CHECK: add r0, r0, #7 -; CHECK: bfc r0, #0, #3 +; CHECK: add [[REG:(r[0-9]+)|(lr)]], {{(r[0-9]+)|(lr)}}, #7 +; CHECK: bfc [[REG]], #0, #3 ; CHECK-NOT: bfc define i64 @test1(i32 %i, ...) nounwind optsize { @@ -19,8 +19,8 @@ entry: ; CHECK: test2: ; CHECK-NOT: bfc -; CHECK: add r0, r0, #7 -; CHECK: bfc r0, #0, #3 +; CHECK: add [[REG:(r[0-9]+)|(lr)]], {{(r[0-9]+)|(lr)}}, #7 +; CHECK: bfc [[REG]], #0, #3 ; CHECK-NOT: bfc ; CHECK: bx lr diff --git a/test/CodeGen/ARM/vbsl-constant.ll b/test/CodeGen/ARM/vbsl-constant.ll new file mode 100644 index 000000000000..14e668efb1da --- /dev/null +++ b/test/CodeGen/ARM/vbsl-constant.ll @@ -0,0 +1,115 @@ +; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s + +define <8 x i8> @v_bsli8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind { +;CHECK: v_bsli8: +;CHECK: vldr.64 +;CHECK: vldr.64 +;CHECK: vbsl + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = load <8 x i8>* %C + %tmp4 = and <8 x i8> %tmp1, + %tmp6 = and <8 x i8> %tmp3, + %tmp7 = or <8 x i8> %tmp4, %tmp6 + ret <8 x i8> %tmp7 +} + +define <4 x i16> @v_bsli16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { +;CHECK: v_bsli16: +;CHECK: vldr.64 +;CHECK: vldr.64 +;CHECK: vbsl + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = load <4 x i16>* %C + %tmp4 = and <4 x i16> %tmp1, + %tmp6 = and <4 x i16> %tmp3, + %tmp7 = or <4 x i16> %tmp4, %tmp6 + ret <4 x i16> %tmp7 +} + +define <2 x i32> @v_bsli32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { +;CHECK: v_bsli32: +;CHECK: vldr.64 +;CHECK: vldr.64 +;CHECK: vbsl + %tmp1 = load <2 x i32>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = load <2 x i32>* %C + %tmp4 = and <2 x i32> %tmp1, + %tmp6 = and <2 x i32> %tmp3, + %tmp7 = or <2 x i32> %tmp4, %tmp6 + ret <2 x i32> %tmp7 +} + +define <1 x i64> @v_bsli64(<1 x i64>* %A, <1 x i64>* %B, <1 x i64>* %C) nounwind { +;CHECK: v_bsli64: +;CHECK: vldr.64 +;CHECK: vldr.64 +;CHECK: vldr.64 +;CHECK: vbsl + %tmp1 = load <1 x i64>* %A + %tmp2 = load <1 x i64>* %B + %tmp3 = load <1 x i64>* %C + %tmp4 = and <1 x i64> %tmp1, + %tmp6 = and <1 x i64> %tmp3, + %tmp7 = or <1 x i64> %tmp4, %tmp6 + ret <1 x i64> %tmp7 +} + +define <16 x i8> @v_bslQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind { +;CHECK: v_bslQi8: +;CHECK: vldmia +;CHECK: vldmia +;CHECK: vbsl + %tmp1 = load <16 x i8>* %A + %tmp2 = load <16 x i8>* %B + %tmp3 = load <16 x i8>* %C + %tmp4 = and <16 x i8> %tmp1, + %tmp6 = and <16 x i8> %tmp3, + %tmp7 = or <16 x i8> %tmp4, %tmp6 + ret <16 x i8> %tmp7 +} + +define <8 x i16> @v_bslQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind { +;CHECK: v_bslQi16: +;CHECK: vldmia +;CHECK: vldmia +;CHECK: vbsl + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i16>* %B + %tmp3 = load <8 x i16>* %C + %tmp4 = and <8 x i16> %tmp1, + %tmp6 = and <8 x i16> %tmp3, + %tmp7 = or <8 x i16> %tmp4, %tmp6 + ret <8 x i16> %tmp7 +} + +define <4 x i32> @v_bslQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind { +;CHECK: v_bslQi32: +;CHECK: vldmia +;CHECK: vldmia +;CHECK: vbsl + %tmp1 = load <4 x i32>* %A + %tmp2 = load <4 x i32>* %B + %tmp3 = load <4 x i32>* %C + %tmp4 = and <4 x i32> %tmp1, + %tmp6 = and <4 x i32> %tmp3, + %tmp7 = or <4 x i32> %tmp4, %tmp6 + ret <4 x i32> %tmp7 +} + +define <2 x i64> @v_bslQi64(<2 x i64>* %A, <2 x i64>* %B, <2 x i64>* %C) nounwind { +;CHECK: v_bslQi64: +;CHECK: vldmia +;CHECK: vldmia +;CHECK: vldmia +;CHECK: vbsl + %tmp1 = load <2 x i64>* %A + %tmp2 = load <2 x i64>* %B + %tmp3 = load <2 x i64>* %C + %tmp4 = and <2 x i64> %tmp1, + %tmp6 = and <2 x i64> %tmp3, + %tmp7 = or <2 x i64> %tmp4, %tmp6 + ret <2 x i64> %tmp7 +} diff --git a/test/CodeGen/ARM/vcgt.ll b/test/CodeGen/ARM/vcgt.ll index c3c4cb356307..2243bac91fb1 100644 --- a/test/CodeGen/ARM/vcgt.ll +++ b/test/CodeGen/ARM/vcgt.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc < %s -march=arm -mattr=+neon -regalloc=basic | FileCheck %s define <8 x i8> @vcgts8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: vcgts8: @@ -161,9 +162,9 @@ define <4 x i32> @vacgtQf32(<4 x float>* %A, <4 x float>* %B) nounwind { ; rdar://7923010 define <4 x i32> @vcgt_zext(<4 x float>* %A, <4 x float>* %B) nounwind { ;CHECK: vcgt_zext: -;CHECK: vmov.i32 q10, #0x1 -;CHECK: vcgt.f32 q8 -;CHECK: vand q8, q8, q10 +;CHECK: vmov.i32 [[Q0:q[0-9]+]], #0x1 +;CHECK: vcgt.f32 [[Q1:q[0-9]+]] +;CHECK: vand [[Q2:q[0-9]+]], [[Q1]], [[Q0]] %tmp1 = load <4 x float>* %A %tmp2 = load <4 x float>* %B %tmp3 = fcmp ogt <4 x float> %tmp1, %tmp2 diff --git a/test/CodeGen/ARM/vector-DAGCombine.ll b/test/CodeGen/ARM/vector-DAGCombine.ll index 3ab0cfcbbc77..81bdc44863b7 100644 --- a/test/CodeGen/ARM/vector-DAGCombine.ll +++ b/test/CodeGen/ARM/vector-DAGCombine.ll @@ -105,3 +105,21 @@ define void @i64_extractelement(i64* %ptr, <2 x i64>* %vp) nounwind { store i64 %t1, i64* %ptr ret void } + +; Test trying to do a AND Combine on illegal types. +define void @andVec(<3 x i8>* %A) nounwind { + %tmp = load <3 x i8>* %A, align 4 + %and = and <3 x i8> %tmp, + store <3 x i8> %and, <3 x i8>* %A + ret void +} + + +; Test trying to do an OR Combine on illegal types. +define void @orVec(<3 x i8>* %A) nounwind { + %tmp = load <3 x i8>* %A, align 4 + %or = or <3 x i8> %tmp, + store <3 x i8> %or, <3 x i8>* %A + ret void +} + diff --git a/test/CodeGen/ARM/vext.ll b/test/CodeGen/ARM/vext.ll index 55abefef0fa7..49a042b7e1f5 100644 --- a/test/CodeGen/ARM/vext.ll +++ b/test/CodeGen/ARM/vext.ll @@ -125,11 +125,11 @@ define <4 x i16> @test_largespan(<8 x i16>* %B) nounwind { ; The actual shuffle code only handles some cases, make sure we check ; this rather than blindly emitting a VECTOR_SHUFFLE (infinite ; lowering loop can result otherwise). -define <8 x i8> @test_illegal(<16 x i8>* %A, <16 x i8>* %B) nounwind { +define <8 x i16> @test_illegal(<8 x i16>* %A, <8 x i16>* %B) nounwind { ;CHECK: test_illegal: -;CHECK: vst1.8 - %tmp1 = load <16 x i8>* %A - %tmp2 = load <16 x i8>* %B - %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <8 x i32> - ret <8 x i8> %tmp3 +;CHECK: vst1.16 + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i16>* %B + %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> + ret <8 x i16> %tmp3 } diff --git a/test/CodeGen/ARM/vfp.ll b/test/CodeGen/ARM/vfp.ll index 44a44afe9af4..49a69827bc05 100644 --- a/test/CodeGen/ARM/vfp.ll +++ b/test/CodeGen/ARM/vfp.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s +; RUN: llc < %s -march=arm -mattr=+vfp2 -disable-post-ra | FileCheck %s +; RUN: llc < %s -march=arm -mattr=+vfp2 -disable-post-ra -regalloc=basic | FileCheck %s define void @test(float* %P, double* %D) { %A = load float* %P ; [#uses=1] @@ -40,9 +41,9 @@ define void @test_ext_round(float* %P, double* %D) { ;CHECK: test_ext_round: %a = load float* %P ; [#uses=1] ;CHECK: vcvt.f64.f32 +;CHECK: vcvt.f32.f64 %b = fpext float %a to double ; [#uses=1] %A = load double* %D ; [#uses=1] -;CHECK: vcvt.f32.f64 %B = fptrunc double %A to float ; [#uses=1] store double %b, double* %D store float %B, float* %P diff --git a/test/CodeGen/ARM/vld1.ll b/test/CodeGen/ARM/vld1.ll index c886125a2fb0..e524395c501a 100644 --- a/test/CodeGen/ARM/vld1.ll +++ b/test/CodeGen/ARM/vld1.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc < %s -march=arm -mattr=+neon -regalloc=basic | FileCheck %s define <8 x i8> @vld1i8(i8* %A) nounwind { ;CHECK: vld1i8: @@ -19,7 +20,7 @@ define <4 x i16> @vld1i16(i16* %A) nounwind { ;Check for a post-increment updating load. define <4 x i16> @vld1i16_update(i16** %ptr) nounwind { ;CHECK: vld1i16_update: -;CHECK: vld1.16 {d16}, [r1]! +;CHECK: vld1.16 {d16}, [{{r[0-9]+}}]! %A = load i16** %ptr %tmp0 = bitcast i16* %A to i8* %tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16(i8* %tmp0, i32 1) @@ -39,7 +40,7 @@ define <2 x i32> @vld1i32(i32* %A) nounwind { ;Check for a post-increment updating load with register increment. define <2 x i32> @vld1i32_update(i32** %ptr, i32 %inc) nounwind { ;CHECK: vld1i32_update: -;CHECK: vld1.32 {d16}, [r2], r1 +;CHECK: vld1.32 {d16}, [{{r[0-9]+}}], {{r[0-9]+}} %A = load i32** %ptr %tmp0 = bitcast i32* %A to i8* %tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32(i8* %tmp0, i32 1) @@ -75,7 +76,7 @@ define <16 x i8> @vld1Qi8(i8* %A) nounwind { ;Check for a post-increment updating load. define <16 x i8> @vld1Qi8_update(i8** %ptr) nounwind { ;CHECK: vld1Qi8_update: -;CHECK: vld1.8 {d16, d17}, [r1, :64]! +;CHECK: vld1.8 {d16, d17}, [{{r[0-9]+}}, :64]! %A = load i8** %ptr %tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A, i32 8) %tmp2 = getelementptr i8* %A, i32 16 @@ -132,8 +133,6 @@ declare <2 x i64> @llvm.arm.neon.vld1.v2i64(i8*, i32) nounwind readonly ; Do not crash if the vld1 result is not used. define void @unused_vld1_result() { entry: -;CHECK: unused_vld1_result -;CHECK: vld1.32 %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef, i32 1) call void @llvm.trap() unreachable diff --git a/test/CodeGen/ARM/vld3.ll b/test/CodeGen/ARM/vld3.ll index dde530f6df1f..b495319830b0 100644 --- a/test/CodeGen/ARM/vld3.ll +++ b/test/CodeGen/ARM/vld3.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc < %s -march=arm -mattr=+neon -regalloc=basic | FileCheck %s %struct.__neon_int8x8x3_t = type { <8 x i8>, <8 x i8>, <8 x i8> } %struct.__neon_int16x4x3_t = type { <4 x i16>, <4 x i16>, <4 x i16> } @@ -36,7 +37,7 @@ define <4 x i16> @vld3i16(i16* %A) nounwind { ;Check for a post-increment updating load with register increment. define <4 x i16> @vld3i16_update(i16** %ptr, i32 %inc) nounwind { ;CHECK: vld3i16_update: -;CHECK: vld3.16 {d16, d17, d18}, [r2], r1 +;CHECK: vld3.16 {d16, d17, d18}, [{{r[0-9]+}}], {{r[0-9]+}} %A = load i16** %ptr %tmp0 = bitcast i16* %A to i8* %tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8* %tmp0, i32 1) @@ -121,8 +122,8 @@ define <4 x i32> @vld3Qi32(i32* %A) nounwind { ;Check for a post-increment updating load. define <4 x i32> @vld3Qi32_update(i32** %ptr) nounwind { ;CHECK: vld3Qi32_update: -;CHECK: vld3.32 {d16, d18, d20}, [r1]! -;CHECK: vld3.32 {d17, d19, d21}, [r1]! +;CHECK: vld3.32 {d16, d18, d20}, [r[[R:[0-9]+]]]! +;CHECK: vld3.32 {d17, d19, d21}, [r[[R]]]! %A = load i32** %ptr %tmp0 = bitcast i32* %A to i8* %tmp1 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8* %tmp0, i32 1) diff --git a/test/CodeGen/ARM/vldlane.ll b/test/CodeGen/ARM/vldlane.ll index 770ed071ac12..805aad51d4fd 100644 --- a/test/CodeGen/ARM/vldlane.ll +++ b/test/CodeGen/ARM/vldlane.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc < %s -march=arm -mattr=+neon -regalloc=basic | FileCheck %s define <8 x i8> @vld1lanei8(i8* %A, <8 x i8>* %B) nounwind { ;CHECK: vld1lanei8: @@ -279,7 +280,7 @@ define <8 x i16> @vld3laneQi16(i16* %A, <8 x i16>* %B) nounwind { ;Check for a post-increment updating load with register increment. define <8 x i16> @vld3laneQi16_update(i16** %ptr, <8 x i16>* %B, i32 %inc) nounwind { ;CHECK: vld3laneQi16_update: -;CHECK: vld3.16 {d16[1], d18[1], d20[1]}, [r2], r1 +;CHECK: vld3.16 {d16[1], d18[1], d20[1]}, [{{r[0-9]+}}], {{r[0-9]+}} %A = load i16** %ptr %tmp0 = bitcast i16* %A to i8* %tmp1 = load <8 x i16>* %B @@ -490,7 +491,7 @@ declare %struct.__neon_float32x4x4_t @llvm.arm.neon.vld4lane.v4f32(i8*, <4 x flo ; in the QPR_VFP2 regclass, it needs to be copied to a QPR regclass because ; we don't currently have a QQQQ_VFP2 super-regclass. (The "0" for the low ; part of %ins67 is supposed to be loaded by a VLDRS instruction in this test.) -define void @test_qqqq_regsequence_subreg([6 x i64] %b) nounwind { +define <8 x i16> @test_qqqq_regsequence_subreg([6 x i64] %b) nounwind { ;CHECK: test_qqqq_regsequence_subreg ;CHECK: vld3.16 %tmp63 = extractvalue [6 x i64] %b, 5 @@ -499,8 +500,12 @@ define void @test_qqqq_regsequence_subreg([6 x i64] %b) nounwind { %ins67 = or i128 %tmp65, 0 %tmp78 = bitcast i128 %ins67 to <8 x i16> %vld3_lane = tail call %struct.__neon_int16x8x3_t @llvm.arm.neon.vld3lane.v8i16(i8* undef, <8 x i16> undef, <8 x i16> undef, <8 x i16> %tmp78, i32 1, i32 2) - call void @llvm.trap() - unreachable + %tmp3 = extractvalue %struct.__neon_int16x8x3_t %vld3_lane, 0 + %tmp4 = extractvalue %struct.__neon_int16x8x3_t %vld3_lane, 1 + %tmp5 = extractvalue %struct.__neon_int16x8x3_t %vld3_lane, 2 + %tmp6 = add <8 x i16> %tmp3, %tmp4 + %tmp7 = add <8 x i16> %tmp5, %tmp6 + ret <8 x i16> %tmp7 } declare void @llvm.trap() nounwind diff --git a/test/CodeGen/ARM/vmul.ll b/test/CodeGen/ARM/vmul.ll index ee033caa00d0..1fd6581ae081 100644 --- a/test/CodeGen/ARM/vmul.ll +++ b/test/CodeGen/ARM/vmul.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s +; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s define <8 x i8> @vmuli8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: vmuli8: @@ -158,6 +158,15 @@ define <8 x i16> @vmulls8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ret <8 x i16> %tmp5 } +define <8 x i16> @vmulls8_int(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vmulls8_int: +;CHECK: vmull.s8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2) + ret <8 x i16> %tmp3 +} + define <4 x i32> @vmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: vmulls16: ;CHECK: vmull.s16 @@ -169,6 +178,15 @@ define <4 x i32> @vmulls16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ret <4 x i32> %tmp5 } +define <4 x i32> @vmulls16_int(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vmulls16_int: +;CHECK: vmull.s16 + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) + ret <4 x i32> %tmp3 +} + define <2 x i64> @vmulls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: vmulls32: ;CHECK: vmull.s32 @@ -180,6 +198,15 @@ define <2 x i64> @vmulls32(<2 x i32>* %A, <2 x i32>* %B) nounwind { ret <2 x i64> %tmp5 } +define <2 x i64> @vmulls32_int(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vmulls32_int: +;CHECK: vmull.s32 + %tmp1 = load <2 x i32>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) + ret <2 x i64> %tmp3 +} + define <8 x i16> @vmullu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: vmullu8: ;CHECK: vmull.u8 @@ -191,6 +218,15 @@ define <8 x i16> @vmullu8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ret <8 x i16> %tmp5 } +define <8 x i16> @vmullu8_int(<8 x i8>* %A, <8 x i8>* %B) nounwind { +;CHECK: vmullu8_int: +;CHECK: vmull.u8 + %tmp1 = load <8 x i8>* %A + %tmp2 = load <8 x i8>* %B + %tmp3 = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %tmp1, <8 x i8> %tmp2) + ret <8 x i16> %tmp3 +} + define <4 x i32> @vmullu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ;CHECK: vmullu16: ;CHECK: vmull.u16 @@ -202,6 +238,15 @@ define <4 x i32> @vmullu16(<4 x i16>* %A, <4 x i16>* %B) nounwind { ret <4 x i32> %tmp5 } +define <4 x i32> @vmullu16_int(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK: vmullu16_int: +;CHECK: vmull.u16 + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %tmp1, <4 x i16> %tmp2) + ret <4 x i32> %tmp3 +} + define <2 x i64> @vmullu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { ;CHECK: vmullu32: ;CHECK: vmull.u32 @@ -213,6 +258,15 @@ define <2 x i64> @vmullu32(<2 x i32>* %A, <2 x i32>* %B) nounwind { ret <2 x i64> %tmp5 } +define <2 x i64> @vmullu32_int(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK: vmullu32_int: +;CHECK: vmull.u32 + %tmp1 = load <2 x i32>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %tmp1, <2 x i32> %tmp2) + ret <2 x i64> %tmp3 +} + define <8 x i16> @vmullp8(<8 x i8>* %A, <8 x i8>* %B) nounwind { ;CHECK: vmullp8: ;CHECK: vmull.p8 @@ -233,6 +287,15 @@ entry: ret <4 x i32> %3 } +define arm_aapcs_vfpcc <4 x i32> @test_vmull_lanes16_int(<4 x i16> %arg0_int16x4_t, <4 x i16> %arg1_int16x4_t) nounwind readnone { +entry: +; CHECK: test_vmull_lanes16_int +; CHECK: vmull.s16 q0, d0, d1[1] + %0 = shufflevector <4 x i16> %arg1_int16x4_t, <4 x i16> undef, <4 x i32> ; <<4 x i16>> [#uses=1] + %1 = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %arg0_int16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %1 +} + define arm_aapcs_vfpcc <2 x i64> @test_vmull_lanes32(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone { entry: ; CHECK: test_vmull_lanes32 @@ -244,6 +307,15 @@ entry: ret <2 x i64> %3 } +define arm_aapcs_vfpcc <2 x i64> @test_vmull_lanes32_int(<2 x i32> %arg0_int32x2_t, <2 x i32> %arg1_int32x2_t) nounwind readnone { +entry: +; CHECK: test_vmull_lanes32_int +; CHECK: vmull.s32 q0, d0, d1[1] + %0 = shufflevector <2 x i32> %arg1_int32x2_t, <2 x i32> undef, <2 x i32> ; <<2 x i32>> [#uses=1] + %1 = tail call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %arg0_int32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %1 +} + define arm_aapcs_vfpcc <4 x i32> @test_vmull_laneu16(<4 x i16> %arg0_uint16x4_t, <4 x i16> %arg1_uint16x4_t) nounwind readnone { entry: ; CHECK: test_vmull_laneu16 @@ -255,6 +327,15 @@ entry: ret <4 x i32> %3 } +define arm_aapcs_vfpcc <4 x i32> @test_vmull_laneu16_int(<4 x i16> %arg0_uint16x4_t, <4 x i16> %arg1_uint16x4_t) nounwind readnone { +entry: +; CHECK: test_vmull_laneu16_int +; CHECK: vmull.u16 q0, d0, d1[1] + %0 = shufflevector <4 x i16> %arg1_uint16x4_t, <4 x i16> undef, <4 x i32> ; <<4 x i16>> [#uses=1] + %1 = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %arg0_uint16x4_t, <4 x i16> %0) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %1 +} + define arm_aapcs_vfpcc <2 x i64> @test_vmull_laneu32(<2 x i32> %arg0_uint32x2_t, <2 x i32> %arg1_uint32x2_t) nounwind readnone { entry: ; CHECK: test_vmull_laneu32 @@ -266,6 +347,23 @@ entry: ret <2 x i64> %3 } +define arm_aapcs_vfpcc <2 x i64> @test_vmull_laneu32_int(<2 x i32> %arg0_uint32x2_t, <2 x i32> %arg1_uint32x2_t) nounwind readnone { +entry: +; CHECK: test_vmull_laneu32_int +; CHECK: vmull.u32 q0, d0, d1[1] + %0 = shufflevector <2 x i32> %arg1_uint32x2_t, <2 x i32> undef, <2 x i32> ; <<2 x i32>> [#uses=1] + %1 = tail call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %arg0_uint32x2_t, <2 x i32> %0) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %1 +} + +declare <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32>, <2 x i32>) nounwind readnone + +declare <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8>, <8 x i8>) nounwind readnone +declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone +declare <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32>, <2 x i32>) nounwind readnone + declare <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8>, <8 x i8>) nounwind readnone @@ -339,3 +437,58 @@ define <2 x i64> @vmull_extvec_u32(<2 x i32> %arg) nounwind { %tmp4 = mul <2 x i64> %tmp3, ret <2 x i64> %tmp4 } + +; rdar://9197392 +define void @distribue(i16* %dst, i8* %src, i32 %mul) nounwind { +entry: +; CHECK: distribue: +; CHECK: vmull.u8 [[REG1:(q[0-9]+)]], d{{.*}}, [[REG2:(d[0-9]+)]] +; CHECK: vmlal.u8 [[REG1]], d{{.*}}, [[REG2]] + %0 = trunc i32 %mul to i8 + %1 = insertelement <8 x i8> undef, i8 %0, i32 0 + %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer + %3 = tail call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %src, i32 1) + %4 = bitcast <16 x i8> %3 to <2 x double> + %5 = extractelement <2 x double> %4, i32 1 + %6 = bitcast double %5 to <8 x i8> + %7 = zext <8 x i8> %6 to <8 x i16> + %8 = zext <8 x i8> %2 to <8 x i16> + %9 = extractelement <2 x double> %4, i32 0 + %10 = bitcast double %9 to <8 x i8> + %11 = zext <8 x i8> %10 to <8 x i16> + %12 = add <8 x i16> %7, %11 + %13 = mul <8 x i16> %12, %8 + %14 = bitcast i16* %dst to i8* + tail call void @llvm.arm.neon.vst1.v8i16(i8* %14, <8 x i16> %13, i32 2) + ret void +} + +declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8*, i32) nounwind readonly + +declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind + +; Take advantage of the Cortex-A8 multiplier accumulator forward. + +%struct.uint8x8_t = type { <8 x i8> } + +define void @distribue2(%struct.uint8x8_t* nocapture %dst, i8* %src, i32 %mul) nounwind { +entry: +; CHECK: distribue2 +; CHECK-NOT: vadd.i8 +; CHECK: vmul.i8 +; CHECK: vmla.i8 + %0 = trunc i32 %mul to i8 + %1 = insertelement <8 x i8> undef, i8 %0, i32 0 + %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer + %3 = tail call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %src, i32 1) + %4 = bitcast <16 x i8> %3 to <2 x double> + %5 = extractelement <2 x double> %4, i32 1 + %6 = bitcast double %5 to <8 x i8> + %7 = extractelement <2 x double> %4, i32 0 + %8 = bitcast double %7 to <8 x i8> + %9 = add <8 x i8> %6, %8 + %10 = mul <8 x i8> %9, %2 + %11 = getelementptr inbounds %struct.uint8x8_t* %dst, i32 0, i32 0 + store <8 x i8> %10, <8 x i8>* %11, align 8 + ret void +} diff --git a/test/CodeGen/ARM/vst3.ll b/test/CodeGen/ARM/vst3.ll index d262303bc60e..e3372a03793d 100644 --- a/test/CodeGen/ARM/vst3.ll +++ b/test/CodeGen/ARM/vst3.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm -mattr=+neon -O0 | FileCheck %s +; RUN: llc < %s -march=arm -mattr=+neon -disable-arm-fast-isel -O0 | FileCheck %s define void @vst3i8(i8* %A, <8 x i8>* %B) nounwind { ;CHECK: vst3i8: diff --git a/test/CodeGen/Blackfin/2009-08-04-LowerExtract-Live.ll b/test/CodeGen/Blackfin/2009-08-04-LowerExtract-Live.ll index 3ee5e8df9972..50fccb440990 100644 --- a/test/CodeGen/Blackfin/2009-08-04-LowerExtract-Live.ll +++ b/test/CodeGen/Blackfin/2009-08-04-LowerExtract-Live.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -march=bfin -join-liveintervals=0 -verify-machineinstrs +; RUN: llc < %s -march=bfin -join-liveintervals=0 -verify-machineinstrs -regalloc=greedy ; Provoke an error in LowerSubregsPass::LowerExtract where the live range of a ; super-register is illegally extended. diff --git a/test/CodeGen/CellSPU/jumptable.ll b/test/CodeGen/CellSPU/jumptable.ll index 42b41b3bf29b..87376ef6ed53 100644 --- a/test/CodeGen/CellSPU/jumptable.ll +++ b/test/CodeGen/CellSPU/jumptable.ll @@ -1,4 +1,4 @@ -;RUN: llc --march=cellspu %s -o - | FileCheck %s +;RUN: llc --march=cellspu -disable-cgp-branch-opts %s -o - | FileCheck %s ; This is to check that emitting jumptables doesn't crash llc define i32 @test(i32 %param) { entry: diff --git a/test/CodeGen/CellSPU/loads.ll b/test/CodeGen/CellSPU/loads.ll index 03d7ad1153a1..4771752f5f4c 100644 --- a/test/CodeGen/CellSPU/loads.ll +++ b/test/CodeGen/CellSPU/loads.ll @@ -50,3 +50,10 @@ define i32 @load_misaligned( i32* %ptr ){ %rv = load i32* %ptr, align 2 ret i32 %rv } + +define <4 x i32> @load_null_vec( ) { +;CHECK: lqa +;CHECK: bi $lr + %rv = load <4 x i32>* null + ret <4 x i32> %rv +} diff --git a/test/CodeGen/CellSPU/rotate_ops.ll b/test/CodeGen/CellSPU/rotate_ops.ll index e1172089c703..b1219e6f56e5 100644 --- a/test/CodeGen/CellSPU/rotate_ops.ll +++ b/test/CodeGen/CellSPU/rotate_ops.ll @@ -3,9 +3,9 @@ ; RUN: grep roth %t1.s | count 8 ; RUN: grep roti.*5 %t1.s | count 1 ; RUN: grep roti.*27 %t1.s | count 1 -; RUN grep rothi.*5 %t1.s | count 2 -; RUN grep rothi.*11 %t1.s | count 1 -; RUN grep rothi.*,.3 %t1.s | count 1 +; RUN: grep rothi.*5 %t1.s | count 2 +; RUN: grep rothi.*11 %t1.s | count 1 +; RUN: grep rothi.*,.3 %t1.s | count 1 ; RUN: grep andhi %t1.s | count 4 ; RUN: grep shlhi %t1.s | count 4 ; RUN: cat %t1.s | FileCheck %s diff --git a/test/CodeGen/CellSPU/shift_ops.ll b/test/CodeGen/CellSPU/shift_ops.ll index 92390abf9465..c4a5abd29042 100644 --- a/test/CodeGen/CellSPU/shift_ops.ll +++ b/test/CodeGen/CellSPU/shift_ops.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -march=cellspu > %t1.s -; RUN: grep {shlh } %t1.s | count 9 +; RUN: grep {shlh } %t1.s | count 10 ; RUN: grep {shlhi } %t1.s | count 3 -; RUN: grep {shl } %t1.s | count 9 +; RUN: grep {shl } %t1.s | count 11 ; RUN: grep {shli } %t1.s | count 3 ; RUN: grep {xshw } %t1.s | count 5 ; RUN: grep {and } %t1.s | count 14 @@ -14,15 +14,12 @@ ; RUN: grep {rotqbyi } %t1.s | count 1 ; RUN: grep {rotqbii } %t1.s | count 2 ; RUN: grep {rotqbybi } %t1.s | count 1 -; RUN: grep {sfi } %t1.s | count 4 +; RUN: grep {sfi } %t1.s | count 6 ; RUN: cat %t1.s | FileCheck %s target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" target triple = "spu" -; Vector shifts are not currently supported in gcc or llvm assembly. These are -; not tested. - ; Shift left i16 via register, note that the second operand to shl is promoted ; to a 32-bit type: @@ -293,3 +290,55 @@ define i128 @test_lshr_i128( i128 %val ) { %rv = lshr i128 %val, 64 ret i128 %rv } + +;Vector shifts +define <2 x i32> @shl_v2i32(<2 x i32> %val, <2 x i32> %sh) { +;CHECK: shl +;CHECK: bi $lr + %rv = shl <2 x i32> %val, %sh + ret <2 x i32> %rv +} + +define <4 x i32> @shl_v4i32(<4 x i32> %val, <4 x i32> %sh) { +;CHECK: shl +;CHECK: bi $lr + %rv = shl <4 x i32> %val, %sh + ret <4 x i32> %rv +} + +define <8 x i16> @shl_v8i16(<8 x i16> %val, <8 x i16> %sh) { +;CHECK: shlh +;CHECK: bi $lr + %rv = shl <8 x i16> %val, %sh + ret <8 x i16> %rv +} + +define <4 x i32> @lshr_v4i32(<4 x i32> %val, <4 x i32> %sh) { +;CHECK: rotm +;CHECK: bi $lr + %rv = lshr <4 x i32> %val, %sh + ret <4 x i32> %rv +} + +define <8 x i16> @lshr_v8i16(<8 x i16> %val, <8 x i16> %sh) { +;CHECK: sfhi +;CHECK: rothm +;CHECK: bi $lr + %rv = lshr <8 x i16> %val, %sh + ret <8 x i16> %rv +} + +define <4 x i32> @ashr_v4i32(<4 x i32> %val, <4 x i32> %sh) { +;CHECK: rotma +;CHECK: bi $lr + %rv = ashr <4 x i32> %val, %sh + ret <4 x i32> %rv +} + +define <8 x i16> @ashr_v8i16(<8 x i16> %val, <8 x i16> %sh) { +;CHECK: sfhi +;CHECK: rotmah +;CHECK: bi $lr + %rv = ashr <8 x i16> %val, %sh + ret <8 x i16> %rv +} diff --git a/test/CodeGen/CellSPU/stores.ll b/test/CodeGen/CellSPU/stores.ll index 7e0bf06b4e45..6ca5b0892304 100644 --- a/test/CodeGen/CellSPU/stores.ll +++ b/test/CodeGen/CellSPU/stores.ll @@ -171,3 +171,11 @@ define void @store_v8( <8 x float> %val, <8 x float>* %ptr ) store <8 x float> %val, <8 x float>* %ptr ret void } + +define void @store_null_vec( <4 x i32> %val ) { +; FIXME - this is for some reason compiled into a il+stqd, not a sta. +;CHECK: stqd +;CHECK: bi $lr + store <4 x i32> %val, <4 x i32>* null + ret void +} diff --git a/test/CodeGen/CellSPU/v2f32.ll b/test/CodeGen/CellSPU/v2f32.ll index efd032031002..09e15ffbc75d 100644 --- a/test/CodeGen/CellSPU/v2f32.ll +++ b/test/CodeGen/CellSPU/v2f32.ll @@ -33,6 +33,7 @@ define %vec @test_mul(%vec %param) ret %vec %1 } +; CHECK: test_splat: define %vec @test_splat(float %param ) { ;CHECK: lqa ;CHECK: shufb @@ -43,16 +44,17 @@ define %vec @test_splat(float %param ) { } define void @test_store(%vec %val, %vec* %ptr){ - +; CHECK: test_store: ;CHECK: stqd - store %vec undef, %vec* null + store %vec zeroinitializer, %vec* null -;CHECK: stqd $3, 0(${{.}}) +;CHECK: stqd $3, 0(${{.*}}) ;CHECK: bi $lr store %vec %val, %vec* %ptr ret void } +; CHECK: test_insert: define %vec @test_insert(){ ;CHECK: cwd ;CHECK: shufb $3 @@ -61,6 +63,8 @@ define %vec @test_insert(){ ret %vec %rv } +; CHECK: test_unaligned_store: + define void @test_unaligned_store() { ;CHECK: cdd ;CHECK: shufb @@ -68,7 +72,7 @@ define void @test_unaligned_store() { %data = alloca [4 x float], align 16 ; <[4 x float]*> [#uses=1] %ptr = getelementptr [4 x float]* %data, i32 0, i32 2 ; [#uses=1] %vptr = bitcast float* %ptr to <2 x float>* ; <[1 x <2 x float>]*> [#uses=1] - store <2 x float> undef, <2 x float>* %vptr + store <2 x float> zeroinitializer, <2 x float>* %vptr ret void } diff --git a/test/CodeGen/Generic/crash.ll b/test/CodeGen/Generic/crash.ll index 042739884df7..e7cc7e339406 100644 --- a/test/CodeGen/Generic/crash.ll +++ b/test/CodeGen/Generic/crash.ll @@ -38,3 +38,31 @@ unreachable declare void @Parse_Vector(double*) declare i32 @llvm.objectsize.i32(i8*, i1) + +; PR9578 +%struct.S0 = type { i32, i8, i32 } + +define void @func_82() nounwind optsize { +entry: + br label %for.body.i + +for.body.i: ; preds = %for.body.i, %entry + br i1 undef, label %func_74.exit.for.cond29.thread_crit_edge, label %for.body.i + +func_74.exit.for.cond29.thread_crit_edge: ; preds = %for.body.i + %f13576.pre = getelementptr inbounds %struct.S0* undef, i64 0, i32 1 + store i8 0, i8* %f13576.pre, align 4, !tbaa !0 + br label %lbl_468 + +lbl_468: ; preds = %lbl_468, %func_74.exit.for.cond29.thread_crit_edge + %f13577.ph = phi i8* [ %f13576.pre, %func_74.exit.for.cond29.thread_crit_edge ], [ %f135.pre, %lbl_468 ] + store i8 1, i8* %f13577.ph, align 1 + %f135.pre = getelementptr inbounds %struct.S0* undef, i64 0, i32 1 + br i1 undef, label %lbl_468, label %for.end74 + +for.end74: ; preds = %lbl_468 + ret void +} + +!0 = metadata !{metadata !"omnipotent char", metadata !1} +!1 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/test/CodeGen/MBlaze/fsl.ll b/test/CodeGen/MBlaze/fsl.ll index f9c6205bc19f..5444f82dd63c 100644 --- a/test/CodeGen/MBlaze/fsl.ll +++ b/test/CodeGen/MBlaze/fsl.ll @@ -3,7 +3,7 @@ ; dynamic version of the instructions and that constant values use the ; constant version of the instructions. ; -; RUN: llc < %s -march=mblaze | FileCheck %s +; RUN: llc -O3 < %s -march=mblaze | FileCheck %s declare i32 @llvm.mblaze.fsl.get(i32 %port) declare i32 @llvm.mblaze.fsl.aget(i32 %port) @@ -55,8 +55,7 @@ declare void @llvm.mblaze.fsl.tnaput(i32 %port) declare void @llvm.mblaze.fsl.tncput(i32 %port) declare void @llvm.mblaze.fsl.tncaput(i32 %port) -define i32 @fsl_get(i32 %port) -{ +define void @fsl_get(i32 %port) { ; CHECK: fsl_get: %v0 = call i32 @llvm.mblaze.fsl.get(i32 %port) ; CHECK: getd @@ -122,12 +121,11 @@ define i32 @fsl_get(i32 %port) ; CHECK-NEXT: tnecgetd %v31 = call i32 @llvm.mblaze.fsl.tnecaget(i32 %port) ; CHECK-NEXT: tnecagetd - ret i32 1 + ret void ; CHECK: rtsd } -define i32 @fslc_get() -{ +define void @fslc_get() { ; CHECK: fslc_get: %v0 = call i32 @llvm.mblaze.fsl.get(i32 1) ; CHECK: get @@ -224,12 +222,11 @@ define i32 @fslc_get() %v31 = call i32 @llvm.mblaze.fsl.tnecaget(i32 1) ; CHECK-NOT: tnecagetd ; CHECK: tnecaget - ret i32 1 + ret void ; CHECK: rtsd } -define void @putfsl(i32 %value, i32 %port) -{ +define void @putfsl(i32 %value, i32 %port) { ; CHECK: putfsl: call void @llvm.mblaze.fsl.put(i32 %value, i32 %port) ; CHECK: putd @@ -267,8 +264,7 @@ define void @putfsl(i32 %value, i32 %port) ; CHECK: rtsd } -define void @putfsl_const(i32 %value) -{ +define void @putfsl_const(i32 %value) { ; CHECK: putfsl_const: call void @llvm.mblaze.fsl.put(i32 %value, i32 1) ; CHECK-NOT: putd diff --git a/test/CodeGen/MBlaze/loop.ll b/test/CodeGen/MBlaze/loop.ll index 8973f75aa1dc..7439d0b6fe22 100644 --- a/test/CodeGen/MBlaze/loop.ll +++ b/test/CodeGen/MBlaze/loop.ll @@ -29,14 +29,12 @@ loop_inner_finish: %inner.5 = add i32 %inner.2, 1 call i32 (i8*,...)* @printf( i8* getelementptr([19 x i8]* @MSG,i32 0,i32 0), i32 %inner.0, i32 %inner.1, i32 %inner.2 ) - ; CHECK: brlid - ; CHECK: addik {{.*, 1}} %inner.6 = icmp eq i32 %inner.5, 100 - ; CHECK: cmp + ; CHECK: cmp [[REG:r[0-9]*]] br i1 %inner.6, label %loop_inner, label %loop_outer_finish - ; CHECK: {{beq|bne}} + ; CHECK: {{beqid|bneid}} [[REG]] loop_outer_finish: %outer.1 = add i32 %outer.0, 1 diff --git a/test/CodeGen/Mips/2008-07-22-Cstpool.ll b/test/CodeGen/Mips/2008-07-22-Cstpool.ll index 20bd88889061..94dfe35faba1 100644 --- a/test/CodeGen/Mips/2008-07-22-Cstpool.ll +++ b/test/CodeGen/Mips/2008-07-22-Cstpool.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=mips -o %t ; RUN: grep {CPI\[01\]_\[01\]:} %t | count 2 -; RUN: grep {rodata.cst4,"aM",@progbits} %t | count 1 +; RUN: grep {.rodata.cst4,"aM",@progbits} %t | count 1 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" target triple = "mipsallegrexel-unknown-psp-elf" diff --git a/test/CodeGen/Mips/2008-07-23-fpcmp.ll b/test/CodeGen/Mips/2008-07-23-fpcmp.ll index ca837ffd2a50..519e4b93a72b 100644 --- a/test/CodeGen/Mips/2008-07-23-fpcmp.ll +++ b/test/CodeGen/Mips/2008-07-23-fpcmp.ll @@ -2,6 +2,10 @@ ; RUN: grep {c\\..*\\.s} %t | count 3 ; RUN: grep {bc1\[tf\]} %t | count 3 +; FIXME: Disabled because branch instructions are generated where +; conditional move instructions are expected. +; REQUIRES: disabled + target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" target triple = "mipsallegrexel-unknown-psp-elf" diff --git a/test/CodeGen/Mips/2008-07-29-icmp.ll b/test/CodeGen/Mips/2008-07-29-icmp.ll index 52a4b081ddb3..e85a749f7dcd 100644 --- a/test/CodeGen/Mips/2008-07-29-icmp.ll +++ b/test/CodeGen/Mips/2008-07-29-icmp.ll @@ -1,5 +1,9 @@ ; RUN: llc < %s -march=mips | grep {b\[ne\]\[eq\]} | count 1 +; FIXME: Disabled because branch instructions are generated where +; conditional move instructions are expected. +; REQUIRES: disabled + target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" target triple = "mipsallegrexel-unknown-psp-elf" diff --git a/test/CodeGen/Mips/2008-08-06-Alloca.ll b/test/CodeGen/Mips/2008-08-06-Alloca.ll index 7be7974e0ffe..6dd4af111cd9 100644 --- a/test/CodeGen/Mips/2008-08-06-Alloca.ll +++ b/test/CodeGen/Mips/2008-08-06-Alloca.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -march=mips | grep {subu.*sp} | count 2 +; RUN: llc < %s -march=mips -regalloc=basic | grep {subu.*sp} | count 2 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64" target triple = "mipsallegrexel-unknown-psp-elf" diff --git a/test/CodeGen/Mips/2010-07-20-Select.ll b/test/CodeGen/Mips/2010-07-20-Select.ll index 891b5d9e1884..e5e2c5473770 100644 --- a/test/CodeGen/Mips/2010-07-20-Select.ll +++ b/test/CodeGen/Mips/2010-07-20-Select.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -march=mips -relocation-model=static | FileCheck %s +; RUN: llc < %s -march=mips -relocation-model=static -regalloc=basic | FileCheck %s ; Fix PR7473 define i32 @main() nounwind readnone { @@ -9,12 +10,12 @@ entry: volatile store i32 0, i32* %c, align 4 %0 = volatile load i32* %a, align 4 ; [#uses=1] %1 = icmp eq i32 %0, 0 ; [#uses=1] -; CHECK: addiu $3, $zero, 0 +; CHECK: addiu $[[R1:[0-9]+]], $zero, 0 %iftmp.0.0 = select i1 %1, i32 3, i32 0 ; [#uses=1] %2 = volatile load i32* %c, align 4 ; [#uses=1] %3 = icmp eq i32 %2, 0 ; [#uses=1] -; CHECK: addiu $3, $zero, 3 -; CHECK: addu $2, $5, $3 +; CHECK: addiu $[[R1]], $zero, 3 +; CHECK: addu $2, ${{.}}, $[[R1]] %iftmp.2.0 = select i1 %3, i32 0, i32 5 ; [#uses=1] %4 = add nsw i32 %iftmp.2.0, %iftmp.0.0 ; [#uses=1] ret i32 %4 diff --git a/test/CodeGen/Mips/addc.ll b/test/CodeGen/Mips/addc.ll new file mode 100644 index 000000000000..e5d05b1d6dbb --- /dev/null +++ b/test/CodeGen/Mips/addc.ll @@ -0,0 +1,13 @@ +; RUN: llc < %s -march=mipsel | FileCheck %s +; RUN: llc < %s -march=mips | FileCheck %s + +define void @f(i64 %l, i64* nocapture %p) nounwind { +entry: +; CHECK: lui +; CHECK: ori +; CHECK: addu + %add = add i64 %l, 1311768467294899695 + store i64 %add, i64* %p, align 4 + ret void +} + diff --git a/test/CodeGen/Mips/analyzebranch.ll b/test/CodeGen/Mips/analyzebranch.ll new file mode 100644 index 000000000000..8f0bdf286c52 --- /dev/null +++ b/test/CodeGen/Mips/analyzebranch.ll @@ -0,0 +1,46 @@ +; RUN: llc -march=mips < %s | FileCheck %s + +define double @foo(double %a, double %b) nounwind readnone { +entry: +; CHECK: bc1f $BB0_2 +; CHECK: nop +; CHECK: # BB#1: + + %cmp = fcmp ogt double %a, 0.000000e+00 + br i1 %cmp, label %if.end6, label %if.else + +if.else: ; preds = %entry + %cmp3 = fcmp ogt double %b, 0.000000e+00 + br i1 %cmp3, label %if.end6, label %return + +if.end6: ; preds = %if.else, %entry + %c.0 = phi double [ %a, %entry ], [ 0.000000e+00, %if.else ] + %sub = fsub double %b, %c.0 + %mul = fmul double %sub, 2.000000e+00 + br label %return + +return: ; preds = %if.else, %if.end6 + %retval.0 = phi double [ %mul, %if.end6 ], [ 0.000000e+00, %if.else ] + ret double %retval.0 +} + +define void @f1(float %f) nounwind { +entry: +; CHECK: bc1t $BB1_2 +; CHECK: nop +; CHECK: # BB#1: + %cmp = fcmp une float %f, 0.000000e+00 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + tail call void @abort() noreturn + unreachable + +if.end: ; preds = %entry + tail call void (...)* @f2() nounwind + ret void +} + +declare void @abort() noreturn nounwind + +declare void @f2(...) diff --git a/test/CodeGen/Mips/blockaddr.ll b/test/CodeGen/Mips/blockaddr.ll new file mode 100644 index 000000000000..e9af3045e15f --- /dev/null +++ b/test/CodeGen/Mips/blockaddr.ll @@ -0,0 +1,31 @@ +; RUN: llc -march=mipsel -relocation-model=pic < %s | FileCheck %s -check-prefix=CHECK-PIC +; RUN: llc -march=mipsel -relocation-model=static < %s | FileCheck %s -check-prefix=CHECK-STATIC + +@reg = common global i8* null, align 4 + +define i8* @dummy(i8* %x) nounwind readnone noinline { +entry: + ret i8* %x +} + +; CHECK-PIC: lw $[[R0:[0-9]+]], %got($tmp1)($gp) +; CHECK-PIC: addiu ${{[0-9]+}}, $[[R0]], %lo($tmp1) +; CHECK-PIC: lw $[[R1:[0-9]+]], %got($tmp2)($gp) +; CHECK-PIC: addiu ${{[0-9]+}}, $[[R1]], %lo($tmp2) +; CHECK-STATIC: lui $[[R2:[0-9]+]], %hi($tmp1) +; CHECK-STATIC: addiu ${{[0-9]+}}, $[[R2]], %lo($tmp1) +; CHECK-STATIC: lui $[[R3:[0-9]+]], %hi($tmp2) +; CHECK-STATIC: addiu ${{[0-9]+}}, $[[R3]], %lo($tmp2) +define void @f() nounwind { +entry: + %call = tail call i8* @dummy(i8* blockaddress(@f, %baz)) + indirectbr i8* %call, [label %baz, label %foo] + +foo: ; preds = %foo, %entry + store i8* blockaddress(@f, %foo), i8** @reg, align 4 + br label %foo + +baz: ; preds = %entry + store i8* null, i8** @reg, align 4 + ret void +} diff --git a/test/CodeGen/Mips/buildpairextractelementf64.ll b/test/CodeGen/Mips/buildpairextractelementf64.ll new file mode 100644 index 000000000000..585bc250fb8c --- /dev/null +++ b/test/CodeGen/Mips/buildpairextractelementf64.ll @@ -0,0 +1,23 @@ +; RUN: llc < %s -march=mipsel | FileCheck %s +; RUN: llc < %s -march=mips | FileCheck %s +@a = external global i32 + +define double @f(i32 %a1, double %d) nounwind { +entry: +; CHECK: mtc1 +; CHECK: mtc1 + store i32 %a1, i32* @a, align 4 + %add = fadd double %d, 2.000000e+00 + ret double %add +} + +define void @f3(double %d, i32 %a1) nounwind { +entry: +; CHECK: mfc1 +; CHECK: mfc1 + tail call void @f2(i32 %a1, double %d) nounwind + ret void +} + +declare void @f2(i32, double) + diff --git a/test/CodeGen/Mips/cmov.ll b/test/CodeGen/Mips/cmov.ll index 7d3e0252e3c9..8329c891f0c2 100755 --- a/test/CodeGen/Mips/cmov.ll +++ b/test/CodeGen/Mips/cmov.ll @@ -1,10 +1,11 @@ ; RUN: llc -march=mips -mcpu=4ke < %s | FileCheck %s +; RUN: llc -march=mips -mcpu=4ke -regalloc=basic < %s | FileCheck %s @i1 = global [3 x i32] [i32 1, i32 2, i32 3], align 4 @i3 = common global i32* null, align 4 -; CHECK: lw $3, %got(i3)($gp) -; CHECK: addiu $5, $gp, %got(i1) +; CHECK: lw ${{[0-9]+}}, %got(i3)($gp) +; CHECK: addiu ${{[0-9]+}}, $gp, %got(i1) define i32* @cmov1(i32 %s) nounwind readonly { entry: %tobool = icmp ne i32 %s, 0 diff --git a/test/CodeGen/Mips/divrem.ll b/test/CodeGen/Mips/divrem.ll new file mode 100644 index 000000000000..398d1b78bd43 --- /dev/null +++ b/test/CodeGen/Mips/divrem.ll @@ -0,0 +1,51 @@ +; RUN: llc -march=mips < %s | FileCheck %s + +; CHECK: div $zero, +define i32 @sdiv1(i32 %a0, i32 %a1) nounwind readnone { +entry: + %div = sdiv i32 %a0, %a1 + ret i32 %div +} + +; CHECK: div $zero, +define i32 @srem1(i32 %a0, i32 %a1) nounwind readnone { +entry: + %rem = srem i32 %a0, %a1 + ret i32 %rem +} + +; CHECK: divu $zero, +define i32 @udiv1(i32 %a0, i32 %a1) nounwind readnone { +entry: + %div = udiv i32 %a0, %a1 + ret i32 %div +} + +; CHECK: divu $zero, +define i32 @urem1(i32 %a0, i32 %a1) nounwind readnone { +entry: + %rem = urem i32 %a0, %a1 + ret i32 %rem +} + +; CHECK: div $zero, +define i32 @sdivrem1(i32 %a0, i32 %a1, i32* nocapture %r) nounwind { +entry: + %rem = srem i32 %a0, %a1 + store i32 %rem, i32* %r, align 4, !tbaa !0 + %div = sdiv i32 %a0, %a1 + ret i32 %div +} + +; CHECK: divu $zero, +define i32 @udivrem1(i32 %a0, i32 %a1, i32* nocapture %r) nounwind { +entry: + %rem = urem i32 %a0, %a1 + store i32 %rem, i32* %r, align 4, !tbaa !0 + %div = udiv i32 %a0, %a1 + ret i32 %div +} + +!0 = metadata !{metadata !"int", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/test/CodeGen/Mips/fpbr.ll b/test/CodeGen/Mips/fpbr.ll new file mode 100644 index 000000000000..0a6478b0f8f0 --- /dev/null +++ b/test/CodeGen/Mips/fpbr.ll @@ -0,0 +1,119 @@ +; RUN: llc < %s -march=mipsel | FileCheck %s + +define void @func0(float %f2, float %f3) nounwind { +entry: +; CHECK: c.eq.s +; CHECK: bc1f + %cmp = fcmp oeq float %f2, %f3 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + tail call void (...)* @g0() nounwind + br label %if.end + +if.else: ; preds = %entry + tail call void (...)* @g1() nounwind + br label %if.end + +if.end: ; preds = %if.else, %if.then + ret void +} + +declare void @g0(...) + +declare void @g1(...) + +define void @func1(float %f2, float %f3) nounwind { +entry: +; CHECK: c.olt.s +; CHECK: bc1f + %cmp = fcmp olt float %f2, %f3 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + tail call void (...)* @g0() nounwind + br label %if.end + +if.else: ; preds = %entry + tail call void (...)* @g1() nounwind + br label %if.end + +if.end: ; preds = %if.else, %if.then + ret void +} + +define void @func2(float %f2, float %f3) nounwind { +entry: +; CHECK: c.ole.s +; CHECK: bc1f + %cmp = fcmp ugt float %f2, %f3 + br i1 %cmp, label %if.else, label %if.then + +if.then: ; preds = %entry + tail call void (...)* @g0() nounwind + br label %if.end + +if.else: ; preds = %entry + tail call void (...)* @g1() nounwind + br label %if.end + +if.end: ; preds = %if.else, %if.then + ret void +} + +define void @func3(double %f2, double %f3) nounwind { +entry: +; CHECK: c.eq.d +; CHECK: bc1f + %cmp = fcmp oeq double %f2, %f3 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + tail call void (...)* @g0() nounwind + br label %if.end + +if.else: ; preds = %entry + tail call void (...)* @g1() nounwind + br label %if.end + +if.end: ; preds = %if.else, %if.then + ret void +} + +define void @func4(double %f2, double %f3) nounwind { +entry: +; CHECK: c.olt.d +; CHECK: bc1f + %cmp = fcmp olt double %f2, %f3 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + tail call void (...)* @g0() nounwind + br label %if.end + +if.else: ; preds = %entry + tail call void (...)* @g1() nounwind + br label %if.end + +if.end: ; preds = %if.else, %if.then + ret void +} + +define void @func5(double %f2, double %f3) nounwind { +entry: +; CHECK: c.ole.d +; CHECK: bc1f + %cmp = fcmp ugt double %f2, %f3 + br i1 %cmp, label %if.else, label %if.then + +if.then: ; preds = %entry + tail call void (...)* @g0() nounwind + br label %if.end + +if.else: ; preds = %entry + tail call void (...)* @g1() nounwind + br label %if.end + +if.end: ; preds = %if.else, %if.then + ret void +} diff --git a/test/CodeGen/Mips/fpcmp.ll b/test/CodeGen/Mips/fpcmp.ll new file mode 100644 index 000000000000..c89ffe67f1b9 --- /dev/null +++ b/test/CodeGen/Mips/fpcmp.ll @@ -0,0 +1,23 @@ +; RUN: llc < %s -march=mipsel -mcpu=4ke | FileCheck %s -check-prefix=CHECK-MIPS32R2 +; RUN: llc < %s -march=mipsel | FileCheck %s -check-prefix=CHECK-MIPS1 + +@g1 = external global i32 + +define i32 @f(float %f0, float %f1) nounwind { +entry: +; CHECK-MIPS32R2: c.olt.s +; CHECK-MIPS32R2: movt +; CHECK-MIPS32R2: c.olt.s +; CHECK-MIPS32R2: movt +; CHECK-MIPS1: c.olt.s +; CHECK-MIPS1: bc1t +; CHECK-MIPS1: c.olt.s +; CHECK-MIPS1: bc1t + %cmp = fcmp olt float %f0, %f1 + %conv = zext i1 %cmp to i32 + %tmp2 = load i32* @g1, align 4 + %add = add nsw i32 %tmp2, %conv + store i32 %add, i32* @g1, align 4 + %cond = select i1 %cmp, i32 10, i32 20 + ret i32 %cond +} diff --git a/test/CodeGen/Mips/internalfunc.ll b/test/CodeGen/Mips/internalfunc.ll new file mode 100644 index 000000000000..fdfa01a9e0f7 --- /dev/null +++ b/test/CodeGen/Mips/internalfunc.ll @@ -0,0 +1,52 @@ +; RUN: llc < %s -march=mips | FileCheck %s + +@caller.sf1 = internal unnamed_addr global void (...)* null, align 4 +@gf1 = external global void (...)* +@.str = private unnamed_addr constant [3 x i8] c"f2\00" + +define i32 @main(i32 %argc, i8** nocapture %argv) nounwind { +entry: +; CHECK: lw $[[R0:[0-9]+]], %got(f2)($gp) +; CHECK: addiu $25, $[[R0]], %lo(f2) + tail call fastcc void @f2() + ret i32 0 +} + +define void @caller(i32 %a0, i32 %a1) nounwind { +entry: +; CHECK: lw $[[R1:[0-9]+]], %got(caller.sf1)($gp) +; CHECK: addiu ${{[0-9]+}}, $[[R1]], %lo(caller.sf1) + %tobool = icmp eq i32 %a1, 0 + br i1 %tobool, label %if.end, label %if.then + +if.then: ; preds = %entry + %tmp1 = load void (...)** @caller.sf1, align 4 + tail call void (...)* %tmp1() nounwind + br label %if.end + +if.end: ; preds = %entry, %if.then +; CHECK: lw $[[R2:[0-9]+]], %got(sf2)($gp) +; CHECK: lw $[[R3:[0-9]+]], %got(caller.sf1)($gp) +; CHECK: addiu ${{[0-9]+}}, $[[R2]], %lo(sf2) +; CHECK: addiu ${{[0-9]+}}, $[[R3]], %lo(caller.sf1) + %tobool3 = icmp ne i32 %a0, 0 + %tmp4 = load void (...)** @gf1, align 4 + %cond = select i1 %tobool3, void (...)* %tmp4, void (...)* bitcast (void ()* @sf2 to void (...)*) + store void (...)* %cond, void (...)** @caller.sf1, align 4 + ret void +} + +define internal void @sf2() nounwind { +entry: + %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0)) nounwind + ret void +} + +declare i32 @printf(i8* nocapture, ...) nounwind + +define internal fastcc void @f2() nounwind noinline { +entry: + %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([3 x i8]* @.str, i32 0, i32 0)) nounwind + ret void +} + diff --git a/test/CodeGen/Mips/largeimm1.ll b/test/CodeGen/Mips/largeimm1.ll new file mode 100644 index 000000000000..d65cc025d085 --- /dev/null +++ b/test/CodeGen/Mips/largeimm1.ll @@ -0,0 +1,13 @@ +; RUN: llc -march=mipsel < %s | FileCheck %s + +; CHECK: lui $at, 49152 +; CHECK: lui $at, 16384 +define void @f() nounwind { +entry: + %a1 = alloca [1073741824 x i8], align 1 + %arrayidx = getelementptr inbounds [1073741824 x i8]* %a1, i32 0, i32 1048676 + call void @f2(i8* %arrayidx) nounwind + ret void +} + +declare void @f2(i8*) diff --git a/test/CodeGen/Mips/o32_cc.ll b/test/CodeGen/Mips/o32_cc.ll index b6df62be6603..3974cd4a6a76 100644 --- a/test/CodeGen/Mips/o32_cc.ll +++ b/test/CodeGen/Mips/o32_cc.ll @@ -61,8 +61,8 @@ entry: declare void @f4(i32, i32, i32, i32) ; $f12, $6, stack -; CHECK: sw $2, 16($sp) -; CHECK: sw $zero, 20($sp) +; CHECK: sw +; CHECK: sw ; CHECK: ldc1 $f12, %lo ; CHECK: addiu $6, $zero, 23 define void @testlowercall5() nounwind { @@ -98,8 +98,8 @@ entry: declare void @f7(float, i32, i32) ; $4, $5, $6, stack -; CHECK: sw $2, 16($sp) -; CHECK: sw $zero, 20($sp) +; CHECK: sw +; CHECK: sw ; CHECK: addiu $4, $zero, 22 ; CHECK: addiu $5, $zero, 53 ; CHECK: addiu $6, $zero, 44 @@ -115,7 +115,7 @@ declare void @f8(i32, i32, i32, double) ; CHECK: addiu $4, $zero, 32 ; CHECK: addiu $5, $zero, 63 ; CHECK: addiu $6, $zero, 54 -; CHECK: ori $7, $2, 0 +; CHECK: ori $7 define void @testlowercall9() nounwind { entry: tail call void @f9(i32 32, i32 63, i32 54, float 1.100000e+01) nounwind @@ -128,7 +128,7 @@ declare void @f9(i32, i32, i32, float) ; CHECK: addiu $4, $zero, 42 ; CHECK: addiu $5, $zero, 73 ; CHECK: addiu $6, $zero, 0 -; CHECK: ori $7, $2, 0 +; CHECK: ori $7 define void @testlowercall10() nounwind { entry: tail call void @f10(i32 42, i32 73, double 2.700000e+01) nounwind @@ -140,7 +140,7 @@ declare void @f10(i32, i32, double) ; $4, ($6, $7) ; CHECK: addiu $4, $zero, 52 ; CHECK: addiu $6, $zero, 0 -; CHECK: ori $7, $2, 0 +; CHECK: ori $7 define void @testlowercall11() nounwind { entry: tail call void @f11(i32 52, double 1.600000e+01) nounwind @@ -152,8 +152,8 @@ declare void @f11(i32, double) ; $f12, $f14, $6, $7 ; CHECK: lwc1 $f12, %lo ; CHECK: lwc1 $f14, %lo -; CHECK: ori $6, $4, 0 -; CHECK: ori $7, $5, 0 +; CHECK: ori $6 +; CHECK: ori $7 define void @testlowercall12() nounwind { entry: tail call void @f12(float 2.800000e+01, float 1.900000e+01, float 1.000000e+01, float 2.100000e+01) nounwind @@ -165,7 +165,7 @@ declare void @f12(float, float, float, float) ; $f12, $5, $6, $7 ; CHECK: lwc1 $f12, %lo ; CHECK: addiu $5, $zero, 83 -; CHECK: ori $6, $3, 0 +; CHECK: ori $6 ; CHECK: addiu $7, $zero, 25 define void @testlowercall13() nounwind { entry: @@ -179,7 +179,7 @@ declare void @f13(float, i32, float, i32) ; $f12, $f14, $7 ; CHECK: ldc1 $f12, %lo ; CHECK: lwc1 $f14, %lo -; CHECK: ori $7, $4, 0 +; CHECK: ori $7 define void @testlowercall14() nounwind { entry: tail call void @f14(double 3.500000e+01, float 2.900000e+01, float 3.000000e+01) nounwind @@ -192,7 +192,7 @@ declare void @f14(double, float, float) ; CHECK: lwc1 $f12, %lo ; CHECK: lwc1 $f14, %lo ; CHECK: addiu $6, $zero, 0 -; CHECK: ori $7, $4, 32768 +; CHECK: ori $7 define void @testlowercall15() nounwind { entry: tail call void @f15(float 4.800000e+01, float 3.900000e+01, double 3.700000e+01) nounwind @@ -203,9 +203,9 @@ declare void @f15(float, float, double) ; $4, $5, $6, $7 ; CHECK: addiu $4, $zero, 62 -; CHECK: ori $5, $2, 0 +; CHECK: ori $5 ; CHECK: addiu $6, $zero, 64 -; CHECK: ori $7, $3, 0 +; CHECK: ori $7 define void @testlowercall16() nounwind { entry: tail call void @f16(i32 62, float 4.900000e+01, i32 64, float 3.100000e+01) nounwind @@ -216,7 +216,7 @@ declare void @f16(i32, float, i32, float) ; $4, $5, $6, $7 ; CHECK: addiu $4, $zero, 72 -; CHECK: ori $5, $2, 0 +; CHECK: ori $5 ; CHECK: addiu $6, $zero, 74 ; CHECK: addiu $7, $zero, 35 define void @testlowercall17() nounwind { @@ -230,7 +230,7 @@ declare void @f17(i32, float, i32, i32) ; $4, $5, $6, $7 ; CHECK: addiu $4, $zero, 82 ; CHECK: addiu $5, $zero, 93 -; CHECK: ori $6, $2, 0 +; CHECK: ori $6 ; CHECK: addiu $7, $zero, 45 define void @testlowercall18() nounwind { entry: @@ -242,11 +242,11 @@ declare void @f18(i32, i32, float, i32) ; $4, ($6, $7), stack -; CHECK: sw $2, 16($sp) -; CHECK: sw $zero, 20($sp) +; CHECK: sw +; CHECK: sw ; CHECK: addiu $4, $zero, 92 ; CHECK: addiu $6, $zero, 0 -; CHECK: ori $7, $3, 0 +; CHECK: ori $7 define void @testlowercall20() nounwind { entry: tail call void @f20(i32 92, double 2.600000e+01, double 4.700000e+01) nounwind @@ -270,7 +270,7 @@ declare void @f21(float, i32) ; CHECK: lwc1 $f12, %lo ; CHECK: addiu $5, $zero, 113 ; CHECK: addiu $6, $zero, 0 -; CHECK: ori $7, $3, 32768 +; CHECK: ori $7 define void @testlowercall22() nounwind { entry: tail call void @f22(float 6.800000e+01, i32 113, double 5.700000e+01) nounwind @@ -291,8 +291,8 @@ entry: declare void @f23(double, i32) ; $f12,$6, stack -; CHECK: sw $2, 16($sp) -; CHECK: sw $zero, 20($sp) +; CHECK: sw +; CHECK: sw ; CHECK: ldc1 $f12, %lo ; CHECK: addiu $6, $zero, 133 define void @testlowercall24() nounwind { @@ -306,15 +306,15 @@ declare void @f24(double, i32, double) ; CHECK: lwc1 $f12, %lo ; lwc1 $f12, %lo ; CHECK: lwc1 $f14, %lo -; CHECK: ori $6, $4, 0 -; CHECK: ori $7, $5, 0 +; CHECK: ori $6 +; CHECK: ori $7 ; CHECK: lwc1 $f12, %lo ; CHECK: addiu $5, $zero, 83 -; CHECK: ori $6, $3, 0 +; CHECK: ori $6 ; CHECK: addiu $7, $zero, 25 ; CHECK: addiu $4, $zero, 82 ; CHECK: addiu $5, $zero, 93 -; CHECK: ori $6, $2, 0 +; CHECK: ori $6 ; CHECK: addiu $7, $zero, 45 define void @testlowercall25() nounwind { entry: diff --git a/test/CodeGen/Mips/o32_cc_vararg.ll b/test/CodeGen/Mips/o32_cc_vararg.ll new file mode 100644 index 000000000000..1f71ed2640eb --- /dev/null +++ b/test/CodeGen/Mips/o32_cc_vararg.ll @@ -0,0 +1,278 @@ +; RUN: llc -march=mipsel -mcpu=mips2 -pre-RA-sched=source < %s | FileCheck %s +; RUN: llc -march=mipsel -mcpu=mips2 -pre-RA-sched=source < %s -regalloc=basic | FileCheck %s + + +; All test functions do the same thing - they return the first variable +; argument. + +; All CHECK's do the same thing - they check whether variable arguments from +; registers are placed on correct stack locations, and whether the first +; variable argument is returned from the correct stack location. + + +declare void @llvm.va_start(i8*) nounwind +declare void @llvm.va_end(i8*) nounwind + +; return int +define i32 @va1(i32 %a, ...) nounwind { +entry: + %a.addr = alloca i32, align 4 + %ap = alloca i8*, align 4 + %b = alloca i32, align 4 + store i32 %a, i32* %a.addr, align 4 + %ap1 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap1) + %0 = va_arg i8** %ap, i32 + store i32 %0, i32* %b, align 4 + %ap2 = bitcast i8** %ap to i8* + call void @llvm.va_end(i8* %ap2) + %tmp = load i32* %b, align 4 + ret i32 %tmp + +; CHECK: va1: +; CHECK: addiu $sp, $sp, -32 +; CHECK: sw $7, 44($sp) +; CHECK: sw $6, 40($sp) +; CHECK: sw $5, 36($sp) +; CHECK: lw $2, 36($sp) +} + +; check whether the variable double argument will be accessed from the 8-byte +; aligned location (i.e. whether the address is computed by adding 7 and +; clearing lower 3 bits) +define double @va2(i32 %a, ...) nounwind { +entry: + %a.addr = alloca i32, align 4 + %ap = alloca i8*, align 4 + %b = alloca double, align 8 + store i32 %a, i32* %a.addr, align 4 + %ap1 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap1) + %0 = va_arg i8** %ap, double + store double %0, double* %b, align 8 + %ap2 = bitcast i8** %ap to i8* + call void @llvm.va_end(i8* %ap2) + %tmp = load double* %b, align 8 + ret double %tmp + +; CHECK: va2: +; CHECK: addiu $sp, $sp, -40 +; CHECK: sw $7, 52($sp) +; CHECK: sw $6, 48($sp) +; CHECK: sw $5, 44($sp) +; CHECK: addiu $[[R0:[0-9]+]], $sp, 44 +; CHECK: addiu $[[R1:[0-9]+]], $[[R0]], 7 +; CHECK: addiu $[[R2:[0-9]+]], $zero, -8 +; CHECK: and $[[R3:[0-9]+]], $[[R1]], $[[R2]] +; CHECK: ldc1 $f0, 0($[[R3]]) +} + +; int +define i32 @va3(double %a, ...) nounwind { +entry: + %a.addr = alloca double, align 8 + %ap = alloca i8*, align 4 + %b = alloca i32, align 4 + store double %a, double* %a.addr, align 8 + %ap1 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap1) + %0 = va_arg i8** %ap, i32 + store i32 %0, i32* %b, align 4 + %ap2 = bitcast i8** %ap to i8* + call void @llvm.va_end(i8* %ap2) + %tmp = load i32* %b, align 4 + ret i32 %tmp + +; CHECK: va3: +; CHECK: addiu $sp, $sp, -40 +; CHECK: sw $7, 52($sp) +; CHECK: sw $6, 48($sp) +; CHECK: lw $2, 48($sp) +} + +; double +define double @va4(double %a, ...) nounwind { +entry: + %a.addr = alloca double, align 8 + %ap = alloca i8*, align 4 + %b = alloca double, align 8 + store double %a, double* %a.addr, align 8 + %ap1 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap1) + %0 = va_arg i8** %ap, double + store double %0, double* %b, align 8 + %ap2 = bitcast i8** %ap to i8* + call void @llvm.va_end(i8* %ap2) + %tmp = load double* %b, align 8 + ret double %tmp + +; CHECK: va4: +; CHECK: addiu $sp, $sp, -48 +; CHECK: sw $7, 60($sp) +; CHECK: sw $6, 56($sp) +; CHECK: addiu $[[R0:[0-9]+]], $sp, 56 +; CHECK: addiu $[[R1:[0-9]+]], $[[R0]], 7 +; CHECK: addiu $[[R2:[0-9]+]], $zero, -8 +; CHECK: and $[[R3:[0-9]+]], $[[R1]], $[[R2]] +; CHECK: ldc1 $f0, 0($[[R3]]) +} + +; int +define i32 @va5(i32 %a, i32 %b, i32 %c, ...) nounwind { +entry: + %a.addr = alloca i32, align 4 + %b.addr = alloca i32, align 4 + %c.addr = alloca i32, align 4 + %ap = alloca i8*, align 4 + %d = alloca i32, align 4 + store i32 %a, i32* %a.addr, align 4 + store i32 %b, i32* %b.addr, align 4 + store i32 %c, i32* %c.addr, align 4 + %ap1 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap1) + %0 = va_arg i8** %ap, i32 + store i32 %0, i32* %d, align 4 + %ap2 = bitcast i8** %ap to i8* + call void @llvm.va_end(i8* %ap2) + %tmp = load i32* %d, align 4 + ret i32 %tmp + +; CHECK: va5: +; CHECK: addiu $sp, $sp, -40 +; CHECK: sw $7, 52($sp) +; CHECK: lw $2, 52($sp) +} + +; double +define double @va6(i32 %a, i32 %b, i32 %c, ...) nounwind { +entry: + %a.addr = alloca i32, align 4 + %b.addr = alloca i32, align 4 + %c.addr = alloca i32, align 4 + %ap = alloca i8*, align 4 + %d = alloca double, align 8 + store i32 %a, i32* %a.addr, align 4 + store i32 %b, i32* %b.addr, align 4 + store i32 %c, i32* %c.addr, align 4 + %ap1 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap1) + %0 = va_arg i8** %ap, double + store double %0, double* %d, align 8 + %ap2 = bitcast i8** %ap to i8* + call void @llvm.va_end(i8* %ap2) + %tmp = load double* %d, align 8 + ret double %tmp + +; CHECK: va6: +; CHECK: addiu $sp, $sp, -48 +; CHECK: sw $7, 60($sp) +; CHECK: addiu $[[R0:[0-9]+]], $sp, 60 +; CHECK: addiu $[[R1:[0-9]+]], $[[R0]], 7 +; CHECK: addiu $[[R2:[0-9]+]], $zero, -8 +; CHECK: and $[[R3:[0-9]+]], $[[R1]], $[[R2]] +; CHECK: ldc1 $f0, 0($[[R3]]) +} + +; int +define i32 @va7(i32 %a, double %b, ...) nounwind { +entry: + %a.addr = alloca i32, align 4 + %b.addr = alloca double, align 8 + %ap = alloca i8*, align 4 + %c = alloca i32, align 4 + store i32 %a, i32* %a.addr, align 4 + store double %b, double* %b.addr, align 8 + %ap1 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap1) + %0 = va_arg i8** %ap, i32 + store i32 %0, i32* %c, align 4 + %ap2 = bitcast i8** %ap to i8* + call void @llvm.va_end(i8* %ap2) + %tmp = load i32* %c, align 4 + ret i32 %tmp + +; CHECK: va7: +; CHECK: addiu $sp, $sp, -40 +; CHECK: lw $2, 56($sp) +} + +; double +define double @va8(i32 %a, double %b, ...) nounwind { +entry: + %a.addr = alloca i32, align 4 + %b.addr = alloca double, align 8 + %ap = alloca i8*, align 4 + %c = alloca double, align 8 + store i32 %a, i32* %a.addr, align 4 + store double %b, double* %b.addr, align 8 + %ap1 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap1) + %0 = va_arg i8** %ap, double + store double %0, double* %c, align 8 + %ap2 = bitcast i8** %ap to i8* + call void @llvm.va_end(i8* %ap2) + %tmp = load double* %c, align 8 + ret double %tmp + +; CHECK: va8: +; CHECK: addiu $sp, $sp, -48 +; CHECK: addiu $[[R0:[0-9]+]], $sp, 64 +; CHECK: addiu $[[R1:[0-9]+]], $[[R0]], 7 +; CHECK: addiu $[[R2:[0-9]+]], $zero, -8 +; CHECK: and $[[R3:[0-9]+]], $[[R1]], $[[R2]] +; CHECK: ldc1 $f0, 0($[[R3]]) +} + +; int +define i32 @va9(double %a, double %b, i32 %c, ...) nounwind { +entry: + %a.addr = alloca double, align 8 + %b.addr = alloca double, align 8 + %c.addr = alloca i32, align 4 + %ap = alloca i8*, align 4 + %d = alloca i32, align 4 + store double %a, double* %a.addr, align 8 + store double %b, double* %b.addr, align 8 + store i32 %c, i32* %c.addr, align 4 + %ap1 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap1) + %0 = va_arg i8** %ap, i32 + store i32 %0, i32* %d, align 4 + %ap2 = bitcast i8** %ap to i8* + call void @llvm.va_end(i8* %ap2) + %tmp = load i32* %d, align 4 + ret i32 %tmp + +; CHECK: va9: +; CHECK: addiu $sp, $sp, -56 +; CHECK: lw $2, 76($sp) +} + +; double +define double @va10(double %a, double %b, i32 %c, ...) nounwind { +entry: + %a.addr = alloca double, align 8 + %b.addr = alloca double, align 8 + %c.addr = alloca i32, align 4 + %ap = alloca i8*, align 4 + %d = alloca double, align 8 + store double %a, double* %a.addr, align 8 + store double %b, double* %b.addr, align 8 + store i32 %c, i32* %c.addr, align 4 + %ap1 = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* %ap1) + %0 = va_arg i8** %ap, double + store double %0, double* %d, align 8 + %ap2 = bitcast i8** %ap to i8* + call void @llvm.va_end(i8* %ap2) + %tmp = load double* %d, align 8 + ret double %tmp + +; CHECK: va10: +; CHECK: addiu $sp, $sp, -56 +; CHECK: addiu $[[R0:[0-9]+]], $sp, 76 +; CHECK: addiu $[[R1:[0-9]+]], $[[R0]], 7 +; CHECK: addiu $[[R2:[0-9]+]], $zero, -8 +; CHECK: and $[[R3:[0-9]+]], $[[R1]], $[[R2]] +; CHECK: ldc1 $f0, 0($[[R3]]) +} diff --git a/test/CodeGen/Mips/select.ll b/test/CodeGen/Mips/select.ll new file mode 100644 index 000000000000..c83fa3ece026 --- /dev/null +++ b/test/CodeGen/Mips/select.ll @@ -0,0 +1,196 @@ +; RUN: llc < %s -march=mipsel -mcpu=4ke | FileCheck %s -check-prefix=CHECK-MIPS32R2 +; RUN: llc < %s -march=mipsel | FileCheck %s -check-prefix=CHECK-MIPS1 + +@d2 = external global double +@d3 = external global double + +define i32 @sel1(i32 %s, i32 %f0, i32 %f1) nounwind readnone { +entry: +; CHECK-MIPS32R2: movn +; CHECK-MIPS1: beq + %tobool = icmp ne i32 %s, 0 + %cond = select i1 %tobool, i32 %f1, i32 %f0 + ret i32 %cond +} + +define float @sel2(i32 %s, float %f0, float %f1) nounwind readnone { +entry: +; CHECK-MIPS32R2: movn.s +; CHECK-MIPS1: beq + %tobool = icmp ne i32 %s, 0 + %cond = select i1 %tobool, float %f0, float %f1 + ret float %cond +} + +define double @sel2_1(i32 %s, double %f0, double %f1) nounwind readnone { +entry: +; CHECK-MIPS32R2: movn.d +; CHECK-MIPS1: beq + %tobool = icmp ne i32 %s, 0 + %cond = select i1 %tobool, double %f0, double %f1 + ret double %cond +} + +define float @sel3(float %f0, float %f1, float %f2, float %f3) nounwind readnone { +entry: +; CHECK-MIPS32R2: c.eq.s +; CHECK-MIPS32R2: movt.s +; CHECK-MIPS1: c.eq.s +; CHECK-MIPS1: bc1f + %cmp = fcmp oeq float %f2, %f3 + %cond = select i1 %cmp, float %f0, float %f1 + ret float %cond +} + +define float @sel4(float %f0, float %f1, float %f2, float %f3) nounwind readnone { +entry: +; CHECK-MIPS32R2: c.olt.s +; CHECK-MIPS32R2: movt.s +; CHECK-MIPS1: c.olt.s +; CHECK-MIPS1: bc1f + %cmp = fcmp olt float %f2, %f3 + %cond = select i1 %cmp, float %f0, float %f1 + ret float %cond +} + +define float @sel5(float %f0, float %f1, float %f2, float %f3) nounwind readnone { +entry: +; CHECK-MIPS32R2: c.ule.s +; CHECK-MIPS32R2: movf.s +; CHECK-MIPS1: c.ule.s +; CHECK-MIPS1: bc1t + %cmp = fcmp ogt float %f2, %f3 + %cond = select i1 %cmp, float %f0, float %f1 + ret float %cond +} + +define double @sel5_1(double %f0, double %f1, float %f2, float %f3) nounwind readnone { +entry: +; CHECK-MIPS32R2: c.ule.s +; CHECK-MIPS32R2: movf.d +; CHECK-MIPS1: c.ule.s +; CHECK-MIPS1: bc1t + %cmp = fcmp ogt float %f2, %f3 + %cond = select i1 %cmp, double %f0, double %f1 + ret double %cond +} + +define double @sel6(double %f0, double %f1, double %f2, double %f3) nounwind readnone { +entry: +; CHECK-MIPS32R2: c.eq.d +; CHECK-MIPS32R2: movt.d +; CHECK-MIPS1: c.eq.d +; CHECK-MIPS1: bc1f + %cmp = fcmp oeq double %f2, %f3 + %cond = select i1 %cmp, double %f0, double %f1 + ret double %cond +} + +define double @sel7(double %f0, double %f1, double %f2, double %f3) nounwind readnone { +entry: +; CHECK-MIPS32R2: c.olt.d +; CHECK-MIPS32R2: movt.d +; CHECK-MIPS1: c.olt.d +; CHECK-MIPS1: bc1f + %cmp = fcmp olt double %f2, %f3 + %cond = select i1 %cmp, double %f0, double %f1 + ret double %cond +} + +define double @sel8(double %f0, double %f1, double %f2, double %f3) nounwind readnone { +entry: +; CHECK-MIPS32R2: c.ule.d +; CHECK-MIPS32R2: movf.d +; CHECK-MIPS1: c.ule.d +; CHECK-MIPS1: bc1t + %cmp = fcmp ogt double %f2, %f3 + %cond = select i1 %cmp, double %f0, double %f1 + ret double %cond +} + +define float @sel8_1(float %f0, float %f1, double %f2, double %f3) nounwind readnone { +entry: +; CHECK-MIPS32R2: c.ule.d +; CHECK-MIPS32R2: movf.s +; CHECK-MIPS1: c.ule.d +; CHECK-MIPS1: bc1t + %cmp = fcmp ogt double %f2, %f3 + %cond = select i1 %cmp, float %f0, float %f1 + ret float %cond +} + +define i32 @sel9(i32 %f0, i32 %f1, float %f2, float %f3) nounwind readnone { +entry: +; CHECK-MIPS32R2: c.eq.s +; CHECK-MIPS32R2: movt +; CHECK-MIPS1: c.eq.s +; CHECK-MIPS1: bc1f + %cmp = fcmp oeq float %f2, %f3 + %cond = select i1 %cmp, i32 %f0, i32 %f1 + ret i32 %cond +} + +define i32 @sel10(i32 %f0, i32 %f1, float %f2, float %f3) nounwind readnone { +entry: +; CHECK-MIPS32R2: c.olt.s +; CHECK-MIPS32R2: movt +; CHECK-MIPS1: c.olt.s +; CHECK-MIPS1: bc1f + %cmp = fcmp olt float %f2, %f3 + %cond = select i1 %cmp, i32 %f0, i32 %f1 + ret i32 %cond +} + +define i32 @sel11(i32 %f0, i32 %f1, float %f2, float %f3) nounwind readnone { +entry: +; CHECK-MIPS32R2: c.ule.s +; CHECK-MIPS32R2: movf +; CHECK-MIPS1: c.ule.s +; CHECK-MIPS1: bc1t + %cmp = fcmp ogt float %f2, %f3 + %cond = select i1 %cmp, i32 %f0, i32 %f1 + ret i32 %cond +} + +define i32 @sel12(i32 %f0, i32 %f1) nounwind readonly { +entry: +; CHECK-MIPS32R2: c.eq.d +; CHECK-MIPS32R2: movt +; CHECK-MIPS1: c.eq.d +; CHECK-MIPS1: bc1f + %tmp = load double* @d2, align 8, !tbaa !0 + %tmp1 = load double* @d3, align 8, !tbaa !0 + %cmp = fcmp oeq double %tmp, %tmp1 + %cond = select i1 %cmp, i32 %f0, i32 %f1 + ret i32 %cond +} + +define i32 @sel13(i32 %f0, i32 %f1) nounwind readonly { +entry: +; CHECK-MIPS32R2: c.olt.d +; CHECK-MIPS32R2: movt +; CHECK-MIPS1: c.olt.d +; CHECK-MIPS1: bc1f + %tmp = load double* @d2, align 8, !tbaa !0 + %tmp1 = load double* @d3, align 8, !tbaa !0 + %cmp = fcmp olt double %tmp, %tmp1 + %cond = select i1 %cmp, i32 %f0, i32 %f1 + ret i32 %cond +} + +define i32 @sel14(i32 %f0, i32 %f1) nounwind readonly { +entry: +; CHECK-MIPS32R2: c.ule.d +; CHECK-MIPS32R2: movf +; CHECK-MIPS1: c.ule.d +; CHECK-MIPS1: bc1t + %tmp = load double* @d2, align 8, !tbaa !0 + %tmp1 = load double* @d3, align 8, !tbaa !0 + %cmp = fcmp ogt double %tmp, %tmp1 + %cond = select i1 %cmp, i32 %f0, i32 %f1 + ret i32 %cond +} + +!0 = metadata !{metadata !"double", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA", null} diff --git a/test/CodeGen/PTX/add.ll b/test/CodeGen/PTX/add.ll index 1259d03e96c9..235b00e8782f 100644 --- a/test/CodeGen/PTX/add.ll +++ b/test/CodeGen/PTX/add.ll @@ -1,15 +1,71 @@ -; RUN: llc < %s -march=ptx | FileCheck %s +; RUN: llc < %s -march=ptx32 | FileCheck %s -define ptx_device i32 @t1(i32 %x, i32 %y) { -; CHECK: add.s32 r0, r1, r2; +define ptx_device i16 @t1_u16(i16 %x, i16 %y) { +; CHECK: add.u16 rh0, rh1, rh2; +; CHECK-NEXT: ret; + %z = add i16 %x, %y + ret i16 %z +} + +define ptx_device i32 @t1_u32(i32 %x, i32 %y) { +; CHECK: add.u32 r0, r1, r2; +; CHECK-NEXT: ret; %z = add i32 %x, %y -; CHECK: ret; ret i32 %z } -define ptx_device i32 @t2(i32 %x) { -; CHECK: add.s32 r0, r1, 1; +define ptx_device i64 @t1_u64(i64 %x, i64 %y) { +; CHECK: add.u64 rd0, rd1, rd2; +; CHECK-NEXT: ret; + %z = add i64 %x, %y + ret i64 %z +} + +define ptx_device float @t1_f32(float %x, float %y) { +; CHECK: add.f32 f0, f1, f2 +; CHECK-NEXT: ret; + %z = fadd float %x, %y + ret float %z +} + +define ptx_device double @t1_f64(double %x, double %y) { +; CHECK: add.f64 fd0, fd1, fd2 +; CHECK-NEXT: ret; + %z = fadd double %x, %y + ret double %z +} + +define ptx_device i16 @t2_u16(i16 %x) { +; CHECK: add.u16 rh0, rh1, 1; +; CHECK-NEXT: ret; + %z = add i16 %x, 1 + ret i16 %z +} + +define ptx_device i32 @t2_u32(i32 %x) { +; CHECK: add.u32 r0, r1, 1; +; CHECK-NEXT: ret; %z = add i32 %x, 1 -; CHECK: ret; ret i32 %z } + +define ptx_device i64 @t2_u64(i64 %x) { +; CHECK: add.u64 rd0, rd1, 1; +; CHECK-NEXT: ret; + %z = add i64 %x, 1 + ret i64 %z +} + +define ptx_device float @t2_f32(float %x) { +; CHECK: add.f32 f0, f1, 0F3F800000; +; CHECK-NEXT: ret; + %z = fadd float %x, 1.0 + ret float %z +} + +define ptx_device double @t2_f64(double %x) { +; CHECK: add.f64 fd0, fd1, 0D3FF0000000000000; +; CHECK-NEXT: ret; + %z = fadd double %x, 1.0 + ret double %z +} diff --git a/test/CodeGen/PTX/bitwise.ll b/test/CodeGen/PTX/bitwise.ll new file mode 100644 index 000000000000..dbc77e53330b --- /dev/null +++ b/test/CodeGen/PTX/bitwise.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s -march=ptx32 | FileCheck %s + +; preds + +define ptx_device i32 @t1_and_preds(i1 %x, i1 %y) { +; CHECK: and.pred p0, p1, p2 + %c = and i1 %x, %y + %d = zext i1 %c to i32 + ret i32 %d +} + +define ptx_device i32 @t1_or_preds(i1 %x, i1 %y) { +; CHECK: or.pred p0, p1, p2 + %a = or i1 %x, %y + %b = zext i1 %a to i32 + ret i32 %b +} + +define ptx_device i32 @t1_xor_preds(i1 %x, i1 %y) { +; CHECK: xor.pred p0, p1, p2 + %a = xor i1 %x, %y + %b = zext i1 %a to i32 + ret i32 %b +} diff --git a/test/CodeGen/PTX/bra.ll b/test/CodeGen/PTX/bra.ll new file mode 100644 index 000000000000..49383eb3cf96 --- /dev/null +++ b/test/CodeGen/PTX/bra.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s -march=ptx32 | FileCheck %s + +define ptx_device void @test_bra_direct() { +; CHECK: bra $L__BB0_1; +entry: + br label %loop +loop: + br label %loop +} + +define ptx_device i32 @test_bra_cond_direct(i32 %x, i32 %y) { +entry: +; CHECK: setp.le.u32 p0, r1, r2 + %p = icmp ugt i32 %x, %y +; CHECK-NEXT: @p0 bra +; CHECK-NOT: bra + br i1 %p, label %clause.if, label %clause.else +clause.if: +; CHECK: mov.u32 r0, r1 + ret i32 %x +clause.else: +; CHECK: mov.u32 r0, r2 + ret i32 %y +} diff --git a/test/CodeGen/PTX/exit.ll b/test/CodeGen/PTX/exit.ll index 4071babb80ce..7816c801728f 100644 --- a/test/CodeGen/PTX/exit.ll +++ b/test/CodeGen/PTX/exit.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=ptx | FileCheck %s +; RUN: llc < %s -march=ptx32 | FileCheck %s define ptx_kernel void @t1() { ; CHECK: exit; diff --git a/test/CodeGen/PTX/fdiv-sm10.ll b/test/CodeGen/PTX/fdiv-sm10.ll new file mode 100644 index 000000000000..121360ce9be3 --- /dev/null +++ b/test/CodeGen/PTX/fdiv-sm10.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -march=ptx32 -mattr=+sm10 | FileCheck %s + +define ptx_device float @t1_f32(float %x, float %y) { +; CHECK: div.approx.f32 f0, f1, f2; +; CHECK-NEXT: ret; + %a = fdiv float %x, %y + ret float %a +} + +define ptx_device double @t1_f64(double %x, double %y) { +; CHECK: div.f64 fd0, fd1, fd2; +; CHECK-NEXT: ret; + %a = fdiv double %x, %y + ret double %a +} diff --git a/test/CodeGen/PTX/fdiv-sm13.ll b/test/CodeGen/PTX/fdiv-sm13.ll new file mode 100644 index 000000000000..0ec7bae8030e --- /dev/null +++ b/test/CodeGen/PTX/fdiv-sm13.ll @@ -0,0 +1,15 @@ +; RUN: llc < %s -march=ptx32 -mattr=+sm13 | FileCheck %s + +define ptx_device float @t1_f32(float %x, float %y) { +; CHECK: div.approx.f32 f0, f1, f2; +; CHECK-NEXT: ret; + %a = fdiv float %x, %y + ret float %a +} + +define ptx_device double @t1_f64(double %x, double %y) { +; CHECK: div.rn.f64 fd0, fd1, fd2; +; CHECK-NEXT: ret; + %a = fdiv double %x, %y + ret double %a +} diff --git a/test/CodeGen/PTX/intrinsic.ll b/test/CodeGen/PTX/intrinsic.ll new file mode 100644 index 000000000000..cea41827ca47 --- /dev/null +++ b/test/CodeGen/PTX/intrinsic.ll @@ -0,0 +1,281 @@ +; RUN: llc < %s -march=ptx32 -mattr=+ptx20,+sm20 | FileCheck %s + +define ptx_device i32 @test_tid_x() { +; CHECK: mov.u32 r0, %tid.x; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.tid.x() + ret i32 %x +} + +define ptx_device i32 @test_tid_y() { +; CHECK: mov.u32 r0, %tid.y; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.tid.y() + ret i32 %x +} + +define ptx_device i32 @test_tid_z() { +; CHECK: mov.u32 r0, %tid.z; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.tid.z() + ret i32 %x +} + +define ptx_device i32 @test_tid_w() { +; CHECK: mov.u32 r0, %tid.w; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.tid.w() + ret i32 %x +} + +define ptx_device i32 @test_ntid_x() { +; CHECK: mov.u32 r0, %ntid.x; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.ntid.x() + ret i32 %x +} + +define ptx_device i32 @test_ntid_y() { +; CHECK: mov.u32 r0, %ntid.y; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.ntid.y() + ret i32 %x +} + +define ptx_device i32 @test_ntid_z() { +; CHECK: mov.u32 r0, %ntid.z; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.ntid.z() + ret i32 %x +} + +define ptx_device i32 @test_ntid_w() { +; CHECK: mov.u32 r0, %ntid.w; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.ntid.w() + ret i32 %x +} + +define ptx_device i32 @test_laneid() { +; CHECK: mov.u32 r0, %laneid; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.laneid() + ret i32 %x +} + +define ptx_device i32 @test_warpid() { +; CHECK: mov.u32 r0, %warpid; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.warpid() + ret i32 %x +} + +define ptx_device i32 @test_nwarpid() { +; CHECK: mov.u32 r0, %nwarpid; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.nwarpid() + ret i32 %x +} + +define ptx_device i32 @test_ctaid_x() { +; CHECK: mov.u32 r0, %ctaid.x; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.ctaid.x() + ret i32 %x +} + +define ptx_device i32 @test_ctaid_y() { +; CHECK: mov.u32 r0, %ctaid.y; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.ctaid.y() + ret i32 %x +} + +define ptx_device i32 @test_ctaid_z() { +; CHECK: mov.u32 r0, %ctaid.z; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.ctaid.z() + ret i32 %x +} + +define ptx_device i32 @test_ctaid_w() { +; CHECK: mov.u32 r0, %ctaid.w; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.ctaid.w() + ret i32 %x +} + +define ptx_device i32 @test_nctaid_x() { +; CHECK: mov.u32 r0, %nctaid.x; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.nctaid.x() + ret i32 %x +} + +define ptx_device i32 @test_nctaid_y() { +; CHECK: mov.u32 r0, %nctaid.y; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.nctaid.y() + ret i32 %x +} + +define ptx_device i32 @test_nctaid_z() { +; CHECK: mov.u32 r0, %nctaid.z; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.nctaid.z() + ret i32 %x +} + +define ptx_device i32 @test_nctaid_w() { +; CHECK: mov.u32 r0, %nctaid.w; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.nctaid.w() + ret i32 %x +} + +define ptx_device i32 @test_smid() { +; CHECK: mov.u32 r0, %smid; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.smid() + ret i32 %x +} + +define ptx_device i32 @test_nsmid() { +; CHECK: mov.u32 r0, %nsmid; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.nsmid() + ret i32 %x +} + +define ptx_device i32 @test_gridid() { +; CHECK: mov.u32 r0, %gridid; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.gridid() + ret i32 %x +} + +define ptx_device i32 @test_lanemask_eq() { +; CHECK: mov.u32 r0, %lanemask_eq; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.lanemask.eq() + ret i32 %x +} + +define ptx_device i32 @test_lanemask_le() { +; CHECK: mov.u32 r0, %lanemask_le; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.lanemask.le() + ret i32 %x +} + +define ptx_device i32 @test_lanemask_lt() { +; CHECK: mov.u32 r0, %lanemask_lt; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.lanemask.lt() + ret i32 %x +} + +define ptx_device i32 @test_lanemask_ge() { +; CHECK: mov.u32 r0, %lanemask_ge; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.lanemask.ge() + ret i32 %x +} + +define ptx_device i32 @test_lanemask_gt() { +; CHECK: mov.u32 r0, %lanemask_gt; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.lanemask.gt() + ret i32 %x +} + +define ptx_device i32 @test_clock() { +; CHECK: mov.u32 r0, %clock; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.clock() + ret i32 %x +} + +define ptx_device i64 @test_clock64() { +; CHECK: mov.u64 rd0, %clock64; +; CHECK-NEXT: ret; + %x = call i64 @llvm.ptx.read.clock64() + ret i64 %x +} + +define ptx_device i32 @test_pm0() { +; CHECK: mov.u32 r0, %pm0; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.pm0() + ret i32 %x +} + +define ptx_device i32 @test_pm1() { +; CHECK: mov.u32 r0, %pm1; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.pm1() + ret i32 %x +} + +define ptx_device i32 @test_pm2() { +; CHECK: mov.u32 r0, %pm2; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.pm2() + ret i32 %x +} + +define ptx_device i32 @test_pm3() { +; CHECK: mov.u32 r0, %pm3; +; CHECK-NEXT: ret; + %x = call i32 @llvm.ptx.read.pm3() + ret i32 %x +} + +define ptx_device void @test_bar_sync() { +; CHECK: bar.sync 0 +; CHECK-NEXT: ret; + call void @llvm.ptx.bar.sync(i32 0) + ret void +} + +declare i32 @llvm.ptx.read.tid.x() +declare i32 @llvm.ptx.read.tid.y() +declare i32 @llvm.ptx.read.tid.z() +declare i32 @llvm.ptx.read.tid.w() +declare i32 @llvm.ptx.read.ntid.x() +declare i32 @llvm.ptx.read.ntid.y() +declare i32 @llvm.ptx.read.ntid.z() +declare i32 @llvm.ptx.read.ntid.w() + +declare i32 @llvm.ptx.read.laneid() +declare i32 @llvm.ptx.read.warpid() +declare i32 @llvm.ptx.read.nwarpid() + +declare i32 @llvm.ptx.read.ctaid.x() +declare i32 @llvm.ptx.read.ctaid.y() +declare i32 @llvm.ptx.read.ctaid.z() +declare i32 @llvm.ptx.read.ctaid.w() +declare i32 @llvm.ptx.read.nctaid.x() +declare i32 @llvm.ptx.read.nctaid.y() +declare i32 @llvm.ptx.read.nctaid.z() +declare i32 @llvm.ptx.read.nctaid.w() + +declare i32 @llvm.ptx.read.smid() +declare i32 @llvm.ptx.read.nsmid() +declare i32 @llvm.ptx.read.gridid() + +declare i32 @llvm.ptx.read.lanemask.eq() +declare i32 @llvm.ptx.read.lanemask.le() +declare i32 @llvm.ptx.read.lanemask.lt() +declare i32 @llvm.ptx.read.lanemask.ge() +declare i32 @llvm.ptx.read.lanemask.gt() + +declare i32 @llvm.ptx.read.clock() +declare i64 @llvm.ptx.read.clock64() + +declare i32 @llvm.ptx.read.pm0() +declare i32 @llvm.ptx.read.pm1() +declare i32 @llvm.ptx.read.pm2() +declare i32 @llvm.ptx.read.pm3() + +declare void @llvm.ptx.bar.sync(i32 %i) diff --git a/test/CodeGen/PTX/ld.ll b/test/CodeGen/PTX/ld.ll index 836c4d41045a..377a95abe3db 100644 --- a/test/CodeGen/PTX/ld.ll +++ b/test/CodeGen/PTX/ld.ll @@ -1,78 +1,447 @@ -; RUN: llc < %s -march=ptx | FileCheck %s +; RUN: llc < %s -march=ptx32 | FileCheck %s -;CHECK: .extern .global .s32 array[]; -@array = external global [10 x i32] +;CHECK: .extern .global .b8 array_i16[20]; +@array_i16 = external global [10 x i16] -;CHECK: .extern .const .s32 array_constant[]; -@array_constant = external addrspace(1) constant [10 x i32] +;CHECK: .extern .const .b8 array_constant_i16[20]; +@array_constant_i16 = external addrspace(1) constant [10 x i16] -;CHECK: .extern .local .s32 array_local[]; -@array_local = external addrspace(2) global [10 x i32] +;CHECK: .extern .local .b8 array_local_i16[20]; +@array_local_i16 = external addrspace(2) global [10 x i16] -;CHECK: .extern .shared .s32 array_shared[]; -@array_shared = external addrspace(4) global [10 x i32] +;CHECK: .extern .shared .b8 array_shared_i16[20]; +@array_shared_i16 = external addrspace(4) global [10 x i16] -define ptx_device i32 @t1(i32* %p) { +;CHECK: .extern .global .b8 array_i32[40]; +@array_i32 = external global [10 x i32] + +;CHECK: .extern .const .b8 array_constant_i32[40]; +@array_constant_i32 = external addrspace(1) constant [10 x i32] + +;CHECK: .extern .local .b8 array_local_i32[40]; +@array_local_i32 = external addrspace(2) global [10 x i32] + +;CHECK: .extern .shared .b8 array_shared_i32[40]; +@array_shared_i32 = external addrspace(4) global [10 x i32] + +;CHECK: .extern .global .b8 array_i64[80]; +@array_i64 = external global [10 x i64] + +;CHECK: .extern .const .b8 array_constant_i64[80]; +@array_constant_i64 = external addrspace(1) constant [10 x i64] + +;CHECK: .extern .local .b8 array_local_i64[80]; +@array_local_i64 = external addrspace(2) global [10 x i64] + +;CHECK: .extern .shared .b8 array_shared_i64[80]; +@array_shared_i64 = external addrspace(4) global [10 x i64] + +;CHECK: .extern .global .b8 array_float[40]; +@array_float = external global [10 x float] + +;CHECK: .extern .const .b8 array_constant_float[40]; +@array_constant_float = external addrspace(1) constant [10 x float] + +;CHECK: .extern .local .b8 array_local_float[40]; +@array_local_float = external addrspace(2) global [10 x float] + +;CHECK: .extern .shared .b8 array_shared_float[40]; +@array_shared_float = external addrspace(4) global [10 x float] + +;CHECK: .extern .global .b8 array_double[80]; +@array_double = external global [10 x double] + +;CHECK: .extern .const .b8 array_constant_double[80]; +@array_constant_double = external addrspace(1) constant [10 x double] + +;CHECK: .extern .local .b8 array_local_double[80]; +@array_local_double = external addrspace(2) global [10 x double] + +;CHECK: .extern .shared .b8 array_shared_double[80]; +@array_shared_double = external addrspace(4) global [10 x double] + + +define ptx_device i16 @t1_u16(i16* %p) { entry: -;CHECK: ld.global.s32 r0, [r1]; +;CHECK: ld.global.u16 rh0, [r1]; +;CHECK-NEXT: ret; + %x = load i16* %p + ret i16 %x +} + +define ptx_device i32 @t1_u32(i32* %p) { +entry: +;CHECK: ld.global.u32 r0, [r1]; +;CHECK-NEXT: ret; %x = load i32* %p ret i32 %x } -define ptx_device i32 @t2(i32* %p) { +define ptx_device i64 @t1_u64(i64* %p) { entry: -;CHECK: ld.global.s32 r0, [r1+4]; +;CHECK: ld.global.u64 rd0, [r1]; +;CHECK-NEXT: ret; + %x = load i64* %p + ret i64 %x +} + +define ptx_device float @t1_f32(float* %p) { +entry: +;CHECK: ld.global.f32 f0, [r1]; +;CHECK-NEXT: ret; + %x = load float* %p + ret float %x +} + +define ptx_device double @t1_f64(double* %p) { +entry: +;CHECK: ld.global.f64 fd0, [r1]; +;CHECK-NEXT: ret; + %x = load double* %p + ret double %x +} + +define ptx_device i16 @t2_u16(i16* %p) { +entry: +;CHECK: ld.global.u16 rh0, [r1+2]; +;CHECK-NEXT: ret; + %i = getelementptr i16* %p, i32 1 + %x = load i16* %i + ret i16 %x +} + +define ptx_device i32 @t2_u32(i32* %p) { +entry: +;CHECK: ld.global.u32 r0, [r1+4]; +;CHECK-NEXT: ret; %i = getelementptr i32* %p, i32 1 %x = load i32* %i ret i32 %x } -define ptx_device i32 @t3(i32* %p, i32 %q) { +define ptx_device i64 @t2_u64(i64* %p) { +entry: +;CHECK: ld.global.u64 rd0, [r1+8]; +;CHECK-NEXT: ret; + %i = getelementptr i64* %p, i32 1 + %x = load i64* %i + ret i64 %x +} + +define ptx_device float @t2_f32(float* %p) { +entry: +;CHECK: ld.global.f32 f0, [r1+4]; +;CHECK-NEXT: ret; + %i = getelementptr float* %p, i32 1 + %x = load float* %i + ret float %x +} + +define ptx_device double @t2_f64(double* %p) { +entry: +;CHECK: ld.global.f64 fd0, [r1+8]; +;CHECK-NEXT: ret; + %i = getelementptr double* %p, i32 1 + %x = load double* %i + ret double %x +} + +define ptx_device i16 @t3_u16(i16* %p, i32 %q) { +entry: +;CHECK: shl.b32 r0, r2, 1; +;CHECK-NEXT: add.u32 r0, r1, r0; +;CHECK-NEXT: ld.global.u16 rh0, [r0]; + %i = getelementptr i16* %p, i32 %q + %x = load i16* %i + ret i16 %x +} + +define ptx_device i32 @t3_u32(i32* %p, i32 %q) { entry: ;CHECK: shl.b32 r0, r2, 2; -;CHECK: add.s32 r0, r1, r0; -;CHECK: ld.global.s32 r0, [r0]; +;CHECK-NEXT: add.u32 r0, r1, r0; +;CHECK-NEXT: ld.global.u32 r0, [r0]; %i = getelementptr i32* %p, i32 %q %x = load i32* %i ret i32 %x } -define ptx_device i32 @t4_global() { +define ptx_device i64 @t3_u64(i64* %p, i32 %q) { entry: -;CHECK: ld.global.s32 r0, [array]; - %i = getelementptr [10 x i32]* @array, i32 0, i32 0 +;CHECK: shl.b32 r0, r2, 3; +;CHECK-NEXT: add.u32 r0, r1, r0; +;CHECK-NEXT: ld.global.u64 rd0, [r0]; + %i = getelementptr i64* %p, i32 %q + %x = load i64* %i + ret i64 %x +} + +define ptx_device float @t3_f32(float* %p, i32 %q) { +entry: +;CHECK: shl.b32 r0, r2, 2; +;CHECK-NEXT: add.u32 r0, r1, r0; +;CHECK-NEXT: ld.global.f32 f0, [r0]; + %i = getelementptr float* %p, i32 %q + %x = load float* %i + ret float %x +} + +define ptx_device double @t3_f64(double* %p, i32 %q) { +entry: +;CHECK: shl.b32 r0, r2, 3; +;CHECK-NEXT: add.u32 r0, r1, r0; +;CHECK-NEXT: ld.global.f64 fd0, [r0]; + %i = getelementptr double* %p, i32 %q + %x = load double* %i + ret double %x +} + +define ptx_device i16 @t4_global_u16() { +entry: +;CHECK: mov.u32 r0, array_i16; +;CHECK-NEXT: ld.global.u16 rh0, [r0]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i16]* @array_i16, i32 0, i32 0 + %x = load i16* %i + ret i16 %x +} + +define ptx_device i32 @t4_global_u32() { +entry: +;CHECK: mov.u32 r0, array_i32; +;CHECK-NEXT: ld.global.u32 r0, [r0]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 0 %x = load i32* %i ret i32 %x } -define ptx_device i32 @t4_const() { +define ptx_device i64 @t4_global_u64() { entry: -;CHECK: ld.const.s32 r0, [array_constant]; - %i = getelementptr [10 x i32] addrspace(1)* @array_constant, i32 0, i32 0 +;CHECK: mov.u32 r0, array_i64; +;CHECK-NEXT: ld.global.u64 rd0, [r0]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 0 + %x = load i64* %i + ret i64 %x +} + +define ptx_device float @t4_global_f32() { +entry: +;CHECK: mov.u32 r0, array_float; +;CHECK-NEXT: ld.global.f32 f0, [r0]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x float]* @array_float, i32 0, i32 0 + %x = load float* %i + ret float %x +} + +define ptx_device double @t4_global_f64() { +entry: +;CHECK: mov.u32 r0, array_double; +;CHECK-NEXT: ld.global.f64 fd0, [r0]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x double]* @array_double, i32 0, i32 0 + %x = load double* %i + ret double %x +} + +define ptx_device i16 @t4_const_u16() { +entry: +;CHECK: mov.u32 r0, array_constant_i16; +;CHECK-NEXT: ld.const.u16 rh0, [r0]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i16] addrspace(1)* @array_constant_i16, i32 0, i32 0 + %x = load i16 addrspace(1)* %i + ret i16 %x +} + +define ptx_device i32 @t4_const_u32() { +entry: +;CHECK: mov.u32 r0, array_constant_i32; +;CHECK-NEXT: ld.const.u32 r0, [r0]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i32] addrspace(1)* @array_constant_i32, i32 0, i32 0 %x = load i32 addrspace(1)* %i ret i32 %x } -define ptx_device i32 @t4_local() { +define ptx_device i64 @t4_const_u64() { entry: -;CHECK: ld.local.s32 r0, [array_local]; - %i = getelementptr [10 x i32] addrspace(2)* @array_local, i32 0, i32 0 +;CHECK: mov.u32 r0, array_constant_i64; +;CHECK-NEXT: ld.const.u64 rd0, [r0]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i64] addrspace(1)* @array_constant_i64, i32 0, i32 0 + %x = load i64 addrspace(1)* %i + ret i64 %x +} + +define ptx_device float @t4_const_f32() { +entry: +;CHECK: mov.u32 r0, array_constant_float; +;CHECK-NEXT: ld.const.f32 f0, [r0]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x float] addrspace(1)* @array_constant_float, i32 0, i32 0 + %x = load float addrspace(1)* %i + ret float %x +} + +define ptx_device double @t4_const_f64() { +entry: +;CHECK: mov.u32 r0, array_constant_double; +;CHECK-NEXT: ld.const.f64 fd0, [r0]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x double] addrspace(1)* @array_constant_double, i32 0, i32 0 + %x = load double addrspace(1)* %i + ret double %x +} + +define ptx_device i16 @t4_local_u16() { +entry: +;CHECK: mov.u32 r0, array_local_i16; +;CHECK-NEXT: ld.local.u16 rh0, [r0]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i16] addrspace(2)* @array_local_i16, i32 0, i32 0 + %x = load i16 addrspace(2)* %i + ret i16 %x +} + +define ptx_device i32 @t4_local_u32() { +entry: +;CHECK: mov.u32 r0, array_local_i32; +;CHECK-NEXT: ld.local.u32 r0, [r0]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i32] addrspace(2)* @array_local_i32, i32 0, i32 0 %x = load i32 addrspace(2)* %i ret i32 %x } -define ptx_device i32 @t4_shared() { +define ptx_device i64 @t4_local_u64() { entry: -;CHECK: ld.shared.s32 r0, [array_shared]; - %i = getelementptr [10 x i32] addrspace(4)* @array_shared, i32 0, i32 0 +;CHECK: mov.u32 r0, array_local_i64; +;CHECK-NEXT: ld.local.u64 rd0, [r0]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i64] addrspace(2)* @array_local_i64, i32 0, i32 0 + %x = load i64 addrspace(2)* %i + ret i64 %x +} + +define ptx_device float @t4_local_f32() { +entry: +;CHECK: mov.u32 r0, array_local_float; +;CHECK-NEXT: ld.local.f32 f0, [r0]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x float] addrspace(2)* @array_local_float, i32 0, i32 0 + %x = load float addrspace(2)* %i + ret float %x +} + +define ptx_device double @t4_local_f64() { +entry: +;CHECK: mov.u32 r0, array_local_double; +;CHECK-NEXT: ld.local.f64 fd0, [r0]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x double] addrspace(2)* @array_local_double, i32 0, i32 0 + %x = load double addrspace(2)* %i + ret double %x +} + +define ptx_device i16 @t4_shared_u16() { +entry: +;CHECK: mov.u32 r0, array_shared_i16; +;CHECK-NEXT: ld.shared.u16 rh0, [r0]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i16] addrspace(4)* @array_shared_i16, i32 0, i32 0 + %x = load i16 addrspace(4)* %i + ret i16 %x +} + +define ptx_device i32 @t4_shared_u32() { +entry: +;CHECK: mov.u32 r0, array_shared_i32; +;CHECK-NEXT: ld.shared.u32 r0, [r0]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i32] addrspace(4)* @array_shared_i32, i32 0, i32 0 %x = load i32 addrspace(4)* %i ret i32 %x } -define ptx_device i32 @t5() { +define ptx_device i64 @t4_shared_u64() { entry: -;CHECK: ld.global.s32 r0, [array+4]; - %i = getelementptr [10 x i32]* @array, i32 0, i32 1 +;CHECK: mov.u32 r0, array_shared_i64; +;CHECK-NEXT: ld.shared.u64 rd0, [r0]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i64] addrspace(4)* @array_shared_i64, i32 0, i32 0 + %x = load i64 addrspace(4)* %i + ret i64 %x +} + +define ptx_device float @t4_shared_f32() { +entry: +;CHECK: mov.u32 r0, array_shared_float; +;CHECK-NEXT: ld.shared.f32 f0, [r0]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x float] addrspace(4)* @array_shared_float, i32 0, i32 0 + %x = load float addrspace(4)* %i + ret float %x +} + +define ptx_device double @t4_shared_f64() { +entry: +;CHECK: mov.u32 r0, array_shared_double; +;CHECK-NEXT: ld.shared.f64 fd0, [r0]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x double] addrspace(4)* @array_shared_double, i32 0, i32 0 + %x = load double addrspace(4)* %i + ret double %x +} + +define ptx_device i16 @t5_u16() { +entry: +;CHECK: mov.u32 r0, array_i16; +;CHECK-NEXT: ld.global.u16 rh0, [r0+2]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i16]* @array_i16, i32 0, i32 1 + %x = load i16* %i + ret i16 %x +} + +define ptx_device i32 @t5_u32() { +entry: +;CHECK: mov.u32 r0, array_i32; +;CHECK-NEXT: ld.global.u32 r0, [r0+4]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 1 %x = load i32* %i ret i32 %x } + +define ptx_device i64 @t5_u64() { +entry: +;CHECK: mov.u32 r0, array_i64; +;CHECK-NEXT: ld.global.u64 rd0, [r0+8]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 1 + %x = load i64* %i + ret i64 %x +} + +define ptx_device float @t5_f32() { +entry: +;CHECK: mov.u32 r0, array_float; +;CHECK-NEXT: ld.global.f32 f0, [r0+4]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x float]* @array_float, i32 0, i32 1 + %x = load float* %i + ret float %x +} + +define ptx_device double @t5_f64() { +entry: +;CHECK: mov.u32 r0, array_double; +;CHECK-NEXT: ld.global.f64 fd0, [r0+8]; +;CHECK-NEXT: ret; + %i = getelementptr [10 x double]* @array_double, i32 0, i32 1 + %x = load double* %i + ret double %x +} diff --git a/test/CodeGen/PTX/llvm-intrinsic.ll b/test/CodeGen/PTX/llvm-intrinsic.ll new file mode 100644 index 000000000000..1e265f5b7b3a --- /dev/null +++ b/test/CodeGen/PTX/llvm-intrinsic.ll @@ -0,0 +1,56 @@ +; RUN: llc < %s -march=ptx32 -mattr=+ptx20,+sm20 | FileCheck %s + +define ptx_device float @test_sqrt_f32(float %x) { +entry: +; CHECK: sqrt.rn.f32 f0, f1; +; CHECK-NEXT: ret; + %y = call float @llvm.sqrt.f32(float %x) + ret float %y +} + +define ptx_device double @test_sqrt_f64(double %x) { +entry: +; CHECK: sqrt.rn.f64 fd0, fd1; +; CHECK-NEXT: ret; + %y = call double @llvm.sqrt.f64(double %x) + ret double %y +} + +define ptx_device float @test_sin_f32(float %x) { +entry: +; CHECK: sin.approx.f32 f0, f1; +; CHECK-NEXT: ret; + %y = call float @llvm.sin.f32(float %x) + ret float %y +} + +define ptx_device double @test_sin_f64(double %x) { +entry: +; CHECK: sin.approx.f64 fd0, fd1; +; CHECK-NEXT: ret; + %y = call double @llvm.sin.f64(double %x) + ret double %y +} + +define ptx_device float @test_cos_f32(float %x) { +entry: +; CHECK: cos.approx.f32 f0, f1; +; CHECK-NEXT: ret; + %y = call float @llvm.cos.f32(float %x) + ret float %y +} + +define ptx_device double @test_cos_f64(double %x) { +entry: +; CHECK: cos.approx.f64 fd0, fd1; +; CHECK-NEXT: ret; + %y = call double @llvm.cos.f64(double %x) + ret double %y +} + +declare float @llvm.sqrt.f32(float) +declare double @llvm.sqrt.f64(double) +declare float @llvm.sin.f32(float) +declare double @llvm.sin.f64(double) +declare float @llvm.cos.f32(float) +declare double @llvm.cos.f64(double) diff --git a/test/CodeGen/PTX/mad.ll b/test/CodeGen/PTX/mad.ll new file mode 100644 index 000000000000..0c25f2c0030a --- /dev/null +++ b/test/CodeGen/PTX/mad.ll @@ -0,0 +1,17 @@ +; RUN: llc < %s -march=ptx32 -mattr=+sm13 | FileCheck %s + +define ptx_device float @t1_f32(float %x, float %y, float %z) { +; CHECK: mad.rn.f32 f0, f1, f2, f3; +; CHECK-NEXT: ret; + %a = fmul float %x, %y + %b = fadd float %a, %z + ret float %b +} + +define ptx_device double @t1_f64(double %x, double %y, double %z) { +; CHECK: mad.rn.f64 fd0, fd1, fd2, fd3; +; CHECK-NEXT: ret; + %a = fmul double %x, %y + %b = fadd double %a, %z + ret double %b +} diff --git a/test/CodeGen/PTX/mov.ll b/test/CodeGen/PTX/mov.ll index c365e9beb897..120572a0e868 100644 --- a/test/CodeGen/PTX/mov.ll +++ b/test/CodeGen/PTX/mov.ll @@ -1,13 +1,62 @@ -; RUN: llc < %s -march=ptx | FileCheck %s +; RUN: llc < %s -march=ptx32 | FileCheck %s -define ptx_device i32 @t1() { -; CHECK: mov.s32 r0, 0; +define ptx_device i16 @t1_u16() { +; CHECK: mov.u16 rh0, 0; +; CHECK: ret; + ret i16 0 +} + +define ptx_device i32 @t1_u32() { +; CHECK: mov.u32 r0, 0; ; CHECK: ret; ret i32 0 } -define ptx_device i32 @t2(i32 %x) { -; CHECK: mov.s32 r0, r1; +define ptx_device i64 @t1_u64() { +; CHECK: mov.u64 rd0, 0; +; CHECK: ret; + ret i64 0 +} + +define ptx_device float @t1_f32() { +; CHECK: mov.f32 f0, 0F00000000; +; CHECK: ret; + ret float 0.0 +} + +define ptx_device double @t1_f64() { +; CHECK: mov.f64 fd0, 0D0000000000000000; +; CHECK: ret; + ret double 0.0 +} + +define ptx_device i16 @t2_u16(i16 %x) { +; CHECK: mov.u16 rh0, rh1; +; CHECK: ret; + ret i16 %x +} + +define ptx_device i32 @t2_u32(i32 %x) { +; CHECK: mov.u32 r0, r1; ; CHECK: ret; ret i32 %x } + +define ptx_device i64 @t2_u64(i64 %x) { +; CHECK: mov.u64 rd0, rd1; +; CHECK: ret; + ret i64 %x +} + +define ptx_device float @t3_f32(float %x) { +; CHECK: mov.f32 f0, f1; +; CHECK-NEXT: ret; + ret float %x +} + +define ptx_device double @t3_f64(double %x) { +; CHECK: mov.f64 fd0, fd1; +; CHECK-NEXT: ret; + ret double %x +} + diff --git a/test/CodeGen/PTX/mul.ll b/test/CodeGen/PTX/mul.ll new file mode 100644 index 000000000000..5ce042675dc8 --- /dev/null +++ b/test/CodeGen/PTX/mul.ll @@ -0,0 +1,39 @@ +; RUN: llc < %s -march=ptx32 | FileCheck %s + +;define ptx_device i32 @t1(i32 %x, i32 %y) { +; %z = mul i32 %x, %y +; ret i32 %z +;} + +;define ptx_device i32 @t2(i32 %x) { +; %z = mul i32 %x, 1 +; ret i32 %z +;} + +define ptx_device float @t1_f32(float %x, float %y) { +; CHECK: mul.f32 f0, f1, f2 +; CHECK-NEXT: ret; + %z = fmul float %x, %y + ret float %z +} + +define ptx_device double @t1_f64(double %x, double %y) { +; CHECK: mul.f64 fd0, fd1, fd2 +; CHECK-NEXT: ret; + %z = fmul double %x, %y + ret double %z +} + +define ptx_device float @t2_f32(float %x) { +; CHECK: mul.f32 f0, f1, 0F40A00000; +; CHECK-NEXT: ret; + %z = fmul float %x, 5.0 + ret float %z +} + +define ptx_device double @t2_f64(double %x) { +; CHECK: mul.f64 fd0, fd1, 0D4014000000000000; +; CHECK-NEXT: ret; + %z = fmul double %x, 5.0 + ret double %z +} diff --git a/test/CodeGen/PTX/options.ll b/test/CodeGen/PTX/options.ll index a14d5c9c27ba..ac33fef0d6e3 100644 --- a/test/CodeGen/PTX/options.ll +++ b/test/CodeGen/PTX/options.ll @@ -1,5 +1,9 @@ -; RUN: llc < %s -march=ptx -ptx-version=2.0 | grep ".version 2.0" -; RUN: llc < %s -march=ptx -ptx-target=sm_20 | grep ".target sm_20" +; RUN: llc < %s -march=ptx32 -mattr=ptx20 | grep ".version 2.0" +; RUN: llc < %s -march=ptx32 -mattr=ptx21 | grep ".version 2.1" +; RUN: llc < %s -march=ptx32 -mattr=ptx22 | grep ".version 2.2" +; RUN: llc < %s -march=ptx32 -mattr=sm10 | grep ".target sm_10" +; RUN: llc < %s -march=ptx32 -mattr=sm13 | grep ".target sm_13" +; RUN: llc < %s -march=ptx32 -mattr=sm20 | grep ".target sm_20" define ptx_device void @t1() { ret void diff --git a/test/CodeGen/PTX/parameter-order.ll b/test/CodeGen/PTX/parameter-order.ll new file mode 100644 index 000000000000..95d4a328149c --- /dev/null +++ b/test/CodeGen/PTX/parameter-order.ll @@ -0,0 +1,8 @@ +; RUN: llc < %s -march=ptx32 | FileCheck %s + +; CHECK: .func (.reg .u32 r0) test_parameter_order (.reg .f32 f1, .reg .u32 r1, .reg .u32 r2, .reg .f32 f2) +define ptx_device i32 @test_parameter_order(float %a, i32 %b, i32 %c, float %d) { +; CHECK: sub.u32 r0, r1, r2 + %result = sub i32 %b, %c + ret i32 %result +} diff --git a/test/CodeGen/PTX/ret.ll b/test/CodeGen/PTX/ret.ll index d5037f25fd36..ba0523f6424a 100644 --- a/test/CodeGen/PTX/ret.ll +++ b/test/CodeGen/PTX/ret.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=ptx | FileCheck %s +; RUN: llc < %s -march=ptx32 | FileCheck %s define ptx_device void @t1() { ; CHECK: ret; diff --git a/test/CodeGen/PTX/setp.ll b/test/CodeGen/PTX/setp.ll new file mode 100644 index 000000000000..5836122049e6 --- /dev/null +++ b/test/CodeGen/PTX/setp.ll @@ -0,0 +1,134 @@ +; RUN: llc < %s -march=ptx32 | FileCheck %s + +define ptx_device i32 @test_setp_eq_u32_rr(i32 %x, i32 %y) { +; CHECK: setp.eq.u32 p0, r1, r2; +; CHECK-NEXT: cvt.u32.pred r0, p0; +; CHECK-NEXT: ret; + %p = icmp eq i32 %x, %y + %z = zext i1 %p to i32 + ret i32 %z +} + +define ptx_device i32 @test_setp_ne_u32_rr(i32 %x, i32 %y) { +; CHECK: setp.ne.u32 p0, r1, r2; +; CHECK-NEXT: cvt.u32.pred r0, p0; +; CHECK-NEXT: ret; + %p = icmp ne i32 %x, %y + %z = zext i1 %p to i32 + ret i32 %z +} + +define ptx_device i32 @test_setp_lt_u32_rr(i32 %x, i32 %y) { +; CHECK: setp.lt.u32 p0, r1, r2; +; CHECK-NEXT: cvt.u32.pred r0, p0; +; CHECK-NEXT: ret; + %p = icmp ult i32 %x, %y + %z = zext i1 %p to i32 + ret i32 %z +} + +define ptx_device i32 @test_setp_le_u32_rr(i32 %x, i32 %y) { +; CHECK: setp.le.u32 p0, r1, r2; +; CHECK-NEXT: cvt.u32.pred r0, p0; +; CHECK-NEXT: ret; + %p = icmp ule i32 %x, %y + %z = zext i1 %p to i32 + ret i32 %z +} + +define ptx_device i32 @test_setp_gt_u32_rr(i32 %x, i32 %y) { +; CHECK: setp.gt.u32 p0, r1, r2; +; CHECK-NEXT: cvt.u32.pred r0, p0; +; CHECK-NEXT: ret; + %p = icmp ugt i32 %x, %y + %z = zext i1 %p to i32 + ret i32 %z +} + +define ptx_device i32 @test_setp_ge_u32_rr(i32 %x, i32 %y) { +; CHECK: setp.ge.u32 p0, r1, r2; +; CHECK-NEXT: cvt.u32.pred r0, p0; +; CHECK-NEXT: ret; + %p = icmp uge i32 %x, %y + %z = zext i1 %p to i32 + ret i32 %z +} + +define ptx_device i32 @test_setp_eq_u32_ri(i32 %x) { +; CHECK: setp.eq.u32 p0, r1, 1; +; CHECK-NEXT: cvt.u32.pred r0, p0; +; CHECK-NEXT: ret; + %p = icmp eq i32 %x, 1 + %z = zext i1 %p to i32 + ret i32 %z +} + +define ptx_device i32 @test_setp_ne_u32_ri(i32 %x) { +; CHECK: setp.ne.u32 p0, r1, 1; +; CHECK-NEXT: cvt.u32.pred r0, p0; +; CHECK-NEXT: ret; + %p = icmp ne i32 %x, 1 + %z = zext i1 %p to i32 + ret i32 %z +} + +define ptx_device i32 @test_setp_lt_u32_ri(i32 %x) { +; CHECK: setp.eq.u32 p0, r1, 0; +; CHECK-NEXT: cvt.u32.pred r0, p0; +; CHECK-NEXT: ret; + %p = icmp ult i32 %x, 1 + %z = zext i1 %p to i32 + ret i32 %z +} + +define ptx_device i32 @test_setp_le_u32_ri(i32 %x) { +; CHECK: setp.lt.u32 p0, r1, 2; +; CHECK-NEXT: cvt.u32.pred r0, p0; +; CHECK-NEXT: ret; + %p = icmp ule i32 %x, 1 + %z = zext i1 %p to i32 + ret i32 %z +} + +define ptx_device i32 @test_setp_gt_u32_ri(i32 %x) { +; CHECK: setp.gt.u32 p0, r1, 1; +; CHECK-NEXT: cvt.u32.pred r0, p0; +; CHECK-NEXT: ret; + %p = icmp ugt i32 %x, 1 + %z = zext i1 %p to i32 + ret i32 %z +} + +define ptx_device i32 @test_setp_ge_u32_ri(i32 %x) { +; CHECK: setp.ne.u32 p0, r1, 0; +; CHECK-NEXT: cvt.u32.pred r0, p0; +; CHECK-NEXT: ret; + %p = icmp uge i32 %x, 1 + %z = zext i1 %p to i32 + ret i32 %z +} + +define ptx_device i32 @test_setp_4_op_format_1(i32 %x, i32 %y, i32 %u, i32 %v) { +; CHECK: setp.gt.u32 p0, r3, r4; +; CHECK-NEXT: setp.eq.and.u32 p0, r1, r2, p0; +; CHECK-NEXT: cvt.u32.pred r0, p0; +; CHECK-NEXT: ret; + %c = icmp eq i32 %x, %y + %d = icmp ugt i32 %u, %v + %e = and i1 %c, %d + %z = zext i1 %e to i32 + ret i32 %z +} + +define ptx_device i32 @test_setp_4_op_format_2(i32 %x, i32 %y, i32 %w) { +; CHECK: cvt.pred.u32 p0, r3; +; CHECK-NEXT: setp.eq.and.u32 p0, r1, r2, !p0; +; CHECK-NEXT: cvt.u32.pred r0, p0; +; CHECK-NEXT: ret; + %c = trunc i32 %w to i1 + %d = icmp eq i32 %x, %y + %e = xor i1 %c, 1 + %f = and i1 %d, %e + %z = zext i1 %f to i32 + ret i32 %z +} diff --git a/test/CodeGen/PTX/shl.ll b/test/CodeGen/PTX/shl.ll index b564b43ab932..6e72c9221325 100644 --- a/test/CodeGen/PTX/shl.ll +++ b/test/CodeGen/PTX/shl.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=ptx | FileCheck %s +; RUN: llc < %s -march=ptx32 | FileCheck %s define ptx_device i32 @t1(i32 %x, i32 %y) { ; CHECK: shl.b32 r0, r1, r2 diff --git a/test/CodeGen/PTX/shr.ll b/test/CodeGen/PTX/shr.ll index 3f8ade862b75..8693e0ecf49a 100644 --- a/test/CodeGen/PTX/shr.ll +++ b/test/CodeGen/PTX/shr.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=ptx | FileCheck %s +; RUN: llc < %s -march=ptx32 | FileCheck %s define ptx_device i32 @t1(i32 %x, i32 %y) { ; CHECK: shr.u32 r0, r1, r2 diff --git a/test/CodeGen/PTX/st.ll b/test/CodeGen/PTX/st.ll index 2cbacb9ee59c..dee5c61abe66 100644 --- a/test/CodeGen/PTX/st.ll +++ b/test/CodeGen/PTX/st.ll @@ -1,71 +1,402 @@ -; RUN: llc < %s -march=ptx | FileCheck %s +; RUN: llc < %s -march=ptx32 | FileCheck %s -;CHECK: .extern .global .s32 array[]; -@array = external global [10 x i32] +;CHECK: .extern .global .b8 array_i16[20]; +@array_i16 = external global [10 x i16] -;CHECK: .extern .const .s32 array_constant[]; -@array_constant = external addrspace(1) constant [10 x i32] +;CHECK: .extern .const .b8 array_constant_i16[20]; +@array_constant_i16 = external addrspace(1) constant [10 x i16] -;CHECK: .extern .local .s32 array_local[]; -@array_local = external addrspace(2) global [10 x i32] +;CHECK: .extern .local .b8 array_local_i16[20]; +@array_local_i16 = external addrspace(2) global [10 x i16] -;CHECK: .extern .shared .s32 array_shared[]; -@array_shared = external addrspace(4) global [10 x i32] +;CHECK: .extern .shared .b8 array_shared_i16[20]; +@array_shared_i16 = external addrspace(4) global [10 x i16] -define ptx_device void @t1(i32* %p, i32 %x) { +;CHECK: .extern .global .b8 array_i32[40]; +@array_i32 = external global [10 x i32] + +;CHECK: .extern .const .b8 array_constant_i32[40]; +@array_constant_i32 = external addrspace(1) constant [10 x i32] + +;CHECK: .extern .local .b8 array_local_i32[40]; +@array_local_i32 = external addrspace(2) global [10 x i32] + +;CHECK: .extern .shared .b8 array_shared_i32[40]; +@array_shared_i32 = external addrspace(4) global [10 x i32] + +;CHECK: .extern .global .b8 array_i64[80]; +@array_i64 = external global [10 x i64] + +;CHECK: .extern .const .b8 array_constant_i64[80]; +@array_constant_i64 = external addrspace(1) constant [10 x i64] + +;CHECK: .extern .local .b8 array_local_i64[80]; +@array_local_i64 = external addrspace(2) global [10 x i64] + +;CHECK: .extern .shared .b8 array_shared_i64[80]; +@array_shared_i64 = external addrspace(4) global [10 x i64] + +;CHECK: .extern .global .b8 array_float[40]; +@array_float = external global [10 x float] + +;CHECK: .extern .const .b8 array_constant_float[40]; +@array_constant_float = external addrspace(1) constant [10 x float] + +;CHECK: .extern .local .b8 array_local_float[40]; +@array_local_float = external addrspace(2) global [10 x float] + +;CHECK: .extern .shared .b8 array_shared_float[40]; +@array_shared_float = external addrspace(4) global [10 x float] + +;CHECK: .extern .global .b8 array_double[80]; +@array_double = external global [10 x double] + +;CHECK: .extern .const .b8 array_constant_double[80]; +@array_constant_double = external addrspace(1) constant [10 x double] + +;CHECK: .extern .local .b8 array_local_double[80]; +@array_local_double = external addrspace(2) global [10 x double] + +;CHECK: .extern .shared .b8 array_shared_double[80]; +@array_shared_double = external addrspace(4) global [10 x double] + + +define ptx_device void @t1_u16(i16* %p, i16 %x) { entry: -;CHECK: st.global.s32 [r1], r2; +;CHECK: st.global.u16 [r1], rh1; +;CHECK-NEXT: ret; + store i16 %x, i16* %p + ret void +} + +define ptx_device void @t1_u32(i32* %p, i32 %x) { +entry: +;CHECK: st.global.u32 [r1], r2; +;CHECK-NEXT: ret; store i32 %x, i32* %p ret void } -define ptx_device void @t2(i32* %p, i32 %x) { +define ptx_device void @t1_u64(i64* %p, i64 %x) { entry: -;CHECK: st.global.s32 [r1+4], r2; +;CHECK: st.global.u64 [r1], rd1; +;CHECK-NEXT: ret; + store i64 %x, i64* %p + ret void +} + +define ptx_device void @t1_f32(float* %p, float %x) { +entry: +;CHECK: st.global.f32 [r1], f1; +;CHECK-NEXT: ret; + store float %x, float* %p + ret void +} + +define ptx_device void @t1_f64(double* %p, double %x) { +entry: +;CHECK: st.global.f64 [r1], fd1; +;CHECK-NEXT: ret; + store double %x, double* %p + ret void +} + +define ptx_device void @t2_u16(i16* %p, i16 %x) { +entry: +;CHECK: st.global.u16 [r1+2], rh1; +;CHECK-NEXT: ret; + %i = getelementptr i16* %p, i32 1 + store i16 %x, i16* %i + ret void +} + +define ptx_device void @t2_u32(i32* %p, i32 %x) { +entry: +;CHECK: st.global.u32 [r1+4], r2; +;CHECK-NEXT: ret; %i = getelementptr i32* %p, i32 1 store i32 %x, i32* %i ret void } -define ptx_device void @t3(i32* %p, i32 %q, i32 %x) { -;CHECK: .reg .s32 r0; +define ptx_device void @t2_u64(i64* %p, i64 %x) { +entry: +;CHECK: st.global.u64 [r1+8], rd1; +;CHECK-NEXT: ret; + %i = getelementptr i64* %p, i32 1 + store i64 %x, i64* %i + ret void +} + +define ptx_device void @t2_f32(float* %p, float %x) { +entry: +;CHECK: st.global.f32 [r1+4], f1; +;CHECK-NEXT: ret; + %i = getelementptr float* %p, i32 1 + store float %x, float* %i + ret void +} + +define ptx_device void @t2_f64(double* %p, double %x) { +entry: +;CHECK: st.global.f64 [r1+8], fd1; +;CHECK-NEXT: ret; + %i = getelementptr double* %p, i32 1 + store double %x, double* %i + ret void +} + +define ptx_device void @t3_u16(i16* %p, i32 %q, i16 %x) { +entry: +;CHECK: shl.b32 r0, r2, 1; +;CHECK-NEXT: add.u32 r0, r1, r0; +;CHECK-NEXT: st.global.u16 [r0], rh1; +;CHECK-NEXT: ret; + %i = getelementptr i16* %p, i32 %q + store i16 %x, i16* %i + ret void +} + +define ptx_device void @t3_u32(i32* %p, i32 %q, i32 %x) { entry: ;CHECK: shl.b32 r0, r2, 2; -;CHECK: add.s32 r0, r1, r0; -;CHECK: st.global.s32 [r0], r3; +;CHECK-NEXT: add.u32 r0, r1, r0; +;CHECK-NEXT: st.global.u32 [r0], r3; +;CHECK-NEXT: ret; %i = getelementptr i32* %p, i32 %q store i32 %x, i32* %i ret void } -define ptx_device void @t4_global(i32 %x) { +define ptx_device void @t3_u64(i64* %p, i32 %q, i64 %x) { entry: -;CHECK: st.global.s32 [array], r1; - %i = getelementptr [10 x i32]* @array, i32 0, i32 0 +;CHECK: shl.b32 r0, r2, 3; +;CHECK-NEXT: add.u32 r0, r1, r0; +;CHECK-NEXT: st.global.u64 [r0], rd1; +;CHECK-NEXT: ret; + %i = getelementptr i64* %p, i32 %q + store i64 %x, i64* %i + ret void +} + +define ptx_device void @t3_f32(float* %p, i32 %q, float %x) { +entry: +;CHECK: shl.b32 r0, r2, 2; +;CHECK-NEXT: add.u32 r0, r1, r0; +;CHECK-NEXT: st.global.f32 [r0], f1; +;CHECK-NEXT: ret; + %i = getelementptr float* %p, i32 %q + store float %x, float* %i + ret void +} + +define ptx_device void @t3_f64(double* %p, i32 %q, double %x) { +entry: +;CHECK: shl.b32 r0, r2, 3; +;CHECK-NEXT: add.u32 r0, r1, r0; +;CHECK-NEXT: st.global.f64 [r0], fd1; +;CHECK-NEXT: ret; + %i = getelementptr double* %p, i32 %q + store double %x, double* %i + ret void +} + +define ptx_device void @t4_global_u16(i16 %x) { +entry: +;CHECK: mov.u32 r0, array_i16; +;CHECK-NEXT: st.global.u16 [r0], rh1; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i16]* @array_i16, i16 0, i16 0 + store i16 %x, i16* %i + ret void +} + +define ptx_device void @t4_global_u32(i32 %x) { +entry: +;CHECK: mov.u32 r0, array_i32; +;CHECK-NEXT: st.global.u32 [r0], r1; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 0 store i32 %x, i32* %i ret void } -define ptx_device void @t4_local(i32 %x) { +define ptx_device void @t4_global_u64(i64 %x) { entry: -;CHECK: st.local.s32 [array_local], r1; - %i = getelementptr [10 x i32] addrspace(2)* @array_local, i32 0, i32 0 +;CHECK: mov.u32 r0, array_i64; +;CHECK-NEXT: st.global.u64 [r0], rd1; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 0 + store i64 %x, i64* %i + ret void +} + +define ptx_device void @t4_global_f32(float %x) { +entry: +;CHECK: mov.u32 r0, array_float; +;CHECK-NEXT: st.global.f32 [r0], f1; +;CHECK-NEXT: ret; + %i = getelementptr [10 x float]* @array_float, i32 0, i32 0 + store float %x, float* %i + ret void +} + +define ptx_device void @t4_global_f64(double %x) { +entry: +;CHECK: mov.u32 r0, array_double; +;CHECK-NEXT: st.global.f64 [r0], fd1; +;CHECK-NEXT: ret; + %i = getelementptr [10 x double]* @array_double, i32 0, i32 0 + store double %x, double* %i + ret void +} + +define ptx_device void @t4_local_u16(i16 %x) { +entry: +;CHECK: mov.u32 r0, array_local_i16; +;CHECK-NEXT: st.local.u16 [r0], rh1; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i16] addrspace(2)* @array_local_i16, i32 0, i32 0 + store i16 %x, i16 addrspace(2)* %i + ret void +} + +define ptx_device void @t4_local_u32(i32 %x) { +entry: +;CHECK: mov.u32 r0, array_local_i32; +;CHECK-NEXT: st.local.u32 [r0], r1; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i32] addrspace(2)* @array_local_i32, i32 0, i32 0 store i32 %x, i32 addrspace(2)* %i ret void } -define ptx_device void @t4_shared(i32 %x) { +define ptx_device void @t4_local_u64(i64 %x) { entry: -;CHECK: st.shared.s32 [array_shared], r1; - %i = getelementptr [10 x i32] addrspace(4)* @array_shared, i32 0, i32 0 +;CHECK: mov.u32 r0, array_local_i64; +;CHECK-NEXT: st.local.u64 [r0], rd1; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i64] addrspace(2)* @array_local_i64, i32 0, i32 0 + store i64 %x, i64 addrspace(2)* %i + ret void +} + +define ptx_device void @t4_local_f32(float %x) { +entry: +;CHECK: mov.u32 r0, array_local_float; +;CHECK-NEXT: st.local.f32 [r0], f1; +;CHECK-NEXT: ret; + %i = getelementptr [10 x float] addrspace(2)* @array_local_float, i32 0, i32 0 + store float %x, float addrspace(2)* %i + ret void +} + +define ptx_device void @t4_local_f64(double %x) { +entry: +;CHECK: mov.u32 r0, array_local_double; +;CHECK-NEXT: st.local.f64 [r0], fd1; +;CHECK-NEXT: ret; + %i = getelementptr [10 x double] addrspace(2)* @array_local_double, i32 0, i32 0 + store double %x, double addrspace(2)* %i + ret void +} + +define ptx_device void @t4_shared_u16(i16 %x) { +entry: +;CHECK: mov.u32 r0, array_shared_i16; +;CHECK-NEXT: st.shared.u16 [r0], rh1; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i16] addrspace(4)* @array_shared_i16, i32 0, i32 0 + store i16 %x, i16 addrspace(4)* %i + ret void +} + +define ptx_device void @t4_shared_u32(i32 %x) { +entry: +;CHECK: mov.u32 r0, array_shared_i32; +;CHECK-NEXT: st.shared.u32 [r0], r1; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i32] addrspace(4)* @array_shared_i32, i32 0, i32 0 store i32 %x, i32 addrspace(4)* %i ret void } -define ptx_device void @t5(i32 %x) { +define ptx_device void @t4_shared_u64(i64 %x) { entry: -;CHECK: st.global.s32 [array+4], r1; - %i = getelementptr [10 x i32]* @array, i32 0, i32 1 +;CHECK: mov.u32 r0, array_shared_i64; +;CHECK-NEXT: st.shared.u64 [r0], rd1; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i64] addrspace(4)* @array_shared_i64, i32 0, i32 0 + store i64 %x, i64 addrspace(4)* %i + ret void +} + +define ptx_device void @t4_shared_f32(float %x) { +entry: +;CHECK: mov.u32 r0, array_shared_float; +;CHECK-NEXT: st.shared.f32 [r0], f1; +;CHECK-NEXT: ret; + %i = getelementptr [10 x float] addrspace(4)* @array_shared_float, i32 0, i32 0 + store float %x, float addrspace(4)* %i + ret void +} + +define ptx_device void @t4_shared_f64(double %x) { +entry: +;CHECK: mov.u32 r0, array_shared_double; +;CHECK-NEXT: st.shared.f64 [r0], fd1; +;CHECK-NEXT: ret; + %i = getelementptr [10 x double] addrspace(4)* @array_shared_double, i32 0, i32 0 + store double %x, double addrspace(4)* %i + ret void +} + +define ptx_device void @t5_u16(i16 %x) { +entry: +;CHECK: mov.u32 r0, array_i16; +;CHECK-NEXT: st.global.u16 [r0+2], rh1; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i16]* @array_i16, i32 0, i32 1 + store i16 %x, i16* %i + ret void +} + +define ptx_device void @t5_u32(i32 %x) { +entry: +;CHECK: mov.u32 r0, array_i32; +;CHECK-NEXT: st.global.u32 [r0+4], r1; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 1 store i32 %x, i32* %i ret void } + +define ptx_device void @t5_u64(i64 %x) { +entry: +;CHECK: mov.u32 r0, array_i64; +;CHECK-NEXT: st.global.u64 [r0+8], rd1; +;CHECK-NEXT: ret; + %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 1 + store i64 %x, i64* %i + ret void +} + +define ptx_device void @t5_f32(float %x) { +entry: +;CHECK: mov.u32 r0, array_float; +;CHECK-NEXT: st.global.f32 [r0+4], f1; +;CHECK-NEXT: ret; + %i = getelementptr [10 x float]* @array_float, i32 0, i32 1 + store float %x, float* %i + ret void +} + +define ptx_device void @t5_f64(double %x) { +entry: +;CHECK: mov.u32 r0, array_double; +;CHECK-NEXT: st.global.f64 [r0+8], fd1; +;CHECK-NEXT: ret; + %i = getelementptr [10 x double]* @array_double, i32 0, i32 1 + store double %x, double* %i + ret void +} diff --git a/test/CodeGen/PTX/sub.ll b/test/CodeGen/PTX/sub.ll index aab3fdadad13..7dd2c6f6ac79 100644 --- a/test/CodeGen/PTX/sub.ll +++ b/test/CodeGen/PTX/sub.ll @@ -1,15 +1,71 @@ -; RUN: llc < %s -march=ptx | FileCheck %s +; RUN: llc < %s -march=ptx32 | FileCheck %s -define ptx_device i32 @t1(i32 %x, i32 %y) { -;CHECK: sub.s32 r0, r1, r2; +define ptx_device i16 @t1_u16(i16 %x, i16 %y) { +; CHECK: sub.u16 rh0, rh1, rh2; +; CHECK-NEXT: ret; + %z = sub i16 %x, %y + ret i16 %z +} + +define ptx_device i32 @t1_u32(i32 %x, i32 %y) { +; CHECK: sub.u32 r0, r1, r2; +; CHECK-NEXT: ret; %z = sub i32 %x, %y -;CHECK: ret; ret i32 %z } -define ptx_device i32 @t2(i32 %x) { -;CHECK: add.s32 r0, r1, -1; +define ptx_device i64 @t1_u64(i64 %x, i64 %y) { +; CHECK: sub.u64 rd0, rd1, rd2; +; CHECK-NEXT: ret; + %z = sub i64 %x, %y + ret i64 %z +} + +define ptx_device float @t1_f32(float %x, float %y) { +; CHECK: sub.f32 f0, f1, f2 +; CHECK-NEXT: ret; + %z = fsub float %x, %y + ret float %z +} + +define ptx_device double @t1_f64(double %x, double %y) { +; CHECK: sub.f64 fd0, fd1, fd2 +; CHECK-NEXT: ret; + %z = fsub double %x, %y + ret double %z +} + +define ptx_device i16 @t2_u16(i16 %x) { +; CHECK: add.u16 rh0, rh1, -1; +; CHECK-NEXT: ret; + %z = sub i16 %x, 1 + ret i16 %z +} + +define ptx_device i32 @t2_u32(i32 %x) { +; CHECK: add.u32 r0, r1, -1; +; CHECK-NEXT: ret; %z = sub i32 %x, 1 -;CHECK: ret; ret i32 %z } + +define ptx_device i64 @t2_u64(i64 %x) { +; CHECK: add.u64 rd0, rd1, -1; +; CHECK-NEXT: ret; + %z = sub i64 %x, 1 + ret i64 %z +} + +define ptx_device float @t2_f32(float %x) { +; CHECK: add.f32 f0, f1, 0FBF800000; +; CHECK-NEXT: ret; + %z = fsub float %x, 1.0 + ret float %z +} + +define ptx_device double @t2_f64(double %x) { +; CHECK: add.f64 fd0, fd1, 0DBFF0000000000000; +; CHECK-NEXT: ret; + %z = fsub double %x, 1.0 + ret double %z +} diff --git a/test/CodeGen/PowerPC/2008-12-12-EH.ll b/test/CodeGen/PowerPC/2008-12-12-EH.ll index 2315e36ff465..a2a5e9e39641 100644 --- a/test/CodeGen/PowerPC/2008-12-12-EH.ll +++ b/test/CodeGen/PowerPC/2008-12-12-EH.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin9 | grep ^__Z1fv.eh +; RUN: llc < %s -disable-cfi -march=ppc32 -mtriple=powerpc-apple-darwin9 | grep ^__Z1fv.eh define void @_Z1fv() { entry: diff --git a/test/CodeGen/PowerPC/2010-05-03-retaddr1.ll b/test/CodeGen/PowerPC/2010-05-03-retaddr1.ll index b10920a6c10d..72ae9d6c73b3 100644 --- a/test/CodeGen/PowerPC/2010-05-03-retaddr1.ll +++ b/test/CodeGen/PowerPC/2010-05-03-retaddr1.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin -mcpu=g5 | FileCheck %s +; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin -mcpu=g5 -regalloc=basic | FileCheck %s declare i8* @llvm.frameaddress(i32) nounwind readnone diff --git a/test/CodeGen/PowerPC/Atomics-64.ll b/test/CodeGen/PowerPC/Atomics-64.ll index 1dc4310761c3..cfc1eb98e064 100644 --- a/test/CodeGen/PowerPC/Atomics-64.ll +++ b/test/CodeGen/PowerPC/Atomics-64.ll @@ -1,5 +1,11 @@ -; RUN: llc < %s -march=ppc64 -; ModuleID = 'Atomics.c' +; RUN: llc < %s -march=ppc64 -verify-machineinstrs +; +; This test is disabled until PPCISelLowering learns to insert proper 64-bit +; code for ATOMIC_CMP_SWAP. Currently, it is inserting 32-bit instructions with +; 64-bit operands which causes the machine code verifier to throw a tantrum. +; +; XFAIL: * + target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128" target triple = "powerpc64-apple-darwin9" @sc = common global i8 0 ; [#uses=52] diff --git a/test/CodeGen/PowerPC/Frames-small.ll b/test/CodeGen/PowerPC/Frames-small.ll index 404fdd01966c..ecd5ecd2eca1 100644 --- a/test/CodeGen/PowerPC/Frames-small.ll +++ b/test/CodeGen/PowerPC/Frames-small.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=ppc32 -mtriple=powerpc-apple-darwin8 -o %t1 -; RUN not grep {stw r31, -4(r1)} %t1 +; RUN: not grep {stw r31, -4(r1)} %t1 ; RUN: grep {stwu r1, -16448(r1)} %t1 ; RUN: grep {addi r1, r1, 16448} %t1 ; RUN: llc < %s -march=ppc32 | \ diff --git a/test/CodeGen/PowerPC/indirectbr.ll b/test/CodeGen/PowerPC/indirectbr.ll index 5122ab39d232..ac5662534d0b 100644 --- a/test/CodeGen/PowerPC/indirectbr.ll +++ b/test/CodeGen/PowerPC/indirectbr.ll @@ -43,13 +43,13 @@ L2: ; preds = %L3, %bb2 L1: ; preds = %L2, %bb2 %res.3 = phi i32 [ %phitmp, %L2 ], [ 2, %bb2 ] ; [#uses=1] -; PIC: addis r4, r4, ha16(Ltmp0-L0$pb) -; PIC: li r6, lo16(Ltmp0-L0$pb) -; PIC: add r4, r4, r6 -; PIC: stw r4 -; STATIC: li r5, lo16(Ltmp0) -; STATIC: addis r5, r5, ha16(Ltmp0) -; STATIC: stw r5 +; PIC: addis r[[R0:[0-9]+]], r{{[0-9]+}}, ha16(Ltmp0-L0$pb) +; PIC: li r[[R1:[0-9]+]], lo16(Ltmp0-L0$pb) +; PIC: add r[[R2:[0-9]+]], r[[R0]], r[[R1]] +; PIC: stw r[[R2]] +; STATIC: li r[[R0:[0-9]+]], lo16(Ltmp0) +; STATIC: addis r[[R0]], r[[R0]], ha16(Ltmp0) +; STATIC: stw r[[R0]] store i8* blockaddress(@foo, %L5), i8** @nextaddr, align 4 ret i32 %res.3 } diff --git a/test/CodeGen/PowerPC/mulhs.ll b/test/CodeGen/PowerPC/mulhs.ll index 9ab8d997c0d0..5b02e187ae88 100644 --- a/test/CodeGen/PowerPC/mulhs.ll +++ b/test/CodeGen/PowerPC/mulhs.ll @@ -5,7 +5,7 @@ ; RUN: not grep add %t ; RUN: grep mulhw %t | count 1 -define i32 @mulhs(i32 %a, i32 %b) { +define i32 @mulhs(i32 %a, i32 %b) nounwind { entry: %tmp.1 = sext i32 %a to i64 ; [#uses=1] %tmp.3 = sext i32 %b to i64 ; [#uses=1] diff --git a/test/CodeGen/PowerPC/ppc-prologue.ll b/test/CodeGen/PowerPC/ppc-prologue.ll index 2ebfd3c319fc..553837121a36 100644 --- a/test/CodeGen/PowerPC/ppc-prologue.ll +++ b/test/CodeGen/PowerPC/ppc-prologue.ll @@ -5,9 +5,7 @@ define i32 @_Z4funci(i32 %a) ssp { ; CHECK-NEXT: stw r31, -4(r1) ; CHECK-NEXT: stw r0, 8(r1) ; CHECK-NEXT: stwu r1, -80(r1) -; CHECK-NEXT: Ltmp0: -; CHECK-NEXT: mr r31, r1 -; CHECK-NEXT: Ltmp1: +; CHECK: mr r31, r1 entry: %a_addr = alloca i32 ; [#uses=2] %retval = alloca i32 ; [#uses=2] diff --git a/test/CodeGen/SPARC/2011-01-11-FrameAddr.ll b/test/CodeGen/SPARC/2011-01-11-FrameAddr.ll index fbf724270566..9e6583ca2ce1 100644 --- a/test/CodeGen/SPARC/2011-01-11-FrameAddr.ll +++ b/test/CodeGen/SPARC/2011-01-11-FrameAddr.ll @@ -1,5 +1,7 @@ ;RUN: llc -march=sparc < %s | FileCheck %s -check-prefix=V8 ;RUN: llc -march=sparc -mattr=v9 < %s | FileCheck %s -check-prefix=V9 +;RUN: llc -march=sparc -regalloc=basic < %s | FileCheck %s -check-prefix=V8 +;RUN: llc -march=sparc -regalloc=basic -mattr=v9 < %s | FileCheck %s -check-prefix=V9 define i8* @frameaddr() nounwind readnone { entry: diff --git a/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll b/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll index 98feb83231dc..92f54675b72b 100644 --- a/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll +++ b/test/CodeGen/SystemZ/2009-07-10-BadIncomingArgOffset.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s | FileCheck %s +; RUN: llc < %s -regalloc=basic | FileCheck %s target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-a0:16:16" target triple = "s390x-ibm-linux" @@ -8,7 +9,7 @@ declare void @rdft(i32 signext, i32 signext, double*, i32* nocapture, double*) n declare double @mp_mul_d2i_test(i32 signext, i32 signext, double* nocapture) nounwind define void @mp_mul_radix_test_bb3(i32 %radix, i32 %nfft, double* %tmpfft, i32* %ip, double* %w, double* %arrayidx44.reload, double* %call.out) nounwind { -; CHECK: lg %r11, 328(%r15) +; CHECK: lg %r{{[0-9]+}}, 328(%r15) newFuncRoot: br label %bb3 diff --git a/test/CodeGen/Thumb/2009-08-20-ISelBug.ll b/test/CodeGen/Thumb/2009-08-20-ISelBug.ll index 39612c00e4f6..d6ca0d793351 100644 --- a/test/CodeGen/Thumb/2009-08-20-ISelBug.ll +++ b/test/CodeGen/Thumb/2009-08-20-ISelBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=thumbv6-apple-darwin -relocation-model=pic -disable-fp-elim -mattr=+v6 | FileCheck %s +; RUN: llc < %s -mtriple=thumbv6-apple-darwin -relocation-model=pic -disable-fp-elim -mattr=+v6 -verify-machineinstrs | FileCheck %s ; rdar://7157006 %struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 } diff --git a/test/CodeGen/Thumb/2010-01-15-local-alloc-spill-physical.ll b/test/CodeGen/Thumb/2010-01-15-local-alloc-spill-physical.ll deleted file mode 100644 index fad26693e768..000000000000 --- a/test/CodeGen/Thumb/2010-01-15-local-alloc-spill-physical.ll +++ /dev/null @@ -1,20 +0,0 @@ -; RUN: llc < %s -regalloc=fast -relocation-model=pic | FileCheck %s - -target triple = "thumbv6-apple-darwin10" - -@fred = internal global i32 0 ; [#uses=1] - -define void @foo() nounwind { -entry: -; CHECK: str r0, [sp - %0 = call i32 (...)* @bar() nounwind ; [#uses=1] -; CHECK: blx _bar -; CHECK: ldr r1, [sp - store i32 %0, i32* @fred, align 4 - br label %return - -return: ; preds = %entry - ret void -} - -declare i32 @bar(...) diff --git a/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll b/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll index 06c0dfec5bab..9f5a677ed356 100644 --- a/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll +++ b/test/CodeGen/Thumb/2010-07-15-debugOrdering.ll @@ -10,7 +10,7 @@ define void @_Z19getClosestDiagonal3ii(%0* noalias sret, i32, i32) nounwind { ; CHECK: blx ___muldf3 ; CHECK: blx ___muldf3 -; CHECK: beq LBB0_7 +; CHECK: beq LBB0 ; CHECK: blx ___muldf3 ;